diff --git a/.gitignore b/.gitignore index 7dc73be..49421e7 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -SOURCES/qemu-8.0.0.tar.xz +SOURCES/qemu-9.0.0.tar.xz diff --git a/.qemu-kvm.metadata b/.qemu-kvm.metadata index a158c44..052613e 100644 --- a/.qemu-kvm.metadata +++ b/.qemu-kvm.metadata @@ -1 +1 @@ -17d54a85aa5d7f5dcfc619aa34049f9a91ceed0d SOURCES/qemu-8.0.0.tar.xz +6699bb03d6da21159b89668bca01c6c958b95d07 SOURCES/qemu-9.0.0.tar.xz diff --git a/SOURCES/0004-Initial-redhat-build.patch b/SOURCES/0004-Initial-redhat-build.patch index 612633e..49991a2 100644 --- a/SOURCES/0004-Initial-redhat-build.patch +++ b/SOURCES/0004-Initial-redhat-build.patch @@ -1,4 +1,4 @@ -From 84039bfc860878f3c3421de4a1836ac5d6300ed7 Mon Sep 17 00:00:00 2001 +From ea7dff3dbf979d7d8a85a16cf5187235143e1048 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 26 May 2021 10:56:02 +0200 Subject: Initial redhat build @@ -13,7 +13,7 @@ several issues are fixed in QEMU tree: We disable make check due to issues with some of the tests. -This rebase is based on qemu-kvm-7.2.0-14.el9 +This rebase is based on qemu-kvm-8.2.0-11.el9 Signed-off-by: Miroslav Rezanina -- @@ -50,32 +50,45 @@ Rebase changes (7.0.0): - Change permissions on installing tests/Makefile.include - Remove ssh block driver -Rebase changes (7.1.0 rc0): +Rebase changes (7.1.0): - --disable-vnc-png renamed to --disable-png (upstream) - removed --disable-vhost-vsock and --disable-vhost-scsi - capstone submodule removed - Temporary include capstone build -Rebase changes (7.2.0 rc0): +Rebase changes (7.2.0): - Switch --enable-slirp=system to --enable-slirp - -Rebaes changes (7.2.0 rc2): - Added new configure options (blkio and sndio, both disabled) Rebase changes (7.2.0): - Fix SRPM name generation to work on Fedora 37 - Switch back to system meson -Rebase changes (8.0.0-rc1): +Rebase changes (8.0.0): - use enable-dtrace-backands instead of enable-dtrace-backend - Removed qemu virtiofsd bits - -Rebase changes (8.0.0-rc2): - test/check-block.sh removed (upstream) - -Rebase changes (8.0.0-rc3): - Add new --disable-* options for configure +Rebase changes (8.1.0): +- qmp-spec.txt installed by make +- Removed --meson configure option +- Add --disable-pypi +- Removed --with-git and -with-gitsubmodules +- Renamed --disable-pypi to --disable-downloads +- Minor updates in README.tests + +Rebase changes (8.2.0): +- Removed --disable-hax (upstream) +- Added --disable-plugins configure option +- Fixing frh.py strings + +Rebase notes (9.0.0): +- Fixed qemu-kvm binary location change +- Remove hppa-firmware64.img +- Package stp files for utilities +- Download subprojects on local build + Merged patches (6.0.0): - 605758c902 Limit build on Power to qemu-img and qemu-ga only @@ -168,24 +181,38 @@ Merged patches (7.0.0): - d46d2710b2 spec: Obsolete old usb redir subpackage - 6f52a50b68 spec: Obsolete ssh driver -Merged patches (7.2.0 rc4): +Merged patches (7.2.0): - 8c6834feb6 Remove opengl display device subpackages (C9S MR 124) - 0ecc97f29e spec: Add requires for packages with additional virtio-gpu variants (C9S MR 124) -Merged patches (8.0.0-rc1): +Merged patches (8.0.0): - 7754f6ba78 Minor packaging fixes - 401af56187 spec: Disable VDUSE +Merged patches (8.1.0): +- 0c2306676f Enable Linux io_uring +- b7fa6426d5 Enable libblkio block drivers +- 19f6d7a6f4 Fix virtio-blk-vhost-vdpa typo in spec file +- f356cae88f spec: Build DBUS display +- 77b763efd5 Provide elf2dmp binary in qemu-tools + +Merged patches (8.2.0): +- cd9efa221d Enable qemu-kvm-device-usb-redirec for aarch64 + +Merged patches (9.0.0 rc0): +- 25de053dbf spec: Enable zstd + Signed-off-by: Miroslav Rezanina --- - .distro/Makefile | 100 + - .distro/Makefile.common | 41 + + .distro/Makefile | 101 + + .distro/Makefile.common | 42 + .distro/README.tests | 39 + .distro/modules-load.conf | 4 + .distro/qemu-guest-agent.service | 1 - - .distro/qemu-kvm.spec.template | 4528 +++++++++++++++++++++++ + .distro/qemu-kvm.spec.template | 5170 +++++++++++++++++++++++ .distro/rpminspect.yaml | 6 +- .distro/scripts/extract_build_cmd.py | 12 + + .distro/scripts/frh.py | 4 +- .distro/scripts/process-patches.sh | 4 + .gitignore | 1 + README.systemtap | 43 + @@ -193,7 +220,7 @@ Signed-off-by: Miroslav Rezanina scripts/systemtap/conf.d/qemu_kvm.conf | 4 + scripts/systemtap/script.d/qemu_kvm.stp | 1 + ui/vnc-auth-sasl.c | 2 +- - 15 files changed, 4784 insertions(+), 4 deletions(-) + 16 files changed, 5430 insertions(+), 6 deletions(-) create mode 100644 .distro/Makefile create mode 100644 .distro/Makefile.common create mode 100644 .distro/README.tests @@ -296,5 +323,5 @@ index 47fdae5b21..2a950caa2a 100644 if (saslErr != SASL_OK) { error_setg(errp, "Failed to initialize SASL auth: %s", -- -2.39.1 +2.39.3 diff --git a/SOURCES/0005-Enable-disable-devices-for-RHEL.patch b/SOURCES/0005-Enable-disable-devices-for-RHEL.patch index 14dd3f9..61e84a1 100644 --- a/SOURCES/0005-Enable-disable-devices-for-RHEL.patch +++ b/SOURCES/0005-Enable-disable-devices-for-RHEL.patch @@ -1,4 +1,4 @@ -From 63829772dbc2075fc014a9d52e3968735d228018 Mon Sep 17 00:00:00 2001 +From 780c39975b059deaee106775b6e3a240155acea3 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 7 Dec 2022 03:05:48 -0500 Subject: Enable/disable devices for RHEL @@ -22,21 +22,37 @@ Rebase notes (7.0.0): - Renamed CONFIG_ARM_GIC_TCG to CONFIG_ARM_GICV3_TCG - Removed upstream devices -Rebase notes (7.1.0 rc0): +Rebase notes (7.1.0): - Added CONFIG_VHOST_VSOCK and CONFIG_VHOST_USER_VSOCK configs - Added CONFIG_CXL and CONFIG_CXL_MEM_DEVICE for aarch64 and x86_64 - -Rebase notes (7.1.0 rc3): - Added CONFIG_VHOST_USER_FS option (all archs) -Rebase notes (7.2.0 rc20): +Rebase notes (7.2.0): - Removed disabling a15mpcore.c as no longer needed -Rebase notes (8.0.0-rc1): +Rebase notes (8.0.0): - Rename CONFIG_ACPI_X86_ICH to CONFIG_ACPI_ICH9 - Inlude qemu/error-report.h in hw/display/cirrus_vga.c - Change virtiofsd dependency version +Rebase notes (8.1.0): +- Added CONFIG_PCIE_PCI_BRIDGE for x86_64 +- Disabling tcg cpus for aarch64 +- Disable CONFIG_ARM_V7M and remove related hack +- Moved aarch64 tcg cpu disabling from arm machine type commit + +Rebase notes (8.2.0): +- Disabled new a710 arm64 tcg cpu +- No longer needed hack for removal of i2c-echo +- Disable new neoverse-v2 +- Removed CONFIG_OPENGL from x86_64 config file + +Rebase notes (9.0.0 rc0): +- Split CONFIG_IDE_QDEV to CONFIG_IDE_DEV and CONFIG_IDE_BUS (upstream change) + +Rebase notes (9.0.0 rc1): +- Do not compile armv7 cpu types + Merged patches (6.1.0): - c51bf45304 Remove SPICE and QXL from x86_64-rh-devices.mak - 02fc745601 aarch64-rh-devices: add CONFIG_PVPANIC_PCI @@ -53,32 +69,52 @@ Merged patches (7.0.0): - fd7c45a5a8 redhat: Enable virtio-mem as tech-preview on x86-64 - c9e68ea451 Enable SGX -- RH Only -Merged patches (7.1.0 rc0): +Merged patches (7.1.0): - 38b89dc245 pc: Move s3/s4 suspend disabling to compat (only hw/acpi/ich9.c chunk) - 8f663466c6 configs/devices/aarch64-softmmu: Enable CONFIG_VIRTIO_MEM - 1bf372717a Enable virtio-iommu-pci on aarch64 - ae3f269458 Enable virtio-iommu-pci on x86_64 + +Merged patches (8.1.0): +- 8173d2eaba Disable unwanted new devices + +Merged patches (8.2.0): +- b29f66431f Enable igb on x86_64 + +Merged patches (9.0.0 rc0): +- 3889ede5d9 Compile IOMMUFD on x86_64 +- 0beb18451f Compile IOMMUFD on s390x +- 2b4b13f70d Compile IOMMUFD object on aarch64 --- .distro/qemu-kvm.spec.template | 18 +-- - .../aarch64-softmmu/aarch64-rh-devices.mak | 41 +++++++ + .../aarch64-softmmu/aarch64-rh-devices.mak | 42 +++++++ .../ppc64-softmmu/ppc64-rh-devices.mak | 37 ++++++ configs/devices/rh-virtio.mak | 10 ++ - .../s390x-softmmu/s390x-rh-devices.mak | 18 +++ - .../x86_64-softmmu/x86_64-rh-devices.mak | 109 ++++++++++++++++++ - hw/arm/meson.build | 2 +- + .../s390x-softmmu/s390x-rh-devices.mak | 19 +++ + .../x86_64-softmmu/x86_64-rh-devices.mak | 112 ++++++++++++++++++ + hw/arm/virt.c | 2 + hw/block/fdc.c | 10 ++ hw/cpu/meson.build | 3 +- - hw/display/cirrus_vga.c | 7 +- + hw/cxl/meson.build | 3 +- + hw/display/cirrus_vga.c | 4 + hw/ide/piix.c | 5 +- hw/input/pckbd.c | 2 + hw/net/e1000.c | 2 + hw/ppc/spapr_cpu_core.c | 2 + hw/usb/meson.build | 2 +- - target/arm/cpu_tcg.c | 10 ++ + hw/virtio/meson.build | 6 +- + target/arm/arm-qmp-cmds.c | 2 + + target/arm/cpu.c | 4 + + target/arm/cpu.h | 3 + + target/arm/cpu64.c | 12 +- + target/arm/tcg/cpu32.c | 2 + + target/arm/tcg/cpu64.c | 8 ++ + target/arm/tcg/meson.build | 4 +- target/ppc/cpu-models.c | 9 ++ target/s390x/cpu_models_sysemu.c | 3 + target/s390x/kvm/kvm.c | 8 ++ - 19 files changed, 285 insertions(+), 13 deletions(-) + tests/qtest/arm-cpu-features.c | 4 + + 28 files changed, 321 insertions(+), 17 deletions(-) create mode 100644 configs/devices/aarch64-softmmu/aarch64-rh-devices.mak create mode 100644 configs/devices/ppc64-softmmu/ppc64-rh-devices.mak create mode 100644 configs/devices/rh-virtio.mak @@ -87,22 +123,22 @@ Merged patches (7.1.0 rc0): diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak new file mode 100644 -index 0000000000..720ec0cb57 +index 0000000000..b0191d3c69 --- /dev/null +++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak -@@ -0,0 +1,41 @@ +@@ -0,0 +1,42 @@ +include ../rh-virtio.mak + +CONFIG_ARM_GIC_KVM=y +CONFIG_ARM_GICV3_TCG=y +CONFIG_ARM_GIC=y +CONFIG_ARM_SMMUV3=y -+CONFIG_ARM_V7M=y +CONFIG_ARM_VIRT=y +CONFIG_CXL=y +CONFIG_CXL_MEM_DEVICE=y +CONFIG_EDID=y +CONFIG_PCIE_PORT=y ++CONFIG_PCIE_PCI_BRIDGE=y +CONFIG_PCI_DEVICES=y +CONFIG_PCI_TESTDEV=y +CONFIG_PFLASH_CFI01=y @@ -132,6 +168,7 @@ index 0000000000..720ec0cb57 +CONFIG_VHOST_VSOCK=y +CONFIG_VHOST_USER_VSOCK=y +CONFIG_VHOST_USER_FS=y ++CONFIG_IOMMUFD=y diff --git a/configs/devices/ppc64-softmmu/ppc64-rh-devices.mak b/configs/devices/ppc64-softmmu/ppc64-rh-devices.mak new file mode 100644 index 0000000000..dbb7d30829 @@ -193,10 +230,10 @@ index 0000000000..94ede1b5f6 +CONFIG_VIRTIO_SERIAL=y diff --git a/configs/devices/s390x-softmmu/s390x-rh-devices.mak b/configs/devices/s390x-softmmu/s390x-rh-devices.mak new file mode 100644 -index 0000000000..69a799adbd +index 0000000000..24cf6dbd03 --- /dev/null +++ b/configs/devices/s390x-softmmu/s390x-rh-devices.mak -@@ -0,0 +1,18 @@ +@@ -0,0 +1,19 @@ +include ../rh-virtio.mak + +CONFIG_PCI=y @@ -215,12 +252,13 @@ index 0000000000..69a799adbd +CONFIG_VHOST_VSOCK=y +CONFIG_VHOST_USER_VSOCK=y +CONFIG_VHOST_USER_FS=y ++CONFIG_IOMMUFD=y diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak new file mode 100644 -index 0000000000..668b2d0e18 +index 0000000000..d60ff1bcfc --- /dev/null +++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak -@@ -0,0 +1,109 @@ +@@ -0,0 +1,112 @@ +include ../rh-virtio.mak + +CONFIG_ACPI=y @@ -258,7 +296,9 @@ index 0000000000..668b2d0e18 +CONFIG_IDE_CORE=y +CONFIG_IDE_PCI=y +CONFIG_IDE_PIIX=y -+CONFIG_IDE_QDEV=y ++CONFIG_IDE_DEV=y ++CONFIG_IDE_BUS=y ++CONFIG_IGB_PCI_EXPRESS=y +CONFIG_IOAPIC=y +CONFIG_IOH3420=y +CONFIG_ISA_BUS=y @@ -268,7 +308,6 @@ index 0000000000..668b2d0e18 +CONFIG_MC146818RTC=y +CONFIG_MEM_DEVICE=y +CONFIG_NVDIMM=y -+CONFIG_OPENGL=y +CONFIG_PAM=y +CONFIG_PC=y +CONFIG_PCI=y @@ -282,6 +321,7 @@ index 0000000000..668b2d0e18 +CONFIG_PCSPK=y +CONFIG_PC_ACPI=y +CONFIG_PC_PCI=y ++CONFIG_PCIE_PCI_BRIDGE=y +CONFIG_PFLASH_CFI01=y +CONFIG_PVPANIC_ISA=y +CONFIG_PXB=y @@ -330,21 +370,29 @@ index 0000000000..668b2d0e18 +CONFIG_VHOST_VSOCK=y +CONFIG_VHOST_USER_VSOCK=y +CONFIG_VHOST_USER_FS=y -diff --git a/hw/arm/meson.build b/hw/arm/meson.build -index b545ba0e4f..a41a16cba7 100644 ---- a/hw/arm/meson.build -+++ b/hw/arm/meson.build -@@ -29,7 +29,7 @@ arm_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c')) - arm_ss.add(when: 'CONFIG_ZYNQ', if_true: files('xilinx_zynq.c')) - arm_ss.add(when: 'CONFIG_SABRELITE', if_true: files('sabrelite.c')) - --arm_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('armv7m.c')) -+#arm_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('armv7m.c')) - arm_ss.add(when: 'CONFIG_EXYNOS4', if_true: files('exynos4210.c')) - arm_ss.add(when: 'CONFIG_PXA2XX', if_true: files('pxa2xx.c', 'pxa2xx_gpio.c', 'pxa2xx_pic.c')) - arm_ss.add(when: 'CONFIG_DIGIC', if_true: files('digic.c')) ++CONFIG_IOMMUFD=y +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index a9a913aead..6c6d155002 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2954,6 +2954,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) + MachineClass *mc = MACHINE_CLASS(oc); + HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); + static const char * const valid_cpu_types[] = { ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + #ifdef CONFIG_TCG + ARM_CPU_TYPE_NAME("cortex-a7"), + ARM_CPU_TYPE_NAME("cortex-a15"), +@@ -2971,6 +2972,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) + #endif /* CONFIG_TCG */ + #ifdef TARGET_AARCH64 + ARM_CPU_TYPE_NAME("cortex-a53"), ++#endif /* disabled for RHEL */ + ARM_CPU_TYPE_NAME("cortex-a57"), + #if defined(CONFIG_KVM) || defined(CONFIG_HVF) + ARM_CPU_TYPE_NAME("host"), diff --git a/hw/block/fdc.c b/hw/block/fdc.c -index d7cc4d3ec1..12d0a60905 100644 +index 6dd94e98bc..a05757fc9a 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -49,6 +49,8 @@ @@ -372,18 +420,32 @@ index d7cc4d3ec1..12d0a60905 100644 error_setg(errp, "Cannot choose a fallback FDrive type of 'auto'"); return; diff --git a/hw/cpu/meson.build b/hw/cpu/meson.build -index e37490074f..4431e3731c 100644 +index 38cdcfbe57..e588ecfd42 100644 --- a/hw/cpu/meson.build +++ b/hw/cpu/meson.build @@ -1,4 +1,5 @@ --softmmu_ss.add(files('core.c', 'cluster.c')) -+#softmmu_ss.add(files('core.c', 'cluster.c')) -+softmmu_ss.add(files('core.c')) +-system_ss.add(files('core.c', 'cluster.c')) ++#system_ss.add(files('core.c', 'cluster.c')) ++system_ss.add(files('core.c')) - softmmu_ss.add(when: 'CONFIG_ARM11MPCORE', if_true: files('arm11mpcore.c')) - softmmu_ss.add(when: 'CONFIG_REALVIEW', if_true: files('realview_mpcore.c')) + system_ss.add(when: 'CONFIG_ARM11MPCORE', if_true: files('arm11mpcore.c')) + system_ss.add(when: 'CONFIG_REALVIEW', if_true: files('realview_mpcore.c')) +diff --git a/hw/cxl/meson.build b/hw/cxl/meson.build +index 3e375f61a9..613adb3ebb 100644 +--- a/hw/cxl/meson.build ++++ b/hw/cxl/meson.build +@@ -6,7 +6,8 @@ system_ss.add(when: 'CONFIG_CXL', + 'cxl-host.c', + 'cxl-cdat.c', + 'cxl-events.c', +- 'switch-mailbox-cci.c', ++# Disabled for 8.2.0 rebase for RHEL 9.4.0 ++# 'switch-mailbox-cci.c', + ), + if_false: files( + 'cxl-host-stubs.c', diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c -index b80f98b6c4..cbde6a8f15 100644 +index 150883a971..497365bd80 100644 --- a/hw/display/cirrus_vga.c +++ b/hw/display/cirrus_vga.c @@ -36,6 +36,7 @@ @@ -394,31 +456,21 @@ index b80f98b6c4..cbde6a8f15 100644 #include "sysemu/reset.h" #include "qapi/error.h" #include "trace.h" -@@ -47,6 +48,7 @@ - #include "qom/object.h" - #include "ui/console.h" - -+ - /* - * TODO: - * - destination write mask support not complete (bits 5..7) -@@ -2946,7 +2948,10 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) +@@ -2946,6 +2947,9 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev); int16_t device_id = pc->device_id; -- /* -+ warn_report("'cirrus-vga' is deprecated, " -+ "please use a different VGA card instead"); ++ warn_report("'cirrus-vga' is deprecated, " ++ "please use a different VGA card instead"); + -+ /* + /* * Follow real hardware, cirrus card emulated has 4 MB video memory. * Also accept 8 MB/16 MB for backward compatibility. - */ diff --git a/hw/ide/piix.c b/hw/ide/piix.c -index 41d60921e3..a4af45b4e8 100644 +index 80efc633d3..9cb82b8eea 100644 --- a/hw/ide/piix.c +++ b/hw/ide/piix.c -@@ -193,7 +193,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) +@@ -191,7 +191,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) k->device_id = PCI_DEVICE_ID_INTEL_82371SB_1; k->class_id = PCI_CLASS_STORAGE_IDE; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); @@ -428,7 +480,7 @@ index 41d60921e3..a4af45b4e8 100644 } static const TypeInfo piix3_ide_info = { -@@ -216,6 +217,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) +@@ -215,6 +216,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) k->class_id = PCI_CLASS_STORAGE_IDE; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); dc->hotpluggable = false; @@ -438,10 +490,10 @@ index 41d60921e3..a4af45b4e8 100644 static const TypeInfo piix4_ide_info = { diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c -index b92b63bedc..3b6235dde6 100644 +index 74f10b640f..2e85ecf476 100644 --- a/hw/input/pckbd.c +++ b/hw/input/pckbd.c -@@ -957,6 +957,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) +@@ -952,6 +952,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) dc->vmsd = &vmstate_kbd_isa; adevc->build_dev_aml = i8042_build_aml; set_bit(DEVICE_CATEGORY_INPUT, dc->categories); @@ -451,10 +503,10 @@ index b92b63bedc..3b6235dde6 100644 static const TypeInfo i8042_info = { diff --git a/hw/net/e1000.c b/hw/net/e1000.c -index 23d660619f..b75c9aa799 100644 +index 43f3a4a701..267f182883 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c -@@ -1805,6 +1805,7 @@ static const E1000Info e1000_devices[] = { +@@ -1746,6 +1746,7 @@ static const E1000Info e1000_devices[] = { .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, }, @@ -462,7 +514,7 @@ index 23d660619f..b75c9aa799 100644 { .name = "e1000-82544gc", .device_id = E1000_DEV_ID_82544GC_COPPER, -@@ -1817,6 +1818,7 @@ static const E1000Info e1000_devices[] = { +@@ -1758,6 +1759,7 @@ static const E1000Info e1000_devices[] = { .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, }, @@ -471,10 +523,10 @@ index 23d660619f..b75c9aa799 100644 static void e1000_register_types(void) diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index 8a4861f45a..fcb5dfe792 100644 +index e7c9edd033..3b0a47a28c 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c -@@ -379,10 +379,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { +@@ -389,10 +389,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { .instance_size = sizeof(SpaprCpuCore), .class_size = sizeof(SpaprCpuCoreClass), }, @@ -482,16 +534,16 @@ index 8a4861f45a..fcb5dfe792 100644 DEFINE_SPAPR_CPU_CORE_TYPE("970_v2.2"), DEFINE_SPAPR_CPU_CORE_TYPE("970mp_v1.0"), DEFINE_SPAPR_CPU_CORE_TYPE("970mp_v1.1"), - DEFINE_SPAPR_CPU_CORE_TYPE("power5+_v2.1"), + DEFINE_SPAPR_CPU_CORE_TYPE("power5p_v2.1"), +#endif DEFINE_SPAPR_CPU_CORE_TYPE("power7_v2.3"), - DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"), + DEFINE_SPAPR_CPU_CORE_TYPE("power7p_v2.1"), DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"), diff --git a/hw/usb/meson.build b/hw/usb/meson.build -index 599dc24f0d..905a994c3a 100644 +index aac3bb35f2..5411ff35df 100644 --- a/hw/usb/meson.build +++ b/hw/usb/meson.build -@@ -52,7 +52,7 @@ softmmu_ss.add(when: 'CONFIG_USB_SMARTCARD', if_true: files('dev-smartcard-reade +@@ -55,7 +55,7 @@ system_ss.add(when: 'CONFIG_USB_SMARTCARD', if_true: files('dev-smartcard-reader if cacard.found() usbsmartcard_ss = ss.source_set() usbsmartcard_ss.add(when: 'CONFIG_USB_SMARTCARD', @@ -500,86 +552,226 @@ index 599dc24f0d..905a994c3a 100644 hw_usb_modules += {'smartcard': usbsmartcard_ss} endif -diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c -index df0c45e523..c154a4dcf2 100644 ---- a/target/arm/cpu_tcg.c -+++ b/target/arm/cpu_tcg.c -@@ -155,6 +155,7 @@ void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu) - /* CPU models. These are not needed for the AArch64 linux-user build. */ - #if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) +diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build +index d7f18c96e6..aaabbb8b0b 100644 +--- a/hw/virtio/meson.build ++++ b/hw/virtio/meson.build +@@ -20,7 +20,8 @@ if have_vhost + system_virtio_ss.add(files('vhost-user-base.c')) -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG) - static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) + # MMIO Stubs +- system_virtio_ss.add(files('vhost-user-device.c')) ++# Disabled for 8.2.0 rebase for RHEL 9.4.0 ++# system_virtio_ss.add(files('vhost-user-device.c')) + system_virtio_ss.add(when: 'CONFIG_VHOST_USER_GPIO', if_true: files('vhost-user-gpio.c')) + system_virtio_ss.add(when: 'CONFIG_VHOST_USER_I2C', if_true: files('vhost-user-i2c.c')) + system_virtio_ss.add(when: 'CONFIG_VHOST_USER_RNG', if_true: files('vhost-user-rng.c')) +@@ -28,7 +29,8 @@ if have_vhost + system_virtio_ss.add(when: 'CONFIG_VHOST_USER_INPUT', if_true: files('vhost-user-input.c')) + + # PCI Stubs +- system_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('vhost-user-device-pci.c')) ++# Disabled for 8.2.0 rebase for RHEL 9.4.0 ++# system_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('vhost-user-device-pci.c')) + system_virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_GPIO'], + if_true: files('vhost-user-gpio-pci.c')) + system_virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_I2C'], +diff --git a/target/arm/arm-qmp-cmds.c b/target/arm/arm-qmp-cmds.c +index 3cc8cc738b..6f21fea1f5 100644 +--- a/target/arm/arm-qmp-cmds.c ++++ b/target/arm/arm-qmp-cmds.c +@@ -223,6 +223,7 @@ CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type, + static void arm_cpu_add_definition(gpointer data, gpointer user_data) { -@@ -508,6 +509,7 @@ static void cortex_a9_initfn(Object *obj) - cpu->isar.reset_pmcr_el0 = 0x41093000; - define_arm_cp_regs(cpu, cortexa9_cp_reginfo); + ObjectClass *oc = data; ++ CPUClass *cc = CPU_CLASS(oc); + CpuDefinitionInfoList **cpu_list = user_data; + CpuDefinitionInfo *info; + const char *typename; +@@ -231,6 +232,7 @@ static void arm_cpu_add_definition(gpointer data, gpointer user_data) + info = g_malloc0(sizeof(*info)); + info->name = cpu_model_from_type(typename); + info->q_typename = g_strdup(typename); ++ info->deprecated = !!cc->deprecation_note; + + QAPI_LIST_PREPEND(*cpu_list, info); } -+#endif /* disabled for RHEL */ +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index ab8d007a86..e5dce20f19 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -2546,6 +2546,10 @@ static void cpu_register_class_init(ObjectClass *oc, void *data) - #ifndef CONFIG_USER_ONLY - static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri) -@@ -532,6 +534,7 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { - .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, - }; + acc->info = data; + cc->gdb_core_xml_file = "arm-core.xml"; ++ ++ if (acc->info->deprecation_note) { ++ cc->deprecation_note = acc->info->deprecation_note; ++ } + } + + void arm_cpu_register(const ARMCPUInfo *info) +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index bc0c84873f..e9472c8bb8 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -37,6 +37,8 @@ + #define KVM_HAVE_MCE_INJECTION 1 + #endif + ++#define RHEL_CPU_DEPRECATION "use 'host' / 'max'" ++ + #define EXCP_UDEF 1 /* undefined instruction */ + #define EXCP_SWI 2 /* software interrupt */ + #define EXCP_PREFETCH_ABORT 3 +@@ -1092,6 +1094,7 @@ typedef struct ARMCPUInfo { + const char *name; + void (*initfn)(Object *obj); + void (*class_init)(ObjectClass *oc, void *data); ++ const char *deprecation_note; + } ARMCPUInfo; + + /** +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index 985b1efe16..46a4e80171 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -648,6 +648,7 @@ static void aarch64_a57_initfn(Object *obj) + define_cortex_a72_a57_a53_cp_reginfo(cpu); + } +#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void cortex_a7_initfn(Object *obj) + static void aarch64_a53_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); -@@ -580,6 +583,7 @@ static void cortex_a7_initfn(Object *obj) - cpu->isar.reset_pmcr_el0 = 0x41072000; - define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */ +@@ -704,6 +705,7 @@ static void aarch64_a53_initfn(Object *obj) + cpu->gic_pribits = 5; + define_cortex_a72_a57_a53_cp_reginfo(cpu); } -+#endif /* disabled for RHEL */ ++#endif - static void cortex_a15_initfn(Object *obj) + static void aarch64_host_initfn(Object *obj) { -@@ -628,6 +632,7 @@ static void cortex_a15_initfn(Object *obj) - define_arm_cp_regs(cpu, cortexa15_cp_reginfo); +@@ -742,8 +744,11 @@ static void aarch64_max_initfn(Object *obj) } + static const ARMCPUInfo aarch64_cpus[] = { +- { .name = "cortex-a57", .initfn = aarch64_a57_initfn }, ++ { .name = "cortex-a57", .initfn = aarch64_a57_initfn, ++ .deprecation_note = RHEL_CPU_DEPRECATION }, +#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void cortex_m0_initfn(Object *obj) + { .name = "cortex-a53", .initfn = aarch64_a53_initfn }, ++#endif /* disabled for RHEL */ + { .name = "max", .initfn = aarch64_max_initfn }, + #if defined(CONFIG_KVM) || defined(CONFIG_HVF) + { .name = "host", .initfn = aarch64_host_initfn }, +@@ -814,8 +819,13 @@ static void aarch64_cpu_instance_init(Object *obj) + static void cpu_register_class_init(ObjectClass *oc, void *data) { - ARMCPU *cpu = ARM_CPU(obj); -@@ -1110,6 +1115,7 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) + ARMCPUClass *acc = ARM_CPU_CLASS(oc); ++ CPUClass *cc = CPU_CLASS(oc); - cc->gdb_core_xml_file = "arm-m-profile.xml"; + acc->info = data; ++ ++ if (acc->info->deprecation_note) { ++ cc->deprecation_note = acc->info->deprecation_note; ++ } + } + + void aarch64_cpu_register(const ARMCPUInfo *info) +diff --git a/target/arm/tcg/cpu32.c b/target/arm/tcg/cpu32.c +index de8f2be941..8896295ae3 100644 +--- a/target/arm/tcg/cpu32.c ++++ b/target/arm/tcg/cpu32.c +@@ -92,6 +92,7 @@ void aa32_max_features(ARMCPU *cpu) + cpu->isar.id_dfr1 = t; } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + /* CPU models. These are not needed for the AArch64 linux-user build. */ + #if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) + +@@ -1037,3 +1038,4 @@ static void arm_tcg_cpu_register_types(void) + type_init(arm_tcg_cpu_register_types) + + #endif /* !CONFIG_USER_ONLY || !TARGET_AARCH64 */ +#endif /* disabled for RHEL */ +diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c +index 9f7a9f3d2c..7ec6851c9c 100644 +--- a/target/arm/tcg/cpu64.c ++++ b/target/arm/tcg/cpu64.c +@@ -29,6 +29,7 @@ + #include "cpu-features.h" + #include "cpregs.h" + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static uint64_t make_ccsidr64(unsigned assoc, unsigned linesize, + unsigned cachesize) + { +@@ -134,6 +135,7 @@ static void aarch64_a35_initfn(Object *obj) + /* These values are the same with A53/A57/A72. */ + define_cortex_a72_a57_a53_cp_reginfo(cpu); + } ++#endif + + static void cpu_max_get_sve_max_vq(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +@@ -223,6 +225,7 @@ static void cpu_max_get_l0gptsz(Object *obj, Visitor *v, const char *name, + static Property arm_cpu_lpa2_property = + DEFINE_PROP_BOOL("lpa2", ARMCPU, prop_lpa2, true); + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void aarch64_a55_initfn(Object *obj) + { + ARMCPU *cpu = ARM_CPU(obj); +@@ -1065,6 +1068,7 @@ static void aarch64_neoverse_n2_initfn(Object *obj) + aarch64_add_pauth_properties(obj); + aarch64_add_sve_properties(obj); + } ++#endif - #ifndef TARGET_AARCH64 /* -@@ -1177,6 +1183,7 @@ static void arm_max_initfn(Object *obj) - #endif /* !TARGET_AARCH64 */ + * -cpu max: a CPU with as many features enabled as our emulation supports. +@@ -1271,6 +1275,7 @@ void aarch64_max_tcg_initfn(Object *obj) + qdev_property_add_static(DEVICE(obj), &arm_cpu_lpa2_property); + } - static const ARMCPUInfo arm_tcg_cpus[] = { +#if 0 /* Disabled for Red Hat Enterprise Linux */ - { .name = "arm926", .initfn = arm926_initfn }, - { .name = "arm946", .initfn = arm946_initfn }, - { .name = "arm1026", .initfn = arm1026_initfn }, -@@ -1192,7 +1199,9 @@ static const ARMCPUInfo arm_tcg_cpus[] = { - { .name = "cortex-a7", .initfn = cortex_a7_initfn }, - { .name = "cortex-a8", .initfn = cortex_a8_initfn }, - { .name = "cortex-a9", .initfn = cortex_a9_initfn }, -+#endif /* disabled for RHEL */ - { .name = "cortex-a15", .initfn = cortex_a15_initfn }, + static const ARMCPUInfo aarch64_cpus[] = { + { .name = "cortex-a35", .initfn = aarch64_a35_initfn }, + { .name = "cortex-a55", .initfn = aarch64_a55_initfn }, +@@ -1282,14 +1287,17 @@ static const ARMCPUInfo aarch64_cpus[] = { + { .name = "neoverse-v1", .initfn = aarch64_neoverse_v1_initfn }, + { .name = "neoverse-n2", .initfn = aarch64_neoverse_n2_initfn }, + }; ++#endif + + static void aarch64_cpu_register_types(void) + { +#if 0 /* Disabled for Red Hat Enterprise Linux */ - { .name = "cortex-m0", .initfn = cortex_m0_initfn, - .class_init = arm_v7m_class_init }, - { .name = "cortex-m3", .initfn = cortex_m3_initfn, -@@ -1224,6 +1233,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { - { .name = "pxa270-b1", .initfn = pxa270b1_initfn }, - { .name = "pxa270-c0", .initfn = pxa270c0_initfn }, - { .name = "pxa270-c5", .initfn = pxa270c5_initfn }, -+#endif /* disabled for RHEL */ - #ifndef TARGET_AARCH64 - { .name = "max", .initfn = arm_max_initfn }, - #endif + size_t i; + + for (i = 0; i < ARRAY_SIZE(aarch64_cpus); ++i) { + aarch64_cpu_register(&aarch64_cpus[i]); + } ++#endif + } + + type_init(aarch64_cpu_register_types) +diff --git a/target/arm/tcg/meson.build b/target/arm/tcg/meson.build +index 3b1a9f0fc5..6898b4de6f 100644 +--- a/target/arm/tcg/meson.build ++++ b/target/arm/tcg/meson.build +@@ -56,5 +56,5 @@ arm_system_ss.add(files( + 'psci.c', + )) + +-arm_system_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('cpu-v7m.c')) +-arm_user_ss.add(when: 'TARGET_AARCH64', if_false: files('cpu-v7m.c')) ++#arm_system_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('cpu-v7m.c')) ++#arm_user_ss.add(when: 'TARGET_AARCH64', if_false: files('cpu-v7m.c')) diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c -index 912b037c63..cd3ff700ac 100644 +index f2301b43f7..f77ebfcc81 100644 --- a/target/ppc/cpu-models.c +++ b/target/ppc/cpu-models.c @@ -66,6 +66,7 @@ @@ -603,13 +795,13 @@ index 912b037c63..cd3ff700ac 100644 POWERPC_DEF("970fx_v1.0", CPU_POWERPC_970FX_v10, 970, @@ -718,6 +721,7 @@ "PowerPC 970MP v1.1") - POWERPC_DEF("power5+_v2.1", CPU_POWERPC_POWER5P_v21, POWER5P, + POWERPC_DEF("power5p_v2.1", CPU_POWERPC_POWER5P_v21, POWER5P, "POWER5+ v2.1") +#endif POWERPC_DEF("power7_v2.3", CPU_POWERPC_POWER7_v23, POWER7, "POWER7 v2.3") - POWERPC_DEF("power7+_v2.1", CPU_POWERPC_POWER7P_v21, POWER7, -@@ -896,12 +900,15 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { + POWERPC_DEF("power7p_v2.1", CPU_POWERPC_POWER7P_v21, POWER7, +@@ -894,13 +898,16 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { { "7447a", "7447a_v1.2" }, { "7457a", "7457a_v1.2" }, { "apollo7pm", "7457a_v1.0" }, @@ -619,12 +811,13 @@ index 912b037c63..cd3ff700ac 100644 { "970", "970_v2.2" }, { "970fx", "970fx_v3.1" }, { "970mp", "970mp_v1.1" }, - { "power5+", "power5+_v2.1" }, + { "power5+", "power5p_v2.1" }, + { "power5+_v2.1", "power5p_v2.1" }, { "power5gs", "power5+_v2.1" }, +#endif { "power7", "power7_v2.3" }, - { "power7+", "power7+_v2.1" }, - { "power8e", "power8e_v2.1" }, + { "power7+", "power7p_v2.1" }, + { "power7+_v2.1", "power7p_v2.1" }, @@ -911,12 +918,14 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { { "power10", "power10_v2.0" }, #endif @@ -641,10 +834,10 @@ index 912b037c63..cd3ff700ac 100644 { NULL, NULL } }; diff --git a/target/s390x/cpu_models_sysemu.c b/target/s390x/cpu_models_sysemu.c -index 63981bf36b..87a4480c05 100644 +index 2d99218069..0728bfcc20 100644 --- a/target/s390x/cpu_models_sysemu.c +++ b/target/s390x/cpu_models_sysemu.c -@@ -35,6 +35,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, +@@ -34,6 +34,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, (max_model->def->gen == model->def->gen && max_model->def->ec_ga < model->def->ec_ga)) { list_add_feat("type", unavailable); @@ -655,10 +848,10 @@ index 63981bf36b..87a4480c05 100644 /* detect missing features if any to properly report them */ diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c -index 3ac7ec9acf..97da1a6424 100644 +index 4ce809c5d4..55fb4855b1 100644 --- a/target/s390x/kvm/kvm.c +++ b/target/s390x/kvm/kvm.c -@@ -2529,6 +2529,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) +@@ -2565,6 +2565,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) error_setg(errp, "KVM doesn't support CPU models"); return; } @@ -673,6 +866,37 @@ index 3ac7ec9acf..97da1a6424 100644 prop.cpuid = s390_cpuid_from_cpu_model(model); prop.ibc = s390_ibc_from_cpu_model(model); /* configure cpu features indicated via STFL(e) */ +diff --git a/tests/qtest/arm-cpu-features.c b/tests/qtest/arm-cpu-features.c +index 9d6e6190d5..f822526acb 100644 +--- a/tests/qtest/arm-cpu-features.c ++++ b/tests/qtest/arm-cpu-features.c +@@ -452,8 +452,10 @@ static void test_query_cpu_model_expansion(const void *data) + assert_error(qts, "host", "The CPU type 'host' requires KVM", NULL); + + /* Test expected feature presence/absence for some cpu types */ ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + assert_has_feature_enabled(qts, "cortex-a15", "pmu"); + assert_has_not_feature(qts, "cortex-a15", "aarch64"); ++#endif /* disabled for RHEL */ + + /* Enabling and disabling pmu should always work. */ + assert_has_feature_enabled(qts, "max", "pmu"); +@@ -470,6 +472,7 @@ static void test_query_cpu_model_expansion(const void *data) + assert_has_feature_enabled(qts, "cortex-a57", "pmu"); + assert_has_feature_enabled(qts, "cortex-a57", "aarch64"); + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + assert_has_feature_enabled(qts, "a64fx", "pmu"); + assert_has_feature_enabled(qts, "a64fx", "aarch64"); + /* +@@ -482,6 +485,7 @@ static void test_query_cpu_model_expansion(const void *data) + "{ 'sve384': true }"); + assert_error(qts, "a64fx", "cannot enable sve640", + "{ 'sve640': true }"); ++#endif /* disabled for RHEL */ + + sve_tests_default(qts, "max"); + pauth_tests_default(qts, "max"); -- -2.39.1 +2.39.3 diff --git a/SOURCES/0006-Machine-type-related-general-changes.patch b/SOURCES/0006-Machine-type-related-general-changes.patch index 5dd591f..e0c3795 100644 --- a/SOURCES/0006-Machine-type-related-general-changes.patch +++ b/SOURCES/0006-Machine-type-related-general-changes.patch @@ -1,4 +1,4 @@ -From c13f8e21b32aa06b08847e88080f2fdea5084a9b Mon Sep 17 00:00:00 2001 +From 8e6a30073f9c1a5d6294b2d16556522453e227e7 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 11 Jan 2019 09:54:45 +0100 Subject: Machine type related general changes @@ -19,10 +19,19 @@ Rebase notes (7.0.0): - Remove downstream changes leftovers in hw/rtc/mc146818rtc.c - Remove unnecessary change in hw/usb/hcd-uhci.c -Rebase notes (7.1.0 rc0): +Rebase notes (7.1.0): - Moved adding rhel_old_machine_deprecation variable from s390x to general machine types commit - Moved adding hw_compat_rhel_8_6 struct from x86_64 to general machine types commit +Rebase notes (8.1.0): +- Do not modify unused vga-isa.c + +Rebase notes (9.0.0 rc0): +- Updated smsbios handling + +Rebase notes (9.0.0 rc4): +- Moving downstream compat changes + Merged patches (6.1.0): - f2fb42a3c6 redhat: add missing entries in hw_compat_rhel_8_4 - 1949ec258e hw/arm/virt: Disable PL011 clock migration through hw_compat_rhel_8_3 @@ -40,67 +49,76 @@ Merged patches (7.0.0): - ef5afcc86d Fix virtio-net-pci* "vectors" compat - 168f0d56e3 compat: Update hw_compat_rhel_8_5 with 6.2.0 RC2 changes -Merged patches (7.1.0 rc0): +Merged patches (7.1.0): - 38b89dc245 pc: Move s3/s4 suspend disabling to compat (only hw/acpi/piix4.c chunk) - 1d6439527a WRB: Introduce RHEL 9.0.0 hw compat structure (only hw/core/machine.c and include/hw/boards.h chunk) -Merged patches (7.2.0 rc0): +Merged patches (7.2.0): - 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts) -Merged patches (8.0.0-rc1): +Merged patches (8.0.0): - 21ed34787b Addd 7.2 compat bits for RHEL 9.1 machine type - e5c8d5d603 virtio-rng-pci: fix migration compat for vectors - 5a5fa77059 virtio-rng-pci: fix transitional migration compat for vectors + +Merged patches (8.1.0): +- bd5d81d286 Add RHEL 9.2.0 compat structure (general part) +- 1165e24c6b hw/pci: Disable PCI_ERR_UNCOR_MASK reg for machine type <= pc-q35-rhel9.2.0 + +Merged patches (8.2.0): +- 4ee284aca9 Add machine types compat bits. (partial) + +Merged patches (9.0.0 rc0): +- 4b8fe42abc virtio-mem: default-enable "dynamic-memslots" --- hw/acpi/piix4.c | 2 +- hw/arm/virt.c | 2 +- - hw/core/machine.c | 229 +++++++++++++++++++++++++++++++++++ - hw/display/vga-isa.c | 2 +- - hw/i386/pc_piix.c | 2 + - hw/i386/pc_q35.c | 2 + + hw/core/machine.c | 269 +++++++++++++++++++++++++++++++++++ + hw/i386/fw_cfg.c | 3 +- hw/net/rtl8139.c | 4 +- - hw/smbios/smbios.c | 46 ++++++- + hw/smbios/smbios.c | 46 +++++- hw/timer/i8254_common.c | 2 +- - hw/usb/hcd-xhci-pci.c | 59 ++++++--- + hw/usb/hcd-xhci-pci.c | 59 ++++++-- hw/usb/hcd-xhci-pci.h | 1 + - include/hw/boards.h | 31 +++++ - include/hw/firmware/smbios.h | 5 +- + hw/virtio/virtio-mem.c | 3 +- + include/hw/boards.h | 40 ++++++ + include/hw/firmware/smbios.h | 4 +- include/hw/i386/pc.h | 3 + - 14 files changed, 367 insertions(+), 23 deletions(-) + 13 files changed, 414 insertions(+), 24 deletions(-) diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c -index 63d2113b86..a24b9aac92 100644 +index debe1adb84..e8ddcd716e 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c -@@ -247,7 +247,7 @@ static bool vmstate_test_migrate_acpi_index(void *opaque, int version_id) +@@ -245,7 +245,7 @@ static bool vmstate_test_migrate_acpi_index(void *opaque, int version_id) static const VMStateDescription vmstate_acpi = { .name = "piix4_pm", .version_id = 3, - .minimum_version_id = 3, + .minimum_version_id = 2, .post_load = vmstate_acpi_post_load, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_PCI_DEVICE(parent_obj, PIIX4PMState), diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index ac626b3bef..4a6e89c7bc 100644 +index 6c6d155002..36e9b4b4e9 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -1629,7 +1629,7 @@ static void virt_build_smbios(VirtMachineState *vms) +@@ -1651,7 +1651,7 @@ static void virt_build_smbios(VirtMachineState *vms) smbios_set_defaults("QEMU", product, - vmc->smbios_old_sys_ver ? "1.0" : mc->name, false, -- true, SMBIOS_ENTRY_POINT_TYPE_64); -+ true, NULL, NULL, SMBIOS_ENTRY_POINT_TYPE_64); + vmc->smbios_old_sys_ver ? "1.0" : mc->name, +- true); ++ true, NULL, NULL); /* build the array of physical mem area from base_memmap */ mem_array.address = vms->memmap[VIRT_MEM].base; diff --git a/hw/core/machine.c b/hw/core/machine.c -index cd13b8b0a3..5aa567fad3 100644 +index 37ede0e7d4..695cb89a46 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c -@@ -46,6 +46,235 @@ GlobalProperty hw_compat_7_2[] = { +@@ -296,6 +296,275 @@ GlobalProperty hw_compat_2_1[] = { }; - const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2); + const size_t hw_compat_2_1_len = G_N_ELEMENTS(hw_compat_2_1); +/* + * RHEL only: machine types for previous major releases are deprecated @@ -108,6 +126,46 @@ index cd13b8b0a3..5aa567fad3 100644 +const char *rhel_old_machine_deprecation = + "machine types for previous major releases are deprecated"; + ++GlobalProperty hw_compat_rhel_9_4[] = { ++ /* hw_compat_rhel_9_4 from hw_compat_8_0 */ ++ { TYPE_VIRTIO_NET, "host_uso", "off"}, ++ /* hw_compat_rhel_9_4 from hw_compat_8_0 */ ++ { TYPE_VIRTIO_NET, "guest_uso4", "off"}, ++ /* hw_compat_rhel_9_4 from hw_compat_8_0 */ ++ { TYPE_VIRTIO_NET, "guest_uso6", "off"}, ++ /* hw_compat_rhel_9_4 from hw_compat_8_1 */ ++ { TYPE_PCI_BRIDGE, "x-pci-express-writeable-slt-bug", "true" }, ++ /* hw_compat_rhel_9_4 from hw_compat_8_1 */ ++ { "ramfb", "x-migrate", "off" }, ++ /* hw_compat_rhel_9_4 from hw_compat_8_1 */ ++ { "vfio-pci-nohotplug", "x-ramfb-migrate", "off" }, ++ /* hw_compat_rhel_9_4 from hw_compat_8_1 */ ++ { "igb", "x-pcie-flr-init", "off" }, ++ /* hw_compat_rhel_9_4 jira RHEL-24045 */ ++ { "virtio-mem", "dynamic-memslots", "off" }, ++}; ++const size_t hw_compat_rhel_9_4_len = G_N_ELEMENTS(hw_compat_rhel_9_4); ++ ++GlobalProperty hw_compat_rhel_9_3[] = { ++ /* hw_compat_rhel_9_3 from hw_compat_8_0 */ ++ { "migration", "multifd-flush-after-each-section", "on"}, ++ /* hw_compat_rhel_9_3 from hw_compat_8_0 */ ++ { TYPE_PCI_DEVICE, "x-pcie-ari-nextfn-1", "on" }, ++}; ++const size_t hw_compat_rhel_9_3_len = G_N_ELEMENTS(hw_compat_rhel_9_3); ++ ++GlobalProperty hw_compat_rhel_9_2[] = { ++ /* hw_compat_rhel_9_2 from hw_compat_7_2 */ ++ { "e1000e", "migrate-timadj", "off" }, ++ /* hw_compat_rhel_9_2 from hw_compat_7_2 */ ++ { "virtio-mem", "x-early-migration", "false" }, ++ /* hw_compat_rhel_9_2 from hw_compat_7_2 */ ++ { "migration", "x-preempt-pre-7-2", "true" }, ++ /* hw_compat_rhel_9_2 from hw_compat_7_2 */ ++ { TYPE_PCI_DEVICE, "x-pcie-err-unc-mask", "off" }, ++}; ++const size_t hw_compat_rhel_9_2_len = G_N_ELEMENTS(hw_compat_rhel_9_2); ++ +/* + * Mostly the same as hw_compat_7_0 + */ @@ -331,53 +389,28 @@ index cd13b8b0a3..5aa567fad3 100644 +}; +const size_t hw_compat_rhel_7_6_len = G_N_ELEMENTS(hw_compat_rhel_7_6); + - GlobalProperty hw_compat_7_1[] = { - { "virtio-device", "queue_reset", "false" }, - { "virtio-rng-pci", "vectors", "0" }, -diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c -index 2a5437d803..0db2c2b2a1 100644 ---- a/hw/display/vga-isa.c -+++ b/hw/display/vga-isa.c -@@ -89,7 +89,7 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp) - } - - static Property vga_isa_properties[] = { -- DEFINE_PROP_UINT32("vgamem_mb", ISAVGAState, state.vram_size_mb, 8), -+ DEFINE_PROP_UINT32("vgamem_mb", ISAVGAState, state.vram_size_mb, 16), - DEFINE_PROP_END_OF_LIST(), - }; - -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 30eedd62a3..14a794081e 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -201,6 +201,8 @@ static void pc_init1(MachineState *machine, - smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)", - mc->name, pcmc->smbios_legacy_mode, - pcmc->smbios_uuid_encoded, -+ pcmc->smbios_stream_product, -+ pcmc->smbios_stream_version, - pcms->smbios_entry_point_type); - } + MachineState *current_machine; -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 797ba347fd..dc0ba5f9e7 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -202,6 +202,8 @@ static void pc_q35_init(MachineState *machine) - smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)", - mc->name, pcmc->smbios_legacy_mode, - pcmc->smbios_uuid_encoded, -+ pcmc->smbios_stream_product, -+ pcmc->smbios_stream_version, - pcms->smbios_entry_point_type); + static char *machine_get_kernel(Object *obj, Error **errp) +diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c +index d802d2787f..c7aa39a13e 100644 +--- a/hw/i386/fw_cfg.c ++++ b/hw/i386/fw_cfg.c +@@ -64,7 +64,8 @@ void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg, + if (pcmc->smbios_defaults) { + /* These values are guest ABI, do not change */ + smbios_set_defaults("QEMU", mc->desc, mc->name, +- pcmc->smbios_uuid_encoded); ++ pcmc->smbios_uuid_encoded, ++ pcmc->smbios_stream_product, pcmc->smbios_stream_version); } + /* tell smbios about cpuid version and features */ diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c -index 5a5aaf868d..3d473d5869 100644 +index 897c86ec41..2d0db43f49 100644 --- a/hw/net/rtl8139.c +++ b/hw/net/rtl8139.c -@@ -3178,7 +3178,7 @@ static int rtl8139_pre_save(void *opaque) +@@ -3169,7 +3169,7 @@ static int rtl8139_pre_save(void *opaque) static const VMStateDescription vmstate_rtl8139 = { .name = "rtl8139", @@ -386,7 +419,7 @@ index 5a5aaf868d..3d473d5869 100644 .minimum_version_id = 3, .post_load = rtl8139_post_load, .pre_save = rtl8139_pre_save, -@@ -3259,7 +3259,9 @@ static const VMStateDescription vmstate_rtl8139 = { +@@ -3250,7 +3250,9 @@ static const VMStateDescription vmstate_rtl8139 = { VMSTATE_UINT32(tally_counters.TxMCol, RTL8139State), VMSTATE_UINT64(tally_counters.RxOkPhy, RTL8139State), VMSTATE_UINT64(tally_counters.RxOkBrd, RTL8139State), @@ -397,20 +430,21 @@ index 5a5aaf868d..3d473d5869 100644 VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State), diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index d2007e70fb..319eae9e9d 100644 +index eed5787b15..68608a3403 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c -@@ -58,6 +58,9 @@ static bool smbios_legacy = true; - static bool smbios_uuid_encoded = true; - /* end: legacy structures & constants for <= 2.0 machines */ +@@ -39,6 +39,10 @@ size_t usr_blobs_len; + static unsigned usr_table_max; + static unsigned usr_table_cnt; +/* Set to true for modern Windows 10 HardwareID-6 compat */ +static bool smbios_type2_required; + - ++ uint8_t *smbios_tables; size_t smbios_tables_len; -@@ -670,7 +673,7 @@ static void smbios_build_type_1_table(void) + unsigned smbios_table_max; +@@ -629,7 +633,7 @@ static void smbios_build_type_1_table(void) static void smbios_build_type_2_table(void) { @@ -419,21 +453,17 @@ index d2007e70fb..319eae9e9d 100644 SMBIOS_TABLE_SET_STR(2, manufacturer_str, type2.manufacturer); SMBIOS_TABLE_SET_STR(2, product_str, type2.product); -@@ -980,7 +983,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) +@@ -1018,16 +1022,52 @@ void smbios_set_default_processor_family(uint16_t processor_family) void smbios_set_defaults(const char *manufacturer, const char *product, - const char *version, bool legacy_mode, -- bool uuid_encoded, SmbiosEntryPointType ep_type) + const char *version, +- bool uuid_encoded) + bool uuid_encoded, + const char *stream_product, -+ const char *stream_version, -+ SmbiosEntryPointType ep_type) ++ const char *stream_version) { smbios_have_defaults = true; - smbios_legacy = legacy_mode; -@@ -1001,11 +1007,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product, - g_free(smbios_entries); - } + smbios_uuid_encoded = uuid_encoded; + /* + * If @stream_product & @stream_version are non-NULL, then @@ -460,12 +490,12 @@ index d2007e70fb..319eae9e9d 100644 + * + * We get 'System Manufacturer' and 'Baseboard Manufacturer' + */ - SMBIOS_SET_DEFAULT(type1.manufacturer, manufacturer); - SMBIOS_SET_DEFAULT(type1.product, product); - SMBIOS_SET_DEFAULT(type1.version, version); -+ SMBIOS_SET_DEFAULT(type1.family, "Red Hat Enterprise Linux"); + SMBIOS_SET_DEFAULT(smbios_type1.manufacturer, manufacturer); + SMBIOS_SET_DEFAULT(smbios_type1.product, product); + SMBIOS_SET_DEFAULT(smbios_type1.version, version); ++ SMBIOS_SET_DEFAULT(smbios_type1.family, "Red Hat Enterprise Linux"); + if (stream_version != NULL) { -+ SMBIOS_SET_DEFAULT(type1.sku, stream_version); ++ SMBIOS_SET_DEFAULT(smbios_type1.sku, stream_version); + } SMBIOS_SET_DEFAULT(type2.manufacturer, manufacturer); - SMBIOS_SET_DEFAULT(type2.product, product); @@ -479,20 +509,20 @@ index d2007e70fb..319eae9e9d 100644 SMBIOS_SET_DEFAULT(type3.manufacturer, manufacturer); SMBIOS_SET_DEFAULT(type3.version, version); diff --git a/hw/timer/i8254_common.c b/hw/timer/i8254_common.c -index 050875b497..32935da46c 100644 +index 28fdabc321..bad13ec224 100644 --- a/hw/timer/i8254_common.c +++ b/hw/timer/i8254_common.c -@@ -231,7 +231,7 @@ static const VMStateDescription vmstate_pit_common = { +@@ -229,7 +229,7 @@ static const VMStateDescription vmstate_pit_common = { .pre_save = pit_dispatch_pre_save, .post_load = pit_dispatch_post_load, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { - VMSTATE_UINT32_V(channels[0].irq_disabled, PITCommonState, 3), + VMSTATE_UINT32(channels[0].irq_disabled, PITCommonState), /* qemu-kvm's v2 had 'flags' here */ VMSTATE_STRUCT_ARRAY(channels, PITCommonState, 3, 2, vmstate_pit_channel, PITChannelState), VMSTATE_INT64(channels[0].next_transition_time, diff --git a/hw/usb/hcd-xhci-pci.c b/hw/usb/hcd-xhci-pci.c -index 643d4643e4..529bad9366 100644 +index 4423983308..43b4b71fdf 100644 --- a/hw/usb/hcd-xhci-pci.c +++ b/hw/usb/hcd-xhci-pci.c @@ -104,6 +104,33 @@ static int xhci_pci_vmstate_post_load(void *opaque, int version_id) @@ -602,14 +632,38 @@ index 08f70ce97c..1be7527c1b 100644 } XHCIPciState; #endif +diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c +index ffd119ebac..0e2be2219c 100644 +--- a/hw/virtio/virtio-mem.c ++++ b/hw/virtio/virtio-mem.c +@@ -1694,8 +1694,9 @@ static Property virtio_mem_properties[] = { + #endif + DEFINE_PROP_BOOL(VIRTIO_MEM_EARLY_MIGRATION_PROP, VirtIOMEM, + early_migration, true), ++ /* RHEL: default-enable "dynamic-memslots" (jira RHEL-24045) */ + DEFINE_PROP_BOOL(VIRTIO_MEM_DYNAMIC_MEMSLOTS_PROP, VirtIOMEM, +- dynamic_memslots, false), ++ dynamic_memslots, true), + DEFINE_PROP_END_OF_LIST(), + }; + diff --git a/include/hw/boards.h b/include/hw/boards.h -index 6fbbfd56c8..c5a965d27f 100644 +index 8b8f6d5c00..0466f9d0f3 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -459,4 +459,35 @@ extern const size_t hw_compat_2_2_len; +@@ -512,4 +512,44 @@ extern const size_t hw_compat_2_2_len; extern GlobalProperty hw_compat_2_1[]; extern const size_t hw_compat_2_1_len; ++extern GlobalProperty hw_compat_rhel_9_4[]; ++extern const size_t hw_compat_rhel_9_4_len; ++ ++extern GlobalProperty hw_compat_rhel_9_3[]; ++extern const size_t hw_compat_rhel_9_3_len; ++ ++extern GlobalProperty hw_compat_rhel_9_2[]; ++extern const size_t hw_compat_rhel_9_2_len; ++ +extern GlobalProperty hw_compat_rhel_9_1[]; +extern const size_t hw_compat_rhel_9_1_len; + @@ -643,29 +697,28 @@ index 6fbbfd56c8..c5a965d27f 100644 +extern const char *rhel_old_machine_deprecation; #endif diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h -index 7f3259a630..d24b3ccd32 100644 +index 8d3fb2fb3b..d9d6d7a169 100644 --- a/include/hw/firmware/smbios.h +++ b/include/hw/firmware/smbios.h -@@ -294,7 +294,10 @@ void smbios_entry_add(QemuOpts *opts, Error **errp); +@@ -332,7 +332,9 @@ void smbios_entry_add(QemuOpts *opts, Error **errp); void smbios_set_cpuid(uint32_t version, uint32_t features); void smbios_set_defaults(const char *manufacturer, const char *product, - const char *version, bool legacy_mode, -- bool uuid_encoded, SmbiosEntryPointType ep_type); + const char *version, +- bool uuid_encoded); + bool uuid_encoded, + const char *stream_product, -+ const char *stream_version, -+ SmbiosEntryPointType ep_type); - uint8_t *smbios_get_table_legacy(MachineState *ms, size_t *length); ++ const char *stream_version); + void smbios_set_default_processor_family(uint16_t processor_family); + uint8_t *smbios_get_table_legacy(size_t *length, Error **errp); void smbios_get_tables(MachineState *ms, - const struct smbios_phys_mem_area *mem_array, diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 8206d5405a..908a275736 100644 +index 27a68071d7..ebd8f973f2 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -111,6 +111,9 @@ struct PCMachineClass { - bool smbios_defaults; +@@ -112,6 +112,9 @@ struct PCMachineClass { bool smbios_legacy_mode; bool smbios_uuid_encoded; + SmbiosEntryPointType default_smbios_ep_type; + /* New fields needed for Windows HardwareID-6 matching */ + const char *smbios_stream_product; + const char *smbios_stream_version; @@ -673,5 +726,5 @@ index 8206d5405a..908a275736 100644 /* RAM / address space compat: */ bool gigabyte_align; -- -2.39.1 +2.39.3 diff --git a/SOURCES/0007-Add-aarch64-machine-types.patch b/SOURCES/0007-Add-aarch64-machine-types.patch index f47bbd0..a556bb2 100644 --- a/SOURCES/0007-Add-aarch64-machine-types.patch +++ b/SOURCES/0007-Add-aarch64-machine-types.patch @@ -1,4 +1,4 @@ -From ec6468b65a3af0e2b84575c9f965f61916d0d8ea Mon Sep 17 00:00:00 2001 +From cf398296f3fcee185a00f23de5deae57c97d648e Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 12:53:31 +0200 Subject: Add aarch64 machine types @@ -17,18 +17,22 @@ Rebase notes (7.0.0): - Added dtb-kaslr-seed option - Set no_tcg_lpa2 to true -Rebase notes (7.1.0 rc0): +Rebase notes (7.1.0): - replace dtb_kaslr_seed by dtb_randomness - -Rebase notes (7.1.0 rc3): - Updated dtb_randomness comment -Rebase notes (7.2.0 rc0): +Rebase notes (7.2.0): - Disabled cortex-a35 -Rebase notes (8.0.0-rc1): +Rebase notes (8.0.0): - Moved changed code from target/arm/helper.c to target/arm/arm-qmp-cmds.c +Rebase notes (8.1.0): +- Added setting default_nic + +Rebase notes (9.0.0 rc0): +- call arm_virt_compat_set on rhel type class_init + Merged patches (6.2.0): - 9a3d4fde0e hw/arm/virt: Remove 9.0 machine type - f7d04d6695 hw: arm: virt: Add hw_compat_rhel_8_5 to 8.5 machine type @@ -46,52 +50,84 @@ Merged patches (7.0.0): - f79b31bdef hw/arm/virt: Remove the dtb-kaslr-seed machine option - b6fca85f4a hw/arm/virt: Fix missing initialization in instance/class_init() -Merged patches (7.1.0 rc0): +Merged patches (7.1.0): - ac97dd4f9f RHEL-only: AArch64: Drop unsupported CPU types - e9c0a70664 target/arm: deprecate named CPU models -Merged patches (7.2.0 rc0): +Merged patches (7.2.0): - 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts) -Merged patches (8.0.0-rc1): +Merged patches (8.0.0): - c1a21266d8 redhat: aarch64: add rhel9.2.0 virt machine type - d97cd7c513 redhat: fix virt-rhel9.2.0 compat props + +Merged patches (8.1.0): +- bd5d81d286 Add RHEL 9.2.0 compat structure (arm part) +- c07f666086 hw/arm/virt: Validate cluster and NUMA node boundary for RHEL machines + +Merged patches (8.2.0): +- 4ee284aca9 Add machine types compat bits. (partial) + +Merged patches (9.0.0 rc0): +- 117068376a hw/arm/virt: Fix compats +- 8bcccfabc4 hw/arm/virt: Add properties to disable high memory regions +- 0005a8b93a hw/arm/virt: deprecate virt-rhel9.{0,2}.0 machine types --- - hw/arm/virt.c | 251 ++++++++++++++++++++++++++++++++- - include/hw/arm/virt.h | 8 ++ - target/arm/arm-qmp-cmds.c | 2 + - target/arm/cpu-qom.h | 1 + - target/arm/cpu.c | 5 + - target/arm/cpu.h | 2 + - target/arm/cpu64.c | 16 ++- - target/arm/cpu_tcg.c | 12 +- - tests/qtest/arm-cpu-features.c | 6 + - 9 files changed, 289 insertions(+), 14 deletions(-) + hw/arm/virt.c | 299 +++++++++++++++++++++++++++++++++++++++++- + include/hw/arm/virt.h | 8 ++ + 2 files changed, 306 insertions(+), 1 deletion(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 4a6e89c7bc..1ae1654be5 100644 +index 36e9b4b4e9..22bc345137 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -81,6 +81,7 @@ - #include "hw/char/pl011.h" - #include "qemu/guest-random.h" +@@ -101,6 +101,7 @@ static void arm_virt_compat_set(MachineClass *mc) + arm_virt_compat_len); + } +#if 0 /* Disabled for Red Hat Enterprise Linux */ #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ void *data) \ -@@ -107,7 +108,48 @@ +@@ -128,7 +129,63 @@ static void arm_virt_compat_set(MachineClass *mc) DEFINE_VIRT_MACHINE_LATEST(major, minor, true) #define DEFINE_VIRT_MACHINE(major, minor) \ DEFINE_VIRT_MACHINE_LATEST(major, minor, false) -- +#endif /* disabled for RHEL */ + ++/* ++ * This variable is for changes to properties that are RHEL specific, ++ * different to the current upstream and to be applied to the latest ++ * machine type. They may be overriden by older machine compats. ++ * ++ * virtio-net-pci variant romfiles are not needed because edk2 does ++ * fully support the pxe boot. Besides virtio romfiles are not shipped ++ * on rhel/aarch64. ++ */ ++GlobalProperty arm_rhel_compat[] = { ++ {"virtio-net-pci", "romfile", "" }, ++ {"virtio-net-pci-transitional", "romfile", "" }, ++ {"virtio-net-pci-non-transitional", "romfile", "" }, ++}; ++const size_t arm_rhel_compat_len = G_N_ELEMENTS(arm_rhel_compat); + ++/* ++ * This cannot be called from the rhel_virt_class_init() because ++ * TYPE_RHEL_MACHINE is abstract and mc->compat_props g_ptr_array_new() ++ * only is called on virt-rhelm.n.s non abstract class init. ++ */ ++static void arm_rhel_compat_set(MachineClass *mc) ++{ ++ compat_props_add(mc->compat_props, arm_rhel_compat, ++ arm_rhel_compat_len); ++} ++ +#define DEFINE_RHEL_MACHINE_LATEST(m, n, s, latest) \ + static void rhel##m##n##s##_virt_class_init(ObjectClass *oc, \ + void *data) \ + { \ + MachineClass *mc = MACHINE_CLASS(oc); \ ++ arm_rhel_compat_set(mc); \ + rhel##m##n##s##_virt_options(mc); \ + mc->desc = "RHEL " # m "." # n "." # s " ARM Virtual Machine"; \ + if (latest) { \ @@ -114,44 +150,10 @@ index 4a6e89c7bc..1ae1654be5 100644 + DEFINE_RHEL_MACHINE_LATEST(major, minor, subminor, true) +#define DEFINE_RHEL_MACHINE(major, minor, subminor) \ + DEFINE_RHEL_MACHINE_LATEST(major, minor, subminor, false) -+ -+/* This variable is for changes to properties that are RHEL specific, -+ * different to the current upstream and to be applied to the latest -+ * machine type. -+ */ -+GlobalProperty arm_rhel_compat[] = { -+ { -+ .driver = "virtio-net-pci", -+ .property = "romfile", -+ .value = "", -+ }, -+}; -+const size_t arm_rhel_compat_len = G_N_ELEMENTS(arm_rhel_compat); /* Number of external interrupt lines to configure the GIC with */ #define NUM_IRQS 256 -@@ -204,16 +246,20 @@ static const int a15irqmap[] = { - }; - - static const char *valid_cpus[] = { -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - ARM_CPU_TYPE_NAME("cortex-a7"), - ARM_CPU_TYPE_NAME("cortex-a15"), - ARM_CPU_TYPE_NAME("cortex-a35"), - ARM_CPU_TYPE_NAME("cortex-a53"), - ARM_CPU_TYPE_NAME("cortex-a55"), -+#endif /* disabled for RHEL */ - ARM_CPU_TYPE_NAME("cortex-a57"), -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - ARM_CPU_TYPE_NAME("cortex-a72"), - ARM_CPU_TYPE_NAME("cortex-a76"), - ARM_CPU_TYPE_NAME("a64fx"), - ARM_CPU_TYPE_NAME("neoverse-n1"), -+#endif /* disabled for RHEL */ - ARM_CPU_TYPE_NAME("host"), - ARM_CPU_TYPE_NAME("max"), - }; -@@ -2339,6 +2385,7 @@ static void machvirt_init(MachineState *machine) +@@ -2355,6 +2412,7 @@ static void machvirt_init(MachineState *machine) qemu_add_machine_init_done_notifier(&vms->machine_done); } @@ -159,7 +161,7 @@ index 4a6e89c7bc..1ae1654be5 100644 static bool virt_get_secure(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2366,6 +2413,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) +@@ -2382,6 +2440,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) vms->virt = value; } @@ -167,25 +169,31 @@ index 4a6e89c7bc..1ae1654be5 100644 static bool virt_get_highmem(Object *obj, Error **errp) { -@@ -2380,7 +2428,7 @@ static void virt_set_highmem(Object *obj, bool value, Error **errp) - +@@ -2397,6 +2456,7 @@ static void virt_set_highmem(Object *obj, bool value, Error **errp) vms->highmem = value; } -- + +#if 0 /* Disabled for Red Hat Enterprise Linux */ static bool virt_get_compact_highmem(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2436,7 +2484,7 @@ static void virt_set_highmem_mmio(Object *obj, bool value, Error **errp) +@@ -2410,6 +2470,7 @@ static void virt_set_compact_highmem(Object *obj, bool value, Error **errp) - vms->highmem_mmio = value; + vms->highmem_compact = value; } -- +#endif /* disabled for RHEL */ + static bool virt_get_highmem_redists(Object *obj, Error **errp) + { +@@ -2453,7 +2514,6 @@ static void virt_set_highmem_mmio(Object *obj, bool value, Error **errp) + vms->highmem_mmio = value; + } + +- static bool virt_get_its(Object *obj, Error **errp) { -@@ -2452,6 +2500,7 @@ static void virt_set_its(Object *obj, bool value, Error **errp) + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -2468,6 +2528,7 @@ static void virt_set_its(Object *obj, bool value, Error **errp) vms->its = value; } @@ -193,7 +201,7 @@ index 4a6e89c7bc..1ae1654be5 100644 static bool virt_get_dtb_randomness(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2465,6 +2514,7 @@ static void virt_set_dtb_randomness(Object *obj, bool value, Error **errp) +@@ -2481,6 +2542,7 @@ static void virt_set_dtb_randomness(Object *obj, bool value, Error **errp) vms->dtb_randomness = value; } @@ -201,7 +209,7 @@ index 4a6e89c7bc..1ae1654be5 100644 static char *virt_get_oem_id(Object *obj, Error **errp) { -@@ -2548,6 +2598,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp) +@@ -2564,6 +2626,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp) vms->ras = value; } @@ -209,7 +217,7 @@ index 4a6e89c7bc..1ae1654be5 100644 static bool virt_get_mte(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2561,6 +2612,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) +@@ -2577,6 +2640,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) vms->mte = value; } @@ -217,7 +225,7 @@ index 4a6e89c7bc..1ae1654be5 100644 static char *virt_get_gic_version(Object *obj, Error **errp) { -@@ -2988,6 +3040,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) +@@ -2949,6 +3013,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) return fixed_ipa ? 0 : requested_pa_size; } @@ -225,7 +233,7 @@ index 4a6e89c7bc..1ae1654be5 100644 static void virt_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); -@@ -3441,3 +3494,195 @@ static void virt_machine_2_6_options(MachineClass *mc) +@@ -3463,3 +3528,235 @@ static void virt_machine_2_6_options(MachineClass *mc) vmc->no_pmu = true; } DEFINE_VIRT_MACHINE(2, 6) @@ -235,6 +243,7 @@ index 4a6e89c7bc..1ae1654be5 100644 +{ + MachineClass *mc = MACHINE_CLASS(oc); + HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); ++ arm_virt_compat_set(mc); + + mc->family = "virt-rhel-Z"; + mc->init = machvirt_init; @@ -263,7 +272,10 @@ index 4a6e89c7bc..1ae1654be5 100644 + mc->smp_props.clusters_supported = true; + mc->auto_enable_numa_with_memhp = true; + mc->auto_enable_numa_with_memdev = true; ++ /* platform instead of architectural choice */ ++ mc->cpu_cluster_has_numa_boundary = true; + mc->default_ram_id = "mach-virt.ram"; ++ mc->default_nic = "virtio-net-pci"; + + object_class_property_add(oc, "acpi", "OnOffAuto", + virt_get_acpi, virt_set_acpi, @@ -277,6 +289,28 @@ index 4a6e89c7bc..1ae1654be5 100644 + "Set on/off to enable/disable using " + "physical address space above 32 bits"); + ++ object_class_property_add_bool(oc, "highmem-redists", ++ virt_get_highmem_redists, ++ virt_set_highmem_redists); ++ object_class_property_set_description(oc, "highmem-redists", ++ "Set on/off to enable/disable high " ++ "memory region for GICv3 or GICv4 " ++ "redistributor"); ++ ++ object_class_property_add_bool(oc, "highmem-ecam", ++ virt_get_highmem_ecam, ++ virt_set_highmem_ecam); ++ object_class_property_set_description(oc, "highmem-ecam", ++ "Set on/off to enable/disable high " ++ "memory region for PCI ECAM"); ++ ++ object_class_property_add_bool(oc, "highmem-mmio", ++ virt_get_highmem_mmio, ++ virt_set_highmem_mmio); ++ object_class_property_set_description(oc, "highmem-mmio", ++ "Set on/off to enable/disable high " ++ "memory region for PCI MMIO"); ++ + object_class_property_add_str(oc, "gic-version", virt_get_gic_version, + virt_set_gic_version); + object_class_property_set_description(oc, "gic-version", @@ -401,11 +435,24 @@ index 4a6e89c7bc..1ae1654be5 100644 +} +type_init(rhel_machine_init); + ++static void rhel940_virt_options(MachineClass *mc) ++{ ++} ++DEFINE_RHEL_MACHINE_AS_LATEST(9, 4, 0) ++ +static void rhel920_virt_options(MachineClass *mc) +{ -+ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); ++ rhel940_virt_options(mc); ++ ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_4, hw_compat_rhel_9_4_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_3, hw_compat_rhel_9_3_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len); ++ ++ /* RHEL 9.4 is the first supported release */ ++ mc->deprecation_reason = ++ "machine types for versions prior to 9.4 are deprecated"; +} -+DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0) ++DEFINE_RHEL_MACHINE(9, 2, 0) + +static void rhel900_virt_options(MachineClass *mc) +{ @@ -414,6 +461,7 @@ index 4a6e89c7bc..1ae1654be5 100644 + rhel920_virt_options(mc); + + compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len); + + /* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */ + vmc->no_tcg_lpa2 = true; @@ -422,10 +470,10 @@ index 4a6e89c7bc..1ae1654be5 100644 +} +DEFINE_RHEL_MACHINE(9, 0, 0) diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index e1ddbea96b..81c2363a40 100644 +index bb486d36b1..237fc77bda 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h -@@ -187,9 +187,17 @@ struct VirtMachineState { +@@ -179,9 +179,17 @@ struct VirtMachineState { #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) @@ -443,270 +491,6 @@ index e1ddbea96b..81c2363a40 100644 void virt_acpi_setup(VirtMachineState *vms); bool virt_is_acpi_enabled(VirtMachineState *vms); -diff --git a/target/arm/arm-qmp-cmds.c b/target/arm/arm-qmp-cmds.c -index c8fa524002..3aa089abf3 100644 ---- a/target/arm/arm-qmp-cmds.c -+++ b/target/arm/arm-qmp-cmds.c -@@ -231,6 +231,7 @@ CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type, - static void arm_cpu_add_definition(gpointer data, gpointer user_data) - { - ObjectClass *oc = data; -+ CPUClass *cc = CPU_CLASS(oc); - CpuDefinitionInfoList **cpu_list = user_data; - CpuDefinitionInfo *info; - const char *typename; -@@ -240,6 +241,7 @@ static void arm_cpu_add_definition(gpointer data, gpointer user_data) - info->name = g_strndup(typename, - strlen(typename) - strlen("-" TYPE_ARM_CPU)); - info->q_typename = g_strdup(typename); -+ info->deprecated = !!cc->deprecation_note; - - QAPI_LIST_PREPEND(*cpu_list, info); - } -diff --git a/target/arm/cpu-qom.h b/target/arm/cpu-qom.h -index 514c22ced9..f789173451 100644 ---- a/target/arm/cpu-qom.h -+++ b/target/arm/cpu-qom.h -@@ -35,6 +35,7 @@ typedef struct ARMCPUInfo { - const char *name; - void (*initfn)(Object *obj); - void (*class_init)(ObjectClass *oc, void *data); -+ const char *deprecation_note; - } ARMCPUInfo; - - void arm_cpu_register(const ARMCPUInfo *info); -diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index 5182ed0c91..6740a8b940 100644 ---- a/target/arm/cpu.c -+++ b/target/arm/cpu.c -@@ -2290,8 +2290,13 @@ static void arm_cpu_instance_init(Object *obj) - static void cpu_register_class_init(ObjectClass *oc, void *data) - { - ARMCPUClass *acc = ARM_CPU_CLASS(oc); -+ CPUClass *cc = CPU_CLASS(oc); - - acc->info = data; -+ -+ if (acc->info->deprecation_note) { -+ cc->deprecation_note = acc->info->deprecation_note; -+ } - } - - void arm_cpu_register(const ARMCPUInfo *info) -diff --git a/target/arm/cpu.h b/target/arm/cpu.h -index c097cae988..829d4a2328 100644 ---- a/target/arm/cpu.h -+++ b/target/arm/cpu.h -@@ -34,6 +34,8 @@ - #define KVM_HAVE_MCE_INJECTION 1 - #endif - -+#define RHEL_CPU_DEPRECATION "use 'host' / 'max'" -+ - #define EXCP_UDEF 1 /* undefined instruction */ - #define EXCP_SWI 2 /* software interrupt */ - #define EXCP_PREFETCH_ABORT 3 -diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c -index 0fb07cc7b6..47459627fb 100644 ---- a/target/arm/cpu64.c -+++ b/target/arm/cpu64.c -@@ -31,6 +31,7 @@ - #include "hw/qdev-properties.h" - #include "internals.h" - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void aarch64_a35_initfn(Object *obj) - { - ARMCPU *cpu = ARM_CPU(obj); -@@ -110,6 +111,7 @@ static void aarch64_a35_initfn(Object *obj) - /* These values are the same with A53/A57/A72. */ - define_cortex_a72_a57_a53_cp_reginfo(cpu); - } -+#endif /* disabled for RHEL */ - - void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) - { -@@ -730,6 +732,7 @@ static void aarch64_a57_initfn(Object *obj) - define_cortex_a72_a57_a53_cp_reginfo(cpu); - } - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void aarch64_a53_initfn(Object *obj) - { - ARMCPU *cpu = ARM_CPU(obj); -@@ -1164,6 +1167,7 @@ static void aarch64_neoverse_n1_initfn(Object *obj) - - define_neoverse_n1_cp_reginfo(cpu); - } -+#endif /* disabled for RHEL */ - - static void aarch64_host_initfn(Object *obj) - { -@@ -1373,14 +1377,19 @@ static void aarch64_max_initfn(Object *obj) - } - - static const ARMCPUInfo aarch64_cpus[] = { -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - { .name = "cortex-a35", .initfn = aarch64_a35_initfn }, -- { .name = "cortex-a57", .initfn = aarch64_a57_initfn }, -+#endif /* disabled for RHEL */ -+ { .name = "cortex-a57", .initfn = aarch64_a57_initfn, -+ .deprecation_note = RHEL_CPU_DEPRECATION }, -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - { .name = "cortex-a53", .initfn = aarch64_a53_initfn }, - { .name = "cortex-a55", .initfn = aarch64_a55_initfn }, - { .name = "cortex-a72", .initfn = aarch64_a72_initfn }, - { .name = "cortex-a76", .initfn = aarch64_a76_initfn }, - { .name = "a64fx", .initfn = aarch64_a64fx_initfn }, - { .name = "neoverse-n1", .initfn = aarch64_neoverse_n1_initfn }, -+#endif /* disabled for RHEL */ - { .name = "max", .initfn = aarch64_max_initfn }, - #if defined(CONFIG_KVM) || defined(CONFIG_HVF) - { .name = "host", .initfn = aarch64_host_initfn }, -@@ -1452,8 +1461,13 @@ static void aarch64_cpu_instance_init(Object *obj) - static void cpu_register_class_init(ObjectClass *oc, void *data) - { - ARMCPUClass *acc = ARM_CPU_CLASS(oc); -+ CPUClass *cc = CPU_CLASS(oc); - - acc->info = data; -+ -+ if (acc->info->deprecation_note) { -+ cc->deprecation_note = acc->info->deprecation_note; -+ } - } - - void aarch64_cpu_register(const ARMCPUInfo *info) -diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c -index c154a4dcf2..f29425b656 100644 ---- a/target/arm/cpu_tcg.c -+++ b/target/arm/cpu_tcg.c -@@ -152,10 +152,10 @@ void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu) - } - #endif /* !CONFIG_USER_ONLY */ - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - /* CPU models. These are not needed for the AArch64 linux-user build. */ - #if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) - --#if 0 /* Disabled for Red Hat Enterprise Linux */ - #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG) - static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) - { -@@ -509,7 +509,6 @@ static void cortex_a9_initfn(Object *obj) - cpu->isar.reset_pmcr_el0 = 0x41093000; - define_arm_cp_regs(cpu, cortexa9_cp_reginfo); - } --#endif /* disabled for RHEL */ - - #ifndef CONFIG_USER_ONLY - static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri) -@@ -534,7 +533,6 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { - .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, - }; - --#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void cortex_a7_initfn(Object *obj) - { - ARMCPU *cpu = ARM_CPU(obj); -@@ -583,7 +581,6 @@ static void cortex_a7_initfn(Object *obj) - cpu->isar.reset_pmcr_el0 = 0x41072000; - define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */ - } --#endif /* disabled for RHEL */ - - static void cortex_a15_initfn(Object *obj) - { -@@ -632,7 +629,6 @@ static void cortex_a15_initfn(Object *obj) - define_arm_cp_regs(cpu, cortexa15_cp_reginfo); - } - --#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void cortex_m0_initfn(Object *obj) - { - ARMCPU *cpu = ARM_CPU(obj); -@@ -1115,7 +1111,6 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) - - cc->gdb_core_xml_file = "arm-m-profile.xml"; - } --#endif /* disabled for RHEL */ - - #ifndef TARGET_AARCH64 - /* -@@ -1183,7 +1178,6 @@ static void arm_max_initfn(Object *obj) - #endif /* !TARGET_AARCH64 */ - - static const ARMCPUInfo arm_tcg_cpus[] = { --#if 0 /* Disabled for Red Hat Enterprise Linux */ - { .name = "arm926", .initfn = arm926_initfn }, - { .name = "arm946", .initfn = arm946_initfn }, - { .name = "arm1026", .initfn = arm1026_initfn }, -@@ -1199,9 +1193,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { - { .name = "cortex-a7", .initfn = cortex_a7_initfn }, - { .name = "cortex-a8", .initfn = cortex_a8_initfn }, - { .name = "cortex-a9", .initfn = cortex_a9_initfn }, --#endif /* disabled for RHEL */ - { .name = "cortex-a15", .initfn = cortex_a15_initfn }, --#if 0 /* Disabled for Red Hat Enterprise Linux */ - { .name = "cortex-m0", .initfn = cortex_m0_initfn, - .class_init = arm_v7m_class_init }, - { .name = "cortex-m3", .initfn = cortex_m3_initfn, -@@ -1233,7 +1225,6 @@ static const ARMCPUInfo arm_tcg_cpus[] = { - { .name = "pxa270-b1", .initfn = pxa270b1_initfn }, - { .name = "pxa270-c0", .initfn = pxa270c0_initfn }, - { .name = "pxa270-c5", .initfn = pxa270c5_initfn }, --#endif /* disabled for RHEL */ - #ifndef TARGET_AARCH64 - { .name = "max", .initfn = arm_max_initfn }, - #endif -@@ -1261,3 +1252,4 @@ static void arm_tcg_cpu_register_types(void) - type_init(arm_tcg_cpu_register_types) - - #endif /* !CONFIG_USER_ONLY || !TARGET_AARCH64 */ -+#endif /* disabled for RHEL */ -diff --git a/tests/qtest/arm-cpu-features.c b/tests/qtest/arm-cpu-features.c -index 1cb08138ad..834497dfec 100644 ---- a/tests/qtest/arm-cpu-features.c -+++ b/tests/qtest/arm-cpu-features.c -@@ -441,8 +441,10 @@ static void test_query_cpu_model_expansion(const void *data) - assert_error(qts, "host", "The CPU type 'host' requires KVM", NULL); - - /* Test expected feature presence/absence for some cpu types */ -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - assert_has_feature_enabled(qts, "cortex-a15", "pmu"); - assert_has_not_feature(qts, "cortex-a15", "aarch64"); -+#endif /* disabled for RHEL */ - - /* Enabling and disabling pmu should always work. */ - assert_has_feature_enabled(qts, "max", "pmu"); -@@ -459,6 +461,7 @@ static void test_query_cpu_model_expansion(const void *data) - assert_has_feature_enabled(qts, "cortex-a57", "pmu"); - assert_has_feature_enabled(qts, "cortex-a57", "aarch64"); - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - assert_has_feature_enabled(qts, "a64fx", "pmu"); - assert_has_feature_enabled(qts, "a64fx", "aarch64"); - /* -@@ -471,6 +474,7 @@ static void test_query_cpu_model_expansion(const void *data) - "{ 'sve384': true }"); - assert_error(qts, "a64fx", "cannot enable sve640", - "{ 'sve640': true }"); -+#endif /* disabled for RHEL */ - - sve_tests_default(qts, "max"); - pauth_tests_default(qts, "max"); -@@ -506,9 +510,11 @@ static void test_query_cpu_model_expansion_kvm(const void *data) - QDict *resp; - char *error; - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - assert_error(qts, "cortex-a15", - "We cannot guarantee the CPU type 'cortex-a15' works " - "with KVM on this host", NULL); -+#endif /* disabled for RHEL */ - - assert_has_feature_enabled(qts, "host", "aarch64"); - -- -2.39.1 +2.39.3 diff --git a/SOURCES/0008-Add-ppc64-machine-types.patch b/SOURCES/0008-Add-ppc64-machine-types.patch index ab78cae..87fcb3a 100644 --- a/SOURCES/0008-Add-ppc64-machine-types.patch +++ b/SOURCES/0008-Add-ppc64-machine-types.patch @@ -1,4 +1,4 @@ -From 401d0ebf1ee959fd944df6b5b4ae9c51c36d1244 Mon Sep 17 00:00:00 2001 +From fb905dbe5b51ed899062ef99a2dd7f238d3e3384 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:27:13 +0200 Subject: Add ppc64 machine types @@ -20,7 +20,7 @@ Merged patches (6.1.0): - af69d1ca6e Remove RHEL 7.4.0 machine types (only ppc64 changes) - 8f7a74ab78 Remove RHEL 7.5.0 machine types (only ppc64 changes) -Merged patches (7.1.0 rc0): +Merged patches (7.1.0): - baa6790171 target/ppc/cpu-models: Fix ppc_cpu_aliases list for RHEL --- hw/ppc/spapr.c | 243 ++++++++++++++++++++++++++++++++++++++++ @@ -34,20 +34,20 @@ Merged patches (7.1.0 rc0): 8 files changed, 314 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 4921198b9d..e24b3e22e3 100644 +index e9bc97fee0..a258d81846 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c -@@ -1634,6 +1634,9 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason) - +@@ -1718,6 +1718,9 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason) pef_kvm_reset(machine->cgs, &error_fatal); spapr_caps_apply(spapr); + spapr_nested_reset(spapr); + if (spapr->svm_allowed) { + kvmppc_svm_allow(&error_fatal); + } first_ppc_cpu = POWERPC_CPU(first_cpu); if (kvm_enabled() && kvmppc_has_cap_mmu_radix() && -@@ -3348,6 +3351,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp) +@@ -3421,6 +3424,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp) spapr->host_serial = g_strdup(value); } @@ -68,7 +68,7 @@ index 4921198b9d..e24b3e22e3 100644 static void spapr_instance_init(Object *obj) { SpaprMachineState *spapr = SPAPR_MACHINE(obj); -@@ -3426,6 +3443,12 @@ static void spapr_instance_init(Object *obj) +@@ -3499,6 +3516,12 @@ static void spapr_instance_init(Object *obj) spapr_get_host_serial, spapr_set_host_serial); object_property_set_description(obj, "host-serial", "Host serial number to advertise in guest device tree"); @@ -81,7 +81,7 @@ index 4921198b9d..e24b3e22e3 100644 } static void spapr_machine_finalizefn(Object *obj) -@@ -4683,6 +4706,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) +@@ -4754,6 +4777,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) vmc->client_architecture_support = spapr_vof_client_architecture_support; vmc->quiesce = spapr_vof_quiesce; vmc->setprop = spapr_vof_setprop; @@ -89,15 +89,15 @@ index 4921198b9d..e24b3e22e3 100644 } static const TypeInfo spapr_machine_info = { -@@ -4734,6 +4758,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) +@@ -4805,6 +4829,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) } \ type_init(spapr_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ /* - * pseries-8.0 + * pseries-9.0 */ -@@ -4894,6 +4919,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) +@@ -4998,6 +5023,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) } DEFINE_SPAPR_MACHINE(4_1, "4.1", false); @@ -105,8 +105,8 @@ index 4921198b9d..e24b3e22e3 100644 /* * pseries-4.0 -@@ -4913,6 +4939,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, - *nv2atsd = 0; +@@ -5013,6 +5039,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, + } return true; } + @@ -114,7 +114,7 @@ index 4921198b9d..e24b3e22e3 100644 static void spapr_machine_4_0_class_options(MachineClass *mc) { SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -@@ -5240,6 +5268,221 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) +@@ -5338,6 +5366,221 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) compat_props_add(mc->compat_props, hw_compat_2_1, hw_compat_2_1_len); } DEFINE_SPAPR_MACHINE(2_1, "2.1", false); @@ -337,7 +337,7 @@ index 4921198b9d..e24b3e22e3 100644 static void spapr_machine_register_types(void) { diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index fcb5dfe792..ab8fb5bf62 100644 +index 3b0a47a28c..375e0c8e45 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -25,6 +25,7 @@ @@ -348,7 +348,7 @@ index fcb5dfe792..ab8fb5bf62 100644 static void spapr_reset_vcpu(PowerPCCPU *cpu) { -@@ -259,6 +260,7 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, +@@ -264,6 +265,7 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, { CPUPPCState *env = &cpu->env; CPUState *cs = CPU(cpu); @@ -356,7 +356,7 @@ index fcb5dfe792..ab8fb5bf62 100644 if (!qdev_realize(DEVICE(cpu), NULL, errp)) { return false; -@@ -270,6 +272,17 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, +@@ -280,6 +282,17 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, /* Set time-base frequency to 512 MHz. vhyp must be set first. */ cpu_ppc_tb_init(env, SPAPR_TIMEBASE_FREQ); @@ -375,10 +375,10 @@ index fcb5dfe792..ab8fb5bf62 100644 qdev_unrealize(DEVICE(cpu)); return false; diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h -index 5c8aabd444..04489d5808 100644 +index 4aaf23d28f..3233c54d11 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h -@@ -155,6 +155,7 @@ struct SpaprMachineClass { +@@ -157,6 +157,7 @@ struct SpaprMachineClass { bool pre_5_2_numa_associativity; bool pre_6_2_numa_affinity; @@ -386,7 +386,7 @@ index 5c8aabd444..04489d5808 100644 bool (*phb_placement)(SpaprMachineState *spapr, uint32_t index, uint64_t *buid, hwaddr *pio, hwaddr *mmio32, hwaddr *mmio64, -@@ -257,6 +258,9 @@ struct SpaprMachineState { +@@ -259,6 +260,9 @@ struct SpaprMachineState { /* Set by -boot */ char *boot_device; @@ -397,7 +397,7 @@ index 5c8aabd444..04489d5808 100644 char *kvm_type; char *host_model; diff --git a/target/ppc/compat.c b/target/ppc/compat.c -index 7949a24f5a..f207a9ba01 100644 +index ebef2cccec..ff2c00c60e 100644 --- a/target/ppc/compat.c +++ b/target/ppc/compat.c @@ -114,8 +114,19 @@ static const CompatInfo *compat_by_pvr(uint32_t pvr) @@ -422,10 +422,10 @@ index 7949a24f5a..f207a9ba01 100644 const CompatInfo *compat = compat_by_pvr(compat_pvr); const CompatInfo *min = compat_by_pvr(min_compat_pvr); diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c -index cd3ff700ac..1cb49c8087 100644 +index f77ebfcc81..18e9422006 100644 --- a/target/ppc/cpu-models.c +++ b/target/ppc/cpu-models.c -@@ -746,6 +746,7 @@ +@@ -744,6 +744,7 @@ /* PowerPC CPU aliases */ PowerPCCPUAlias ppc_cpu_aliases[] = { @@ -434,10 +434,10 @@ index cd3ff700ac..1cb49c8087 100644 { "405cr", "405crc" }, { "405gp", "405gpd" }, diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h -index 557d736dab..6646ec1c27 100644 +index 67e6b2effd..11187aeb93 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h -@@ -1482,6 +1482,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) +@@ -1655,6 +1655,7 @@ static inline int ppc_env_mmu_index(CPUPPCState *env, bool ifetch) /* Compatibility modes */ #if defined(TARGET_PPC64) @@ -446,18 +446,18 @@ index 557d736dab..6646ec1c27 100644 uint32_t min_compat_pvr, uint32_t max_compat_pvr); bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr, diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c -index 78f6fc50cd..68d06c3f8f 100644 +index 8231feb2d4..59f640cf7b 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c -@@ -88,6 +88,7 @@ static int cap_ppc_nested_kvm_hv; - static int cap_large_decr; +@@ -89,6 +89,7 @@ static int cap_large_decr; static int cap_fwnmi; static int cap_rpt_invalidate; + static int cap_ail_mode_3; +static int cap_ppc_secure_guest; static uint32_t debug_inst_opcode; -@@ -135,6 +136,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) +@@ -141,6 +142,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT); kvmppc_get_cpu_characteristics(s); cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV); @@ -465,8 +465,8 @@ index 78f6fc50cd..68d06c3f8f 100644 cap_large_decr = kvmppc_get_dec_bits(); cap_fwnmi = kvm_vm_check_extension(s, KVM_CAP_PPC_FWNMI); /* -@@ -2569,6 +2571,16 @@ int kvmppc_has_cap_rpt_invalidate(void) - return cap_rpt_invalidate; +@@ -2564,6 +2566,16 @@ bool kvmppc_supports_ail_3(void) + return cap_ail_mode_3; } +bool kvmppc_has_cap_secure_guest(void) @@ -482,7 +482,7 @@ index 78f6fc50cd..68d06c3f8f 100644 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) { uint32_t host_pvr = mfpvr(); -@@ -2969,3 +2981,18 @@ bool kvm_arch_cpu_check_are_resettable(void) +@@ -2964,3 +2976,18 @@ bool kvm_arch_cpu_check_are_resettable(void) void kvm_arch_accel_class_init(ObjectClass *oc) { } @@ -502,27 +502,27 @@ index 78f6fc50cd..68d06c3f8f 100644 + } +} diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h -index 5fd9753953..b5ebfe2be0 100644 +index 1975fb5ee6..d1017f98be 100644 --- a/target/ppc/kvm_ppc.h +++ b/target/ppc/kvm_ppc.h -@@ -43,6 +43,7 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu); +@@ -46,6 +46,7 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu); target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu, bool radix, bool gtse, uint64_t proc_tbl); +void kvmppc_svm_allow(Error **errp); - #ifndef CONFIG_USER_ONLY bool kvmppc_spapr_use_multitce(void); int kvmppc_spapr_enable_inkernel_multitce(void); -@@ -77,6 +78,8 @@ int kvmppc_get_cap_large_decr(void); - int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable); + void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift, +@@ -79,6 +80,8 @@ int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable); int kvmppc_has_cap_rpt_invalidate(void); + bool kvmppc_supports_ail_3(void); int kvmppc_enable_hwrng(void); +bool kvmppc_has_cap_secure_guest(void); +int kvmppc_enable_cap_secure_guest(void); int kvmppc_put_books_sregs(PowerPCCPU *cpu); PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void); void kvmppc_check_papr_resize_hpt(Error **errp); -@@ -396,6 +399,16 @@ static inline int kvmppc_has_cap_rpt_invalidate(void) +@@ -427,6 +430,16 @@ static inline bool kvmppc_supports_ail_3(void) return false; } @@ -540,5 +540,5 @@ index 5fd9753953..b5ebfe2be0 100644 { return -1; -- -2.39.1 +2.39.3 diff --git a/SOURCES/0009-Add-s390x-machine-types.patch b/SOURCES/0009-Add-s390x-machine-types.patch index 07dfb57..b9709f1 100644 --- a/SOURCES/0009-Add-s390x-machine-types.patch +++ b/SOURCES/0009-Add-s390x-machine-types.patch @@ -1,4 +1,4 @@ -From 3c7647197729fcd76e219070c6f359bb3667d04d Mon Sep 17 00:00:00 2001 +From 04178c77cfe188b4eed9c08a0bf66842e61fe5dc Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:47:32 +0200 Subject: Add s390x machine types @@ -8,7 +8,7 @@ Adding changes to add RHEL machine types for s390x architecture. Signed-off-by: Miroslav Rezanina -- -Rebase changes (7.1.0 rc0): +Rebase changes (7.1.0): - Moved adding rhel_old_machine_deprecation variable to general machine types commit Merged patches (6.1.0): @@ -23,52 +23,74 @@ Merged patches (7.0.0): - 4b0efa7e21 redhat: Add rhel8.6.0 and rhel9.0.0 machine types for s390x - dcc64971bf RHEL: mark old machine types as deprecated (partialy) -Merged patches (7.1.0 rc0): +Merged patches (7.1.0): - 1d6439527a WRB: Introduce RHEL 9.0.0 hw compat structure (only hw/s390x/s390-virtio-ccw.c chunk) - c8ad21ca31 redhat: Update s390x machine type compatibility for rebase to QEMU 7.0.0 - 5bcf8d874c target/s390x: deprecate CPUs older than z14 -Merged patches (7.2.0 rc0): +Merged patches (7.2.0): - 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts) -Merged patches (8.0.0-rc1): +Merged patches (8.0.0): - 27c188c6a4 redhat: Update s390x machine type compatibility for QEMU 7.2.0 update - a932b8d429 redhat: Add new rhel-9.2.0 s390x machine type - ac88104bad s390x/s390-virtio-ccw: Activate zPCI features on s390-ccw-virtio-rhel8.6.0 + +Merged patches (8.1.0): +- bd5d81d286 Add RHEL 9.2.0 compat structure (s390x part) + +Merged patches (8.2.0): +- 4ee284aca9 Add machine types compat bits. (partial) --- - hw/s390x/s390-virtio-ccw.c | 143 +++++++++++++++++++++++++++++++ + hw/s390x/s390-virtio-ccw.c | 159 +++++++++++++++++++++++++++++++ target/s390x/cpu_models.c | 11 +++ target/s390x/cpu_models.h | 2 + target/s390x/cpu_models_sysemu.c | 2 + - 4 files changed, 158 insertions(+) + 4 files changed, 174 insertions(+) diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 503f212a31..dcd3b966b0 100644 +index b1dcb3857f..ff753a29e0 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c -@@ -826,6 +826,7 @@ bool css_migration_enabled(void) +@@ -859,6 +859,7 @@ bool css_migration_enabled(void) } \ type_init(ccw_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void ccw_machine_8_0_instance_options(MachineState *machine) + static void ccw_machine_9_0_instance_options(MachineState *machine) { } -@@ -1201,6 +1202,148 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) +@@ -1272,6 +1273,164 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); } DEFINE_CCW_MACHINE(2_4, "2.4", false); +#endif + + ++static void ccw_machine_rhel940_instance_options(MachineState *machine) ++{ ++} ++ ++static void ccw_machine_rhel940_class_options(MachineClass *mc) ++{ ++} ++DEFINE_CCW_MACHINE(rhel940, "rhel9.4.0", true); ++ +static void ccw_machine_rhel920_instance_options(MachineState *machine) +{ ++ ccw_machine_rhel940_instance_options(machine); +} + +static void ccw_machine_rhel920_class_options(MachineClass *mc) +{ ++ ccw_machine_rhel940_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_4, hw_compat_rhel_9_4_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_3, hw_compat_rhel_9_3_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len); ++ mc->smp_props.drawers_supported = false; /* from ccw_machine_8_1 */ ++ mc->smp_props.books_supported = false; /* from ccw_machine_8_1 */ +} -+DEFINE_CCW_MACHINE(rhel920, "rhel9.2.0", true); ++DEFINE_CCW_MACHINE(rhel920, "rhel9.2.0", false); + +static void ccw_machine_rhel900_instance_options(MachineState *machine) +{ @@ -204,7 +226,7 @@ index 503f212a31..dcd3b966b0 100644 static void ccw_machine_register_types(void) { diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c -index 457b5cb10c..ff6b9463cb 100644 +index 8ed3bb6a27..370b3b3065 100644 --- a/target/s390x/cpu_models.c +++ b/target/s390x/cpu_models.c @@ -46,6 +46,9 @@ @@ -217,7 +239,7 @@ index 457b5cb10c..ff6b9463cb 100644 static S390CPUDef s390_cpu_defs[] = { CPUDEF_INIT(0x2064, 7, 1, 38, 0x00000000U, "z900", "IBM zSeries 900 GA1"), CPUDEF_INIT(0x2064, 7, 2, 38, 0x00000000U, "z900.2", "IBM zSeries 900 GA2"), -@@ -857,22 +860,30 @@ static void s390_host_cpu_model_class_init(ObjectClass *oc, void *data) +@@ -866,22 +869,30 @@ static void s390_host_cpu_model_class_init(ObjectClass *oc, void *data) static void s390_base_cpu_model_class_init(ObjectClass *oc, void *data) { S390CPUClass *xcc = S390_CPU_CLASS(oc); @@ -249,23 +271,23 @@ index 457b5cb10c..ff6b9463cb 100644 static void s390_qemu_cpu_model_class_init(ObjectClass *oc, void *data) diff --git a/target/s390x/cpu_models.h b/target/s390x/cpu_models.h -index fb1adc8b21..d76745afa9 100644 +index d7b8912989..1a806a97c4 100644 --- a/target/s390x/cpu_models.h +++ b/target/s390x/cpu_models.h -@@ -38,6 +38,8 @@ struct S390CPUDef { +@@ -38,6 +38,8 @@ typedef struct S390CPUDef { S390FeatBitmap full_feat; /* used to init full_feat from generated data */ S390FeatInit full_init; + /* if deprecated, provides a suggestion */ + const char *deprecation_note; - }; + } S390CPUDef; /* CPU model based on a CPU definition */ diff --git a/target/s390x/cpu_models_sysemu.c b/target/s390x/cpu_models_sysemu.c -index 87a4480c05..28c1b0486c 100644 +index 0728bfcc20..ca2e5d91e2 100644 --- a/target/s390x/cpu_models_sysemu.c +++ b/target/s390x/cpu_models_sysemu.c -@@ -60,6 +60,7 @@ static void create_cpu_model_list(ObjectClass *klass, void *opaque) +@@ -59,6 +59,7 @@ static void create_cpu_model_list(ObjectClass *klass, void *opaque) CpuDefinitionInfo *info; char *name = g_strdup(object_class_get_name(klass)); S390CPUClass *scc = S390_CPU_CLASS(klass); @@ -273,7 +295,7 @@ index 87a4480c05..28c1b0486c 100644 /* strip off the -s390x-cpu */ g_strrstr(name, "-" TYPE_S390_CPU)[0] = 0; -@@ -69,6 +70,7 @@ static void create_cpu_model_list(ObjectClass *klass, void *opaque) +@@ -68,6 +69,7 @@ static void create_cpu_model_list(ObjectClass *klass, void *opaque) info->migration_safe = scc->is_migration_safe; info->q_static = scc->is_static; info->q_typename = g_strdup(object_class_get_name(klass)); @@ -282,5 +304,5 @@ index 87a4480c05..28c1b0486c 100644 if (cpu_list_data->model) { Object *obj; -- -2.39.1 +2.39.3 diff --git a/SOURCES/0010-Add-x86_64-machine-types.patch b/SOURCES/0010-Add-x86_64-machine-types.patch index 9685338..83ee4a3 100644 --- a/SOURCES/0010-Add-x86_64-machine-types.patch +++ b/SOURCES/0010-Add-x86_64-machine-types.patch @@ -1,4 +1,4 @@ -From 510291040cb280e1f68b793a84ec0f7d1c88aafa Mon Sep 17 00:00:00 2001 +From 3c88acb005806ad2386ab6c94a8831151f624738 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:10:31 +0200 Subject: Add x86_64 machine types @@ -13,9 +13,12 @@ Rebase notes (6.1.0): Rebase notes (7.0.0): - Reset alias for all machine-types except latest one -Rebase notes (8.0.0-rc1): +Rebase notes (8.0.0): - remove legacy_no_rng_seed usage (removed upstream) +Rebase notes (8.1.0): +- default_nic_model to default_nic + Merged patches (6.1.0): - 59c284ad3b x86: Add x86 rhel8.5 machine types - a8868b42fe redhat: x86: Enable 'kvm-asyncpf-int' by default @@ -35,35 +38,61 @@ Merged patches (7.0.0): - dcc64971bf RHEL: mark old machine types as deprecated (partialy) - 6b396f182b RHEL: disable "seqpacket" for "vhost-vsock-device" in rhel8.6.0 -Merged patches (7.1.0 rc0): +Merged patches (7.1.0): - 38b89dc245 pc: Move s3/s4 suspend disabling to compat (only hw/i386/pc.c chunk) - 1d6439527a WRB: Introduce RHEL 9.0.0 hw compat structure (x86_64 specific changes) - 35b5c8554f target/i386: deprecate CPUs older than x86_64-v2 ABI -Merged patches (7.2.0 rc0): +Merged patches (7.2.0): - 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts) -Merged patches (8.0.0-rc1): +Merged patches (8.0.0): - f33ca8aed4 x86: rhel 9.2.0 machine type + +Merged patches (8.1.0): +- bd5d81d286 Add RHEL 9.2.0 compat structure (x86_64 part) +- c6eaf73add redhat: hw/i386/pc: Update x86 machine type compatibility for QEMU 8.0.0 update +- 6cbf496e5e hw/acpi: Mark acpi blobs as resizable on RHEL pc machines version 7.6 and above + +Merged patches (8.2.0): +- 4ee284aca9 Add machine types compat bits. (partial) +- 719e2ac147 Fix x86 machine type compatibility for qemu-kvm 8.1.0 + +Merged patches (9.0.0 rc0): +- 9149e2bc8f x86: rhel 9.2.0 machine type compat fix --- - hw/i386/pc.c | 147 +++++++++++++++++++++- - hw/i386/pc_piix.c | 86 ++++++++++++- - hw/i386/pc_q35.c | 252 ++++++++++++++++++++++++++++++++++++- + hw/i386/fw_cfg.c | 2 +- + hw/i386/pc.c | 159 ++++++++++++++++++++- + hw/i386/pc_piix.c | 109 ++++++++++++++ + hw/i386/pc_q35.c | 285 +++++++++++++++++++++++++++++++++++++ include/hw/boards.h | 2 + - include/hw/i386/pc.h | 27 ++++ - target/i386/cpu.c | 21 ++++ + include/hw/i386/pc.h | 33 +++++ + target/i386/cpu.c | 21 +++ target/i386/kvm/kvm-cpu.c | 1 + target/i386/kvm/kvm.c | 4 + tests/qtest/pvpanic-test.c | 5 +- - 9 files changed, 538 insertions(+), 7 deletions(-) + 10 files changed, 617 insertions(+), 4 deletions(-) +diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c +index c7aa39a13e..283c3f4c16 100644 +--- a/hw/i386/fw_cfg.c ++++ b/hw/i386/fw_cfg.c +@@ -63,7 +63,7 @@ void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg, + + if (pcmc->smbios_defaults) { + /* These values are guest ABI, do not change */ +- smbios_set_defaults("QEMU", mc->desc, mc->name, ++ smbios_set_defaults("Red Hat", "KVM", mc->desc, + pcmc->smbios_uuid_encoded, + pcmc->smbios_stream_product, pcmc->smbios_stream_version); + } diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 1489abf010..8abb1f872e 100644 +index 5c21b0c4db..4a154c1a9a 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c -@@ -407,6 +407,149 @@ GlobalProperty pc_compat_1_4[] = { +@@ -326,6 +326,161 @@ GlobalProperty pc_compat_2_0[] = { }; - const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); + const size_t pc_compat_2_0_len = G_N_ELEMENTS(pc_compat_2_0); +/* This macro is for changes to properties that are RHEL specific, + * different to the current upstream and to be applied to the latest @@ -80,13 +109,25 @@ index 1489abf010..8abb1f872e 100644 + { TYPE_X86_CPU, "host-phys-bits-limit", "48" }, + { TYPE_X86_CPU, "vmx-entry-load-perf-global-ctrl", "off" }, + { TYPE_X86_CPU, "vmx-exit-load-perf-global-ctrl", "off" }, -+ /* bz 1508330 */ ++ /* bz 1508330 */ + { "vfio-pci", "x-no-geforce-quirks", "on" }, + /* bz 1941397 */ + { TYPE_X86_CPU, "kvm-asyncpf-int", "on" }, +}; +const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + ++GlobalProperty pc_rhel_9_3_compat[] = { ++ /* pc_rhel_9_3_compat from pc_compat_8_0 */ ++ { "virtio-mem", "unplugged-inaccessible", "auto" }, ++}; ++const size_t pc_rhel_9_3_compat_len = G_N_ELEMENTS(pc_rhel_9_3_compat); ++ ++GlobalProperty pc_rhel_9_2_compat[] = { ++ /* pc_rhel_9_2_compat from pc_compat_7_2 */ ++ { "ICH9-LPC", "noreboot", "true" }, ++}; ++const size_t pc_rhel_9_2_compat_len = G_N_ELEMENTS(pc_rhel_9_2_compat); ++ +GlobalProperty pc_rhel_9_0_compat[] = { + /* pc_rhel_9_0_compat from pc_compat_6_2 */ + { "virtio-mem", "unplugged-inaccessible", "off" }, @@ -177,27 +218,27 @@ index 1489abf010..8abb1f872e 100644 + * machine types irrespective of host. + */ +GlobalProperty pc_rhel_7_6_compat[] = { -+ /* pc_rhel_7_6_compat from pc_compat_3_0 */ ++ /* pc_rhel_7_6_compat from pc_compat_3_0 */ + { TYPE_X86_CPU, "x-hv-synic-kvm-only", "on" }, -+ /* pc_rhel_7_6_compat from pc_compat_3_0 */ ++ /* pc_rhel_7_6_compat from pc_compat_3_0 */ + { "Skylake-Server" "-" TYPE_X86_CPU, "pku", "off" }, -+ /* pc_rhel_7_6_compat from pc_compat_3_0 */ ++ /* pc_rhel_7_6_compat from pc_compat_3_0 */ + { "Skylake-Server-IBRS" "-" TYPE_X86_CPU, "pku", "off" }, -+ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ + { TYPE_X86_CPU, "x-migrate-smi-count", "off" }, -+ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ + { "Skylake-Client" "-" TYPE_X86_CPU, "mpx", "on" }, -+ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ + { "Skylake-Client-IBRS" "-" TYPE_X86_CPU, "mpx", "on" }, -+ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ + { "Skylake-Server" "-" TYPE_X86_CPU, "mpx", "on" }, -+ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ + { "Skylake-Server-IBRS" "-" TYPE_X86_CPU, "mpx", "on" }, -+ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ + { "Cascadelake-Server" "-" TYPE_X86_CPU, "mpx", "on" }, -+ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ + { "Icelake-Client" "-" TYPE_X86_CPU, "mpx", "on" }, -+ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ + { "Icelake-Server" "-" TYPE_X86_CPU, "mpx", "on" }, +}; +const size_t pc_rhel_7_6_compat_len = G_N_ELEMENTS(pc_rhel_7_6_compat); @@ -211,15 +252,15 @@ index 1489abf010..8abb1f872e 100644 GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled) { GSIState *s; -@@ -1944,6 +2087,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) - pcmc->pvh_enabled = true; - pcmc->kvmclock_create_always = true; +@@ -1813,6 +1968,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) + pcmc->resizable_acpi_blob = true; + x86mc->apic_xrupt_override = true; assert(!mc->get_hotplug_handler); + mc->async_pf_vmexit_disable = false; mc->get_hotplug_handler = pc_get_hotplug_handler; mc->hotplug_allowed = pc_hotplug_allowed; mc->cpu_index_to_instance_props = x86_cpu_index_to_props; -@@ -1954,7 +2098,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -1823,7 +1979,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) mc->has_hotpluggable_cpus = true; mc->default_boot_order = "cad"; mc->block_default_type = IF_IDE; @@ -230,10 +271,10 @@ index 1489abf010..8abb1f872e 100644 mc->wakeup = pc_machine_wakeup; hc->pre_plug = pc_machine_device_pre_plug_cb; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 14a794081e..3e330fd36f 100644 +index 18ba076609..a647262d63 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c -@@ -54,6 +54,7 @@ +@@ -52,6 +52,7 @@ #include "qapi/error.h" #include "qemu/error-report.h" #include "sysemu/xen.h" @@ -241,18 +282,7 @@ index 14a794081e..3e330fd36f 100644 #ifdef CONFIG_XEN #include #include "hw/xen/xen_pt.h" -@@ -198,8 +199,8 @@ static void pc_init1(MachineState *machine, - if (pcmc->smbios_defaults) { - MachineClass *mc = MACHINE_GET_CLASS(machine); - /* These values are guest ABI, do not change */ -- smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)", -- mc->name, pcmc->smbios_legacy_mode, -+ smbios_set_defaults("Red Hat", "KVM", -+ mc->desc, pcmc->smbios_legacy_mode, - pcmc->smbios_uuid_encoded, - pcmc->smbios_stream_product, - pcmc->smbios_stream_version, -@@ -351,6 +352,7 @@ static void pc_init1(MachineState *machine, +@@ -422,6 +423,7 @@ static void pc_set_south_bridge(Object *obj, int value, Error **errp) * hw_compat_*, pc_compat_*, or * pc_*_machine_options(). */ @@ -260,7 +290,7 @@ index 14a794081e..3e330fd36f 100644 static void pc_compat_2_3_fn(MachineState *machine) { X86MachineState *x86ms = X86_MACHINE(machine); -@@ -899,3 +901,83 @@ static void xenfv_3_1_machine_options(MachineClass *m) +@@ -951,3 +953,110 @@ static void xenfv_3_1_machine_options(MachineClass *m) DEFINE_PC_MACHINE(xenfv, "xenfv-3.1", pc_xen_hvm_init, xenfv_3_1_machine_options); #endif @@ -274,8 +304,9 @@ index 14a794081e..3e330fd36f 100644 + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + m->family = "pc_piix_Y"; + m->default_machine_opts = "firmware=bios-256k.bin,hpet=off"; -+ pcmc->default_nic_model = "e1000"; + pcmc->pci_root_uid = 0; ++ pcmc->resizable_acpi_blob = true; ++ m->default_nic = "e1000"; + m->default_display = "std"; + m->no_parallel = 1; + m->numa_mem_supported = true; @@ -289,13 +320,13 @@ index 14a794081e..3e330fd36f 100644 + +static void pc_init_rhel760(MachineState *machine) +{ -+ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ -+ TYPE_I440FX_PCI_DEVICE); ++ pc_init1(machine, TYPE_I440FX_PCI_DEVICE); +} + +static void pc_machine_rhel760_options(MachineClass *m) +{ + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ ObjectClass *oc = OBJECT_CLASS(m); + pc_machine_rhel7_options(m); + m->desc = "RHEL 7.6.0 PC (i440FX + PIIX, 1996)"; + m->async_pf_vmexit_disable = true; @@ -309,7 +340,33 @@ index 14a794081e..3e330fd36f 100644 + pcmc->kvmclock_create_always = false; + /* From pc_i440fx_5_1_machine_options() */ + pcmc->pci_root_uid = 1; ++ /* From pc_i440fx_7_0_machine_options() */ + pcmc->enforce_amd_1tb_hole = false; ++ /* From pc_i440fx_8_0_machine_options() */ ++ pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32; ++ /* From pc_i440fx_8_1_machine_options() */ ++ pcmc->broken_32bit_mem_addr_check = true; ++ /* Introduced in QEMU 8.2 */ ++ pcmc->default_south_bridge = TYPE_PIIX3_DEVICE; ++ ++ object_class_property_add_enum(oc, "x-south-bridge", "PCSouthBridgeOption", ++ &PCSouthBridgeOption_lookup, ++ pc_get_south_bridge, ++ pc_set_south_bridge); ++ object_class_property_set_description(oc, "x-south-bridge", ++ "Use a different south bridge than PIIX3"); ++ ++ ++ compat_props_add(m->compat_props, hw_compat_rhel_9_4, ++ hw_compat_rhel_9_4_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_9_3, ++ hw_compat_rhel_9_3_len); ++ compat_props_add(m->compat_props, pc_rhel_9_3_compat, ++ pc_rhel_9_3_compat_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_9_2, ++ hw_compat_rhel_9_2_len); ++ compat_props_add(m->compat_props, pc_rhel_9_2_compat, ++ pc_rhel_9_2_compat_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_1, + hw_compat_rhel_9_1_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_0, @@ -345,21 +402,10 @@ index 14a794081e..3e330fd36f 100644 +DEFINE_PC_MACHINE(rhel760, "pc-i440fx-rhel7.6.0", pc_init_rhel760, + pc_machine_rhel760_options); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index dc0ba5f9e7..98601bb76f 100644 +index c7bc8a2041..e872dc7e46 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c -@@ -199,8 +199,8 @@ static void pc_q35_init(MachineState *machine) - - if (pcmc->smbios_defaults) { - /* These values are guest ABI, do not change */ -- smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)", -- mc->name, pcmc->smbios_legacy_mode, -+ smbios_set_defaults("Red Hat", "KVM", -+ mc->desc, pcmc->smbios_legacy_mode, - pcmc->smbios_uuid_encoded, - pcmc->smbios_stream_product, - pcmc->smbios_stream_version, -@@ -354,6 +354,7 @@ static void pc_q35_init(MachineState *machine) +@@ -341,6 +341,7 @@ static void pc_q35_init(MachineState *machine) DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn) @@ -367,7 +413,7 @@ index dc0ba5f9e7..98601bb76f 100644 static void pc_q35_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -@@ -663,3 +664,250 @@ static void pc_q35_2_4_machine_options(MachineClass *m) +@@ -693,3 +694,287 @@ static void pc_q35_2_4_machine_options(MachineClass *m) DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, pc_q35_2_4_machine_options); @@ -379,8 +425,8 @@ index dc0ba5f9e7..98601bb76f 100644 +static void pc_q35_machine_rhel_options(MachineClass *m) +{ + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -+ pcmc->default_nic_model = "e1000e"; + pcmc->pci_root_uid = 0; ++ m->default_nic = "e1000e"; + m->family = "pc_q35_Z"; + m->units_per_default_bus = 1; + m->default_machine_opts = "firmware=bios-256k.bin,hpet=off"; @@ -394,8 +440,28 @@ index dc0ba5f9e7..98601bb76f 100644 + m->alias = "q35"; + m->max_cpus = 710; + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); ++ compat_props_add(m->compat_props, ++ pc_q35_compat_defaults, pc_q35_compat_defaults_len); +} + ++static void pc_q35_init_rhel940(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel940_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel_options(m); ++ m->desc = "RHEL-9.4.0 PC (Q35 + ICH9, 2009)"; ++ pcmc->smbios_stream_product = "RHEL"; ++ pcmc->smbios_stream_version = "9.4.0"; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel940, "pc-q35-rhel9.4.0", pc_q35_init_rhel940, ++ pc_q35_machine_rhel940_options); ++ ++ +static void pc_q35_init_rhel920(MachineState *machine) +{ + pc_q35_init(machine); @@ -404,10 +470,27 @@ index dc0ba5f9e7..98601bb76f 100644 +static void pc_q35_machine_rhel920_options(MachineClass *m) +{ + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -+ pc_q35_machine_rhel_options(m); ++ pc_q35_machine_rhel940_options(m); + m->desc = "RHEL-9.2.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; + pcmc->smbios_stream_product = "RHEL"; + pcmc->smbios_stream_version = "9.2.0"; ++ ++ /* From pc_q35_8_0_machine_options() */ ++ pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32; ++ /* From pc_q35_8_1_machine_options() */ ++ pcmc->broken_32bit_mem_addr_check = true; ++ ++ compat_props_add(m->compat_props, hw_compat_rhel_9_4, ++ hw_compat_rhel_9_4_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_9_3, ++ hw_compat_rhel_9_3_len); ++ compat_props_add(m->compat_props, pc_rhel_9_3_compat, ++ pc_rhel_9_3_compat_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_9_2, ++ hw_compat_rhel_9_2_len); ++ compat_props_add(m->compat_props, pc_rhel_9_2_compat, ++ pc_rhel_9_2_compat_len); +} + +DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920, @@ -619,10 +702,10 @@ index dc0ba5f9e7..98601bb76f 100644 +DEFINE_PC_MACHINE(q35_rhel760, "pc-q35-rhel7.6.0", pc_q35_init_rhel760, + pc_q35_machine_rhel760_options); diff --git a/include/hw/boards.h b/include/hw/boards.h -index c5a965d27f..5e7446ee40 100644 +index 0466f9d0f3..46b8725c41 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -268,6 +268,8 @@ struct MachineClass { +@@ -283,6 +283,8 @@ struct MachineClass { strList *allowed_dynamic_sysbus_devices; bool auto_enable_numa_with_memhp; bool auto_enable_numa_with_memdev; @@ -632,16 +715,22 @@ index c5a965d27f..5e7446ee40 100644 bool smbus_no_migration_support; bool nvdimm_supported; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 908a275736..4376f64a47 100644 +index ebd8f973f2..a984c951ad 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -293,6 +293,33 @@ extern const size_t pc_compat_1_4_len; - - int pc_machine_kvm_type(MachineState *machine, const char *vm_type); +@@ -291,6 +291,39 @@ extern const size_t pc_compat_2_1_len; + extern GlobalProperty pc_compat_2_0[]; + extern const size_t pc_compat_2_0_len; +extern GlobalProperty pc_rhel_compat[]; +extern const size_t pc_rhel_compat_len; + ++extern GlobalProperty pc_rhel_9_3_compat[]; ++extern const size_t pc_rhel_9_3_compat_len; ++ ++extern GlobalProperty pc_rhel_9_2_compat[]; ++extern const size_t pc_rhel_9_2_compat_len; ++ +extern GlobalProperty pc_rhel_9_0_compat[]; +extern const size_t pc_rhel_9_0_compat_len; + @@ -670,10 +759,10 @@ index 908a275736..4376f64a47 100644 static void pc_machine_##suffix##_class_init(ObjectClass *oc, void *data) \ { \ diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 6576287e5b..0ef2bf1b93 100644 +index 33760a2ee1..be7b0663cd 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c -@@ -1834,9 +1834,13 @@ static const CPUCaches epyc_milan_cache_info = { +@@ -2190,9 +2190,13 @@ static const CPUCaches epyc_genoa_cache_info = { * PT in VMX operation */ @@ -687,7 +776,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 0xd, .vendor = CPUID_VENDOR_AMD, .family = 15, -@@ -1857,6 +1861,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2213,6 +2217,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "phenom", @@ -695,7 +784,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 5, .vendor = CPUID_VENDOR_AMD, .family = 16, -@@ -1889,6 +1894,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2245,6 +2250,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "core2duo", @@ -703,7 +792,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 10, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -1931,6 +1937,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2287,6 +2293,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "kvm64", @@ -711,7 +800,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 0xd, .vendor = CPUID_VENDOR_INTEL, .family = 15, -@@ -1972,6 +1979,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2328,6 +2335,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "qemu32", @@ -719,7 +808,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 4, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -1986,6 +1994,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2342,6 +2350,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "kvm32", @@ -727,7 +816,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 5, .vendor = CPUID_VENDOR_INTEL, .family = 15, -@@ -2016,6 +2025,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2372,6 +2381,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "coreduo", @@ -735,7 +824,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 10, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -2049,6 +2059,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2405,6 +2415,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "486", @@ -743,7 +832,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 1, .vendor = CPUID_VENDOR_INTEL, .family = 4, -@@ -2061,6 +2072,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2417,6 +2428,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "pentium", @@ -751,7 +840,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 1, .vendor = CPUID_VENDOR_INTEL, .family = 5, -@@ -2073,6 +2085,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2429,6 +2441,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "pentium2", @@ -759,7 +848,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 2, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -2085,6 +2098,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2441,6 +2454,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "pentium3", @@ -767,7 +856,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 3, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -2097,6 +2111,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2453,6 +2467,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "athlon", @@ -775,7 +864,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 2, .vendor = CPUID_VENDOR_AMD, .family = 6, -@@ -2112,6 +2127,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2468,6 +2483,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "n270", @@ -783,7 +872,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 10, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -2137,6 +2153,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2493,6 +2509,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "Conroe", @@ -791,7 +880,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 10, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -2177,6 +2194,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2533,6 +2550,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "Penryn", @@ -799,7 +888,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 10, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -3893,6 +3911,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -4394,6 +4412,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "Opteron_G1", @@ -807,7 +896,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 5, .vendor = CPUID_VENDOR_AMD, .family = 15, -@@ -3913,6 +3932,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -4414,6 +4433,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "Opteron_G2", @@ -815,7 +904,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 5, .vendor = CPUID_VENDOR_AMD, .family = 15, -@@ -3935,6 +3955,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -4436,6 +4456,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "Opteron_G3", @@ -824,10 +913,10 @@ index 6576287e5b..0ef2bf1b93 100644 .vendor = CPUID_VENDOR_AMD, .family = 16, diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c -index 7237378a7d..7b8a3d5af0 100644 +index 9c791b7b05..b91af5051f 100644 --- a/target/i386/kvm/kvm-cpu.c +++ b/target/i386/kvm/kvm-cpu.c -@@ -137,6 +137,7 @@ static PropValue kvm_default_props[] = { +@@ -138,6 +138,7 @@ static PropValue kvm_default_props[] = { { "acpi", "off" }, { "monitor", "off" }, { "svm", "off" }, @@ -836,10 +925,10 @@ index 7237378a7d..7b8a3d5af0 100644 }; diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c -index de531842f6..8d82304609 100644 +index e68cbe9293..739f33db47 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c -@@ -3822,6 +3822,7 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -3715,6 +3715,7 @@ static int kvm_get_msrs(X86CPU *cpu) struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries; int ret, i; uint64_t mtrr_top_bits; @@ -847,7 +936,7 @@ index de531842f6..8d82304609 100644 kvm_msr_buf_reset(cpu); -@@ -4177,6 +4178,9 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -4069,6 +4070,9 @@ static int kvm_get_msrs(X86CPU *cpu) break; case MSR_KVM_ASYNC_PF_EN: env->async_pf_en_msr = msrs[i].data; @@ -881,5 +970,5 @@ index 78f1cf8186..ac954c9b06 100644 val = qtest_inb(qts, 0x505); g_assert_cmpuint(val, ==, 3); -- -2.39.1 +2.39.3 diff --git a/SOURCES/0011-Enable-make-check.patch b/SOURCES/0011-Enable-make-check.patch index cc91302..502bc67 100644 --- a/SOURCES/0011-Enable-make-check.patch +++ b/SOURCES/0011-Enable-make-check.patch @@ -1,4 +1,4 @@ -From 738db8353055eb6fd902513949c6659af8b401d0 Mon Sep 17 00:00:00 2001 +From 5768cf6811842e5c59da3b752f60659a9d6b5ba1 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 2 Sep 2020 09:39:41 +0200 Subject: Enable make check @@ -24,43 +24,49 @@ Rebase changes (7.0.0): - Remove unnecessary changes in iotest 051 - Remove changes in bios-tables-test.c and prom-env-test.c qtests -Rebase changes (7.1.0 rc0): +Rebase changes (7.1.0): - Disable bcm2835-dma-test (added upstream) -Rebase changes (8.0.0-rc1): +Rebase changes (8.0.0): - Removed chunks for disabling bios-table-test (protected upstream) - -Rebase change (8.0.0-rc2): - Disable new qemu-iotests execution - Revert change in tco qtest (blocking test run) +Rebase changes (8.1.0): +- Do not disable device-plug-test for s390x + +Rebase changes (8.2.0 rc1): +- Remove unneeded hack in qtest/usb-hcd-xhci-test.c + Merged patches (6.1.0): - 2f129df7d3 redhat: Enable the 'test-block-iothread' test again -Merged patches (7.1.0 rc0): +Merged patches (7.1.0): - 64d736640e RHEL-only: tests/avocado: Switch aarch64 tests from a53 to a57 + +Merged patches (8.1.0): +- f468163234 iotests: Use alternative CPU type that is not deprecated in RHEL --- .distro/qemu-kvm.spec.template | 4 ++-- tests/avocado/replay_kernel.py | 2 +- tests/avocado/reverse_debugging.py | 2 +- tests/avocado/tcg_plugins.py | 6 ++--- tests/qemu-iotests/meson.build | 34 ++++++++++++++--------------- + tests/qemu-iotests/testenv.py | 3 +++ tests/qtest/fuzz-e1000e-test.c | 2 +- tests/qtest/fuzz-virtio-scsi-test.c | 2 +- tests/qtest/intel-hda-test.c | 2 +- tests/qtest/libqos/meson.build | 2 +- tests/qtest/lpc-ich9-test.c | 2 +- - tests/qtest/meson.build | 2 -- - tests/qtest/tco-test.c | 2 +- - tests/qtest/usb-hcd-xhci-test.c | 4 ++++ + tests/qtest/meson.build | 1 - tests/qtest/virtio-net-failover.c | 1 + - 14 files changed, 35 insertions(+), 32 deletions(-) + 13 files changed, 33 insertions(+), 30 deletions(-) diff --git a/tests/avocado/replay_kernel.py b/tests/avocado/replay_kernel.py -index f13456e1ec..2fee270a42 100644 +index 10d99403a4..c3422ea1e4 100644 --- a/tests/avocado/replay_kernel.py +++ b/tests/avocado/replay_kernel.py -@@ -147,7 +147,7 @@ def test_aarch64_virt(self): +@@ -166,7 +166,7 @@ def test_aarch64_virt(self): """ :avocado: tags=arch:aarch64 :avocado: tags=machine:virt @@ -70,10 +76,10 @@ index f13456e1ec..2fee270a42 100644 kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora' '/linux/releases/29/Everything/aarch64/os/images/pxeboot' diff --git a/tests/avocado/reverse_debugging.py b/tests/avocado/reverse_debugging.py -index 680c314cfc..71eccb8fb6 100644 +index 92855a02a5..87822074b6 100644 --- a/tests/avocado/reverse_debugging.py +++ b/tests/avocado/reverse_debugging.py -@@ -206,7 +206,7 @@ def test_aarch64_virt(self): +@@ -230,7 +230,7 @@ def test_aarch64_virt(self): """ :avocado: tags=arch:aarch64 :avocado: tags=machine:virt @@ -83,10 +89,10 @@ index 680c314cfc..71eccb8fb6 100644 kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora' '/linux/releases/29/Everything/aarch64/os/images/pxeboot' diff --git a/tests/avocado/tcg_plugins.py b/tests/avocado/tcg_plugins.py -index 642d2e49e3..93b3afd823 100644 +index 15fd87b2c1..f0d9d89c93 100644 --- a/tests/avocado/tcg_plugins.py +++ b/tests/avocado/tcg_plugins.py -@@ -68,7 +68,7 @@ def test_aarch64_virt_insn(self): +@@ -66,7 +66,7 @@ def test_aarch64_virt_insn(self): :avocado: tags=accel:tcg :avocado: tags=arch:aarch64 :avocado: tags=machine:virt @@ -95,7 +101,7 @@ index 642d2e49e3..93b3afd823 100644 """ kernel_path = self._grab_aarch64_kernel() kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + -@@ -94,7 +94,7 @@ def test_aarch64_virt_insn_icount(self): +@@ -96,7 +96,7 @@ def test_aarch64_virt_insn_icount(self): :avocado: tags=accel:tcg :avocado: tags=arch:aarch64 :avocado: tags=machine:virt @@ -104,7 +110,7 @@ index 642d2e49e3..93b3afd823 100644 """ kernel_path = self._grab_aarch64_kernel() kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + -@@ -120,7 +120,7 @@ def test_aarch64_virt_mem_icount(self): +@@ -126,7 +126,7 @@ def test_aarch64_virt_mem_icount(self): :avocado: tags=accel:tcg :avocado: tags=arch:aarch64 :avocado: tags=machine:virt @@ -114,7 +120,7 @@ index 642d2e49e3..93b3afd823 100644 kernel_path = self._grab_aarch64_kernel() kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + diff --git a/tests/qemu-iotests/meson.build b/tests/qemu-iotests/meson.build -index 9735071a29..32002335f4 100644 +index fad340ad59..3c0d5241f6 100644 --- a/tests/qemu-iotests/meson.build +++ b/tests/qemu-iotests/meson.build @@ -51,21 +51,21 @@ foreach format, speed: qemu_iotests_formats @@ -156,6 +162,20 @@ index 9735071a29..32002335f4 100644 +# suite: suites) +# endforeach endforeach +diff --git a/tests/qemu-iotests/testenv.py b/tests/qemu-iotests/testenv.py +index 588f30a4f1..3929a3634f 100644 +--- a/tests/qemu-iotests/testenv.py ++++ b/tests/qemu-iotests/testenv.py +@@ -244,6 +244,9 @@ def __init__(self, source_dir: str, build_dir: str, + if self.qemu_prog.endswith(f'qemu-system-{suffix}'): + self.qemu_options += f' -machine {machine}' + ++ if self.qemu_prog.endswith('qemu-system-x86_64'): ++ self.qemu_options += ' -cpu Nehalem' ++ + # QEMU_DEFAULT_MACHINE + self.qemu_default_machine = get_default_machine(self.qemu_prog) + diff --git a/tests/qtest/fuzz-e1000e-test.c b/tests/qtest/fuzz-e1000e-test.c index 5052883fb6..b5286f4b12 100644 --- a/tests/qtest/fuzz-e1000e-test.c @@ -183,20 +203,20 @@ index e37b48b2cc..88647da054 100644 qtest_outl(s, 0xcf8, 0x80001811); diff --git a/tests/qtest/intel-hda-test.c b/tests/qtest/intel-hda-test.c -index d4a8db6fd6..1a796ec15a 100644 +index 663bb6c485..2efc43e3f7 100644 --- a/tests/qtest/intel-hda-test.c +++ b/tests/qtest/intel-hda-test.c -@@ -38,7 +38,7 @@ static void test_issue542_ich6(void) +@@ -42,7 +42,7 @@ static void test_issue542_ich6(void) { QTestState *s; - s = qtest_init("-nographic -nodefaults -M pc-q35-6.2 " + s = qtest_init("-nographic -nodefaults -M pc-q35-rhel9.0.0 " + AUDIODEV "-device intel-hda,id=" HDA_ID CODEC_DEVICES); - qtest_outl(s, 0xcf8, 0x80000804); diff --git a/tests/qtest/libqos/meson.build b/tests/qtest/libqos/meson.build -index cc209a8de5..42a7c529c9 100644 +index 3aed6efcb8..119613237e 100644 --- a/tests/qtest/libqos/meson.build +++ b/tests/qtest/libqos/meson.build @@ -44,7 +44,7 @@ libqos_srcs = files( @@ -206,8 +226,8 @@ index cc209a8de5..42a7c529c9 100644 - 'virtio-iommu.c', +# 'virtio-iommu.c', 'virtio-gpio.c', + 'virtio-scmi.c', 'generic-pcihost.c', - diff --git a/tests/qtest/lpc-ich9-test.c b/tests/qtest/lpc-ich9-test.c index 8ac95b89f7..cd2102555c 100644 --- a/tests/qtest/lpc-ich9-test.c @@ -222,10 +242,10 @@ index 8ac95b89f7..cd2102555c 100644 qtest_outl(s, 0xcf8, 0x8000f840); /* PMBASE */ diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build -index 85ea4e8d99..893afc8eeb 100644 +index 36c5c13a7b..a2887d6057 100644 --- a/tests/qtest/meson.build +++ b/tests/qtest/meson.build -@@ -94,7 +94,6 @@ qtests_i386 = \ +@@ -101,7 +101,6 @@ qtests_i386 = \ 'drive_del-test', 'tco-test', 'cpu-plug-test', @@ -233,62 +253,11 @@ index 85ea4e8d99..893afc8eeb 100644 'vmgenid-test', 'migration-test', 'test-x86-cpuid-compat', -@@ -223,7 +222,6 @@ qtests_s390x = \ - (config_host.has_key('CONFIG_POSIX') ? ['test-filter-redirector'] : []) + \ - ['boot-serial-test', - 'drive_del-test', -- 'device-plug-test', - 'virtio-ccw-test', - 'cpu-plug-test', - 'migration-test'] -diff --git a/tests/qtest/tco-test.c b/tests/qtest/tco-test.c -index 0547d41173..3756ce82d8 100644 ---- a/tests/qtest/tco-test.c -+++ b/tests/qtest/tco-test.c -@@ -60,7 +60,7 @@ static void test_init(TestData *d) - QTestState *qs; - - qs = qtest_initf("-machine q35 %s %s", -- d->noreboot ? "-global ICH9-LPC.noreboot=true" : "", -+ d->noreboot ? "" : "-global ICH9-LPC.noreboot=false", - !d->args ? "" : d->args); - qtest_irq_intercept_in(qs, "ioapic"); - -diff --git a/tests/qtest/usb-hcd-xhci-test.c b/tests/qtest/usb-hcd-xhci-test.c -index 10ef9d2a91..3855873050 100644 ---- a/tests/qtest/usb-hcd-xhci-test.c -+++ b/tests/qtest/usb-hcd-xhci-test.c -@@ -21,6 +21,7 @@ static void test_xhci_hotplug(void) - usb_test_hotplug(global_qtest, "xhci", "1", NULL); - } - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void test_usb_uas_hotplug(void) - { - QTestState *qts = global_qtest; -@@ -36,6 +37,7 @@ static void test_usb_uas_hotplug(void) - qtest_qmp_device_del(qts, "scsihd"); - qtest_qmp_device_del(qts, "uas"); - } -+#endif - - static void test_usb_ccid_hotplug(void) - { -@@ -56,7 +58,9 @@ int main(int argc, char **argv) - - qtest_add_func("/xhci/pci/init", test_xhci_init); - qtest_add_func("/xhci/pci/hotplug", test_xhci_hotplug); -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - qtest_add_func("/xhci/pci/hotplug/usb-uas", test_usb_uas_hotplug); -+#endif - qtest_add_func("/xhci/pci/hotplug/usb-ccid", test_usb_ccid_hotplug); - - qtest_start("-device nec-usb-xhci,id=xhci" diff --git a/tests/qtest/virtio-net-failover.c b/tests/qtest/virtio-net-failover.c -index 4a809590bf..1bf3fa641c 100644 +index 73dfabc272..a9dd304781 100644 --- a/tests/qtest/virtio-net-failover.c +++ b/tests/qtest/virtio-net-failover.c -@@ -25,6 +25,7 @@ +@@ -26,6 +26,7 @@ #define PCI_SEL_BASE 0x0010 #define BASE_MACHINE "-M q35 -nodefaults " \ @@ -297,5 +266,5 @@ index 4a809590bf..1bf3fa641c 100644 "-device pcie-root-port,id=root1,addr=0x2,bus=pcie.0,chassis=2 " -- -2.39.1 +2.39.3 diff --git a/SOURCES/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch b/SOURCES/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch index 430959b..e8bf13a 100644 --- a/SOURCES/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch +++ b/SOURCES/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -1,4 +1,4 @@ -From 34cb4f7ddd762ec46ed1a6a4261aebde39360ca4 Mon Sep 17 00:00:00 2001 +From e06a905d726fc20ea6bd95dff1bd0ffe97ebb202 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 3 Dec 2013 20:05:13 +0100 Subject: vfio: cap number of devices that can be assigned @@ -26,16 +26,19 @@ Count of slots increased to 509 later so we could increase limit to 64 as some usecases require more than 32 devices. Signed-off-by: Bandan Das + +Rebase changes (8.2.0): +- Update to upstream changes --- - hw/vfio/pci.c | 29 ++++++++++++++++++++++++++++- + hw/vfio/pci.c | 31 ++++++++++++++++++++++++++++++- hw/vfio/pci.h | 1 + - 2 files changed, 29 insertions(+), 1 deletion(-) + 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index ec9a854361..a779053be3 100644 +index 64780d1b79..57ac63c10c 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c -@@ -48,6 +48,9 @@ +@@ -50,6 +50,9 @@ /* Protected by BQL */ static KVMRouteChange vfio_route_change; @@ -45,13 +48,19 @@ index ec9a854361..a779053be3 100644 static void vfio_disable_interrupts(VFIOPCIDevice *vdev); static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); static void vfio_msi_disable_common(VFIOPCIDevice *vdev); -@@ -2854,9 +2857,30 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - ssize_t len; - struct stat st; - int groupid; +@@ -2946,13 +2949,36 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + ERRP_GUARD(); + VFIOPCIDevice *vdev = VFIO_PCI(pdev); + VFIODevice *vbasedev = &vdev->vbasedev; ++ VFIODevice *vbasedev_iter; ++ VFIOGroup *group; + char *tmp, *subsys; + Error *err = NULL; - int i, ret; + int ret, i = 0; bool is_mdev; + char uuid[UUID_STR_LEN]; + char *name; + if (device_limit && device_limit != vdev->assigned_device_limit) { + error_setg(errp, "Assigned device limit has been redefined. " @@ -74,10 +83,10 @@ index ec9a854361..a779053be3 100644 + return; + } + - if (!vbasedev->sysfsdev) { + if (vbasedev->fd < 0 && !vbasedev->sysfsdev) { if (!(~vdev->host.domain || ~vdev->host.bus || ~vdev->host.slot || ~vdev->host.function)) { -@@ -3294,6 +3318,9 @@ static Property vfio_pci_dev_properties[] = { +@@ -3370,6 +3396,9 @@ static Property vfio_pci_dev_properties[] = { DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false), DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice, no_geforce_quirks, false), @@ -88,10 +97,10 @@ index ec9a854361..a779053be3 100644 false), DEFINE_PROP_BOOL("x-no-vfio-ioeventfd", VFIOPCIDevice, no_vfio_ioeventfd, diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h -index 177abcc8fb..45235d38ba 100644 +index 6e64a2654e..b7de39c010 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h -@@ -140,6 +140,7 @@ struct VFIOPCIDevice { +@@ -142,6 +142,7 @@ struct VFIOPCIDevice { EventNotifier err_notifier; EventNotifier req_notifier; int (*resetfn)(struct VFIOPCIDevice *); @@ -100,5 +109,5 @@ index 177abcc8fb..45235d38ba 100644 uint32_t device_id; uint32_t sub_vendor_id; -- -2.39.1 +2.39.3 diff --git a/SOURCES/0013-Add-support-statement-to-help-output.patch b/SOURCES/0013-Add-support-statement-to-help-output.patch index 25db0b8..0644440 100644 --- a/SOURCES/0013-Add-support-statement-to-help-output.patch +++ b/SOURCES/0013-Add-support-statement-to-help-output.patch @@ -1,4 +1,4 @@ -From 8964a3e8835992442902d35b011a708787366d82 Mon Sep 17 00:00:00 2001 +From b467dc6a24ef41fa574260429807711f6802a54d Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 4 Dec 2013 18:53:17 +0100 Subject: Add support statement to -help output @@ -17,14 +17,14 @@ as unsupported by Red Hat, and advising users to use libvirt instead. Signed-off-by: Eduardo Habkost --- - softmmu/vl.c | 9 +++++++++ + system/vl.c | 9 +++++++++ 1 file changed, 9 insertions(+) -diff --git a/softmmu/vl.c b/softmmu/vl.c -index ea20b23e4c..ad4173138d 100644 ---- a/softmmu/vl.c -+++ b/softmmu/vl.c -@@ -834,9 +834,17 @@ static void version(void) +diff --git a/system/vl.c b/system/vl.c +index c644222982..03c3b0aa94 100644 +--- a/system/vl.c ++++ b/system/vl.c +@@ -869,9 +869,17 @@ static void version(void) QEMU_COPYRIGHT "\n"); } @@ -42,7 +42,7 @@ index ea20b23e4c..ad4173138d 100644 printf("usage: %s [options] [disk_image]\n\n" "'disk_image' is a raw hard disk image for IDE hard disk 0\n\n", g_get_prgname()); -@@ -862,6 +870,7 @@ static void help(int exitcode) +@@ -897,6 +905,7 @@ static void help(int exitcode) "\n" QEMU_HELP_BOTTOM "\n"); @@ -51,5 +51,5 @@ index ea20b23e4c..ad4173138d 100644 } -- -2.39.1 +2.39.3 diff --git a/SOURCES/0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/SOURCES/0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch index b97c844..04adb4a 100644 --- a/SOURCES/0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +++ b/SOURCES/0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -1,4 +1,4 @@ -From 0b72d348fa0714de641ee242e5cee97df006e8fd Mon Sep 17 00:00:00 2001 +From 20cc3a6d9bce3e40d165f865b5e398c300cae7bf Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 8 Jul 2020 08:35:50 +0200 Subject: Use qemu-kvm in documentation instead of qemu-system- @@ -36,10 +36,10 @@ index 52d6454b93..d74dbdeca9 100644 .. |I2C| replace:: I\ :sup:`2`\ C .. |I2S| replace:: I\ :sup:`2`\ S diff --git a/qemu-options.hx b/qemu-options.hx -index 59bdf67a2c..52b49f1f6a 100644 +index 8ce85d4559..4fc27ee2e2 100644 --- a/qemu-options.hx +++ b/qemu-options.hx -@@ -3296,11 +3296,11 @@ SRST +@@ -3493,11 +3493,11 @@ SRST :: @@ -57,5 +57,5 @@ index 59bdf67a2c..52b49f1f6a 100644 ``-netdev vhost-vdpa[,vhostdev=/path/to/dev][,vhostfd=h]`` Establish a vhost-vdpa netdev. -- -2.39.1 +2.39.3 diff --git a/SOURCES/0015-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch b/SOURCES/0015-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch index 1e2f8e1..8518918 100644 --- a/SOURCES/0015-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch +++ b/SOURCES/0015-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch @@ -1,4 +1,4 @@ -From bd6bcebfd783fa49e283d035d378fb5240423d84 Mon Sep 17 00:00:00 2001 +From 2f9fdd21ecf2810d0d83a8125ce0cc1e75dbb13a Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 20 Aug 2021 18:25:12 +0200 Subject: qcow2: Deprecation warning when opening v2 images rw @@ -44,10 +44,10 @@ Rebase notes (6.1.0): 2 files changed, 7 insertions(+) diff --git a/block/qcow2.c b/block/qcow2.c -index 30fd53fa64..22084730f9 100644 +index 956128b409..0e8b2f7518 100644 --- a/block/qcow2.c +++ b/block/qcow2.c -@@ -1337,6 +1337,12 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, +@@ -1358,6 +1358,12 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, ret = -ENOTSUP; goto fail; } @@ -61,7 +61,7 @@ index 30fd53fa64..22084730f9 100644 s->qcow_version = header.version; diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter -index 6b32c7fbfa..6ddda2ee64 100644 +index 2846c83808..83472953a2 100644 --- a/tests/qemu-iotests/common.filter +++ b/tests/qemu-iotests/common.filter @@ -83,6 +83,7 @@ _filter_qemu() @@ -73,5 +73,5 @@ index 6b32c7fbfa..6ddda2ee64 100644 } -- -2.39.1 +2.39.3 diff --git a/SOURCES/0016-Add-upstream-compatibility-bits.patch b/SOURCES/0016-Add-upstream-compatibility-bits.patch new file mode 100644 index 0000000..3efa22c --- /dev/null +++ b/SOURCES/0016-Add-upstream-compatibility-bits.patch @@ -0,0 +1,121 @@ +From 59470e8ab849f22b407f55292e540e16a8cad01a Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Wed, 20 Mar 2024 05:34:32 -0400 +Subject: Add upstream compatibility bits + +Adding new compats structure for changes introduced during rebase to QEMU 9.0.0. + +Signed-off-by: Miroslav Rezanina + +--- + +Rebase notes (9.0.0 rc2): +- Add aw-bits setting for aarch compat record (overwritten for 9.4 and older) +--- + hw/arm/virt.c | 3 +++ + hw/core/machine.c | 10 ++++++++++ + hw/i386/pc_piix.c | 3 ++- + hw/i386/pc_q35.c | 3 +++ + hw/s390x/s390-virtio-ccw.c | 1 + + include/hw/boards.h | 3 +++ + 6 files changed, 22 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 22bc345137..f1af9495c6 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -144,6 +144,8 @@ GlobalProperty arm_rhel_compat[] = { + {"virtio-net-pci", "romfile", "" }, + {"virtio-net-pci-transitional", "romfile", "" }, + {"virtio-net-pci-non-transitional", "romfile", "" }, ++ /* arm_rhel_compat from arm_virt_compat, added for 9.0.0 rebase */ ++ { TYPE_VIRTIO_IOMMU_PCI, "aw-bits", "48" }, + }; + const size_t arm_rhel_compat_len = G_N_ELEMENTS(arm_rhel_compat); + +@@ -3728,6 +3730,7 @@ type_init(rhel_machine_init); + + static void rhel940_virt_options(MachineClass *mc) + { ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_5, hw_compat_rhel_9_5_len); + } + DEFINE_RHEL_MACHINE_AS_LATEST(9, 4, 0) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 695cb89a46..0f256d9633 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -302,6 +302,16 @@ const size_t hw_compat_2_1_len = G_N_ELEMENTS(hw_compat_2_1); + const char *rhel_old_machine_deprecation = + "machine types for previous major releases are deprecated"; + ++GlobalProperty hw_compat_rhel_9_5[] = { ++ /* hw_compat_rhel_9_5 from hw_compat_8_2 */ ++ { "migration", "zero-page-detection", "legacy"}, ++ /* hw_compat_rhel_9_5 from hw_compat_8_2 */ ++ { TYPE_VIRTIO_IOMMU_PCI, "granule", "4k" }, ++ /* hw_compat_rhel_9_5 from hw_compat_8_2 */ ++ { TYPE_VIRTIO_IOMMU_PCI, "aw-bits", "64" }, ++}; ++const size_t hw_compat_rhel_9_5_len = G_N_ELEMENTS(hw_compat_rhel_9_5); ++ + GlobalProperty hw_compat_rhel_9_4[] = { + /* hw_compat_rhel_9_4 from hw_compat_8_0 */ + { TYPE_VIRTIO_NET, "host_uso", "off"}, +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index a647262d63..6b260682eb 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -1015,7 +1015,8 @@ static void pc_machine_rhel760_options(MachineClass *m) + object_class_property_set_description(oc, "x-south-bridge", + "Use a different south bridge than PIIX3"); + +- ++ compat_props_add(m->compat_props, hw_compat_rhel_9_5, ++ hw_compat_rhel_9_5_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_4, + hw_compat_rhel_9_4_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_3, +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index e872dc7e46..2b54944c0f 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -733,6 +733,9 @@ static void pc_q35_machine_rhel940_options(MachineClass *m) + m->desc = "RHEL-9.4.0 PC (Q35 + ICH9, 2009)"; + pcmc->smbios_stream_product = "RHEL"; + pcmc->smbios_stream_version = "9.4.0"; ++ ++ compat_props_add(m->compat_props, hw_compat_rhel_9_5, ++ hw_compat_rhel_9_5_len); + } + + DEFINE_PC_MACHINE(q35_rhel940, "pc-q35-rhel9.4.0", pc_q35_init_rhel940, +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index ff753a29e0..9ad54682c6 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1282,6 +1282,7 @@ static void ccw_machine_rhel940_instance_options(MachineState *machine) + + static void ccw_machine_rhel940_class_options(MachineClass *mc) + { ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_5, hw_compat_rhel_9_5_len); + } + DEFINE_CCW_MACHINE(rhel940, "rhel9.4.0", true); + +diff --git a/include/hw/boards.h b/include/hw/boards.h +index 46b8725c41..cca62f906b 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -514,6 +514,9 @@ extern const size_t hw_compat_2_2_len; + extern GlobalProperty hw_compat_2_1[]; + extern const size_t hw_compat_2_1_len; + ++extern GlobalProperty hw_compat_rhel_9_5[]; ++extern const size_t hw_compat_rhel_9_5_len; ++ + extern GlobalProperty hw_compat_rhel_9_4[]; + extern const size_t hw_compat_rhel_9_4_len; + +-- +2.39.3 + diff --git a/SOURCES/0016-qga-linux-add-usb-support-to-guest-get-fsinfo.patch b/SOURCES/0016-qga-linux-add-usb-support-to-guest-get-fsinfo.patch deleted file mode 100644 index bb9455a..0000000 --- a/SOURCES/0016-qga-linux-add-usb-support-to-guest-get-fsinfo.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 78a42cf27aa519bb71214443ab570b40e156fa9c Mon Sep 17 00:00:00 2001 -From: Kfir Manor -Date: Sun, 22 Jan 2023 17:33:07 +0200 -Subject: qga/linux: add usb support to guest-get-fsinfo - -RH-Author: Kostiantyn Kostiuk -RH-MergeRequest: 140: qga/linux: add usb support to guest-get-fsinfo -RH-Bugzilla: 2149191 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: yvugenfi -RH-Commit: [1/1] bae929a2d0d0ad20e7308ede69c26499fc2119c7 (kostyanf14/redhat_centos-stream_src_qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2149191 -Upstream patch: https://patchew.org/QEMU/20230122153307.1050593-1-kfir@daynix.com/ - -Signed-off-by: Kfir Manor -Reviewed-by: Konstantin Kostiuk -Signed-off-by: Konstantin Kostiuk - -Patch-name: kvm-qga-linux-add-usb-support-to-guest-get-fsinfo.patch -Patch-id: 72 -Patch-present-in-specfile: True ---- - qga/commands-posix.c | 6 +++++- - 1 file changed, 5 insertions(+), 1 deletion(-) - -diff --git a/qga/commands-posix.c b/qga/commands-posix.c -index 079689d79a..97754930c1 100644 ---- a/qga/commands-posix.c -+++ b/qga/commands-posix.c -@@ -879,7 +879,9 @@ static bool build_guest_fsinfo_for_pci_dev(char const *syspath, - g_str_equal(driver, "sym53c8xx") || - g_str_equal(driver, "virtio-pci") || - g_str_equal(driver, "ahci") || -- g_str_equal(driver, "nvme"))) { -+ g_str_equal(driver, "nvme") || -+ g_str_equal(driver, "xhci_hcd") || -+ g_str_equal(driver, "ehci-pci"))) { - break; - } - -@@ -976,6 +978,8 @@ static bool build_guest_fsinfo_for_pci_dev(char const *syspath, - } - } else if (strcmp(driver, "nvme") == 0) { - disk->bus_type = GUEST_DISK_BUS_TYPE_NVME; -+ } else if (strcmp(driver, "ehci-pci") == 0 || strcmp(driver, "xhci_hcd") == 0) { -+ disk->bus_type = GUEST_DISK_BUS_TYPE_USB; - } else { - g_debug("unknown driver '%s' (sysfs path '%s')", driver, syspath); - goto cleanup; --- -2.39.1 - diff --git a/SOURCES/0017-Add-RHEL-9.2.0-compat-structure.patch b/SOURCES/0017-Add-RHEL-9.2.0-compat-structure.patch deleted file mode 100644 index ce0ba5c..0000000 --- a/SOURCES/0017-Add-RHEL-9.2.0-compat-structure.patch +++ /dev/null @@ -1,110 +0,0 @@ -From bd5d81d2865c239ffea0fecf32476732149ad05c Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Wed, 15 Feb 2023 02:03:17 -0500 -Subject: Add RHEL 9.2.0 compat structure - -Adding compatibility bits necessary to keep 9.2.0 machine -types same after rebase to 8.0. - -Signed-off-by: Miroslav Rezanina - -Rebase notes (8.0.0 rc4): -- Added migration.x-preempt-pre-7-2 compat) ---- - hw/arm/virt.c | 1 + - hw/core/machine.c | 10 ++++++++++ - hw/i386/pc_piix.c | 2 ++ - hw/i386/pc_q35.c | 3 +++ - hw/s390x/s390-virtio-ccw.c | 1 + - include/hw/boards.h | 3 +++ - 6 files changed, 20 insertions(+) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 1ae1654be5..9be53e9355 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -3669,6 +3669,7 @@ type_init(rhel_machine_init); - static void rhel920_virt_options(MachineClass *mc) - { - compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); -+ compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len); - } - DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 5aa567fad3..0e0120b7f2 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -52,6 +52,16 @@ const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2); - const char *rhel_old_machine_deprecation = - "machine types for previous major releases are deprecated"; - -+GlobalProperty hw_compat_rhel_9_2[] = { -+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */ -+ { "e1000e", "migrate-timadj", "off" }, -+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */ -+ { "virtio-mem", "x-early-migration", "false" }, -+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */ -+ { "migration", "x-preempt-pre-7-2", "true" }, -+}; -+const size_t hw_compat_rhel_9_2_len = G_N_ELEMENTS(hw_compat_rhel_9_2); -+ - /* - * Mostly the same as hw_compat_7_0 - */ -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 3e330fd36f..90fb6e2e03 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -947,6 +947,8 @@ static void pc_machine_rhel760_options(MachineClass *m) - /* From pc_i440fx_5_1_machine_options() */ - pcmc->pci_root_uid = 1; - pcmc->enforce_amd_1tb_hole = false; -+ compat_props_add(m->compat_props, hw_compat_rhel_9_2, -+ hw_compat_rhel_9_2_len); - compat_props_add(m->compat_props, hw_compat_rhel_9_1, - hw_compat_rhel_9_1_len); - compat_props_add(m->compat_props, hw_compat_rhel_9_0, -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 98601bb76f..8945b69175 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -701,6 +701,9 @@ static void pc_q35_machine_rhel920_options(MachineClass *m) - m->desc = "RHEL-9.2.0 PC (Q35 + ICH9, 2009)"; - pcmc->smbios_stream_product = "RHEL"; - pcmc->smbios_stream_version = "9.2.0"; -+ -+ compat_props_add(m->compat_props, hw_compat_rhel_9_2, -+ hw_compat_rhel_9_2_len); - } - - DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920, -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index dcd3b966b0..6a0b93c63d 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -1211,6 +1211,7 @@ static void ccw_machine_rhel920_instance_options(MachineState *machine) - - static void ccw_machine_rhel920_class_options(MachineClass *mc) - { -+ compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len); - } - DEFINE_CCW_MACHINE(rhel920, "rhel9.2.0", true); - -diff --git a/include/hw/boards.h b/include/hw/boards.h -index 5e7446ee40..5f08bd7550 100644 ---- a/include/hw/boards.h -+++ b/include/hw/boards.h -@@ -461,6 +461,9 @@ extern const size_t hw_compat_2_2_len; - extern GlobalProperty hw_compat_2_1[]; - extern const size_t hw_compat_2_1_len; - -+extern GlobalProperty hw_compat_rhel_9_2[]; -+extern const size_t hw_compat_rhel_9_2_len; -+ - extern GlobalProperty hw_compat_rhel_9_1[]; - extern const size_t hw_compat_rhel_9_1_len; - --- -2.39.1 - diff --git a/SOURCES/0017-x86-rhel-9.4.0-machine-type-compat-fix.patch b/SOURCES/0017-x86-rhel-9.4.0-machine-type-compat-fix.patch new file mode 100644 index 0000000..5befe68 --- /dev/null +++ b/SOURCES/0017-x86-rhel-9.4.0-machine-type-compat-fix.patch @@ -0,0 +1,30 @@ +From ba574acacf679850e337ec2d5e7836b8277cf393 Mon Sep 17 00:00:00 2001 +From: Sebastian Ott +Date: Thu, 18 Apr 2024 15:04:28 +0200 +Subject: x86: rhel 9.4.0 machine type compat fix + +Fix up the compatibility for 9.4.0. Ensure that pc-q35-rhel9.4.0 +still uses SMBIOS 3.X by default. + +Signed-off-by: Sebastian Ott +--- + hw/i386/pc_q35.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 2b54944c0f..2f11f9af7d 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -734,6 +734,9 @@ static void pc_q35_machine_rhel940_options(MachineClass *m) + pcmc->smbios_stream_product = "RHEL"; + pcmc->smbios_stream_version = "9.4.0"; + ++ /* From pc_q35_8_2_machine_options() - use SMBIOS 3.X by default */ ++ pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_64; ++ + compat_props_add(m->compat_props, hw_compat_rhel_9_5, + hw_compat_rhel_9_5_len); + } +-- +2.39.3 + diff --git a/SOURCES/0018-redhat-hw-i386-pc-Update-x86-machine-type-compatibil.patch b/SOURCES/0018-redhat-hw-i386-pc-Update-x86-machine-type-compatibil.patch deleted file mode 100644 index 81993e9..0000000 --- a/SOURCES/0018-redhat-hw-i386-pc-Update-x86-machine-type-compatibil.patch +++ /dev/null @@ -1,76 +0,0 @@ -From c6eaf73adda2e87fe91c9a3836f45dd58a553e06 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Mon, 27 Mar 2023 15:14:03 +0200 -Subject: redhat: hw/i386/pc: Update x86 machine type compatibility for QEMU - 8.0.0 update - -Add pc_rhel_9_2_compat based on upstream pc_compat_7_2. - -Signed-off-by: Thomas Huth ---- - hw/i386/pc.c | 6 ++++++ - hw/i386/pc_piix.c | 2 ++ - hw/i386/pc_q35.c | 2 ++ - include/hw/i386/pc.h | 3 +++ - 4 files changed, 13 insertions(+) - -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 8abb1f872e..f216922cee 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -429,6 +429,12 @@ GlobalProperty pc_rhel_compat[] = { - }; - const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); - -+GlobalProperty pc_rhel_9_2_compat[] = { -+ /* pc_rhel_9_2_compat from pc_compat_7_2 */ -+ { "ICH9-LPC", "noreboot", "true" }, -+}; -+const size_t pc_rhel_9_2_compat_len = G_N_ELEMENTS(pc_rhel_9_2_compat); -+ - GlobalProperty pc_rhel_9_0_compat[] = { - /* pc_rhel_9_0_compat from pc_compat_6_2 */ - { "virtio-mem", "unplugged-inaccessible", "off" }, -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 90fb6e2e03..fc704d783f 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -949,6 +949,8 @@ static void pc_machine_rhel760_options(MachineClass *m) - pcmc->enforce_amd_1tb_hole = false; - compat_props_add(m->compat_props, hw_compat_rhel_9_2, - hw_compat_rhel_9_2_len); -+ compat_props_add(m->compat_props, pc_rhel_9_2_compat, -+ pc_rhel_9_2_compat_len); - compat_props_add(m->compat_props, hw_compat_rhel_9_1, - hw_compat_rhel_9_1_len); - compat_props_add(m->compat_props, hw_compat_rhel_9_0, -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 8945b69175..e97655616a 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -704,6 +704,8 @@ static void pc_q35_machine_rhel920_options(MachineClass *m) - - compat_props_add(m->compat_props, hw_compat_rhel_9_2, - hw_compat_rhel_9_2_len); -+ compat_props_add(m->compat_props, pc_rhel_9_2_compat, -+ pc_rhel_9_2_compat_len); - } - - DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920, -diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 4376f64a47..d218ad1628 100644 ---- a/include/hw/i386/pc.h -+++ b/include/hw/i386/pc.h -@@ -296,6 +296,9 @@ int pc_machine_kvm_type(MachineState *machine, const char *vm_type); - extern GlobalProperty pc_rhel_compat[]; - extern const size_t pc_rhel_compat_len; - -+extern GlobalProperty pc_rhel_9_2_compat[]; -+extern const size_t pc_rhel_9_2_compat_len; -+ - extern GlobalProperty pc_rhel_9_0_compat[]; - extern const size_t pc_rhel_9_0_compat_len; - --- -2.39.1 - diff --git a/SOURCES/0019-Disable-unwanted-new-devices.patch b/SOURCES/0019-Disable-unwanted-new-devices.patch deleted file mode 100644 index f656ca9..0000000 --- a/SOURCES/0019-Disable-unwanted-new-devices.patch +++ /dev/null @@ -1,83 +0,0 @@ -From 8173d2eabaf77312d36b00c618f6770948b80593 Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Mon, 17 Apr 2023 01:24:18 -0400 -Subject: Disable unwanted new devices - -QEMU 8.0 adds two new device we do not want to support that can't -be disabled using configure switch. - -1) ide-cf - virtual CompactFlash card - -2) i2c-echo - testing echo device - -Use manual disabling of the device by changing code (1) and meson configs (2). - -Signed-off-by: Miroslav Rezanina ---- - hw/ide/qdev.c | 9 +++++++++ - hw/misc/meson.build | 3 ++- - 2 files changed, 11 insertions(+), 1 deletion(-) - -diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c -index 1b3b4da01d..454bfa5783 100644 ---- a/hw/ide/qdev.c -+++ b/hw/ide/qdev.c -@@ -283,10 +283,13 @@ static void ide_cd_realize(IDEDevice *dev, Error **errp) - ide_dev_initfn(dev, IDE_CD, errp); - } - -+/* Disabled for Red Hat Enterprise Linux */ -+#if 0 - static void ide_cf_realize(IDEDevice *dev, Error **errp) - { - ide_dev_initfn(dev, IDE_CFATA, errp); - } -+#endif - - #define DEFINE_IDE_DEV_PROPERTIES() \ - DEFINE_BLOCK_PROPERTIES(IDEDrive, dev.conf), \ -@@ -346,6 +349,8 @@ static const TypeInfo ide_cd_info = { - .class_init = ide_cd_class_init, - }; - -+/* Disabled for Red Hat Enterprise Linux */ -+#if 0 - static Property ide_cf_properties[] = { - DEFINE_IDE_DEV_PROPERTIES(), - DEFINE_BLOCK_CHS_PROPERTIES(IDEDrive, dev.conf), -@@ -371,6 +376,7 @@ static const TypeInfo ide_cf_info = { - .instance_size = sizeof(IDEDrive), - .class_init = ide_cf_class_init, - }; -+#endif - - static void ide_device_class_init(ObjectClass *klass, void *data) - { -@@ -396,7 +402,10 @@ static void ide_register_types(void) - type_register_static(&ide_bus_info); - type_register_static(&ide_hd_info); - type_register_static(&ide_cd_info); -+/* Disabled for Red Hat Enterprise Linux */ -+#if 0 - type_register_static(&ide_cf_info); -+#endif - type_register_static(&ide_device_type_info); - } - -diff --git a/hw/misc/meson.build b/hw/misc/meson.build -index a40245ad44..9cc5a61ed7 100644 ---- a/hw/misc/meson.build -+++ b/hw/misc/meson.build -@@ -128,7 +128,8 @@ softmmu_ss.add(when: 'CONFIG_NRF51_SOC', if_true: files('nrf51_rng.c')) - - softmmu_ss.add(when: 'CONFIG_GRLIB', if_true: files('grlib_ahb_apb_pnp.c')) - --softmmu_ss.add(when: 'CONFIG_I2C', if_true: files('i2c-echo.c')) -+# Disabled for Red Hat Enterprise Linux -+# softmmu_ss.add(when: 'CONFIG_I2C', if_true: files('i2c-echo.c')) - - specific_ss.add(when: 'CONFIG_AVR_POWER', if_true: files('avr_power.c')) - --- -2.39.1 - diff --git a/SOURCES/README.tests b/SOURCES/README.tests index 9932773..739e2c6 100644 --- a/SOURCES/README.tests +++ b/SOURCES/README.tests @@ -28,7 +28,7 @@ avocado_qemu tests: The avocado_qemu tests can be executed by running the following avocado command: avocado run -p qemu_bin=/usr/libexec/qemu-kvm /usr/lib64/qemu-kvm/tests/acceptance/ Avocado needs to be installed separately using either pip or from source as -Avocado is not being packaged for RHEL-8. +Avocado is not being packaged for RHEL. qemu-iotests: symlinks to corresponding binaries need to be created for QEMU_PROG, @@ -36,4 +36,4 @@ QEMU_IO_PROG, QEMU_IMG_PROG, and QEMU_NBD_PROG before the iotests can be executed. The primary purpose of this package is to make these tests available to be -executed as gating tests for the virt module in the RHEL-8 OSCI environment. +executed as gating tests for the qemu-kvm in the RHEL OSCI environment. diff --git a/SOURCES/kvm-HostMem-Add-mechanism-to-opt-in-kvm-guest-memfd-via-.patch b/SOURCES/kvm-HostMem-Add-mechanism-to-opt-in-kvm-guest-memfd-via-.patch new file mode 100644 index 0000000..65da2cc --- /dev/null +++ b/SOURCES/kvm-HostMem-Add-mechanism-to-opt-in-kvm-guest-memfd-via-.patch @@ -0,0 +1,139 @@ +From 93ea86ac8849ad9ca365b1646313dde9a34ba59c Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Wed, 20 Mar 2024 03:39:03 -0500 +Subject: [PATCH 031/100] HostMem: Add mechanism to opt in kvm guest memfd via + MachineState + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [31/91] 43ce32aef954479cdb736301d1adcb919602c321 (bonzini/rhel-qemu-kvm) + +Add a new member "guest_memfd" to memory backends. When it's set +to true, it enables RAM_GUEST_MEMFD in ram_flags, thus private kvm +guest_memfd will be allocated during RAMBlock allocation. + +Memory backend's @guest_memfd is wired with @require_guest_memfd +field of MachineState. It avoid looking up the machine in phymem.c. + +MachineState::require_guest_memfd is supposed to be set by any VMs +that requires KVM guest memfd as private memory, e.g., TDX VM. + +Signed-off-by: Xiaoyao Li +Reviewed-by: David Hildenbrand +Message-ID: <20240320083945.991426-8-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 37662d85b0b7dded0ebdf6747bef6c3bb7ed6a0c) +Signed-off-by: Paolo Bonzini +--- + backends/hostmem-file.c | 1 + + backends/hostmem-memfd.c | 1 + + backends/hostmem-ram.c | 1 + + backends/hostmem.c | 1 + + hw/core/machine.c | 5 +++++ + include/hw/boards.h | 2 ++ + include/sysemu/hostmem.h | 1 + + 7 files changed, 12 insertions(+) + +diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c +index ac3e433cbd..3c69db7946 100644 +--- a/backends/hostmem-file.c ++++ b/backends/hostmem-file.c +@@ -85,6 +85,7 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) + ram_flags |= fb->readonly ? RAM_READONLY_FD : 0; + ram_flags |= fb->rom == ON_OFF_AUTO_ON ? RAM_READONLY : 0; + ram_flags |= backend->reserve ? 0 : RAM_NORESERVE; ++ ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0; + ram_flags |= fb->is_pmem ? RAM_PMEM : 0; + ram_flags |= RAM_NAMED_FILE; + return memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), name, +diff --git a/backends/hostmem-memfd.c b/backends/hostmem-memfd.c +index 3923ea9364..745ead0034 100644 +--- a/backends/hostmem-memfd.c ++++ b/backends/hostmem-memfd.c +@@ -55,6 +55,7 @@ memfd_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) + name = host_memory_backend_get_name(backend); + ram_flags = backend->share ? RAM_SHARED : 0; + ram_flags |= backend->reserve ? 0 : RAM_NORESERVE; ++ ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0; + return memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), name, + backend->size, ram_flags, fd, 0, errp); + } +diff --git a/backends/hostmem-ram.c b/backends/hostmem-ram.c +index d121249f0f..f7d81af783 100644 +--- a/backends/hostmem-ram.c ++++ b/backends/hostmem-ram.c +@@ -30,6 +30,7 @@ ram_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) + name = host_memory_backend_get_name(backend); + ram_flags = backend->share ? RAM_SHARED : 0; + ram_flags |= backend->reserve ? 0 : RAM_NORESERVE; ++ ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0; + return memory_region_init_ram_flags_nomigrate(&backend->mr, OBJECT(backend), + name, backend->size, + ram_flags, errp); +diff --git a/backends/hostmem.c b/backends/hostmem.c +index 81a72ce40b..eb9682b4a8 100644 +--- a/backends/hostmem.c ++++ b/backends/hostmem.c +@@ -277,6 +277,7 @@ static void host_memory_backend_init(Object *obj) + /* TODO: convert access to globals to compat properties */ + backend->merge = machine_mem_merge(machine); + backend->dump = machine_dump_guest_core(machine); ++ backend->guest_memfd = machine_require_guest_memfd(machine); + backend->reserve = true; + backend->prealloc_threads = machine->smp.cpus; + } +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 92609aae27..07b994e136 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -1480,6 +1480,11 @@ bool machine_mem_merge(MachineState *machine) + return machine->mem_merge; + } + ++bool machine_require_guest_memfd(MachineState *machine) ++{ ++ return machine->require_guest_memfd; ++} ++ + static char *cpu_slot_to_string(const CPUArchId *cpu) + { + GString *s = g_string_new(NULL); +diff --git a/include/hw/boards.h b/include/hw/boards.h +index cca62f906b..815a1c4b26 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -36,6 +36,7 @@ bool machine_usb(MachineState *machine); + int machine_phandle_start(MachineState *machine); + bool machine_dump_guest_core(MachineState *machine); + bool machine_mem_merge(MachineState *machine); ++bool machine_require_guest_memfd(MachineState *machine); + HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine); + void machine_set_cpu_numa_node(MachineState *machine, + const CpuInstanceProperties *props, +@@ -372,6 +373,7 @@ struct MachineState { + char *dt_compatible; + bool dump_guest_core; + bool mem_merge; ++ bool require_guest_memfd; + bool usb; + bool usb_disabled; + char *firmware; +diff --git a/include/sysemu/hostmem.h b/include/sysemu/hostmem.h +index 0e411aaa29..04b884bf42 100644 +--- a/include/sysemu/hostmem.h ++++ b/include/sysemu/hostmem.h +@@ -74,6 +74,7 @@ struct HostMemoryBackend { + uint64_t size; + bool merge, dump, use_canonical_path; + bool prealloc, is_mapped, share, reserve; ++ bool guest_memfd; + uint32_t prealloc_threads; + ThreadContext *prealloc_context; + DECLARE_BITMAP(host_nodes, MAX_NODES + 1); +-- +2.39.3 + diff --git a/SOURCES/kvm-KVM-remove-kvm_arch_cpu_check_are_resettable.patch b/SOURCES/kvm-KVM-remove-kvm_arch_cpu_check_are_resettable.patch new file mode 100644 index 0000000..aaedcf4 --- /dev/null +++ b/SOURCES/kvm-KVM-remove-kvm_arch_cpu_check_are_resettable.patch @@ -0,0 +1,203 @@ +From c46ac3db0a4db60e667edeabc9ed451c6e8e0ccf Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 18 Mar 2024 14:41:33 -0400 +Subject: [PATCH 020/100] KVM: remove kvm_arch_cpu_check_are_resettable +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [20/91] d7745bd1a0ed1b215847f150f4a1bb2e912beabc (bonzini/rhel-qemu-kvm) + +Board reset requires writing a fresh CPU state. As far as KVM is +concerned, the only thing that blocks reset is that CPU state is +encrypted; therefore, kvm_cpus_are_resettable() can simply check +if that is the case. + +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Paolo Bonzini +(cherry picked from commit a99c0c66ebe7d8db3af6f16689ade9375247e43e) +Signed-off-by: Paolo Bonzini +--- + accel/kvm/kvm-accel-ops.c | 2 +- + accel/kvm/kvm-all.c | 5 ----- + include/sysemu/kvm.h | 10 ---------- + target/arm/kvm.c | 5 ----- + target/i386/kvm/kvm.c | 5 ----- + target/loongarch/kvm/kvm.c | 5 ----- + target/mips/kvm.c | 5 ----- + target/ppc/kvm.c | 5 ----- + target/riscv/kvm/kvm-cpu.c | 5 ----- + target/s390x/kvm/kvm.c | 5 ----- + 10 files changed, 1 insertion(+), 51 deletions(-) + +diff --git a/accel/kvm/kvm-accel-ops.c b/accel/kvm/kvm-accel-ops.c +index b3c946dc4b..74e3c5785b 100644 +--- a/accel/kvm/kvm-accel-ops.c ++++ b/accel/kvm/kvm-accel-ops.c +@@ -82,7 +82,7 @@ static bool kvm_vcpu_thread_is_idle(CPUState *cpu) + + static bool kvm_cpus_are_resettable(void) + { +- return !kvm_enabled() || kvm_cpu_check_are_resettable(); ++ return !kvm_enabled() || !kvm_state->guest_state_protected; + } + + #ifdef KVM_CAP_SET_GUEST_DEBUG +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index ec0f6df7c5..b51e09a583 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2696,11 +2696,6 @@ void kvm_flush_coalesced_mmio_buffer(void) + s->coalesced_flush_in_progress = false; + } + +-bool kvm_cpu_check_are_resettable(void) +-{ +- return kvm_arch_cpu_check_are_resettable(); +-} +- + static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) + { + if (!cpu->vcpu_dirty && !kvm_state->guest_state_protected) { +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index 302e8f6f1e..54f4d83a37 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -525,16 +525,6 @@ int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target); + /* Notify resamplefd for EOI of specific interrupts. */ + void kvm_resample_fd_notify(int gsi); + +-/** +- * kvm_cpu_check_are_resettable - return whether CPUs can be reset +- * +- * Returns: true: CPUs are resettable +- * false: CPUs are not resettable +- */ +-bool kvm_cpu_check_are_resettable(void); +- +-bool kvm_arch_cpu_check_are_resettable(void); +- + bool kvm_dirty_ring_enabled(void); + + uint32_t kvm_dirty_ring_size(void); +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index ab85d628a8..21ebbf3b8f 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -1598,11 +1598,6 @@ int kvm_arch_msi_data_to_gsi(uint32_t data) + return (data - 32) & 0xffff; + } + +-bool kvm_arch_cpu_check_are_resettable(void) +-{ +- return true; +-} +- + static void kvm_arch_get_eager_split_size(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index e271652620..a12207a8ee 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -5623,11 +5623,6 @@ bool kvm_has_waitpkg(void) + return has_msr_umwait; + } + +-bool kvm_arch_cpu_check_are_resettable(void) +-{ +- return !sev_es_enabled(); +-} +- + #define ARCH_REQ_XCOMP_GUEST_PERM 0x1025 + + void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask) +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +index d630cc39cb..8224d94333 100644 +--- a/target/loongarch/kvm/kvm.c ++++ b/target/loongarch/kvm/kvm.c +@@ -733,11 +733,6 @@ bool kvm_arch_stop_on_emulation_error(CPUState *cs) + return true; + } + +-bool kvm_arch_cpu_check_are_resettable(void) +-{ +- return true; +-} +- + int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) + { + int ret = 0; +diff --git a/target/mips/kvm.c b/target/mips/kvm.c +index 6c52e59f55..a631ab544f 100644 +--- a/target/mips/kvm.c ++++ b/target/mips/kvm.c +@@ -1273,11 +1273,6 @@ int kvm_arch_get_default_type(MachineState *machine) + return -1; + } + +-bool kvm_arch_cpu_check_are_resettable(void) +-{ +- return true; +-} +- + void kvm_arch_accel_class_init(ObjectClass *oc) + { + } +diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c +index 59f640cf7b..9d9d9f0d79 100644 +--- a/target/ppc/kvm.c ++++ b/target/ppc/kvm.c +@@ -2968,11 +2968,6 @@ void kvmppc_set_reg_tb_offset(PowerPCCPU *cpu, int64_t tb_offset) + } + } + +-bool kvm_arch_cpu_check_are_resettable(void) +-{ +- return true; +-} +- + void kvm_arch_accel_class_init(ObjectClass *oc) + { + } +diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c +index 6a6c6cae80..49d2f3ad58 100644 +--- a/target/riscv/kvm/kvm-cpu.c ++++ b/target/riscv/kvm/kvm-cpu.c +@@ -1475,11 +1475,6 @@ void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level) + } + } + +-bool kvm_arch_cpu_check_are_resettable(void) +-{ +- return true; +-} +- + static int aia_mode; + + static const char *kvm_aia_mode_str(uint64_t mode) +diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c +index 55fb4855b1..4db59658e1 100644 +--- a/target/s390x/kvm/kvm.c ++++ b/target/s390x/kvm/kvm.c +@@ -2630,11 +2630,6 @@ void kvm_s390_stop_interrupt(S390CPU *cpu) + kvm_s390_vcpu_interrupt(cpu, &irq); + } + +-bool kvm_arch_cpu_check_are_resettable(void) +-{ +- return true; +-} +- + int kvm_s390_get_zpci_op(void) + { + return cap_zpci_op; +-- +2.39.3 + diff --git a/SOURCES/kvm-KVM-track-whether-guest-state-is-encrypted.patch b/SOURCES/kvm-KVM-track-whether-guest-state-is-encrypted.patch new file mode 100644 index 0000000..7cdab60 --- /dev/null +++ b/SOURCES/kvm-KVM-track-whether-guest-state-is-encrypted.patch @@ -0,0 +1,127 @@ +From 50399796da938c4ea7c69058fde84695bce9d794 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 18 Mar 2024 14:41:10 -0400 +Subject: [PATCH 019/100] KVM: track whether guest state is encrypted +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [19/91] 685b9c54d43d0043d15c33d13afc3a420cbe139b (bonzini/rhel-qemu-kvm) + +So far, KVM has allowed KVM_GET/SET_* ioctls to execute even if the +guest state is encrypted, in which case they do nothing. For the new +API using VM types, instead, the ioctls will fail which is a safer and +more robust approach. + +The new API will be the only one available for SEV-SNP and TDX, but it +is also usable for SEV and SEV-ES. In preparation for that, require +architecture-specific KVM code to communicate the point at which guest +state is protected (which must be after kvm_cpu_synchronize_post_init(), +though that might change in the future in order to suppor migration). +From that point, skip reading registers so that cpu->vcpu_dirty is +never true: if it ever becomes true, kvm_arch_put_registers() will +fail miserably. + +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Paolo Bonzini +(cherry picked from commit 5c3131c392f84c660033d511ec39872d8beb4b1e) +Signed-off-by: Paolo Bonzini +--- + accel/kvm/kvm-all.c | 17 ++++++++++++++--- + include/sysemu/kvm.h | 2 ++ + include/sysemu/kvm_int.h | 1 + + target/i386/sev.c | 1 + + 4 files changed, 18 insertions(+), 3 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 931f74256e..ec0f6df7c5 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2703,7 +2703,7 @@ bool kvm_cpu_check_are_resettable(void) + + static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) + { +- if (!cpu->vcpu_dirty) { ++ if (!cpu->vcpu_dirty && !kvm_state->guest_state_protected) { + int ret = kvm_arch_get_registers(cpu); + if (ret) { + error_report("Failed to get registers: %s", strerror(-ret)); +@@ -2717,7 +2717,7 @@ static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) + + void kvm_cpu_synchronize_state(CPUState *cpu) + { +- if (!cpu->vcpu_dirty) { ++ if (!cpu->vcpu_dirty && !kvm_state->guest_state_protected) { + run_on_cpu(cpu, do_kvm_cpu_synchronize_state, RUN_ON_CPU_NULL); + } + } +@@ -2752,7 +2752,13 @@ static void do_kvm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg) + + void kvm_cpu_synchronize_post_init(CPUState *cpu) + { +- run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, RUN_ON_CPU_NULL); ++ if (!kvm_state->guest_state_protected) { ++ /* ++ * This runs before the machine_init_done notifiers, and is the last ++ * opportunity to synchronize the state of confidential guests. ++ */ ++ run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, RUN_ON_CPU_NULL); ++ } + } + + static void do_kvm_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg) +@@ -4099,3 +4105,8 @@ void query_stats_schemas_cb(StatsSchemaList **result, Error **errp) + query_stats_schema_vcpu(first_cpu, &stats_args); + } + } ++ ++void kvm_mark_guest_state_protected(void) ++{ ++ kvm_state->guest_state_protected = true; ++} +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index fad9a7e8ff..302e8f6f1e 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -539,6 +539,8 @@ bool kvm_dirty_ring_enabled(void); + + uint32_t kvm_dirty_ring_size(void); + ++void kvm_mark_guest_state_protected(void); ++ + /** + * kvm_hwpoisoned_mem - indicate if there is any hwpoisoned page + * reported for the VM. +diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h +index 882e37e12c..3496be7997 100644 +--- a/include/sysemu/kvm_int.h ++++ b/include/sysemu/kvm_int.h +@@ -87,6 +87,7 @@ struct KVMState + bool kernel_irqchip_required; + OnOffAuto kernel_irqchip_split; + bool sync_mmu; ++ bool guest_state_protected; + uint64_t manual_dirty_log_protect; + /* The man page (and posix) say ioctl numbers are signed int, but + * they're not. Linux, glibc and *BSD all treat ioctl numbers as +diff --git a/target/i386/sev.c b/target/i386/sev.c +index b8f79d34d1..c49a8fd55e 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -755,6 +755,7 @@ sev_launch_get_measure(Notifier *notifier, void *unused) + if (ret) { + exit(1); + } ++ kvm_mark_guest_state_protected(); + } + + /* query the measurement blob length */ +-- +2.39.3 + diff --git a/SOURCES/kvm-RAMBlock-Add-support-of-KVM-private-guest-memfd.patch b/SOURCES/kvm-RAMBlock-Add-support-of-KVM-private-guest-memfd.patch new file mode 100644 index 0000000..8e47872 --- /dev/null +++ b/SOURCES/kvm-RAMBlock-Add-support-of-KVM-private-guest-memfd.patch @@ -0,0 +1,329 @@ +From f4b01d645926faab2cab86fadb7398c26d6b8285 Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Wed, 20 Mar 2024 03:39:02 -0500 +Subject: [PATCH 028/100] RAMBlock: Add support of KVM private guest memfd + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [28/91] 95fdf196afcb67113834c20fa354ee1397411bfd (bonzini/rhel-qemu-kvm) + +Add KVM guest_memfd support to RAMBlock so both normal hva based memory +and kvm guest memfd based private memory can be associated in one RAMBlock. + +Introduce new flag RAM_GUEST_MEMFD. When it's set, it calls KVM ioctl to +create private guest_memfd during RAMBlock setup. + +Allocating a new RAM_GUEST_MEMFD flag to instruct the setup of guest memfd +is more flexible and extensible than simply relying on the VM type because +in the future we may have the case that not all the memory of a VM need +guest memfd. As a benefit, it also avoid getting MachineState in memory +subsystem. + +Note, RAM_GUEST_MEMFD is supposed to be set for memory backends of +confidential guests, such as TDX VM. How and when to set it for memory +backends will be implemented in the following patches. + +Introduce memory_region_has_guest_memfd() to query if the MemoryRegion has +KVM guest_memfd allocated. + +Signed-off-by: Xiaoyao Li +Reviewed-by: David Hildenbrand +Message-ID: <20240320083945.991426-7-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 15f7a80c49cb3637f62fa37fa4a17da913bd91ff) +Signed-off-by: Paolo Bonzini +--- + accel/kvm/kvm-all.c | 28 ++++++++++++++++++++++++++++ + accel/stubs/kvm-stub.c | 5 +++++ + include/exec/memory.h | 20 +++++++++++++++++--- + include/exec/ram_addr.h | 2 +- + include/exec/ramblock.h | 1 + + include/sysemu/kvm.h | 2 ++ + system/memory.c | 5 +++++ + system/physmem.c | 34 +++++++++++++++++++++++++++++++--- + 8 files changed, 90 insertions(+), 7 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 272e945f52..a7b9a127dd 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -92,6 +92,7 @@ static bool kvm_has_guest_debug; + static int kvm_sstep_flags; + static bool kvm_immediate_exit; + static uint64_t kvm_supported_memory_attributes; ++static bool kvm_guest_memfd_supported; + static hwaddr kvm_max_slot_size = ~0; + + static const KVMCapabilityInfo kvm_required_capabilites[] = { +@@ -2419,6 +2420,11 @@ static int kvm_init(MachineState *ms) + } + + kvm_supported_memory_attributes = kvm_check_extension(s, KVM_CAP_MEMORY_ATTRIBUTES); ++ kvm_guest_memfd_supported = ++ kvm_check_extension(s, KVM_CAP_GUEST_MEMFD) && ++ kvm_check_extension(s, KVM_CAP_USER_MEMORY2) && ++ (kvm_supported_memory_attributes & KVM_MEMORY_ATTRIBUTE_PRIVATE); ++ + kvm_immediate_exit = kvm_check_extension(s, KVM_CAP_IMMEDIATE_EXIT); + s->nr_slots = kvm_check_extension(s, KVM_CAP_NR_MEMSLOTS); + +@@ -4138,3 +4144,25 @@ void kvm_mark_guest_state_protected(void) + { + kvm_state->guest_state_protected = true; + } ++ ++int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp) ++{ ++ int fd; ++ struct kvm_create_guest_memfd guest_memfd = { ++ .size = size, ++ .flags = flags, ++ }; ++ ++ if (!kvm_guest_memfd_supported) { ++ error_setg(errp, "KVM does not support guest_memfd"); ++ return -1; ++ } ++ ++ fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_GUEST_MEMFD, &guest_memfd); ++ if (fd < 0) { ++ error_setg_errno(errp, errno, "Error creating KVM guest_memfd"); ++ return -1; ++ } ++ ++ return fd; ++} +diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c +index ca38172884..8e0eb22e61 100644 +--- a/accel/stubs/kvm-stub.c ++++ b/accel/stubs/kvm-stub.c +@@ -129,3 +129,8 @@ bool kvm_hwpoisoned_mem(void) + { + return false; + } ++ ++int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp) ++{ ++ return -ENOSYS; ++} +diff --git a/include/exec/memory.h b/include/exec/memory.h +index 8626a355b3..679a847685 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -243,6 +243,9 @@ typedef struct IOMMUTLBEvent { + /* RAM FD is opened read-only */ + #define RAM_READONLY_FD (1 << 11) + ++/* RAM can be private that has kvm guest memfd backend */ ++#define RAM_GUEST_MEMFD (1 << 12) ++ + static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn, + IOMMUNotifierFlag flags, + hwaddr start, hwaddr end, +@@ -1307,7 +1310,8 @@ bool memory_region_init_ram_nomigrate(MemoryRegion *mr, + * @name: Region name, becomes part of RAMBlock name used in migration stream + * must be unique within any device + * @size: size of the region. +- * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_NORESERVE. ++ * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_NORESERVE, ++ * RAM_GUEST_MEMFD. + * @errp: pointer to Error*, to store an error if it happens. + * + * Note that this function does not do anything to cause the data in the +@@ -1369,7 +1373,7 @@ bool memory_region_init_resizeable_ram(MemoryRegion *mr, + * (getpagesize()) will be used. + * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM, + * RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY, +- * RAM_READONLY_FD ++ * RAM_READONLY_FD, RAM_GUEST_MEMFD + * @path: the path in which to allocate the RAM. + * @offset: offset within the file referenced by path + * @errp: pointer to Error*, to store an error if it happens. +@@ -1399,7 +1403,7 @@ bool memory_region_init_ram_from_file(MemoryRegion *mr, + * @size: size of the region. + * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM, + * RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY, +- * RAM_READONLY_FD ++ * RAM_READONLY_FD, RAM_GUEST_MEMFD + * @fd: the fd to mmap. + * @offset: offset within the file referenced by fd + * @errp: pointer to Error*, to store an error if it happens. +@@ -1722,6 +1726,16 @@ static inline bool memory_region_is_romd(MemoryRegion *mr) + */ + bool memory_region_is_protected(MemoryRegion *mr); + ++/** ++ * memory_region_has_guest_memfd: check whether a memory region has guest_memfd ++ * associated ++ * ++ * Returns %true if a memory region's ram_block has valid guest_memfd assigned. ++ * ++ * @mr: the memory region being queried ++ */ ++bool memory_region_has_guest_memfd(MemoryRegion *mr); ++ + /** + * memory_region_get_iommu: check whether a memory region is an iommu + * +diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h +index de45ba7bc9..07c8f86375 100644 +--- a/include/exec/ram_addr.h ++++ b/include/exec/ram_addr.h +@@ -110,7 +110,7 @@ long qemu_maxrampagesize(void); + * @mr: the memory region where the ram block is + * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM, + * RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY, +- * RAM_READONLY_FD ++ * RAM_READONLY_FD, RAM_GUEST_MEMFD + * @mem_path or @fd: specify the backing file or device + * @offset: Offset into target file + * @errp: pointer to Error*, to store an error if it happens +diff --git a/include/exec/ramblock.h b/include/exec/ramblock.h +index 848915ea5b..459c8917de 100644 +--- a/include/exec/ramblock.h ++++ b/include/exec/ramblock.h +@@ -41,6 +41,7 @@ struct RAMBlock { + QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers; + int fd; + uint64_t fd_offset; ++ int guest_memfd; + size_t page_size; + /* dirty bitmap used during migration */ + unsigned long *bmap; +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index f114ff6986..9e4ab7ae89 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -537,6 +537,8 @@ void kvm_mark_guest_state_protected(void); + */ + bool kvm_hwpoisoned_mem(void); + ++int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp); ++ + int kvm_set_memory_attributes_private(hwaddr start, uint64_t size); + int kvm_set_memory_attributes_shared(hwaddr start, uint64_t size); + +diff --git a/system/memory.c b/system/memory.c +index a229a79988..c756950c0c 100644 +--- a/system/memory.c ++++ b/system/memory.c +@@ -1850,6 +1850,11 @@ bool memory_region_is_protected(MemoryRegion *mr) + return mr->ram && (mr->ram_block->flags & RAM_PROTECTED); + } + ++bool memory_region_has_guest_memfd(MemoryRegion *mr) ++{ ++ return mr->ram_block && mr->ram_block->guest_memfd >= 0; ++} ++ + uint8_t memory_region_get_dirty_log_mask(MemoryRegion *mr) + { + uint8_t mask = mr->dirty_log_mask; +diff --git a/system/physmem.c b/system/physmem.c +index a4fe3d2bf8..f5dfa20e57 100644 +--- a/system/physmem.c ++++ b/system/physmem.c +@@ -1808,6 +1808,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) + const bool shared = qemu_ram_is_shared(new_block); + RAMBlock *block; + RAMBlock *last_block = NULL; ++ bool free_on_error = false; + ram_addr_t old_ram_size, new_ram_size; + Error *err = NULL; + +@@ -1837,6 +1838,19 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) + return; + } + memory_try_enable_merging(new_block->host, new_block->max_length); ++ free_on_error = true; ++ } ++ } ++ ++ if (new_block->flags & RAM_GUEST_MEMFD) { ++ assert(kvm_enabled()); ++ assert(new_block->guest_memfd < 0); ++ ++ new_block->guest_memfd = kvm_create_guest_memfd(new_block->max_length, ++ 0, errp); ++ if (new_block->guest_memfd < 0) { ++ qemu_mutex_unlock_ramlist(); ++ goto out_free; + } + } + +@@ -1888,6 +1902,13 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) + ram_block_notify_add(new_block->host, new_block->used_length, + new_block->max_length); + } ++ return; ++ ++out_free: ++ if (free_on_error) { ++ qemu_anon_ram_free(new_block->host, new_block->max_length); ++ new_block->host = NULL; ++ } + } + + #ifdef CONFIG_POSIX +@@ -1902,7 +1923,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, + /* Just support these ram flags by now. */ + assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE | + RAM_PROTECTED | RAM_NAMED_FILE | RAM_READONLY | +- RAM_READONLY_FD)) == 0); ++ RAM_READONLY_FD | RAM_GUEST_MEMFD)) == 0); + + if (xen_enabled()) { + error_setg(errp, "-mem-path not supported with Xen"); +@@ -1939,6 +1960,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, + new_block->used_length = size; + new_block->max_length = size; + new_block->flags = ram_flags; ++ new_block->guest_memfd = -1; + new_block->host = file_ram_alloc(new_block, size, fd, !file_size, offset, + errp); + if (!new_block->host) { +@@ -2018,7 +2040,7 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size, + int align; + + assert((ram_flags & ~(RAM_SHARED | RAM_RESIZEABLE | RAM_PREALLOC | +- RAM_NORESERVE)) == 0); ++ RAM_NORESERVE | RAM_GUEST_MEMFD)) == 0); + assert(!host ^ (ram_flags & RAM_PREALLOC)); + + align = qemu_real_host_page_size(); +@@ -2033,6 +2055,7 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size, + new_block->max_length = max_size; + assert(max_size >= size); + new_block->fd = -1; ++ new_block->guest_memfd = -1; + new_block->page_size = qemu_real_host_page_size(); + new_block->host = host; + new_block->flags = ram_flags; +@@ -2055,7 +2078,7 @@ RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, + RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags, + MemoryRegion *mr, Error **errp) + { +- assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE)) == 0); ++ assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE | RAM_GUEST_MEMFD)) == 0); + return qemu_ram_alloc_internal(size, size, NULL, NULL, ram_flags, mr, errp); + } + +@@ -2083,6 +2106,11 @@ static void reclaim_ramblock(RAMBlock *block) + } else { + qemu_anon_ram_free(block->host, block->max_length); + } ++ ++ if (block->guest_memfd >= 0) { ++ close(block->guest_memfd); ++ } ++ + g_free(block); + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-RAMBlock-make-guest_memfd-require-uncoordinated-disc.patch b/SOURCES/kvm-RAMBlock-make-guest_memfd-require-uncoordinated-disc.patch new file mode 100644 index 0000000..04a5fbf --- /dev/null +++ b/SOURCES/kvm-RAMBlock-make-guest_memfd-require-uncoordinated-disc.patch @@ -0,0 +1,82 @@ +From bd289293604d6f33e9fb89196f0b19117ce81f89 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Wed, 20 Mar 2024 17:45:29 +0100 +Subject: [PATCH 032/100] RAMBlock: make guest_memfd require uncoordinated + discard + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [32/91] 0c005849026c334737b88cbd20a0ac237dfca37e (bonzini/rhel-qemu-kvm) + +Some subsystems like VFIO might disable ram block discard, but guest_memfd +uses discard operations to implement conversions between private and +shared memory. Because of this, sequences like the following can result +in stale IOMMU mappings: + +1. allocate shared page +2. convert page shared->private +3. discard shared page +4. convert page private->shared +5. allocate shared page +6. issue DMA operations against that shared page + +This is not a use-after-free, because after step 3 VFIO is still pinning +the page. However, DMA operations in step 6 will hit the old mapping +that was allocated in step 1. + +Address this by taking ram_block_discard_is_enabled() into account when +deciding whether or not to discard pages. + +Since kvm_convert_memory()/guest_memfd doesn't implement a +RamDiscardManager handler to convey and replay discard operations, +this is a case of uncoordinated discard, which is blocked/released +by ram_block_discard_require(). Interestingly, this function had +no use so far. + +Alternative approaches would be to block discard of shared pages, but +this would cause guests to consume twice the memory if they use VFIO; +or to implement a RamDiscardManager and only block uncoordinated +discard, i.e. use ram_block_coordinated_discard_require(). + +[Commit message mostly by Michael Roth ] + +Signed-off-by: Paolo Bonzini +(cherry picked from commit 852f0048f3ea9f14de18eb279a99fccb6d250e8f) +Signed-off-by: Paolo Bonzini +--- + system/physmem.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/system/physmem.c b/system/physmem.c +index f5dfa20e57..5ebcf5be11 100644 +--- a/system/physmem.c ++++ b/system/physmem.c +@@ -1846,6 +1846,13 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) + assert(kvm_enabled()); + assert(new_block->guest_memfd < 0); + ++ if (ram_block_discard_require(true) < 0) { ++ error_setg_errno(errp, errno, ++ "cannot set up private guest memory: discard currently blocked"); ++ error_append_hint(errp, "Are you using assigned devices?\n"); ++ goto out_free; ++ } ++ + new_block->guest_memfd = kvm_create_guest_memfd(new_block->max_length, + 0, errp); + if (new_block->guest_memfd < 0) { +@@ -2109,6 +2116,7 @@ static void reclaim_ramblock(RAMBlock *block) + + if (block->guest_memfd >= 0) { + close(block->guest_memfd); ++ ram_block_discard_require(false); + } + + g_free(block); +-- +2.39.3 + diff --git a/SOURCES/kvm-Revert-monitor-use-aio_co_reschedule_self.patch b/SOURCES/kvm-Revert-monitor-use-aio_co_reschedule_self.patch new file mode 100644 index 0000000..32e792e --- /dev/null +++ b/SOURCES/kvm-Revert-monitor-use-aio_co_reschedule_self.patch @@ -0,0 +1,67 @@ +From d4e6f7105b00ba2536d5d733b7c03116f28ce116 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Mon, 6 May 2024 15:06:21 -0400 +Subject: [PATCH 2/5] Revert "monitor: use aio_co_reschedule_self()" + +RH-Author: Kevin Wolf +RH-MergeRequest: 248: Revert "monitor: use aio_co_reschedule_self()" +RH-Jira: RHEL-34618 RHEL-38697 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/2] b6a2ebd4a69dbcd2bd56c61e7c747f8f8f42337e (kmwolf/centos-qemu-kvm) + +Commit 1f25c172f837 ("monitor: use aio_co_reschedule_self()") was a code +cleanup that uses aio_co_reschedule_self() instead of open coding +coroutine rescheduling. + +Bug RHEL-34618 was reported and Kevin Wolf identified +the root cause. I missed that aio_co_reschedule_self() -> +qemu_get_current_aio_context() only knows about +qemu_aio_context/IOThread AioContexts and not about iohandler_ctx. It +does not function correctly when going back from the iohandler_ctx to +qemu_aio_context. + +Go back to open coding the AioContext transitions to avoid this bug. + +This reverts commit 1f25c172f83704e350c0829438d832384084a74d. + +Cc: qemu-stable@nongnu.org +Buglink: https://issues.redhat.com/browse/RHEL-34618 +Signed-off-by: Stefan Hajnoczi +Message-ID: <20240506190622.56095-2-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 719c6819ed9a9838520fa732f9861918dc693bda) +Signed-off-by: Kevin Wolf +--- + qapi/qmp-dispatch.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c +index f3488afeef..176b549473 100644 +--- a/qapi/qmp-dispatch.c ++++ b/qapi/qmp-dispatch.c +@@ -212,7 +212,8 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *requ + * executing the command handler so that it can make progress if it + * involves an AIO_WAIT_WHILE(). + */ +- aio_co_reschedule_self(qemu_get_aio_context()); ++ aio_co_schedule(qemu_get_aio_context(), qemu_coroutine_self()); ++ qemu_coroutine_yield(); + } + + monitor_set_cur(qemu_coroutine_self(), cur_mon); +@@ -226,7 +227,9 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *requ + * Move back to iohandler_ctx so that nested event loops for + * qemu_aio_context don't start new monitor commands. + */ +- aio_co_reschedule_self(iohandler_get_aio_context()); ++ aio_co_schedule(iohandler_get_aio_context(), ++ qemu_coroutine_self()); ++ qemu_coroutine_yield(); + } + } else { + /* +-- +2.39.3 + diff --git a/SOURCES/kvm-Revert-x86-rhel-9.4.0-machine-type-compat-fix.patch b/SOURCES/kvm-Revert-x86-rhel-9.4.0-machine-type-compat-fix.patch new file mode 100644 index 0000000..96756df --- /dev/null +++ b/SOURCES/kvm-Revert-x86-rhel-9.4.0-machine-type-compat-fix.patch @@ -0,0 +1,38 @@ +From bcbc897cb19b3a6523de611f48f6bac6cea16c97 Mon Sep 17 00:00:00 2001 +From: Sebastian Ott +Date: Thu, 2 May 2024 13:17:03 +0200 +Subject: [PATCH 2/2] Revert "x86: rhel 9.4.0 machine type compat fix" + +RH-Author: Sebastian Ott +RH-MergeRequest: 237: Revert "x86: rhel 9.4.0 machine type compat fix" +RH-Jira: RHEL-30362 +RH-Acked-by: Ani Sinha +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/1] 858ec153e65e96c39ca4db17ed93fd58c77dc2eb (seott1/cos-qemu-kvm) + +This reverts commit c46e44f0f4e861fe412ce679b0b0204881c1c2f5. + +pc-q35-rhel9.4.0 and newer should stay with SMBIOS_ENTRY_POINT_TYPE_AUTO. + +Signed-off-by: Sebastian Ott +--- + hw/i386/pc_q35.c | 3 --- + 1 file changed, 3 deletions(-) + +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 2f11f9af7d..2b54944c0f 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -734,9 +734,6 @@ static void pc_q35_machine_rhel940_options(MachineClass *m) + pcmc->smbios_stream_product = "RHEL"; + pcmc->smbios_stream_version = "9.4.0"; + +- /* From pc_q35_8_2_machine_options() - use SMBIOS 3.X by default */ +- pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_64; +- + compat_props_add(m->compat_props, hw_compat_rhel_9_5, + hw_compat_rhel_9_5_len); + } +-- +2.39.3 + diff --git a/SOURCES/kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch b/SOURCES/kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch deleted file mode 100644 index b937d27..0000000 --- a/SOURCES/kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch +++ /dev/null @@ -1,84 +0,0 @@ -From 61256a82ce78f40222455becb8850b5f5ebb5d72 Mon Sep 17 00:00:00 2001 -From: Igor Mammedov -Date: Tue, 18 Apr 2023 11:04:49 +0200 -Subject: [PATCH 1/3] acpi: pcihp: allow repeating hot-unplug requests - -RH-Author: Igor Mammedov -RH-MergeRequest: 159: acpi: pcihp: allow repeating hot-unplug requests -RH-Bugzilla: 2087047 -RH-Acked-by: Ani Sinha -RH-Acked-by: Julia Suvorova -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: MST -RH-Commit: [1/1] 9c597232466b27d91f127ee6004322d6ba69755f (imammedo/qemu-kvm-c-9-s-imam) - -with Q35 using ACPI PCI hotplug by default, user's request to unplug -device is ignored when it's issued before guest OS has been booted. -And any additional attempt to request device hot-unplug afterwards -results in following error: - - "Device XYZ is already in the process of unplug" - -arguably it can be considered as a regression introduced by [2], -before which it was possible to issue unplug request multiple -times. - -Accept new uplug requests after timeout (1ms). This brings ACPI PCI -hotplug on par with native PCIe unplug behavior [1] and allows user -to repeat unplug requests at propper times. -Set expire timeout to arbitrary 1msec so user won't be able to -flood guest with SCI interrupts by calling device_del in tight loop. - -PS: -ACPI spec doesn't mandate what OSPM can do with GPEx.status -bits set before it's booted => it's impl. depended. -Status bits may be retained (I tested with one Windows version) -or cleared (Linux since 2.6 kernel times) during guest's ACPI -subsystem initialization. -Clearing status bits (though not wrong per se) hides the unplug -event from guest, and it's upto user to repeat device_del later -when guest is able to handle unplug requests. - -1) 18416c62e3 ("pcie: expire pending delete") -2) -Fixes: cce8944cc9ef ("qdev-monitor: Forbid repeated device_del") -Signed-off-by: Igor Mammedov -Acked-by: Gerd Hoffmann -CC: mst@redhat.com -CC: anisinha@redhat.com -CC: jusual@redhat.com -CC: kraxel@redhat.com -Message-Id: <20230418090449.2155757-1-imammedo@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Ani Sinha -(cherry picked from commit 0f689cf5ada4d5df5ab95c7f7aa9fc221afa855d) -Signed-off-by: Igor Mammedov ---- - hw/acpi/pcihp.c | 10 ++++++++++ - 1 file changed, 10 insertions(+) - -diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c -index dcfb779a7a..cdd6f775a1 100644 ---- a/hw/acpi/pcihp.c -+++ b/hw/acpi/pcihp.c -@@ -357,6 +357,16 @@ void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev, - * acpi_pcihp_eject_slot() when the operation is completed. - */ - pdev->qdev.pending_deleted_event = true; -+ /* if unplug was requested before OSPM is initialized, -+ * linux kernel will clear GPE0.sts[] bits during boot, which effectively -+ * hides unplug event. And than followup qmp_device_del() calls remain -+ * blocked by above flag permanently. -+ * Unblock qmp_device_del() by setting expire limit, so user can -+ * repeat unplug request later when OSPM has been booted. -+ */ -+ pdev->qdev.pending_deleted_expires_ms = -+ qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); /* 1 msec */ -+ - s->acpi_pcihp_pci_status[bsel].down |= (1U << slot); - acpi_send_event(DEVICE(hotplug_dev), ACPI_PCI_HOTPLUG_STATUS); - } --- -2.39.1 - diff --git a/SOURCES/kvm-aio-warn-about-iohandler_ctx-special-casing.patch b/SOURCES/kvm-aio-warn-about-iohandler_ctx-special-casing.patch new file mode 100644 index 0000000..a0d9d31 --- /dev/null +++ b/SOURCES/kvm-aio-warn-about-iohandler_ctx-special-casing.patch @@ -0,0 +1,64 @@ +From 0e3934e89ad1dda21681f64ff38da69b07d1b531 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Mon, 6 May 2024 15:06:22 -0400 +Subject: [PATCH 3/5] aio: warn about iohandler_ctx special casing + +RH-Author: Kevin Wolf +RH-MergeRequest: 248: Revert "monitor: use aio_co_reschedule_self()" +RH-Jira: RHEL-34618 RHEL-38697 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/2] cc316d70b2c187ee0412d6560ca1a03e381a69c1 (kmwolf/centos-qemu-kvm) + +The main loop has two AioContexts: qemu_aio_context and iohandler_ctx. +The main loop runs them both, but nested aio_poll() calls on +qemu_aio_context exclude iohandler_ctx. + +Which one should qemu_get_current_aio_context() return when called from +the main loop? Document that it's always qemu_aio_context. + +This has subtle effects on functions that use +qemu_get_current_aio_context(). For example, aio_co_reschedule_self() +does not work when moving from iohandler_ctx to qemu_aio_context because +qemu_get_current_aio_context() does not differentiate these two +AioContexts. + +Document this in order to reduce the chance of future bugs. + +Signed-off-by: Stefan Hajnoczi +Message-ID: <20240506190622.56095-3-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit e669e800fc9ef8806af5c5578249ab758a4f8a5a) +Signed-off-by: Kevin Wolf +--- + include/block/aio.h | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/include/block/aio.h b/include/block/aio.h +index 8378553eb9..4ee81936ed 100644 +--- a/include/block/aio.h ++++ b/include/block/aio.h +@@ -629,6 +629,9 @@ void aio_co_schedule(AioContext *ctx, Coroutine *co); + * + * Move the currently running coroutine to new_ctx. If the coroutine is already + * running in new_ctx, do nothing. ++ * ++ * Note that this function cannot reschedule from iohandler_ctx to ++ * qemu_aio_context. + */ + void coroutine_fn aio_co_reschedule_self(AioContext *new_ctx); + +@@ -661,6 +664,9 @@ void aio_co_enter(AioContext *ctx, Coroutine *co); + * If called from an IOThread this will be the IOThread's AioContext. If + * called from the main thread or with the "big QEMU lock" taken it + * will be the main loop AioContext. ++ * ++ * Note that the return value is never the main loop's iohandler_ctx and the ++ * return value is the main loop AioContext instead. + */ + AioContext *qemu_get_current_aio_context(void); + +-- +2.39.3 + diff --git a/SOURCES/kvm-apic-disable-reentrancy-detection-for-apic-msi.patch b/SOURCES/kvm-apic-disable-reentrancy-detection-for-apic-msi.patch deleted file mode 100644 index 69505f8..0000000 --- a/SOURCES/kvm-apic-disable-reentrancy-detection-for-apic-msi.patch +++ /dev/null @@ -1,55 +0,0 @@ -From 5beea8b889a38aa59259679d7f1ba050f09eb0f0 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 9 May 2023 10:29:03 -0400 -Subject: [PATCH 12/21] apic: disable reentrancy detection for apic-msi - -RH-Author: Jon Maloy -RH-MergeRequest: 165: memory: prevent dma-reentracy issues -RH-Jira: RHEL-516 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [8/13] 329f3b1c02fc42d85c821dd14c70e6b885cf849a (jmaloy/jmaloy-qemu-kvm-2) - -Jira: https://issues.redhat.com/browse/RHEL-516 -Upstream: Merged -CVE: CVE-2023-2680 - -commit 50795ee051a342c681a9b45671c552fbd6274db8 -Author: Alexander Bulekov -Date: Thu Apr 27 17:10:13 2023 -0400 - - apic: disable reentrancy detection for apic-msi - - As the code is designed for re-entrant calls to apic-msi, mark apic-msi - as reentrancy-safe. - - Signed-off-by: Alexander Bulekov - Reviewed-by: Darren Kenny - Message-Id: <20230427211013.2994127-9-alxndr@bu.edu> - Signed-off-by: Thomas Huth - -Signed-off-by: Jon Maloy ---- - hw/intc/apic.c | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/hw/intc/apic.c b/hw/intc/apic.c -index 20b5a94073..ac3d47d231 100644 ---- a/hw/intc/apic.c -+++ b/hw/intc/apic.c -@@ -885,6 +885,13 @@ static void apic_realize(DeviceState *dev, Error **errp) - memory_region_init_io(&s->io_memory, OBJECT(s), &apic_io_ops, s, "apic-msi", - APIC_SPACE_SIZE); - -+ /* -+ * apic-msi's apic_mem_write can call into ioapic_eoi_broadcast, which can -+ * write back to apic-msi. As such mark the apic-msi region re-entrancy -+ * safe. -+ */ -+ s->io_memory.disable_reentrancy_guard = true; -+ - s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, apic_timer, s); - local_apics[s->id] = s; - --- -2.39.3 - diff --git a/SOURCES/kvm-async-Add-an-optional-reentrancy-guard-to-the-BH-API.patch b/SOURCES/kvm-async-Add-an-optional-reentrancy-guard-to-the-BH-API.patch deleted file mode 100644 index 65ba3be..0000000 --- a/SOURCES/kvm-async-Add-an-optional-reentrancy-guard-to-the-BH-API.patch +++ /dev/null @@ -1,231 +0,0 @@ -From f6db359f543723e2eb840653d35004af357ea5ac Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 9 May 2023 10:29:03 -0400 -Subject: [PATCH 06/21] async: Add an optional reentrancy guard to the BH API - -RH-Author: Jon Maloy -RH-MergeRequest: 165: memory: prevent dma-reentracy issues -RH-Jira: RHEL-516 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/13] 009a9a68c1c25b9ad0cd9bc0d73b3e07bee2a19d (jmaloy/jmaloy-qemu-kvm-2) - -Jira: https://issues.redhat.com/browse/RHEL-516 -Upstream: Merged -CVE: CVE-2023-2680 - -commit 9c86c97f12c060bf7484dd931f38634e166a81f0 -Author: Alexander Bulekov -Date: Thu Apr 27 17:10:07 2023 -0400 - - async: Add an optional reentrancy guard to the BH API - - Devices can pass their MemoryReentrancyGuard (from their DeviceState), - when creating new BHes. Then, the async API will toggle the guard - before/after calling the BH call-back. This prevents bh->mmio reentrancy - issues. - - Signed-off-by: Alexander Bulekov - Reviewed-by: Darren Kenny - Message-Id: <20230427211013.2994127-3-alxndr@bu.edu> - [thuth: Fix "line over 90 characters" checkpatch.pl error] - Signed-off-by: Thomas Huth - -Signed-off-by: Jon Maloy ---- - docs/devel/multiple-iothreads.txt | 7 +++++++ - include/block/aio.h | 18 ++++++++++++++++-- - include/qemu/main-loop.h | 7 +++++-- - tests/unit/ptimer-test-stubs.c | 3 ++- - util/async.c | 18 +++++++++++++++++- - util/main-loop.c | 6 ++++-- - util/trace-events | 1 + - 7 files changed, 52 insertions(+), 8 deletions(-) - -diff --git a/docs/devel/multiple-iothreads.txt b/docs/devel/multiple-iothreads.txt -index 343120f2ef..a3e949f6b3 100644 ---- a/docs/devel/multiple-iothreads.txt -+++ b/docs/devel/multiple-iothreads.txt -@@ -61,6 +61,7 @@ There are several old APIs that use the main loop AioContext: - * LEGACY qemu_aio_set_event_notifier() - monitor an event notifier - * LEGACY timer_new_ms() - create a timer - * LEGACY qemu_bh_new() - create a BH -+ * LEGACY qemu_bh_new_guarded() - create a BH with a device re-entrancy guard - * LEGACY qemu_aio_wait() - run an event loop iteration - - Since they implicitly work on the main loop they cannot be used in code that -@@ -72,8 +73,14 @@ Instead, use the AioContext functions directly (see include/block/aio.h): - * aio_set_event_notifier() - monitor an event notifier - * aio_timer_new() - create a timer - * aio_bh_new() - create a BH -+ * aio_bh_new_guarded() - create a BH with a device re-entrancy guard - * aio_poll() - run an event loop iteration - -+The qemu_bh_new_guarded/aio_bh_new_guarded APIs accept a "MemReentrancyGuard" -+argument, which is used to check for and prevent re-entrancy problems. For -+BHs associated with devices, the reentrancy-guard is contained in the -+corresponding DeviceState and named "mem_reentrancy_guard". -+ - The AioContext can be obtained from the IOThread using - iothread_get_aio_context() or for the main loop using qemu_get_aio_context(). - Code that takes an AioContext argument works both in IOThreads or the main -diff --git a/include/block/aio.h b/include/block/aio.h -index 543717f294..db6f23c619 100644 ---- a/include/block/aio.h -+++ b/include/block/aio.h -@@ -23,6 +23,8 @@ - #include "qemu/thread.h" - #include "qemu/timer.h" - #include "block/graph-lock.h" -+#include "hw/qdev-core.h" -+ - - typedef struct BlockAIOCB BlockAIOCB; - typedef void BlockCompletionFunc(void *opaque, int ret); -@@ -331,9 +333,11 @@ void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, - * is opaque and must be allocated prior to its use. - * - * @name: A human-readable identifier for debugging purposes. -+ * @reentrancy_guard: A guard set when entering a cb to prevent -+ * device-reentrancy issues - */ - QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, -- const char *name); -+ const char *name, MemReentrancyGuard *reentrancy_guard); - - /** - * aio_bh_new: Allocate a new bottom half structure -@@ -342,7 +346,17 @@ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, - * string. - */ - #define aio_bh_new(ctx, cb, opaque) \ -- aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb))) -+ aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), NULL) -+ -+/** -+ * aio_bh_new_guarded: Allocate a new bottom half structure with a -+ * reentrancy_guard -+ * -+ * A convenience wrapper for aio_bh_new_full() that uses the cb as the name -+ * string. -+ */ -+#define aio_bh_new_guarded(ctx, cb, opaque, guard) \ -+ aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), guard) - - /** - * aio_notify: Force processing of pending events. -diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h -index b3e54e00bc..68e70e61aa 100644 ---- a/include/qemu/main-loop.h -+++ b/include/qemu/main-loop.h -@@ -387,9 +387,12 @@ void qemu_cond_timedwait_iothread(QemuCond *cond, int ms); - - /* internal interfaces */ - -+#define qemu_bh_new_guarded(cb, opaque, guard) \ -+ qemu_bh_new_full((cb), (opaque), (stringify(cb)), guard) - #define qemu_bh_new(cb, opaque) \ -- qemu_bh_new_full((cb), (opaque), (stringify(cb))) --QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name); -+ qemu_bh_new_full((cb), (opaque), (stringify(cb)), NULL) -+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name, -+ MemReentrancyGuard *reentrancy_guard); - void qemu_bh_schedule_idle(QEMUBH *bh); - - enum { -diff --git a/tests/unit/ptimer-test-stubs.c b/tests/unit/ptimer-test-stubs.c -index f2bfcede93..8c9407c560 100644 ---- a/tests/unit/ptimer-test-stubs.c -+++ b/tests/unit/ptimer-test-stubs.c -@@ -107,7 +107,8 @@ int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int attr_mask) - return deadline; - } - --QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name) -+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name, -+ MemReentrancyGuard *reentrancy_guard) - { - QEMUBH *bh = g_new(QEMUBH, 1); - -diff --git a/util/async.c b/util/async.c -index 21016a1ac7..a9b528c370 100644 ---- a/util/async.c -+++ b/util/async.c -@@ -65,6 +65,7 @@ struct QEMUBH { - void *opaque; - QSLIST_ENTRY(QEMUBH) next; - unsigned flags; -+ MemReentrancyGuard *reentrancy_guard; - }; - - /* Called concurrently from any thread */ -@@ -137,7 +138,7 @@ void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, - } - - QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, -- const char *name) -+ const char *name, MemReentrancyGuard *reentrancy_guard) - { - QEMUBH *bh; - bh = g_new(QEMUBH, 1); -@@ -146,13 +147,28 @@ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, - .cb = cb, - .opaque = opaque, - .name = name, -+ .reentrancy_guard = reentrancy_guard, - }; - return bh; - } - - void aio_bh_call(QEMUBH *bh) - { -+ bool last_engaged_in_io = false; -+ -+ if (bh->reentrancy_guard) { -+ last_engaged_in_io = bh->reentrancy_guard->engaged_in_io; -+ if (bh->reentrancy_guard->engaged_in_io) { -+ trace_reentrant_aio(bh->ctx, bh->name); -+ } -+ bh->reentrancy_guard->engaged_in_io = true; -+ } -+ - bh->cb(bh->opaque); -+ -+ if (bh->reentrancy_guard) { -+ bh->reentrancy_guard->engaged_in_io = last_engaged_in_io; -+ } - } - - /* Multiple occurrences of aio_bh_poll cannot be called concurrently. */ -diff --git a/util/main-loop.c b/util/main-loop.c -index e180c85145..7022f02ef8 100644 ---- a/util/main-loop.c -+++ b/util/main-loop.c -@@ -605,9 +605,11 @@ void main_loop_wait(int nonblocking) - - /* Functions to operate on the main QEMU AioContext. */ - --QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name) -+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name, -+ MemReentrancyGuard *reentrancy_guard) - { -- return aio_bh_new_full(qemu_aio_context, cb, opaque, name); -+ return aio_bh_new_full(qemu_aio_context, cb, opaque, name, -+ reentrancy_guard); - } - - /* -diff --git a/util/trace-events b/util/trace-events -index 16f78d8fe5..3f7e766683 100644 ---- a/util/trace-events -+++ b/util/trace-events -@@ -11,6 +11,7 @@ poll_remove(void *ctx, void *node, int fd) "ctx %p node %p fd %d" - # async.c - aio_co_schedule(void *ctx, void *co) "ctx %p co %p" - aio_co_schedule_bh_cb(void *ctx, void *co) "ctx %p co %p" -+reentrant_aio(void *ctx, const char *name) "ctx %p name %s" - - # thread-pool.c - thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p" --- -2.39.3 - diff --git a/SOURCES/kvm-async-avoid-use-after-free-on-re-entrancy-guard.patch b/SOURCES/kvm-async-avoid-use-after-free-on-re-entrancy-guard.patch deleted file mode 100644 index df71fa2..0000000 --- a/SOURCES/kvm-async-avoid-use-after-free-on-re-entrancy-guard.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 137e84f68da06666ebf7f391766cc6209ce1c39c Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 9 May 2023 10:29:03 -0400 -Subject: [PATCH 13/21] async: avoid use-after-free on re-entrancy guard - -RH-Author: Jon Maloy -RH-MergeRequest: 165: memory: prevent dma-reentracy issues -RH-Jira: RHEL-516 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [9/13] d4b957108aaacf4a597122aaeeaa8e56985f1fca (jmaloy/jmaloy-qemu-kvm-2) - -Jira: https://issues.redhat.com/browse/RHEL-516 -Upstream: Merged -CVE: CVE-2023-2680 - -commit 7915bd06f25e1803778081161bf6fa10c42dc7cd -Author: Alexander Bulekov -Date: Mon May 1 10:19:56 2023 -0400 - - async: avoid use-after-free on re-entrancy guard - - A BH callback can free the BH, causing a use-after-free in aio_bh_call. - Fix that by keeping a local copy of the re-entrancy guard pointer. - - Buglink: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=58513 - Fixes: 9c86c97f12 ("async: Add an optional reentrancy guard to the BH API") - Signed-off-by: Alexander Bulekov - Message-Id: <20230501141956.3444868-1-alxndr@bu.edu> - Reviewed-by: Thomas Huth - Signed-off-by: Thomas Huth - -Signed-off-by: Jon Maloy ---- - util/async.c | 14 ++++++++------ - 1 file changed, 8 insertions(+), 6 deletions(-) - -diff --git a/util/async.c b/util/async.c -index a9b528c370..cd1a1815f9 100644 ---- a/util/async.c -+++ b/util/async.c -@@ -156,18 +156,20 @@ void aio_bh_call(QEMUBH *bh) - { - bool last_engaged_in_io = false; - -- if (bh->reentrancy_guard) { -- last_engaged_in_io = bh->reentrancy_guard->engaged_in_io; -- if (bh->reentrancy_guard->engaged_in_io) { -+ /* Make a copy of the guard-pointer as cb may free the bh */ -+ MemReentrancyGuard *reentrancy_guard = bh->reentrancy_guard; -+ if (reentrancy_guard) { -+ last_engaged_in_io = reentrancy_guard->engaged_in_io; -+ if (reentrancy_guard->engaged_in_io) { - trace_reentrant_aio(bh->ctx, bh->name); - } -- bh->reentrancy_guard->engaged_in_io = true; -+ reentrancy_guard->engaged_in_io = true; - } - - bh->cb(bh->opaque); - -- if (bh->reentrancy_guard) { -- bh->reentrancy_guard->engaged_in_io = last_engaged_in_io; -+ if (reentrancy_guard) { -+ reentrancy_guard->engaged_in_io = last_engaged_in_io; - } - } - --- -2.39.3 - diff --git a/SOURCES/kvm-bcm2835_property-disable-reentrancy-detection-for-io.patch b/SOURCES/kvm-bcm2835_property-disable-reentrancy-detection-for-io.patch deleted file mode 100644 index 6d9abb8..0000000 --- a/SOURCES/kvm-bcm2835_property-disable-reentrancy-detection-for-io.patch +++ /dev/null @@ -1,57 +0,0 @@ -From 40866640d15e6a8c9f6af7e437edc1ec1e17ba34 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 9 May 2023 10:29:03 -0400 -Subject: [PATCH 10/21] bcm2835_property: disable reentrancy detection for - iomem - -RH-Author: Jon Maloy -RH-MergeRequest: 165: memory: prevent dma-reentracy issues -RH-Jira: RHEL-516 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [6/13] 128ebc85e228674af66553af82fba70eb87960e6 (jmaloy/jmaloy-qemu-kvm-2) - -Jira: https://issues.redhat.com/browse/RHEL-516 -Upstream: Merged -CVE: CVE-2023-2680 - -commit 985c4a4e547afb9573b6bd6843d20eb2c3d1d1cd -Author: Alexander Bulekov -Date: Thu Apr 27 17:10:11 2023 -0400 - - bcm2835_property: disable reentrancy detection for iomem - - As the code is designed for re-entrant calls from bcm2835_property to - bcm2835_mbox and back into bcm2835_property, mark iomem as - reentrancy-safe. - - Signed-off-by: Alexander Bulekov - Reviewed-by: Thomas Huth - Message-Id: <20230427211013.2994127-7-alxndr@bu.edu> - Signed-off-by: Thomas Huth - -Signed-off-by: Jon Maloy ---- - hw/misc/bcm2835_property.c | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/hw/misc/bcm2835_property.c b/hw/misc/bcm2835_property.c -index 890ae7bae5..de056ea2df 100644 ---- a/hw/misc/bcm2835_property.c -+++ b/hw/misc/bcm2835_property.c -@@ -382,6 +382,13 @@ static void bcm2835_property_init(Object *obj) - - memory_region_init_io(&s->iomem, OBJECT(s), &bcm2835_property_ops, s, - TYPE_BCM2835_PROPERTY, 0x10); -+ -+ /* -+ * bcm2835_property_ops call into bcm2835_mbox, which in-turn reads from -+ * iomem. As such, mark iomem as re-entracy safe. -+ */ -+ s->iomem.disable_reentrancy_guard = true; -+ - sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem); - sysbus_init_irq(SYS_BUS_DEVICE(s), &s->mbox_irq); - } --- -2.39.3 - diff --git a/SOURCES/kvm-block-Collapse-padded-I-O-vecs-exceeding-IOV_MAX.patch b/SOURCES/kvm-block-Collapse-padded-I-O-vecs-exceeding-IOV_MAX.patch deleted file mode 100644 index 6de5d65..0000000 --- a/SOURCES/kvm-block-Collapse-padded-I-O-vecs-exceeding-IOV_MAX.patch +++ /dev/null @@ -1,354 +0,0 @@ -From ff05c0b0d3414c0e5b3903048280accdc6c75ca0 Mon Sep 17 00:00:00 2001 -From: Hanna Czenczek -Date: Tue, 11 Apr 2023 19:34:16 +0200 -Subject: [PATCH 2/9] block: Collapse padded I/O vecs exceeding IOV_MAX - -RH-Author: Hanna Czenczek -RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX -RH-Bugzilla: 2174676 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/5] 84c56bd16f841a18cf2baa918dfeab3240e3944d (hreitz/qemu-kvm-c-9-s) - -When processing vectored guest requests that are not aligned to the -storage request alignment, we pad them by adding head and/or tail -buffers for a read-modify-write cycle. - -The guest can submit I/O vectors up to IOV_MAX (1024) in length, but -with this padding, the vector can exceed that limit. As of -4c002cef0e9abe7135d7916c51abce47f7fc1ee2 ("util/iov: make -qemu_iovec_init_extended() honest"), we refuse to pad vectors beyond the -limit, instead returning an error to the guest. - -To the guest, this appears as a random I/O error. We should not return -an I/O error to the guest when it issued a perfectly valid request. - -Before 4c002cef0e9abe7135d7916c51abce47f7fc1ee2, we just made the vector -longer than IOV_MAX, which generally seems to work (because the guest -assumes a smaller alignment than we really have, file-posix's -raw_co_prw() will generally see bdrv_qiov_is_aligned() return false, and -so emulate the request, so that the IOV_MAX does not matter). However, -that does not seem exactly great. - -I see two ways to fix this problem: -1. We split such long requests into two requests. -2. We join some elements of the vector into new buffers to make it - shorter. - -I am wary of (1), because it seems like it may have unintended side -effects. - -(2) on the other hand seems relatively simple to implement, with -hopefully few side effects, so this patch does that. - -To do this, the use of qemu_iovec_init_extended() in bdrv_pad_request() -is effectively replaced by the new function bdrv_create_padded_qiov(), -which not only wraps the request IOV with padding head/tail, but also -ensures that the resulting vector will not have more than IOV_MAX -elements. Putting that functionality into qemu_iovec_init_extended() is -infeasible because it requires allocating a bounce buffer; doing so -would require many more parameters (buffer alignment, how to initialize -the buffer, and out parameters like the buffer, its length, and the -original elements), which is not reasonable. - -Conversely, it is not difficult to move qemu_iovec_init_extended()'s -functionality into bdrv_create_padded_qiov() by using public -qemu_iovec_* functions, so that is what this patch does. - -Because bdrv_pad_request() was the only "serious" user of -qemu_iovec_init_extended(), the next patch will remove the latter -function, so the functionality is not implemented twice. - -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2141964 -Signed-off-by: Hanna Czenczek -Message-Id: <20230411173418.19549-3-hreitz@redhat.com> -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 18743311b829cafc1737a5f20bc3248d5f91ee2a) -Signed-off-by: Hanna Czenczek ---- - block/io.c | 166 ++++++++++++++++++++++++++++++++++++++++++++++++----- - 1 file changed, 151 insertions(+), 15 deletions(-) - -diff --git a/block/io.c b/block/io.c -index 2e267a85ab..4e8e90208b 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -1439,6 +1439,14 @@ out: - * @merge_reads is true for small requests, - * if @buf_len == @head + bytes + @tail. In this case it is possible that both - * head and tail exist but @buf_len == align and @tail_buf == @buf. -+ * -+ * @write is true for write requests, false for read requests. -+ * -+ * If padding makes the vector too long (exceeding IOV_MAX), then we need to -+ * merge existing vector elements into a single one. @collapse_bounce_buf acts -+ * as the bounce buffer in such cases. @pre_collapse_qiov has the pre-collapse -+ * I/O vector elements so for read requests, the data can be copied back after -+ * the read is done. - */ - typedef struct BdrvRequestPadding { - uint8_t *buf; -@@ -1447,11 +1455,17 @@ typedef struct BdrvRequestPadding { - size_t head; - size_t tail; - bool merge_reads; -+ bool write; - QEMUIOVector local_qiov; -+ -+ uint8_t *collapse_bounce_buf; -+ size_t collapse_len; -+ QEMUIOVector pre_collapse_qiov; - } BdrvRequestPadding; - - static bool bdrv_init_padding(BlockDriverState *bs, - int64_t offset, int64_t bytes, -+ bool write, - BdrvRequestPadding *pad) - { - int64_t align = bs->bl.request_alignment; -@@ -1483,6 +1497,8 @@ static bool bdrv_init_padding(BlockDriverState *bs, - pad->tail_buf = pad->buf + pad->buf_len - align; - } - -+ pad->write = write; -+ - return true; - } - -@@ -1547,8 +1563,23 @@ zero_mem: - return 0; - } - --static void bdrv_padding_destroy(BdrvRequestPadding *pad) -+/** -+ * Free *pad's associated buffers, and perform any necessary finalization steps. -+ */ -+static void bdrv_padding_finalize(BdrvRequestPadding *pad) - { -+ if (pad->collapse_bounce_buf) { -+ if (!pad->write) { -+ /* -+ * If padding required elements in the vector to be collapsed into a -+ * bounce buffer, copy the bounce buffer content back -+ */ -+ qemu_iovec_from_buf(&pad->pre_collapse_qiov, 0, -+ pad->collapse_bounce_buf, pad->collapse_len); -+ } -+ qemu_vfree(pad->collapse_bounce_buf); -+ qemu_iovec_destroy(&pad->pre_collapse_qiov); -+ } - if (pad->buf) { - qemu_vfree(pad->buf); - qemu_iovec_destroy(&pad->local_qiov); -@@ -1556,6 +1587,101 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad) - memset(pad, 0, sizeof(*pad)); - } - -+/* -+ * Create pad->local_qiov by wrapping @iov in the padding head and tail, while -+ * ensuring that the resulting vector will not exceed IOV_MAX elements. -+ * -+ * To ensure this, when necessary, the first two or three elements of @iov are -+ * merged into pad->collapse_bounce_buf and replaced by a reference to that -+ * bounce buffer in pad->local_qiov. -+ * -+ * After performing a read request, the data from the bounce buffer must be -+ * copied back into pad->pre_collapse_qiov (e.g. by bdrv_padding_finalize()). -+ */ -+static int bdrv_create_padded_qiov(BlockDriverState *bs, -+ BdrvRequestPadding *pad, -+ struct iovec *iov, int niov, -+ size_t iov_offset, size_t bytes) -+{ -+ int padded_niov, surplus_count, collapse_count; -+ -+ /* Assert this invariant */ -+ assert(niov <= IOV_MAX); -+ -+ /* -+ * Cannot pad if resulting length would exceed SIZE_MAX. Returning an error -+ * to the guest is not ideal, but there is little else we can do. At least -+ * this will practically never happen on 64-bit systems. -+ */ -+ if (SIZE_MAX - pad->head < bytes || -+ SIZE_MAX - pad->head - bytes < pad->tail) -+ { -+ return -EINVAL; -+ } -+ -+ /* Length of the resulting IOV if we just concatenated everything */ -+ padded_niov = !!pad->head + niov + !!pad->tail; -+ -+ qemu_iovec_init(&pad->local_qiov, MIN(padded_niov, IOV_MAX)); -+ -+ if (pad->head) { -+ qemu_iovec_add(&pad->local_qiov, pad->buf, pad->head); -+ } -+ -+ /* -+ * If padded_niov > IOV_MAX, we cannot just concatenate everything. -+ * Instead, merge the first two or three elements of @iov to reduce the -+ * number of vector elements as necessary. -+ */ -+ if (padded_niov > IOV_MAX) { -+ /* -+ * Only head and tail can have lead to the number of entries exceeding -+ * IOV_MAX, so we can exceed it by the head and tail at most. We need -+ * to reduce the number of elements by `surplus_count`, so we merge that -+ * many elements plus one into one element. -+ */ -+ surplus_count = padded_niov - IOV_MAX; -+ assert(surplus_count <= !!pad->head + !!pad->tail); -+ collapse_count = surplus_count + 1; -+ -+ /* -+ * Move the elements to collapse into `pad->pre_collapse_qiov`, then -+ * advance `iov` (and associated variables) by those elements. -+ */ -+ qemu_iovec_init(&pad->pre_collapse_qiov, collapse_count); -+ qemu_iovec_concat_iov(&pad->pre_collapse_qiov, iov, -+ collapse_count, iov_offset, SIZE_MAX); -+ iov += collapse_count; -+ iov_offset = 0; -+ niov -= collapse_count; -+ bytes -= pad->pre_collapse_qiov.size; -+ -+ /* -+ * Construct the bounce buffer to match the length of the to-collapse -+ * vector elements, and for write requests, initialize it with the data -+ * from those elements. Then add it to `pad->local_qiov`. -+ */ -+ pad->collapse_len = pad->pre_collapse_qiov.size; -+ pad->collapse_bounce_buf = qemu_blockalign(bs, pad->collapse_len); -+ if (pad->write) { -+ qemu_iovec_to_buf(&pad->pre_collapse_qiov, 0, -+ pad->collapse_bounce_buf, pad->collapse_len); -+ } -+ qemu_iovec_add(&pad->local_qiov, -+ pad->collapse_bounce_buf, pad->collapse_len); -+ } -+ -+ qemu_iovec_concat_iov(&pad->local_qiov, iov, niov, iov_offset, bytes); -+ -+ if (pad->tail) { -+ qemu_iovec_add(&pad->local_qiov, -+ pad->buf + pad->buf_len - pad->tail, pad->tail); -+ } -+ -+ assert(pad->local_qiov.niov == MIN(padded_niov, IOV_MAX)); -+ return 0; -+} -+ - /* - * bdrv_pad_request - * -@@ -1563,6 +1689,8 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad) - * read of padding, bdrv_padding_rmw_read() should be called separately if - * needed. - * -+ * @write is true for write requests, false for read requests. -+ * - * Request parameters (@qiov, &qiov_offset, &offset, &bytes) are in-out: - * - on function start they represent original request - * - on failure or when padding is not needed they are unchanged -@@ -1571,26 +1699,34 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad) - static int bdrv_pad_request(BlockDriverState *bs, - QEMUIOVector **qiov, size_t *qiov_offset, - int64_t *offset, int64_t *bytes, -+ bool write, - BdrvRequestPadding *pad, bool *padded, - BdrvRequestFlags *flags) - { - int ret; -+ struct iovec *sliced_iov; -+ int sliced_niov; -+ size_t sliced_head, sliced_tail; - - bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort); - -- if (!bdrv_init_padding(bs, *offset, *bytes, pad)) { -+ if (!bdrv_init_padding(bs, *offset, *bytes, write, pad)) { - if (padded) { - *padded = false; - } - return 0; - } - -- ret = qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head, -- *qiov, *qiov_offset, *bytes, -- pad->buf + pad->buf_len - pad->tail, -- pad->tail); -+ sliced_iov = qemu_iovec_slice(*qiov, *qiov_offset, *bytes, -+ &sliced_head, &sliced_tail, -+ &sliced_niov); -+ -+ /* Guaranteed by bdrv_check_qiov_request() */ -+ assert(*bytes <= SIZE_MAX); -+ ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov, -+ sliced_head, *bytes); - if (ret < 0) { -- bdrv_padding_destroy(pad); -+ bdrv_padding_finalize(pad); - return ret; - } - *bytes += pad->head + pad->tail; -@@ -1657,8 +1793,8 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child, - flags |= BDRV_REQ_COPY_ON_READ; - } - -- ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad, -- NULL, &flags); -+ ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, false, -+ &pad, NULL, &flags); - if (ret < 0) { - goto fail; - } -@@ -1668,7 +1804,7 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child, - bs->bl.request_alignment, - qiov, qiov_offset, flags); - tracked_request_end(&req); -- bdrv_padding_destroy(&pad); -+ bdrv_padding_finalize(&pad); - - fail: - bdrv_dec_in_flight(bs); -@@ -2000,7 +2136,7 @@ bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes, - /* This flag doesn't make sense for padding or zero writes */ - flags &= ~BDRV_REQ_REGISTERED_BUF; - -- padding = bdrv_init_padding(bs, offset, bytes, &pad); -+ padding = bdrv_init_padding(bs, offset, bytes, true, &pad); - if (padding) { - assert(!(flags & BDRV_REQ_NO_WAIT)); - bdrv_make_request_serialising(req, align); -@@ -2048,7 +2184,7 @@ bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes, - } - - out: -- bdrv_padding_destroy(&pad); -+ bdrv_padding_finalize(&pad); - - return ret; - } -@@ -2116,8 +2252,8 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child, - * bdrv_co_do_zero_pwritev() does aligning by itself, so, we do - * alignment only if there is no ZERO flag. - */ -- ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad, -- &padded, &flags); -+ ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, true, -+ &pad, &padded, &flags); - if (ret < 0) { - return ret; - } -@@ -2147,7 +2283,7 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child, - ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align, - qiov, qiov_offset, flags); - -- bdrv_padding_destroy(&pad); -+ bdrv_padding_finalize(&pad); - - out: - tracked_request_end(&req); --- -2.39.3 - diff --git a/SOURCES/kvm-block-Don-t-call-no_coroutine_fns-in-qmp_block_resiz.patch b/SOURCES/kvm-block-Don-t-call-no_coroutine_fns-in-qmp_block_resiz.patch deleted file mode 100644 index fbab82d..0000000 --- a/SOURCES/kvm-block-Don-t-call-no_coroutine_fns-in-qmp_block_resiz.patch +++ /dev/null @@ -1,56 +0,0 @@ -From dfa2811e88afaf996345552330e97f0513c1803c Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Thu, 4 May 2023 13:57:34 +0200 -Subject: [PATCH 53/56] block: Don't call no_coroutine_fns in - qmp_block_resize() - -RH-Author: Kevin Wolf -RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize() -RH-Bugzilla: 2185688 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Eric Blake -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [2/4] 7ac7e34821cfc8bd5f0daadd7a1c4a5596bc60a6 (kmwolf/centos-qemu-kvm) - -This QMP handler runs in a coroutine, so it must use the corresponding -no_co_wrappers instead. - -Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2185688 -Cc: qemu-stable@nongnu.org -Signed-off-by: Kevin Wolf -Reviewed-by: Eric Blake -Reviewed-by: Stefan Hajnoczi -Message-Id: <20230504115750.54437-5-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 0c7d204f50c382c6baac8c94bd57af4a022b3888) -Signed-off-by: Kevin Wolf ---- - blockdev.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/blockdev.c b/blockdev.c -index d7b5c18f0a..eb509cf964 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -2430,7 +2430,7 @@ void coroutine_fn qmp_block_resize(const char *device, const char *node_name, - return; - } - -- blk = blk_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL, errp); -+ blk = blk_co_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL, errp); - if (!blk) { - return; - } -@@ -2445,7 +2445,7 @@ void coroutine_fn qmp_block_resize(const char *device, const char *node_name, - - bdrv_co_lock(bs); - bdrv_drained_end(bs); -- blk_unref(blk); -+ blk_co_unref(blk); - bdrv_co_unlock(bs); - } - --- -2.39.1 - diff --git a/SOURCES/kvm-block-Fix-pad_request-s-request-restriction.patch b/SOURCES/kvm-block-Fix-pad_request-s-request-restriction.patch deleted file mode 100644 index c0ab8c2..0000000 --- a/SOURCES/kvm-block-Fix-pad_request-s-request-restriction.patch +++ /dev/null @@ -1,73 +0,0 @@ -From 547f6bf93734f7c13675eebb93273ef2273f7c31 Mon Sep 17 00:00:00 2001 -From: Hanna Czenczek -Date: Fri, 14 Jul 2023 10:59:38 +0200 -Subject: [PATCH 5/9] block: Fix pad_request's request restriction - -RH-Author: Hanna Czenczek -RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX -RH-Bugzilla: 2174676 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [5/5] e8abc0485f6e0608a1ec55143ff40a14d273dfc8 (hreitz/qemu-kvm-c-9-s) - -bdrv_pad_request() relies on requests' lengths not to exceed SIZE_MAX, -which bdrv_check_qiov_request() does not guarantee. - -bdrv_check_request32() however will guarantee this, and both of -bdrv_pad_request()'s callers (bdrv_co_preadv_part() and -bdrv_co_pwritev_part()) already run it before calling -bdrv_pad_request(). Therefore, bdrv_pad_request() can safely call -bdrv_check_request32() without expecting error, too. - -In effect, this patch will not change guest-visible behavior. It is a -clean-up to tighten a condition to match what is guaranteed by our -callers, and which exists purely to show clearly why the subsequent -assertion (`assert(*bytes <= SIZE_MAX)`) is always true. - -Note there is a difference between the interfaces of -bdrv_check_qiov_request() and bdrv_check_request32(): The former takes -an errp, the latter does not, so we can no longer just pass -&error_abort. Instead, we need to check the returned value. While we -do expect success (because the callers have already run this function), -an assert(ret == 0) is not much simpler than just to return an error if -it occurs, so let us handle errors by returning them up the stack now. - -Reported-by: Peter Maydell -Signed-off-by: Hanna Czenczek -Message-id: 20230714085938.202730-1-hreitz@redhat.com -Fixes: 18743311b829cafc1737a5f20bc3248d5f91ee2a - ("block: Collapse padded I/O vecs exceeding IOV_MAX") -Signed-off-by: Hanna Czenczek -Signed-off-by: Stefan Hajnoczi ---- - block/io.c | 8 ++++++-- - 1 file changed, 6 insertions(+), 2 deletions(-) - -diff --git a/block/io.c b/block/io.c -index 4e8e90208b..807c9fb720 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -1708,7 +1708,11 @@ static int bdrv_pad_request(BlockDriverState *bs, - int sliced_niov; - size_t sliced_head, sliced_tail; - -- bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort); -+ /* Should have been checked by the caller already */ -+ ret = bdrv_check_request32(*offset, *bytes, *qiov, *qiov_offset); -+ if (ret < 0) { -+ return ret; -+ } - - if (!bdrv_init_padding(bs, *offset, *bytes, write, pad)) { - if (padded) { -@@ -1721,7 +1725,7 @@ static int bdrv_pad_request(BlockDriverState *bs, - &sliced_head, &sliced_tail, - &sliced_niov); - -- /* Guaranteed by bdrv_check_qiov_request() */ -+ /* Guaranteed by bdrv_check_request32() */ - assert(*bytes <= SIZE_MAX); - ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov, - sliced_head, *bytes); --- -2.39.3 - diff --git a/SOURCES/kvm-block-Parse-filenames-only-when-explicitly-requested.patch b/SOURCES/kvm-block-Parse-filenames-only-when-explicitly-requested.patch new file mode 100644 index 0000000..8db6199 --- /dev/null +++ b/SOURCES/kvm-block-Parse-filenames-only-when-explicitly-requested.patch @@ -0,0 +1,252 @@ +From 2ee645a339e9ef9cd92620a8b784d18d512326be Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 25 Apr 2024 14:56:02 +0200 +Subject: [PATCH 4/4] block: Parse filenames only when explicitly requested + +RH-Author: Hana Czenczek +RH-MergeRequest: 1: CVE 2024-4467 (PRDSC) +RH-Jira: RHEL-35611 +RH-CVE: CVE-2024-4467 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Eric Blake +RH-Commit: [4/4] f44c2941d4419e60f16dea3e9adca164e75aa78d + +When handling image filenames from legacy options such as -drive or from +tools, these filenames are parsed for protocol prefixes, including for +the json:{} pseudo-protocol. + +This behaviour is intended for filenames that come directly from the +command line and for backing files, which may come from the image file +itself. Higher level management tools generally take care to verify that +untrusted images don't contain a bad (or any) backing file reference; +'qemu-img info' is a suitable tool for this. + +However, for other files that can be referenced in images, such as +qcow2 data files or VMDK extents, the string from the image file is +usually not verified by management tools - and 'qemu-img info' wouldn't +be suitable because in contrast to backing files, it already opens these +other referenced files. So here the string should be interpreted as a +literal local filename. More complex configurations need to be specified +explicitly on the command line or in QMP. + +This patch changes bdrv_open_inherit() so that it only parses filenames +if a new parameter parse_filename is true. It is set for the top level +in bdrv_open(), for the file child and for the backing file child. All +other callers pass false and disable filename parsing this way. + +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Hanna Czenczek +Upstream: N/A, embargoed +Signed-off-by: Hanna Czenczek +--- + block.c | 90 ++++++++++++++++++++++++++++++++++++--------------------- + 1 file changed, 57 insertions(+), 33 deletions(-) + +diff --git a/block.c b/block.c +index 468cf5e67d..50bdd197b7 100644 +--- a/block.c ++++ b/block.c +@@ -86,6 +86,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename, + BlockDriverState *parent, + const BdrvChildClass *child_class, + BdrvChildRole child_role, ++ bool parse_filename, + Error **errp); + + static bool bdrv_recurse_has_child(BlockDriverState *bs, +@@ -2058,7 +2059,8 @@ static void parse_json_protocol(QDict *options, const char **pfilename, + * block driver has been specified explicitly. + */ + static int bdrv_fill_options(QDict **options, const char *filename, +- int *flags, Error **errp) ++ int *flags, bool allow_parse_filename, ++ Error **errp) + { + const char *drvname; + bool protocol = *flags & BDRV_O_PROTOCOL; +@@ -2100,7 +2102,7 @@ static int bdrv_fill_options(QDict **options, const char *filename, + if (protocol && filename) { + if (!qdict_haskey(*options, "filename")) { + qdict_put_str(*options, "filename", filename); +- parse_filename = true; ++ parse_filename = allow_parse_filename; + } else { + error_setg(errp, "Can't specify 'file' and 'filename' options at " + "the same time"); +@@ -3663,7 +3665,8 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, + } + + backing_hd = bdrv_open_inherit(backing_filename, reference, options, 0, bs, +- &child_of_bds, bdrv_backing_role(bs), errp); ++ &child_of_bds, bdrv_backing_role(bs), true, ++ errp); + if (!backing_hd) { + bs->open_flags |= BDRV_O_NO_BACKING; + error_prepend(errp, "Could not open backing file: "); +@@ -3697,7 +3700,8 @@ free_exit: + static BlockDriverState * + bdrv_open_child_bs(const char *filename, QDict *options, const char *bdref_key, + BlockDriverState *parent, const BdrvChildClass *child_class, +- BdrvChildRole child_role, bool allow_none, Error **errp) ++ BdrvChildRole child_role, bool allow_none, ++ bool parse_filename, Error **errp) + { + BlockDriverState *bs = NULL; + QDict *image_options; +@@ -3728,7 +3732,8 @@ bdrv_open_child_bs(const char *filename, QDict *options, const char *bdref_key, + } + + bs = bdrv_open_inherit(filename, reference, image_options, 0, +- parent, child_class, child_role, errp); ++ parent, child_class, child_role, parse_filename, ++ errp); + if (!bs) { + goto done; + } +@@ -3738,6 +3743,33 @@ done: + return bs; + } + ++static BdrvChild *bdrv_open_child_common(const char *filename, ++ QDict *options, const char *bdref_key, ++ BlockDriverState *parent, ++ const BdrvChildClass *child_class, ++ BdrvChildRole child_role, ++ bool allow_none, bool parse_filename, ++ Error **errp) ++{ ++ BlockDriverState *bs; ++ BdrvChild *child; ++ ++ GLOBAL_STATE_CODE(); ++ ++ bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_class, ++ child_role, allow_none, parse_filename, errp); ++ if (bs == NULL) { ++ return NULL; ++ } ++ ++ bdrv_graph_wrlock(); ++ child = bdrv_attach_child(parent, bs, bdref_key, child_class, child_role, ++ errp); ++ bdrv_graph_wrunlock(); ++ ++ return child; ++} ++ + /* + * Opens a disk image whose options are given as BlockdevRef in another block + * device's options. +@@ -3761,27 +3793,15 @@ BdrvChild *bdrv_open_child(const char *filename, + BdrvChildRole child_role, + bool allow_none, Error **errp) + { +- BlockDriverState *bs; +- BdrvChild *child; +- +- GLOBAL_STATE_CODE(); +- +- bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_class, +- child_role, allow_none, errp); +- if (bs == NULL) { +- return NULL; +- } +- +- bdrv_graph_wrlock(); +- child = bdrv_attach_child(parent, bs, bdref_key, child_class, child_role, +- errp); +- bdrv_graph_wrunlock(); +- +- return child; ++ return bdrv_open_child_common(filename, options, bdref_key, parent, ++ child_class, child_role, allow_none, false, ++ errp); + } + + /* +- * Wrapper on bdrv_open_child() for most popular case: open primary child of bs. ++ * This does mostly the same as bdrv_open_child(), but for opening the primary ++ * child of a node. A notable difference from bdrv_open_child() is that it ++ * enables filename parsing for protocol names (including json:). + * + * @parent can move to a different AioContext in this function. + */ +@@ -3796,8 +3816,8 @@ int bdrv_open_file_child(const char *filename, + role = parent->drv->is_filter ? + (BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY) : BDRV_CHILD_IMAGE; + +- if (!bdrv_open_child(filename, options, bdref_key, parent, +- &child_of_bds, role, false, errp)) ++ if (!bdrv_open_child_common(filename, options, bdref_key, parent, ++ &child_of_bds, role, false, true, errp)) + { + return -EINVAL; + } +@@ -3842,7 +3862,8 @@ BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp) + + } + +- bs = bdrv_open_inherit(NULL, reference, qdict, 0, NULL, NULL, 0, errp); ++ bs = bdrv_open_inherit(NULL, reference, qdict, 0, NULL, NULL, 0, false, ++ errp); + obj = NULL; + qobject_unref(obj); + visit_free(v); +@@ -3932,7 +3953,7 @@ static BlockDriverState * no_coroutine_fn + bdrv_open_inherit(const char *filename, const char *reference, QDict *options, + int flags, BlockDriverState *parent, + const BdrvChildClass *child_class, BdrvChildRole child_role, +- Error **errp) ++ bool parse_filename, Error **errp) + { + int ret; + BlockBackend *file = NULL; +@@ -3980,9 +4001,11 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, + } + + /* json: syntax counts as explicit options, as if in the QDict */ +- parse_json_protocol(options, &filename, &local_err); +- if (local_err) { +- goto fail; ++ if (parse_filename) { ++ parse_json_protocol(options, &filename, &local_err); ++ if (local_err) { ++ goto fail; ++ } + } + + bs->explicit_options = qdict_clone_shallow(options); +@@ -4007,7 +4030,8 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, + parent->open_flags, parent->options); + } + +- ret = bdrv_fill_options(&options, filename, &flags, &local_err); ++ ret = bdrv_fill_options(&options, filename, &flags, parse_filename, ++ &local_err); + if (ret < 0) { + goto fail; + } +@@ -4076,7 +4100,7 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, + + file_bs = bdrv_open_child_bs(filename, options, "file", bs, + &child_of_bds, BDRV_CHILD_IMAGE, +- true, &local_err); ++ true, true, &local_err); + if (local_err) { + goto fail; + } +@@ -4225,7 +4249,7 @@ BlockDriverState *bdrv_open(const char *filename, const char *reference, + GLOBAL_STATE_CODE(); + + return bdrv_open_inherit(filename, reference, options, flags, NULL, +- NULL, 0, errp); ++ NULL, 0, true, errp); + } + + /* Return true if the NULL-terminated @list contains @str */ +-- +2.39.3 + diff --git a/SOURCES/kvm-block-bdrv-blk_co_unref-for-calls-in-coroutine-conte.patch b/SOURCES/kvm-block-bdrv-blk_co_unref-for-calls-in-coroutine-conte.patch deleted file mode 100644 index 0f0347b..0000000 --- a/SOURCES/kvm-block-bdrv-blk_co_unref-for-calls-in-coroutine-conte.patch +++ /dev/null @@ -1,386 +0,0 @@ -From 7baea25be90e184175dd5a919ee5878cbd4970c2 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Thu, 4 May 2023 13:57:33 +0200 -Subject: [PATCH 52/56] block: bdrv/blk_co_unref() for calls in coroutine - context - -RH-Author: Kevin Wolf -RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize() -RH-Bugzilla: 2185688 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Eric Blake -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [1/4] 8ebf8486b082c30ca1b39a6ede35e471eaaccfa3 (kmwolf/centos-qemu-kvm) - -These functions must not be called in coroutine context, because they -need write access to the graph. - -Cc: qemu-stable@nongnu.org -Signed-off-by: Kevin Wolf -Reviewed-by: Eric Blake -Reviewed-by: Stefan Hajnoczi -Message-Id: <20230504115750.54437-4-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit b2ab5f545fa1eaaf2955dd617bee19a8b3279786) -Signed-off-by: Kevin Wolf ---- - block.c | 2 +- - block/crypto.c | 6 +++--- - block/parallels.c | 6 +++--- - block/qcow.c | 6 +++--- - block/qcow2.c | 14 +++++++------- - block/qed.c | 6 +++--- - block/vdi.c | 6 +++--- - block/vhdx.c | 6 +++--- - block/vmdk.c | 18 +++++++++--------- - block/vpc.c | 6 +++--- - include/block/block-global-state.h | 3 ++- - include/sysemu/block-backend-global-state.h | 5 ++++- - 12 files changed, 44 insertions(+), 40 deletions(-) - -diff --git a/block.c b/block.c -index d79a52ca74..a48112f945 100644 ---- a/block.c -+++ b/block.c -@@ -680,7 +680,7 @@ int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv, - - ret = 0; - out: -- blk_unref(blk); -+ blk_co_unref(blk); - return ret; - } - -diff --git a/block/crypto.c b/block/crypto.c -index ca67289187..8fd3ad0054 100644 ---- a/block/crypto.c -+++ b/block/crypto.c -@@ -355,7 +355,7 @@ block_crypto_co_create_generic(BlockDriverState *bs, int64_t size, - ret = 0; - cleanup: - qcrypto_block_free(crypto); -- blk_unref(blk); -+ blk_co_unref(blk); - return ret; - } - -@@ -661,7 +661,7 @@ block_crypto_co_create_luks(BlockdevCreateOptions *create_options, Error **errp) - - ret = 0; - fail: -- bdrv_unref(bs); -+ bdrv_co_unref(bs); - return ret; - } - -@@ -730,7 +730,7 @@ fail: - bdrv_co_delete_file_noerr(bs); - } - -- bdrv_unref(bs); -+ bdrv_co_unref(bs); - qapi_free_QCryptoBlockCreateOptions(create_opts); - qobject_unref(cryptoopts); - return ret; -diff --git a/block/parallels.c b/block/parallels.c -index 013684801a..b49c35929e 100644 ---- a/block/parallels.c -+++ b/block/parallels.c -@@ -613,8 +613,8 @@ static int coroutine_fn parallels_co_create(BlockdevCreateOptions* opts, - - ret = 0; - out: -- blk_unref(blk); -- bdrv_unref(bs); -+ blk_co_unref(blk); -+ bdrv_co_unref(bs); - return ret; - - exit: -@@ -691,7 +691,7 @@ parallels_co_create_opts(BlockDriver *drv, const char *filename, - - done: - qobject_unref(qdict); -- bdrv_unref(bs); -+ bdrv_co_unref(bs); - qapi_free_BlockdevCreateOptions(create_options); - return ret; - } -diff --git a/block/qcow.c b/block/qcow.c -index 490e4f819e..a0c701f578 100644 ---- a/block/qcow.c -+++ b/block/qcow.c -@@ -915,8 +915,8 @@ static int coroutine_fn qcow_co_create(BlockdevCreateOptions *opts, - g_free(tmp); - ret = 0; - exit: -- blk_unref(qcow_blk); -- bdrv_unref(bs); -+ blk_co_unref(qcow_blk); -+ bdrv_co_unref(bs); - qcrypto_block_free(crypto); - return ret; - } -@@ -1015,7 +1015,7 @@ qcow_co_create_opts(BlockDriver *drv, const char *filename, - fail: - g_free(backing_fmt); - qobject_unref(qdict); -- bdrv_unref(bs); -+ bdrv_co_unref(bs); - qapi_free_BlockdevCreateOptions(create_options); - return ret; - } -diff --git a/block/qcow2.c b/block/qcow2.c -index 22084730f9..0b8beb8b47 100644 ---- a/block/qcow2.c -+++ b/block/qcow2.c -@@ -3711,7 +3711,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) - goto out; - } - -- blk_unref(blk); -+ blk_co_unref(blk); - blk = NULL; - - /* -@@ -3791,7 +3791,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) - } - } - -- blk_unref(blk); -+ blk_co_unref(blk); - blk = NULL; - - /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning. -@@ -3816,9 +3816,9 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) - - ret = 0; - out: -- blk_unref(blk); -- bdrv_unref(bs); -- bdrv_unref(data_bs); -+ blk_co_unref(blk); -+ bdrv_co_unref(bs); -+ bdrv_co_unref(data_bs); - return ret; - } - -@@ -3949,8 +3949,8 @@ finish: - } - - qobject_unref(qdict); -- bdrv_unref(bs); -- bdrv_unref(data_bs); -+ bdrv_co_unref(bs); -+ bdrv_co_unref(data_bs); - qapi_free_BlockdevCreateOptions(create_options); - return ret; - } -diff --git a/block/qed.c b/block/qed.c -index 0705a7b4e2..aff2a2076e 100644 ---- a/block/qed.c -+++ b/block/qed.c -@@ -748,8 +748,8 @@ static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts, - ret = 0; /* success */ - out: - g_free(l1_table); -- blk_unref(blk); -- bdrv_unref(bs); -+ blk_co_unref(blk); -+ bdrv_co_unref(bs); - return ret; - } - -@@ -819,7 +819,7 @@ bdrv_qed_co_create_opts(BlockDriver *drv, const char *filename, - - fail: - qobject_unref(qdict); -- bdrv_unref(bs); -+ bdrv_co_unref(bs); - qapi_free_BlockdevCreateOptions(create_options); - return ret; - } -diff --git a/block/vdi.c b/block/vdi.c -index f2434d6153..08331d2dd7 100644 ---- a/block/vdi.c -+++ b/block/vdi.c -@@ -886,8 +886,8 @@ static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options, - - ret = 0; - exit: -- blk_unref(blk); -- bdrv_unref(bs_file); -+ blk_co_unref(blk); -+ bdrv_co_unref(bs_file); - g_free(bmap); - return ret; - } -@@ -975,7 +975,7 @@ vdi_co_create_opts(BlockDriver *drv, const char *filename, - done: - qobject_unref(qdict); - qapi_free_BlockdevCreateOptions(create_options); -- bdrv_unref(bs_file); -+ bdrv_co_unref(bs_file); - return ret; - } - -diff --git a/block/vhdx.c b/block/vhdx.c -index 81420722a1..00777da91a 100644 ---- a/block/vhdx.c -+++ b/block/vhdx.c -@@ -2053,8 +2053,8 @@ static int coroutine_fn vhdx_co_create(BlockdevCreateOptions *opts, - - ret = 0; - delete_and_exit: -- blk_unref(blk); -- bdrv_unref(bs); -+ blk_co_unref(blk); -+ bdrv_co_unref(bs); - g_free(creator); - return ret; - } -@@ -2144,7 +2144,7 @@ vhdx_co_create_opts(BlockDriver *drv, const char *filename, - - fail: - qobject_unref(qdict); -- bdrv_unref(bs); -+ bdrv_co_unref(bs); - qapi_free_BlockdevCreateOptions(create_options); - return ret; - } -diff --git a/block/vmdk.c b/block/vmdk.c -index f5f49018fe..01ca13c82b 100644 ---- a/block/vmdk.c -+++ b/block/vmdk.c -@@ -2306,7 +2306,7 @@ exit: - if (pbb) { - *pbb = blk; - } else { -- blk_unref(blk); -+ blk_co_unref(blk); - blk = NULL; - } - } -@@ -2516,12 +2516,12 @@ vmdk_co_do_create(int64_t size, - if (strcmp(blk_bs(backing)->drv->format_name, "vmdk")) { - error_setg(errp, "Invalid backing file format: %s. Must be vmdk", - blk_bs(backing)->drv->format_name); -- blk_unref(backing); -+ blk_co_unref(backing); - ret = -EINVAL; - goto exit; - } - ret = vmdk_read_cid(blk_bs(backing), 0, &parent_cid); -- blk_unref(backing); -+ blk_co_unref(backing); - if (ret) { - error_setg(errp, "Failed to read parent CID"); - goto exit; -@@ -2542,14 +2542,14 @@ vmdk_co_do_create(int64_t size, - blk_bs(extent_blk)->filename); - created_size += cur_size; - extent_idx++; -- blk_unref(extent_blk); -+ blk_co_unref(extent_blk); - } - - /* Check whether we got excess extents */ - extent_blk = extent_fn(-1, extent_idx, flat, split, compress, zeroed_grain, - opaque, NULL); - if (extent_blk) { -- blk_unref(extent_blk); -+ blk_co_unref(extent_blk); - error_setg(errp, "List of extents contains unused extents"); - ret = -EINVAL; - goto exit; -@@ -2590,7 +2590,7 @@ vmdk_co_do_create(int64_t size, - ret = 0; - exit: - if (blk) { -- blk_unref(blk); -+ blk_co_unref(blk); - } - g_free(desc); - g_free(parent_desc_line); -@@ -2641,7 +2641,7 @@ vmdk_co_create_opts_cb(int64_t size, int idx, bool flat, bool split, - errp)) { - goto exit; - } -- bdrv_unref(bs); -+ bdrv_co_unref(bs); - exit: - g_free(ext_filename); - return blk; -@@ -2797,12 +2797,12 @@ static BlockBackend * coroutine_fn vmdk_co_create_cb(int64_t size, int idx, - return NULL; - } - blk_set_allow_write_beyond_eof(blk, true); -- bdrv_unref(bs); -+ bdrv_co_unref(bs); - - if (size != -1) { - ret = vmdk_init_extent(blk, size, flat, compress, zeroed_grain, errp); - if (ret) { -- blk_unref(blk); -+ blk_co_unref(blk); - blk = NULL; - } - } -diff --git a/block/vpc.c b/block/vpc.c -index b89b0ff8e2..07ddda5b99 100644 ---- a/block/vpc.c -+++ b/block/vpc.c -@@ -1082,8 +1082,8 @@ static int coroutine_fn vpc_co_create(BlockdevCreateOptions *opts, - } - - out: -- blk_unref(blk); -- bdrv_unref(bs); -+ blk_co_unref(blk); -+ bdrv_co_unref(bs); - return ret; - } - -@@ -1162,7 +1162,7 @@ vpc_co_create_opts(BlockDriver *drv, const char *filename, - - fail: - qobject_unref(qdict); -- bdrv_unref(bs); -+ bdrv_co_unref(bs); - qapi_free_BlockdevCreateOptions(create_options); - return ret; - } -diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h -index 399200a9a3..cd4ea554bf 100644 ---- a/include/block/block-global-state.h -+++ b/include/block/block-global-state.h -@@ -214,7 +214,8 @@ void bdrv_img_create(const char *filename, const char *fmt, - bool quiet, Error **errp); - - void bdrv_ref(BlockDriverState *bs); --void bdrv_unref(BlockDriverState *bs); -+void no_coroutine_fn bdrv_unref(BlockDriverState *bs); -+void coroutine_fn no_co_wrapper bdrv_co_unref(BlockDriverState *bs); - void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child); - BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, - BlockDriverState *child_bs, -diff --git a/include/sysemu/block-backend-global-state.h b/include/sysemu/block-backend-global-state.h -index 2b6d27db7c..fa83f9389c 100644 ---- a/include/sysemu/block-backend-global-state.h -+++ b/include/sysemu/block-backend-global-state.h -@@ -42,7 +42,10 @@ blk_co_new_open(const char *filename, const char *reference, QDict *options, - - int blk_get_refcnt(BlockBackend *blk); - void blk_ref(BlockBackend *blk); --void blk_unref(BlockBackend *blk); -+ -+void no_coroutine_fn blk_unref(BlockBackend *blk); -+void coroutine_fn no_co_wrapper blk_co_unref(BlockBackend *blk); -+ - void blk_remove_all_bs(void); - BlockBackend *blk_by_name(const char *name); - BlockBackend *blk_next(BlockBackend *blk); --- -2.39.1 - diff --git a/SOURCES/kvm-block-blkio-do-not-use-open-flags-in-qemu_open.patch b/SOURCES/kvm-block-blkio-do-not-use-open-flags-in-qemu_open.patch deleted file mode 100644 index caf6694..0000000 --- a/SOURCES/kvm-block-blkio-do-not-use-open-flags-in-qemu_open.patch +++ /dev/null @@ -1,74 +0,0 @@ -From b1f0546548e561856252c2bc610a8f4f8fcdf007 Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Wed, 26 Jul 2023 09:48:07 +0200 -Subject: [PATCH 02/14] block/blkio: do not use open flags in qemu_open() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefano Garzarella -RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers -RH-Bugzilla: 2225354 2225439 -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Alberto Faria -RH-Commit: [2/6] 1ccd0ef56182bb5e2374c3b5be98ee1ec05066d6 (sgarzarella/qemu-kvm-c-9-s) - -qemu_open() in blkio_virtio_blk_common_open() is used to open the -character device (e.g. /dev/vhost-vdpa-0 or /dev/vfio/vfio) or in -the future eventually the unix socket. - -In all these cases we cannot open the path in read-only mode, -when the `read-only` option of blockdev is on, because the exchange -of IOCTL commands for example will fail. - -In order to open the device read-only, we have to use the `read-only` -property of the libblkio driver as we already do in blkio_file_open(). - -Fixes: cad2ccc395 ("block/blkio: use qemu_open() to support fd passing for virtio-blk") -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2225439 -Reported-by: Qing Wang -Signed-off-by: Stefano Garzarella -Reviewed-by: Daniel P. Berrangé -Message-id: 20230726074807.14041-1-sgarzare@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit a5942c177b7bcc1357e496b7d68668befcfc2bb9) -Signed-off-by: Stefano Garzarella ---- - block/blkio.c | 21 ++++++++++++--------- - 1 file changed, 12 insertions(+), 9 deletions(-) - -diff --git a/block/blkio.c b/block/blkio.c -index 3ea9841bd8..5a82c6cb1a 100644 ---- a/block/blkio.c -+++ b/block/blkio.c -@@ -685,15 +685,18 @@ static int blkio_virtio_blk_common_open(BlockDriverState *bs, - * layer through the "/dev/fdset/N" special path. - */ - if (fd_supported) { -- int open_flags; -- -- if (flags & BDRV_O_RDWR) { -- open_flags = O_RDWR; -- } else { -- open_flags = O_RDONLY; -- } -- -- fd = qemu_open(path, open_flags, errp); -+ /* -+ * `path` can contain the path of a character device -+ * (e.g. /dev/vhost-vdpa-0 or /dev/vfio/vfio) or a unix socket. -+ * -+ * So, we should always open it with O_RDWR flag, also if BDRV_O_RDWR -+ * is not set in the open flags, because the exchange of IOCTL commands -+ * for example will fail. -+ * -+ * In order to open the device read-only, we are using the `read-only` -+ * property of the libblkio driver in blkio_file_open(). -+ */ -+ fd = qemu_open(path, O_RDWR, errp); - if (fd < 0) { - return -EINVAL; - } --- -2.39.3 - diff --git a/SOURCES/kvm-block-blkio-enable-the-completion-eventfd.patch b/SOURCES/kvm-block-blkio-enable-the-completion-eventfd.patch deleted file mode 100644 index 8a6f72b..0000000 --- a/SOURCES/kvm-block-blkio-enable-the-completion-eventfd.patch +++ /dev/null @@ -1,54 +0,0 @@ -From ef99db21e9469f3fc946b7bf3edc1837d7b24e0b Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Tue, 25 Jul 2023 12:37:44 +0200 -Subject: [PATCH 01/14] block/blkio: enable the completion eventfd - -RH-Author: Stefano Garzarella -RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers -RH-Bugzilla: 2225354 2225439 -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Alberto Faria -RH-Commit: [1/6] d91b3a465942863550130105ae2f38f47a82a360 (sgarzarella/qemu-kvm-c-9-s) - -Until libblkio 1.3.0, virtio-blk drivers had completion eventfd -notifications enabled from the start, but from the next releases -this is no longer the case, so we have to explicitly enable them. - -In fact, the libblkio documentation says they could be disabled, -so we should always enable them at the start if we want to be -sure to get completion eventfd notifications: - - By default, the driver might not generate completion events for - requests so it is necessary to explicitly enable the completion - file descriptor before use: - - void blkioq_set_completion_fd_enabled(struct blkioq *q, bool enable); - -I discovered this while trying a development version of libblkio: -the guest kernel hangs during boot, while probing the device. - -Fixes: fd66dbd424f5 ("blkio: add libblkio block driver") -Signed-off-by: Stefano Garzarella -Message-id: 20230725103744.77343-1-sgarzare@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 9359c459889fce1804c4e1b2a2ff8f182b4a9ae8) -Signed-off-by: Stefano Garzarella ---- - block/blkio.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/block/blkio.c b/block/blkio.c -index afcec359f2..3ea9841bd8 100644 ---- a/block/blkio.c -+++ b/block/blkio.c -@@ -844,6 +844,7 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags, - QLIST_INIT(&s->bounce_bufs); - s->blkioq = blkio_get_queue(s->blkio, 0); - s->completion_fd = blkioq_get_completion_fd(s->blkioq); -+ blkioq_set_completion_fd_enabled(s->blkioq, true); - - blkio_attach_aio_context(bs, bdrv_get_aio_context(bs)); - return 0; --- -2.39.3 - diff --git a/SOURCES/kvm-block-blkio-fall-back-on-using-path-when-fd-setting-.patch b/SOURCES/kvm-block-blkio-fall-back-on-using-path-when-fd-setting-.patch deleted file mode 100644 index f4d6e3c..0000000 --- a/SOURCES/kvm-block-blkio-fall-back-on-using-path-when-fd-setting-.patch +++ /dev/null @@ -1,67 +0,0 @@ -From c1ce3ba81698b9d52ac9dff83c01ee8141ca403d Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Thu, 27 Jul 2023 18:10:19 +0200 -Subject: [PATCH 05/14] block/blkio: fall back on using `path` when `fd` - setting fails - -RH-Author: Stefano Garzarella -RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers -RH-Bugzilla: 2225354 2225439 -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Alberto Faria -RH-Commit: [5/6] c03cea95146a59b2830ffe2dd56ef77a6630ce3e (sgarzarella/qemu-kvm-c-9-s) - -qemu_open() fails if called with an unix domain socket in this way: - -blockdev node-name=drive0,driver=virtio-blk-vhost-user,path=vhost-user-blk.sock,cache.direct=on: Could not open 'vhost-user-blk.sock': No such device or address - -Since virtio-blk-vhost-user does not support fd passing, let`s always fall back -on using `path` if we fail the fd passing. - -Fixes: cad2ccc395 ("block/blkio: use qemu_open() to support fd passing for virtio-blk") -Reported-by: Qing Wang -Signed-off-by: Stefano Garzarella -Message-id: 20230727161020.84213-4-sgarzare@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 723bea27b127969931fa26bc0de79372a3d9e148) -Signed-off-by: Stefano Garzarella ---- - block/blkio.c | 20 ++++++++++---------- - 1 file changed, 10 insertions(+), 10 deletions(-) - -diff --git a/block/blkio.c b/block/blkio.c -index 93a8f8fc5c..eef80e9ce5 100644 ---- a/block/blkio.c -+++ b/block/blkio.c -@@ -710,19 +710,19 @@ static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options, - * In order to open the device read-only, we are using the `read-only` - * property of the libblkio driver in blkio_file_open(). - */ -- fd = qemu_open(path, O_RDWR, errp); -+ fd = qemu_open(path, O_RDWR, NULL); - if (fd < 0) { -- return -EINVAL; -+ fd_supported = false; -+ } else { -+ ret = blkio_set_int(s->blkio, "fd", fd); -+ if (ret < 0) { -+ fd_supported = false; -+ qemu_close(fd); -+ } - } -+ } - -- ret = blkio_set_int(s->blkio, "fd", fd); -- if (ret < 0) { -- error_setg_errno(errp, -ret, "failed to set fd: %s", -- blkio_get_error_msg()); -- qemu_close(fd); -- return ret; -- } -- } else { -+ if (!fd_supported) { - ret = blkio_set_str(s->blkio, "path", path); - if (ret < 0) { - error_setg_errno(errp, -ret, "failed to set path: %s", --- -2.39.3 - diff --git a/SOURCES/kvm-block-blkio-fix-module_block.py-parsing.patch b/SOURCES/kvm-block-blkio-fix-module_block.py-parsing.patch deleted file mode 100644 index 1c89a0b..0000000 --- a/SOURCES/kvm-block-blkio-fix-module_block.py-parsing.patch +++ /dev/null @@ -1,205 +0,0 @@ -From 545482400ea87d54b1b839587f8aaad41e30692f Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 4 Jul 2023 14:34:36 +0200 -Subject: [PATCH 36/37] block/blkio: fix module_block.py parsing - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 181: block/blkio: fix module_block.py parsing -RH-Bugzilla: 2213317 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Kevin Wolf -RH-Commit: [1/2] c85df95824f4889526a73527771dec9efcb06926 (stefanha/centos-stream-qemu-kvm) - -When QEMU is built with --enable-modules, the module_block.py script -parses block/*.c to find block drivers that are built as modules. The -script generates a table of block drivers called block_driver_modules[]. -This table is used for block driver module loading. - -The blkio.c driver uses macros to define its BlockDriver structs. This -was done to avoid code duplication but the module_block.py script is -unable to parse the macro. The result is that libblkio-based block -drivers can be built as modules but will not be found at runtime. - -One fix is to make the module_block.py script or build system fancier so -it can parse C macros (e.g. by parsing the preprocessed source code). I -chose not to do this because it raises the complexity of the build, -making future issues harder to debug. - -Keep things simple: use the macro to avoid duplicating BlockDriver -function pointers but define .format_name and .protocol_name manually -for each BlockDriver. This way the module_block.py is able to parse the -code. - -Also get rid of the block driver name macros (e.g. DRIVER_IO_URING) -because module_block.py cannot parse them either. - -Fixes: fd66dbd424f5 ("blkio: add libblkio block driver") -Reported-by: Qing Wang -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Stefano Garzarella -Message-id: 20230704123436.187761-1-stefanha@redhat.com -Cc: Stefano Garzarella -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit c21eae1ccc782440f320accb6f90c66cb8f45ee9) - -Conflicts: -- Downstream lacks commit 28ff7b4dfbb5 ("block/blkio: convert to - blk_io_plug_call() API") so keep the .bdrv_co_io_unplug callback. - -Signed-off-by: Stefan Hajnoczi ---- - block/blkio.c | 118 ++++++++++++++++++++++++++------------------------ - 1 file changed, 61 insertions(+), 57 deletions(-) - -diff --git a/block/blkio.c b/block/blkio.c -index 6a6f20f923..afcec359f2 100644 ---- a/block/blkio.c -+++ b/block/blkio.c -@@ -21,16 +21,6 @@ - - #include "block/block-io.h" - --/* -- * Keep the QEMU BlockDriver names identical to the libblkio driver names. -- * Using macros instead of typing out the string literals avoids typos. -- */ --#define DRIVER_IO_URING "io_uring" --#define DRIVER_NVME_IO_URING "nvme-io_uring" --#define DRIVER_VIRTIO_BLK_VFIO_PCI "virtio-blk-vfio-pci" --#define DRIVER_VIRTIO_BLK_VHOST_USER "virtio-blk-vhost-user" --#define DRIVER_VIRTIO_BLK_VHOST_VDPA "virtio-blk-vhost-vdpa" -- - /* - * Allocated bounce buffers are kept in a list sorted by buffer address. - */ -@@ -743,15 +733,15 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags, - return ret; - } - -- if (strcmp(blkio_driver, DRIVER_IO_URING) == 0) { -+ if (strcmp(blkio_driver, "io_uring") == 0) { - ret = blkio_io_uring_open(bs, options, flags, errp); -- } else if (strcmp(blkio_driver, DRIVER_NVME_IO_URING) == 0) { -+ } else if (strcmp(blkio_driver, "nvme-io_uring") == 0) { - ret = blkio_nvme_io_uring(bs, options, flags, errp); -- } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VFIO_PCI) == 0) { -+ } else if (strcmp(blkio_driver, "virtio-blk-vfio-pci") == 0) { - ret = blkio_virtio_blk_common_open(bs, options, flags, errp); -- } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VHOST_USER) == 0) { -+ } else if (strcmp(blkio_driver, "virtio-blk-vhost-user") == 0) { - ret = blkio_virtio_blk_common_open(bs, options, flags, errp); -- } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VHOST_VDPA) == 0) { -+ } else if (strcmp(blkio_driver, "virtio-blk-vhost-vdpa") == 0) { - ret = blkio_virtio_blk_common_open(bs, options, flags, errp); - } else { - g_assert_not_reached(); -@@ -1027,50 +1017,64 @@ static void blkio_refresh_limits(BlockDriverState *bs, Error **errp) - * - truncate - */ - --#define BLKIO_DRIVER(name, ...) \ -- { \ -- .format_name = name, \ -- .protocol_name = name, \ -- .instance_size = sizeof(BDRVBlkioState), \ -- .bdrv_file_open = blkio_file_open, \ -- .bdrv_close = blkio_close, \ -- .bdrv_co_getlength = blkio_co_getlength, \ -- .bdrv_co_truncate = blkio_truncate, \ -- .bdrv_co_get_info = blkio_co_get_info, \ -- .bdrv_attach_aio_context = blkio_attach_aio_context, \ -- .bdrv_detach_aio_context = blkio_detach_aio_context, \ -- .bdrv_co_pdiscard = blkio_co_pdiscard, \ -- .bdrv_co_preadv = blkio_co_preadv, \ -- .bdrv_co_pwritev = blkio_co_pwritev, \ -- .bdrv_co_flush_to_disk = blkio_co_flush, \ -- .bdrv_co_pwrite_zeroes = blkio_co_pwrite_zeroes, \ -- .bdrv_co_io_unplug = blkio_co_io_unplug, \ -- .bdrv_refresh_limits = blkio_refresh_limits, \ -- .bdrv_register_buf = blkio_register_buf, \ -- .bdrv_unregister_buf = blkio_unregister_buf, \ -- __VA_ARGS__ \ -- } -- --static BlockDriver bdrv_io_uring = BLKIO_DRIVER( -- DRIVER_IO_URING, -- .bdrv_needs_filename = true, --); -- --static BlockDriver bdrv_nvme_io_uring = BLKIO_DRIVER( -- DRIVER_NVME_IO_URING, --); -- --static BlockDriver bdrv_virtio_blk_vfio_pci = BLKIO_DRIVER( -- DRIVER_VIRTIO_BLK_VFIO_PCI --); -+/* -+ * Do not include .format_name and .protocol_name because module_block.py -+ * does not parse macros in the source code. -+ */ -+#define BLKIO_DRIVER_COMMON \ -+ .instance_size = sizeof(BDRVBlkioState), \ -+ .bdrv_file_open = blkio_file_open, \ -+ .bdrv_close = blkio_close, \ -+ .bdrv_co_getlength = blkio_co_getlength, \ -+ .bdrv_co_truncate = blkio_truncate, \ -+ .bdrv_co_get_info = blkio_co_get_info, \ -+ .bdrv_attach_aio_context = blkio_attach_aio_context, \ -+ .bdrv_detach_aio_context = blkio_detach_aio_context, \ -+ .bdrv_co_pdiscard = blkio_co_pdiscard, \ -+ .bdrv_co_preadv = blkio_co_preadv, \ -+ .bdrv_co_pwritev = blkio_co_pwritev, \ -+ .bdrv_co_flush_to_disk = blkio_co_flush, \ -+ .bdrv_co_pwrite_zeroes = blkio_co_pwrite_zeroes, \ -+ .bdrv_co_io_unplug = blkio_co_io_unplug, \ -+ .bdrv_refresh_limits = blkio_refresh_limits, \ -+ .bdrv_register_buf = blkio_register_buf, \ -+ .bdrv_unregister_buf = blkio_unregister_buf, - --static BlockDriver bdrv_virtio_blk_vhost_user = BLKIO_DRIVER( -- DRIVER_VIRTIO_BLK_VHOST_USER --); -+/* -+ * Use the same .format_name and .protocol_name as the libblkio driver name for -+ * consistency. -+ */ - --static BlockDriver bdrv_virtio_blk_vhost_vdpa = BLKIO_DRIVER( -- DRIVER_VIRTIO_BLK_VHOST_VDPA --); -+static BlockDriver bdrv_io_uring = { -+ .format_name = "io_uring", -+ .protocol_name = "io_uring", -+ .bdrv_needs_filename = true, -+ BLKIO_DRIVER_COMMON -+}; -+ -+static BlockDriver bdrv_nvme_io_uring = { -+ .format_name = "nvme-io_uring", -+ .protocol_name = "nvme-io_uring", -+ BLKIO_DRIVER_COMMON -+}; -+ -+static BlockDriver bdrv_virtio_blk_vfio_pci = { -+ .format_name = "virtio-blk-vfio-pci", -+ .protocol_name = "virtio-blk-vfio-pci", -+ BLKIO_DRIVER_COMMON -+}; -+ -+static BlockDriver bdrv_virtio_blk_vhost_user = { -+ .format_name = "virtio-blk-vhost-user", -+ .protocol_name = "virtio-blk-vhost-user", -+ BLKIO_DRIVER_COMMON -+}; -+ -+static BlockDriver bdrv_virtio_blk_vhost_vdpa = { -+ .format_name = "virtio-blk-vhost-vdpa", -+ .protocol_name = "virtio-blk-vhost-vdpa", -+ BLKIO_DRIVER_COMMON -+}; - - static void bdrv_blkio_init(void) - { --- -2.39.3 - diff --git a/SOURCES/kvm-block-blkio-move-blkio_connect-in-the-drivers-functi.patch b/SOURCES/kvm-block-blkio-move-blkio_connect-in-the-drivers-functi.patch deleted file mode 100644 index e3ec1ee..0000000 --- a/SOURCES/kvm-block-blkio-move-blkio_connect-in-the-drivers-functi.patch +++ /dev/null @@ -1,151 +0,0 @@ -From 458c33c9f19ed01beeb9b2b494ce6ed10d2ed4ac Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Thu, 27 Jul 2023 18:10:17 +0200 -Subject: [PATCH 03/14] block/blkio: move blkio_connect() in the drivers - functions - -RH-Author: Stefano Garzarella -RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers -RH-Bugzilla: 2225354 2225439 -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Alberto Faria -RH-Commit: [3/6] c356108d7dfe1ba2098c094f8d12b6e40853560c (sgarzarella/qemu-kvm-c-9-s) - -This is in preparation for the next patch, where for virtio-blk -drivers we need to handle the failure of blkio_connect(). - -Let's also rename the *_open() functions to *_connect() to make -the code reflect the changes applied. - -Signed-off-by: Stefano Garzarella -Message-id: 20230727161020.84213-2-sgarzare@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 69785d66ae1ec43f77fc65109a21721992bead9f) -Signed-off-by: Stefano Garzarella ---- - block/blkio.c | 67 ++++++++++++++++++++++++++++++--------------------- - 1 file changed, 40 insertions(+), 27 deletions(-) - -diff --git a/block/blkio.c b/block/blkio.c -index 5a82c6cb1a..85d1eed5fb 100644 ---- a/block/blkio.c -+++ b/block/blkio.c -@@ -602,8 +602,8 @@ static void blkio_unregister_buf(BlockDriverState *bs, void *host, size_t size) - } - } - --static int blkio_io_uring_open(BlockDriverState *bs, QDict *options, int flags, -- Error **errp) -+static int blkio_io_uring_connect(BlockDriverState *bs, QDict *options, -+ int flags, Error **errp) - { - const char *filename = qdict_get_str(options, "filename"); - BDRVBlkioState *s = bs->opaque; -@@ -626,11 +626,18 @@ static int blkio_io_uring_open(BlockDriverState *bs, QDict *options, int flags, - } - } - -+ ret = blkio_connect(s->blkio); -+ if (ret < 0) { -+ error_setg_errno(errp, -ret, "blkio_connect failed: %s", -+ blkio_get_error_msg()); -+ return ret; -+ } -+ - return 0; - } - --static int blkio_nvme_io_uring(BlockDriverState *bs, QDict *options, int flags, -- Error **errp) -+static int blkio_nvme_io_uring_connect(BlockDriverState *bs, QDict *options, -+ int flags, Error **errp) - { - const char *path = qdict_get_try_str(options, "path"); - BDRVBlkioState *s = bs->opaque; -@@ -654,11 +661,18 @@ static int blkio_nvme_io_uring(BlockDriverState *bs, QDict *options, int flags, - return -EINVAL; - } - -+ ret = blkio_connect(s->blkio); -+ if (ret < 0) { -+ error_setg_errno(errp, -ret, "blkio_connect failed: %s", -+ blkio_get_error_msg()); -+ return ret; -+ } -+ - return 0; - } - --static int blkio_virtio_blk_common_open(BlockDriverState *bs, -- QDict *options, int flags, Error **errp) -+static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options, -+ int flags, Error **errp) - { - const char *path = qdict_get_try_str(options, "path"); - BDRVBlkioState *s = bs->opaque; -@@ -717,6 +731,13 @@ static int blkio_virtio_blk_common_open(BlockDriverState *bs, - } - } - -+ ret = blkio_connect(s->blkio); -+ if (ret < 0) { -+ error_setg_errno(errp, -ret, "blkio_connect failed: %s", -+ blkio_get_error_msg()); -+ return ret; -+ } -+ - qdict_del(options, "path"); - - return 0; -@@ -736,24 +757,6 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags, - return ret; - } - -- if (strcmp(blkio_driver, "io_uring") == 0) { -- ret = blkio_io_uring_open(bs, options, flags, errp); -- } else if (strcmp(blkio_driver, "nvme-io_uring") == 0) { -- ret = blkio_nvme_io_uring(bs, options, flags, errp); -- } else if (strcmp(blkio_driver, "virtio-blk-vfio-pci") == 0) { -- ret = blkio_virtio_blk_common_open(bs, options, flags, errp); -- } else if (strcmp(blkio_driver, "virtio-blk-vhost-user") == 0) { -- ret = blkio_virtio_blk_common_open(bs, options, flags, errp); -- } else if (strcmp(blkio_driver, "virtio-blk-vhost-vdpa") == 0) { -- ret = blkio_virtio_blk_common_open(bs, options, flags, errp); -- } else { -- g_assert_not_reached(); -- } -- if (ret < 0) { -- blkio_destroy(&s->blkio); -- return ret; -- } -- - if (!(flags & BDRV_O_RDWR)) { - ret = blkio_set_bool(s->blkio, "read-only", true); - if (ret < 0) { -@@ -764,10 +767,20 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags, - } - } - -- ret = blkio_connect(s->blkio); -+ if (strcmp(blkio_driver, "io_uring") == 0) { -+ ret = blkio_io_uring_connect(bs, options, flags, errp); -+ } else if (strcmp(blkio_driver, "nvme-io_uring") == 0) { -+ ret = blkio_nvme_io_uring_connect(bs, options, flags, errp); -+ } else if (strcmp(blkio_driver, "virtio-blk-vfio-pci") == 0) { -+ ret = blkio_virtio_blk_connect(bs, options, flags, errp); -+ } else if (strcmp(blkio_driver, "virtio-blk-vhost-user") == 0) { -+ ret = blkio_virtio_blk_connect(bs, options, flags, errp); -+ } else if (strcmp(blkio_driver, "virtio-blk-vhost-vdpa") == 0) { -+ ret = blkio_virtio_blk_connect(bs, options, flags, errp); -+ } else { -+ g_assert_not_reached(); -+ } - if (ret < 0) { -- error_setg_errno(errp, -ret, "blkio_connect failed: %s", -- blkio_get_error_msg()); - blkio_destroy(&s->blkio); - return ret; - } --- -2.39.3 - diff --git a/SOURCES/kvm-block-blkio-retry-blkio_connect-if-it-fails-using-fd.patch b/SOURCES/kvm-block-blkio-retry-blkio_connect-if-it-fails-using-fd.patch deleted file mode 100644 index 5ec9e0b..0000000 --- a/SOURCES/kvm-block-blkio-retry-blkio_connect-if-it-fails-using-fd.patch +++ /dev/null @@ -1,85 +0,0 @@ -From ece855a71d9234c58497f37cb5498f507742167d Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Thu, 27 Jul 2023 18:10:18 +0200 -Subject: [PATCH 04/14] block/blkio: retry blkio_connect() if it fails using - `fd` - -RH-Author: Stefano Garzarella -RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers -RH-Bugzilla: 2225354 2225439 -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Alberto Faria -RH-Commit: [4/6] 14ebc1f333617ce22c68693dec1c9a186d4f8a08 (sgarzarella/qemu-kvm-c-9-s) - -libblkio 1.3.0 added support of "fd" property for virtio-blk-vhost-vdpa -driver. In QEMU, starting from commit cad2ccc395 ("block/blkio: use -qemu_open() to support fd passing for virtio-blk") we are using -`blkio_get_int(..., "fd")` to check if the "fd" property is supported -for all the virtio-blk-* driver. - -Unfortunately that property is also available for those driver that do -not support it, such as virtio-blk-vhost-user. - -So, `blkio_get_int()` is not enough to check whether the driver supports -the `fd` property or not. This is because the virito-blk common libblkio -driver only checks whether or not `fd` is set during `blkio_connect()` -and fails with -EINVAL for those transports that do not support it -(all except vhost-vdpa for now). - -So let's handle the `blkio_connect()` failure, retrying it using `path` -directly. - -Fixes: cad2ccc395 ("block/blkio: use qemu_open() to support fd passing for virtio-blk") -Suggested-by: Stefan Hajnoczi -Signed-off-by: Stefano Garzarella -Message-id: 20230727161020.84213-3-sgarzare@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 809c319f8a089fbc49223dc29e1cc2b978beeada) -Signed-off-by: Stefano Garzarella ---- - block/blkio.c | 29 +++++++++++++++++++++++++++++ - 1 file changed, 29 insertions(+) - -diff --git a/block/blkio.c b/block/blkio.c -index 85d1eed5fb..93a8f8fc5c 100644 ---- a/block/blkio.c -+++ b/block/blkio.c -@@ -732,6 +732,35 @@ static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options, - } - - ret = blkio_connect(s->blkio); -+ /* -+ * If the libblkio driver doesn't support the `fd` property, blkio_connect() -+ * will fail with -EINVAL. So let's try calling blkio_connect() again by -+ * directly setting `path`. -+ */ -+ if (fd_supported && ret == -EINVAL) { -+ qemu_close(fd); -+ -+ /* -+ * We need to clear the `fd` property we set previously by setting -+ * it to -1. -+ */ -+ ret = blkio_set_int(s->blkio, "fd", -1); -+ if (ret < 0) { -+ error_setg_errno(errp, -ret, "failed to set fd: %s", -+ blkio_get_error_msg()); -+ return ret; -+ } -+ -+ ret = blkio_set_str(s->blkio, "path", path); -+ if (ret < 0) { -+ error_setg_errno(errp, -ret, "failed to set path: %s", -+ blkio_get_error_msg()); -+ return ret; -+ } -+ -+ ret = blkio_connect(s->blkio); -+ } -+ - if (ret < 0) { - error_setg_errno(errp, -ret, "blkio_connect failed: %s", - blkio_get_error_msg()); --- -2.39.3 - diff --git a/SOURCES/kvm-block-blkio-use-blkio_set_int-fd-to-check-fd-support.patch b/SOURCES/kvm-block-blkio-use-blkio_set_int-fd-to-check-fd-support.patch deleted file mode 100644 index c6e1cd8..0000000 --- a/SOURCES/kvm-block-blkio-use-blkio_set_int-fd-to-check-fd-support.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 2f4436e7cc2f63d198229dc8ba32783460c0b185 Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Thu, 27 Jul 2023 18:10:20 +0200 -Subject: [PATCH 06/14] block/blkio: use blkio_set_int("fd") to check fd - support - -RH-Author: Stefano Garzarella -RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers -RH-Bugzilla: 2225354 2225439 -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Alberto Faria -RH-Commit: [6/6] d57aafb2c3a8ed13aa3c6dcce5525a9cc8f5aa21 (sgarzarella/qemu-kvm-c-9-s) - -Setting the `fd` property fails with virtio-blk-* libblkio drivers -that do not support fd passing since -https://gitlab.com/libblkio/libblkio/-/merge_requests/208. - -Getting the `fd` property, on the other hand, always succeeds for -virtio-blk-* libblkio drivers even when they don't support fd passing. - -This patch switches to setting the `fd` property because it is a -better mechanism for probing fd passing support than getting the `fd` -property. - -Signed-off-by: Stefano Garzarella -Message-id: 20230727161020.84213-5-sgarzare@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 1c38fe69e2b8a05c1762b122292fa7e3662f06fd) -Signed-off-by: Stefano Garzarella ---- - block/blkio.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/block/blkio.c b/block/blkio.c -index eef80e9ce5..8defbf744f 100644 ---- a/block/blkio.c -+++ b/block/blkio.c -@@ -689,7 +689,7 @@ static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options, - return -EINVAL; - } - -- if (blkio_get_int(s->blkio, "fd", &fd) == 0) { -+ if (blkio_set_int(s->blkio, "fd", -1) == 0) { - fd_supported = true; - } - --- -2.39.3 - diff --git a/SOURCES/kvm-block-blkio-use-qemu_open-to-support-fd-passing-for-.patch b/SOURCES/kvm-block-blkio-use-qemu_open-to-support-fd-passing-for-.patch deleted file mode 100644 index 3b32299..0000000 --- a/SOURCES/kvm-block-blkio-use-qemu_open-to-support-fd-passing-for-.patch +++ /dev/null @@ -1,108 +0,0 @@ -From fd57241cf0f8c2906fa56118f8da1e65a5b1e4d8 Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Tue, 30 May 2023 09:19:40 +0200 -Subject: [PATCH 3/5] block/blkio: use qemu_open() to support fd passing for - virtio-blk - -RH-Author: Stefano Garzarella -RH-MergeRequest: 169: block/blkio: support fd passing for virtio-blk-vhost-vdpa driver -RH-Bugzilla: 2180076 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [1/2] 9ff1a1510500db101648341207a36318a0c41c5a (sgarzarella/qemu-kvm-c-9-s) - -Some virtio-blk drivers (e.g. virtio-blk-vhost-vdpa) supports the fd -passing. Let's expose this to the user, so the management layer -can pass the file descriptor of an already opened path. - -If the libblkio virtio-blk driver supports fd passing, let's always -use qemu_open() to open the `path`, so we can handle fd passing -from the management layer through the "/dev/fdset/N" special path. - -Reviewed-by: Stefan Hajnoczi -Signed-off-by: Stefano Garzarella -Message-id: 20230530071941.8954-2-sgarzare@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit cad2ccc395c7113fb30bc9390774b67b34f06c68) -Signed-off-by: Stefano Garzarella ---- - block/blkio.c | 53 ++++++++++++++++++++++++++++++++++++++++++--------- - 1 file changed, 44 insertions(+), 9 deletions(-) - -diff --git a/block/blkio.c b/block/blkio.c -index 0cdc99a729..6a6f20f923 100644 ---- a/block/blkio.c -+++ b/block/blkio.c -@@ -672,25 +672,60 @@ static int blkio_virtio_blk_common_open(BlockDriverState *bs, - { - const char *path = qdict_get_try_str(options, "path"); - BDRVBlkioState *s = bs->opaque; -- int ret; -+ bool fd_supported = false; -+ int fd, ret; - - if (!path) { - error_setg(errp, "missing 'path' option"); - return -EINVAL; - } - -- ret = blkio_set_str(s->blkio, "path", path); -- qdict_del(options, "path"); -- if (ret < 0) { -- error_setg_errno(errp, -ret, "failed to set path: %s", -- blkio_get_error_msg()); -- return ret; -- } -- - if (!(flags & BDRV_O_NOCACHE)) { - error_setg(errp, "cache.direct=off is not supported"); - return -EINVAL; - } -+ -+ if (blkio_get_int(s->blkio, "fd", &fd) == 0) { -+ fd_supported = true; -+ } -+ -+ /* -+ * If the libblkio driver supports fd passing, let's always use qemu_open() -+ * to open the `path`, so we can handle fd passing from the management -+ * layer through the "/dev/fdset/N" special path. -+ */ -+ if (fd_supported) { -+ int open_flags; -+ -+ if (flags & BDRV_O_RDWR) { -+ open_flags = O_RDWR; -+ } else { -+ open_flags = O_RDONLY; -+ } -+ -+ fd = qemu_open(path, open_flags, errp); -+ if (fd < 0) { -+ return -EINVAL; -+ } -+ -+ ret = blkio_set_int(s->blkio, "fd", fd); -+ if (ret < 0) { -+ error_setg_errno(errp, -ret, "failed to set fd: %s", -+ blkio_get_error_msg()); -+ qemu_close(fd); -+ return ret; -+ } -+ } else { -+ ret = blkio_set_str(s->blkio, "path", path); -+ if (ret < 0) { -+ error_setg_errno(errp, -ret, "failed to set path: %s", -+ blkio_get_error_msg()); -+ return ret; -+ } -+ } -+ -+ qdict_del(options, "path"); -+ - return 0; - } - --- -2.39.3 - diff --git a/SOURCES/kvm-block-compile-out-assert_bdrv_graph_readable-by-defa.patch b/SOURCES/kvm-block-compile-out-assert_bdrv_graph_readable-by-defa.patch deleted file mode 100644 index b6eebf3..0000000 --- a/SOURCES/kvm-block-compile-out-assert_bdrv_graph_readable-by-defa.patch +++ /dev/null @@ -1,121 +0,0 @@ -From d9190117f3c701380701d6e9b2aa3c2446b9708f Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Mon, 1 May 2023 13:34:43 -0400 -Subject: [PATCH 01/21] block: compile out assert_bdrv_graph_readable() by - default - -RH-Author: Kevin Wolf -RH-MergeRequest: 166: block/graph-lock: Disable locking for now -RH-Bugzilla: 2186725 -RH-Acked-by: Eric Blake -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [1/4] d8cb4bb832c85e8216d97e57679a34c7bc6a8f71 (kmwolf/centos-qemu-kvm) - -reader_count() is a performance bottleneck because the global -aio_context_list_lock mutex causes thread contention. Put this debugging -assertion behind a new ./configure --enable-debug-graph-lock option and -disable it by default. - -The --enable-debug-graph-lock option is also enabled by the more general ---enable-debug option. - -Signed-off-by: Stefan Hajnoczi -Message-Id: <20230501173443.153062-1-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 58a2e3f5c37be02dac3086b81bdda9414b931edf) -Signed-off-by: Kevin Wolf ---- - block/graph-lock.c | 3 +++ - configure | 1 + - meson.build | 2 ++ - meson_options.txt | 2 ++ - scripts/meson-buildoptions.sh | 4 ++++ - 5 files changed, 12 insertions(+) - -diff --git a/block/graph-lock.c b/block/graph-lock.c -index 454c31e691..259a7a0bde 100644 ---- a/block/graph-lock.c -+++ b/block/graph-lock.c -@@ -265,7 +265,10 @@ void bdrv_graph_rdunlock_main_loop(void) - - void assert_bdrv_graph_readable(void) - { -+ /* reader_count() is slow due to aio_context_list_lock lock contention */ -+#ifdef CONFIG_DEBUG_GRAPH_LOCK - assert(qemu_in_main_thread() || reader_count()); -+#endif - } - - void assert_bdrv_graph_writable(void) -diff --git a/configure b/configure -index 800b5850f4..a62a3e6be9 100755 ---- a/configure -+++ b/configure -@@ -806,6 +806,7 @@ for opt do - --enable-debug) - # Enable debugging options that aren't excessively noisy - debug_tcg="yes" -+ meson_option_parse --enable-debug-graph-lock "" - meson_option_parse --enable-debug-mutex "" - meson_option_add -Doptimization=0 - fortify_source="no" -diff --git a/meson.build b/meson.build -index c44d05a13f..d964e741e7 100644 ---- a/meson.build -+++ b/meson.build -@@ -1956,6 +1956,7 @@ if get_option('debug_stack_usage') and have_coroutine_pool - have_coroutine_pool = false - endif - config_host_data.set10('CONFIG_COROUTINE_POOL', have_coroutine_pool) -+config_host_data.set('CONFIG_DEBUG_GRAPH_LOCK', get_option('debug_graph_lock')) - config_host_data.set('CONFIG_DEBUG_MUTEX', get_option('debug_mutex')) - config_host_data.set('CONFIG_DEBUG_STACK_USAGE', get_option('debug_stack_usage')) - config_host_data.set('CONFIG_GPROF', get_option('gprof')) -@@ -3833,6 +3834,7 @@ summary_info += {'PIE': get_option('b_pie')} - summary_info += {'static build': config_host.has_key('CONFIG_STATIC')} - summary_info += {'malloc trim support': has_malloc_trim} - summary_info += {'membarrier': have_membarrier} -+summary_info += {'debug graph lock': get_option('debug_graph_lock')} - summary_info += {'debug stack usage': get_option('debug_stack_usage')} - summary_info += {'mutex debugging': get_option('debug_mutex')} - summary_info += {'memory allocator': get_option('malloc')} -diff --git a/meson_options.txt b/meson_options.txt -index fc9447d267..bc857fe68b 100644 ---- a/meson_options.txt -+++ b/meson_options.txt -@@ -311,6 +311,8 @@ option('rng_none', type: 'boolean', value: false, - description: 'dummy RNG, avoid using /dev/(u)random and getrandom()') - option('coroutine_pool', type: 'boolean', value: true, - description: 'coroutine freelist (better performance)') -+option('debug_graph_lock', type: 'boolean', value: false, -+ description: 'graph lock debugging support') - option('debug_mutex', type: 'boolean', value: false, - description: 'mutex debugging support') - option('debug_stack_usage', type: 'boolean', value: false, -diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh -index 009fab1515..30e1f25259 100644 ---- a/scripts/meson-buildoptions.sh -+++ b/scripts/meson-buildoptions.sh -@@ -21,6 +21,8 @@ meson_options_help() { - printf "%s\n" ' QEMU' - printf "%s\n" ' --enable-cfi Control-Flow Integrity (CFI)' - printf "%s\n" ' --enable-cfi-debug Verbose errors in case of CFI violation' -+ printf "%s\n" ' --enable-debug-graph-lock' -+ printf "%s\n" ' graph lock debugging support' - printf "%s\n" ' --enable-debug-mutex mutex debugging support' - printf "%s\n" ' --enable-debug-stack-usage' - printf "%s\n" ' measure coroutine stack usage' -@@ -249,6 +251,8 @@ _meson_option_parse() { - --datadir=*) quote_sh "-Ddatadir=$2" ;; - --enable-dbus-display) printf "%s" -Ddbus_display=enabled ;; - --disable-dbus-display) printf "%s" -Ddbus_display=disabled ;; -+ --enable-debug-graph-lock) printf "%s" -Ddebug_graph_lock=true ;; -+ --disable-debug-graph-lock) printf "%s" -Ddebug_graph_lock=false ;; - --enable-debug-mutex) printf "%s" -Ddebug_mutex=true ;; - --disable-debug-mutex) printf "%s" -Ddebug_mutex=false ;; - --enable-debug-stack-usage) printf "%s" -Ddebug_stack_usage=true ;; --- -2.39.3 - diff --git a/SOURCES/kvm-block-crypto-create-ciphers-on-demand.patch b/SOURCES/kvm-block-crypto-create-ciphers-on-demand.patch new file mode 100644 index 0000000..c2b9c47 --- /dev/null +++ b/SOURCES/kvm-block-crypto-create-ciphers-on-demand.patch @@ -0,0 +1,330 @@ +From a67edfb4b591acdffc5b4987601a30224376996f Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Mon, 27 May 2024 11:58:50 -0400 +Subject: [PATCH 4/5] block/crypto: create ciphers on demand +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 251: block/crypto: create ciphers on demand +RH-Jira: RHEL-36159 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/2] 22a4c87fef774cad98a6f5a79f27df50a208013d (stefanha/centos-stream-qemu-kvm) + +Ciphers are pre-allocated by qcrypto_block_init_cipher() depending on +the given number of threads. The -device +virtio-blk-pci,iothread-vq-mapping= feature allows users to assign +multiple IOThreads to a virtio-blk device, but the association between +the virtio-blk device and the block driver happens after the block +driver is already open. + +When the number of threads given to qcrypto_block_init_cipher() is +smaller than the actual number of threads at runtime, the +block->n_free_ciphers > 0 assertion in qcrypto_block_pop_cipher() can +fail. + +Get rid of qcrypto_block_init_cipher() n_thread's argument and allocate +ciphers on demand. + +Reported-by: Qing Wang +Buglink: https://issues.redhat.com/browse/RHEL-36159 +Signed-off-by: Stefan Hajnoczi +Message-ID: <20240527155851.892885-2-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Acked-by: Daniel P. Berrangé +Signed-off-by: Kevin Wolf +(cherry picked from commit af206c284e4c1b17cdfb0f17e898b288c0fc1751) +Signed-off-by: Stefan Hajnoczi +--- + crypto/block-luks.c | 3 +- + crypto/block-qcow.c | 2 +- + crypto/block.c | 111 ++++++++++++++++++++++++++------------------ + crypto/blockpriv.h | 12 +++-- + 4 files changed, 78 insertions(+), 50 deletions(-) + +diff --git a/crypto/block-luks.c b/crypto/block-luks.c +index 3ee928fb5a..3357852c0a 100644 +--- a/crypto/block-luks.c ++++ b/crypto/block-luks.c +@@ -1262,7 +1262,6 @@ qcrypto_block_luks_open(QCryptoBlock *block, + luks->cipher_mode, + masterkey, + luks->header.master_key_len, +- n_threads, + errp) < 0) { + goto fail; + } +@@ -1456,7 +1455,7 @@ qcrypto_block_luks_create(QCryptoBlock *block, + /* Setup the block device payload encryption objects */ + if (qcrypto_block_init_cipher(block, luks_opts.cipher_alg, + luks_opts.cipher_mode, masterkey, +- luks->header.master_key_len, 1, errp) < 0) { ++ luks->header.master_key_len, errp) < 0) { + goto error; + } + +diff --git a/crypto/block-qcow.c b/crypto/block-qcow.c +index 4d7cf36a8f..02305058e3 100644 +--- a/crypto/block-qcow.c ++++ b/crypto/block-qcow.c +@@ -75,7 +75,7 @@ qcrypto_block_qcow_init(QCryptoBlock *block, + ret = qcrypto_block_init_cipher(block, QCRYPTO_CIPHER_ALG_AES_128, + QCRYPTO_CIPHER_MODE_CBC, + keybuf, G_N_ELEMENTS(keybuf), +- n_threads, errp); ++ errp); + if (ret < 0) { + ret = -ENOTSUP; + goto fail; +diff --git a/crypto/block.c b/crypto/block.c +index 506ea1d1a3..ba6d1cebc7 100644 +--- a/crypto/block.c ++++ b/crypto/block.c +@@ -20,6 +20,7 @@ + + #include "qemu/osdep.h" + #include "qapi/error.h" ++#include "qemu/lockable.h" + #include "blockpriv.h" + #include "block-qcow.h" + #include "block-luks.h" +@@ -57,6 +58,8 @@ QCryptoBlock *qcrypto_block_open(QCryptoBlockOpenOptions *options, + { + QCryptoBlock *block = g_new0(QCryptoBlock, 1); + ++ qemu_mutex_init(&block->mutex); ++ + block->format = options->format; + + if (options->format >= G_N_ELEMENTS(qcrypto_block_drivers) || +@@ -76,8 +79,6 @@ QCryptoBlock *qcrypto_block_open(QCryptoBlockOpenOptions *options, + return NULL; + } + +- qemu_mutex_init(&block->mutex); +- + return block; + } + +@@ -92,6 +93,8 @@ QCryptoBlock *qcrypto_block_create(QCryptoBlockCreateOptions *options, + { + QCryptoBlock *block = g_new0(QCryptoBlock, 1); + ++ qemu_mutex_init(&block->mutex); ++ + block->format = options->format; + + if (options->format >= G_N_ELEMENTS(qcrypto_block_drivers) || +@@ -111,8 +114,6 @@ QCryptoBlock *qcrypto_block_create(QCryptoBlockCreateOptions *options, + return NULL; + } + +- qemu_mutex_init(&block->mutex); +- + return block; + } + +@@ -227,37 +228,42 @@ QCryptoCipher *qcrypto_block_get_cipher(QCryptoBlock *block) + * This function is used only in test with one thread (it's safe to skip + * pop/push interface), so it's enough to assert it here: + */ +- assert(block->n_ciphers <= 1); +- return block->ciphers ? block->ciphers[0] : NULL; ++ assert(block->max_free_ciphers <= 1); ++ return block->free_ciphers ? block->free_ciphers[0] : NULL; + } + + +-static QCryptoCipher *qcrypto_block_pop_cipher(QCryptoBlock *block) ++static QCryptoCipher *qcrypto_block_pop_cipher(QCryptoBlock *block, ++ Error **errp) + { +- QCryptoCipher *cipher; +- +- qemu_mutex_lock(&block->mutex); +- +- assert(block->n_free_ciphers > 0); +- block->n_free_ciphers--; +- cipher = block->ciphers[block->n_free_ciphers]; +- +- qemu_mutex_unlock(&block->mutex); ++ /* Usually there is a free cipher available */ ++ WITH_QEMU_LOCK_GUARD(&block->mutex) { ++ if (block->n_free_ciphers > 0) { ++ block->n_free_ciphers--; ++ return block->free_ciphers[block->n_free_ciphers]; ++ } ++ } + +- return cipher; ++ /* Otherwise allocate a new cipher */ ++ return qcrypto_cipher_new(block->alg, block->mode, block->key, ++ block->nkey, errp); + } + + + static void qcrypto_block_push_cipher(QCryptoBlock *block, + QCryptoCipher *cipher) + { +- qemu_mutex_lock(&block->mutex); ++ QEMU_LOCK_GUARD(&block->mutex); + +- assert(block->n_free_ciphers < block->n_ciphers); +- block->ciphers[block->n_free_ciphers] = cipher; +- block->n_free_ciphers++; ++ if (block->n_free_ciphers == block->max_free_ciphers) { ++ block->max_free_ciphers++; ++ block->free_ciphers = g_renew(QCryptoCipher *, ++ block->free_ciphers, ++ block->max_free_ciphers); ++ } + +- qemu_mutex_unlock(&block->mutex); ++ block->free_ciphers[block->n_free_ciphers] = cipher; ++ block->n_free_ciphers++; + } + + +@@ -265,24 +271,31 @@ int qcrypto_block_init_cipher(QCryptoBlock *block, + QCryptoCipherAlgorithm alg, + QCryptoCipherMode mode, + const uint8_t *key, size_t nkey, +- size_t n_threads, Error **errp) ++ Error **errp) + { +- size_t i; ++ QCryptoCipher *cipher; + +- assert(!block->ciphers && !block->n_ciphers && !block->n_free_ciphers); ++ assert(!block->free_ciphers && !block->max_free_ciphers && ++ !block->n_free_ciphers); + +- block->ciphers = g_new0(QCryptoCipher *, n_threads); ++ /* Stash away cipher parameters for qcrypto_block_pop_cipher() */ ++ block->alg = alg; ++ block->mode = mode; ++ block->key = g_memdup2(key, nkey); ++ block->nkey = nkey; + +- for (i = 0; i < n_threads; i++) { +- block->ciphers[i] = qcrypto_cipher_new(alg, mode, key, nkey, errp); +- if (!block->ciphers[i]) { +- qcrypto_block_free_cipher(block); +- return -1; +- } +- block->n_ciphers++; +- block->n_free_ciphers++; ++ /* ++ * Create a new cipher to validate the parameters now. This reduces the ++ * chance of cipher creation failing at I/O time. ++ */ ++ cipher = qcrypto_block_pop_cipher(block, errp); ++ if (!cipher) { ++ g_free(block->key); ++ block->key = NULL; ++ return -1; + } + ++ qcrypto_block_push_cipher(block, cipher); + return 0; + } + +@@ -291,19 +304,23 @@ void qcrypto_block_free_cipher(QCryptoBlock *block) + { + size_t i; + +- if (!block->ciphers) { ++ g_free(block->key); ++ block->key = NULL; ++ ++ if (!block->free_ciphers) { + return; + } + +- assert(block->n_ciphers == block->n_free_ciphers); ++ /* All popped ciphers were eventually pushed back */ ++ assert(block->n_free_ciphers == block->max_free_ciphers); + +- for (i = 0; i < block->n_ciphers; i++) { +- qcrypto_cipher_free(block->ciphers[i]); ++ for (i = 0; i < block->max_free_ciphers; i++) { ++ qcrypto_cipher_free(block->free_ciphers[i]); + } + +- g_free(block->ciphers); +- block->ciphers = NULL; +- block->n_ciphers = block->n_free_ciphers = 0; ++ g_free(block->free_ciphers); ++ block->free_ciphers = NULL; ++ block->max_free_ciphers = block->n_free_ciphers = 0; + } + + QCryptoIVGen *qcrypto_block_get_ivgen(QCryptoBlock *block) +@@ -311,7 +328,7 @@ QCryptoIVGen *qcrypto_block_get_ivgen(QCryptoBlock *block) + /* ivgen should be accessed under mutex. However, this function is used only + * in test with one thread, so it's enough to assert it here: + */ +- assert(block->n_ciphers <= 1); ++ assert(block->max_free_ciphers <= 1); + return block->ivgen; + } + +@@ -446,7 +463,10 @@ int qcrypto_block_decrypt_helper(QCryptoBlock *block, + Error **errp) + { + int ret; +- QCryptoCipher *cipher = qcrypto_block_pop_cipher(block); ++ QCryptoCipher *cipher = qcrypto_block_pop_cipher(block, errp); ++ if (!cipher) { ++ return -1; ++ } + + ret = do_qcrypto_block_cipher_encdec(cipher, block->niv, block->ivgen, + &block->mutex, sectorsize, offset, buf, +@@ -465,7 +485,10 @@ int qcrypto_block_encrypt_helper(QCryptoBlock *block, + Error **errp) + { + int ret; +- QCryptoCipher *cipher = qcrypto_block_pop_cipher(block); ++ QCryptoCipher *cipher = qcrypto_block_pop_cipher(block, errp); ++ if (!cipher) { ++ return -1; ++ } + + ret = do_qcrypto_block_cipher_encdec(cipher, block->niv, block->ivgen, + &block->mutex, sectorsize, offset, buf, +diff --git a/crypto/blockpriv.h b/crypto/blockpriv.h +index 836f3b4726..4bf6043d5d 100644 +--- a/crypto/blockpriv.h ++++ b/crypto/blockpriv.h +@@ -32,8 +32,14 @@ struct QCryptoBlock { + const QCryptoBlockDriver *driver; + void *opaque; + +- QCryptoCipher **ciphers; +- size_t n_ciphers; ++ /* Cipher parameters */ ++ QCryptoCipherAlgorithm alg; ++ QCryptoCipherMode mode; ++ uint8_t *key; ++ size_t nkey; ++ ++ QCryptoCipher **free_ciphers; ++ size_t max_free_ciphers; + size_t n_free_ciphers; + QCryptoIVGen *ivgen; + QemuMutex mutex; +@@ -130,7 +136,7 @@ int qcrypto_block_init_cipher(QCryptoBlock *block, + QCryptoCipherAlgorithm alg, + QCryptoCipherMode mode, + const uint8_t *key, size_t nkey, +- size_t n_threads, Error **errp); ++ Error **errp); + + void qcrypto_block_free_cipher(QCryptoBlock *block); + +-- +2.39.3 + diff --git a/SOURCES/kvm-checkpatch-add-qemu_bh_new-aio_bh_new-checks.patch b/SOURCES/kvm-checkpatch-add-qemu_bh_new-aio_bh_new-checks.patch deleted file mode 100644 index 4173648..0000000 --- a/SOURCES/kvm-checkpatch-add-qemu_bh_new-aio_bh_new-checks.patch +++ /dev/null @@ -1,55 +0,0 @@ -From 961bc392ee60743344236ddd247ab646a0eec914 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 9 May 2023 10:29:03 -0400 -Subject: [PATCH 07/21] checkpatch: add qemu_bh_new/aio_bh_new checks - -RH-Author: Jon Maloy -RH-MergeRequest: 165: memory: prevent dma-reentracy issues -RH-Jira: RHEL-516 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/13] e0473487f0e3186c42559a5c36a8650f27ab26ae (jmaloy/jmaloy-qemu-kvm-2) - -Jira: https://issues.redhat.com/browse/RHEL-516 -Upstream: Merged -CVE: CVE-2023-2680 - -commit ef56ffbdd6b0605dc1e305611287b948c970e236 -Author: Alexander Bulekov -Date: Thu Apr 27 17:10:08 2023 -0400 - - checkpatch: add qemu_bh_new/aio_bh_new checks - - Advise authors to use the _guarded versions of the APIs, instead. - - Signed-off-by: Alexander Bulekov - Reviewed-by: Darren Kenny - Message-Id: <20230427211013.2994127-4-alxndr@bu.edu> - Signed-off-by: Thomas Huth - -Signed-off-by: Jon Maloy ---- - scripts/checkpatch.pl | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl -index d768171dcf..eeaec436eb 100755 ---- a/scripts/checkpatch.pl -+++ b/scripts/checkpatch.pl -@@ -2865,6 +2865,14 @@ sub process { - if ($line =~ /\bsignal\s*\(/ && !($line =~ /SIG_(?:IGN|DFL)/)) { - ERROR("use sigaction to establish signal handlers; signal is not portable\n" . $herecurr); - } -+# recommend qemu_bh_new_guarded instead of qemu_bh_new -+ if ($realfile =~ /.*\/hw\/.*/ && $line =~ /\bqemu_bh_new\s*\(/) { -+ ERROR("use qemu_bh_new_guarded() instead of qemu_bh_new() to avoid reentrancy problems\n" . $herecurr); -+ } -+# recommend aio_bh_new_guarded instead of aio_bh_new -+ if ($realfile =~ /.*\/hw\/.*/ && $line =~ /\baio_bh_new\s*\(/) { -+ ERROR("use aio_bh_new_guarded() instead of aio_bh_new() to avoid reentrancy problems\n" . $herecurr); -+ } - # check for module_init(), use category-specific init macros explicitly please - if ($line =~ /^module_init\s*\(/) { - ERROR("please use block_init(), type_init() etc. instead of module_init()\n" . $herecurr); --- -2.39.3 - diff --git a/SOURCES/kvm-confidential-guest-support-Add-kvm_init-and-kvm_rese.patch b/SOURCES/kvm-confidential-guest-support-Add-kvm_init-and-kvm_rese.patch new file mode 100644 index 0000000..785b437 --- /dev/null +++ b/SOURCES/kvm-confidential-guest-support-Add-kvm_init-and-kvm_rese.patch @@ -0,0 +1,90 @@ +From 0f0a3a860a07addea21a0282556a5022b9cb8b2c Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Thu, 29 Feb 2024 01:00:35 -0500 +Subject: [PATCH 011/100] confidential guest support: Add kvm_init() and + kvm_reset() in class + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [11/91] 21d2178178bf181a8e4d0b051f64bd983f0d0cf1 (bonzini/rhel-qemu-kvm) + +Different confidential VMs in different architectures all have the same +needs to do their specific initialization (and maybe resetting) stuffs +with KVM. Currently each of them exposes individual *_kvm_init() +functions and let machine code or kvm code to call it. + +To facilitate the introduction of confidential guest technology from +different x86 vendors, add two virtual functions, kvm_init() and kvm_reset() +in ConfidentialGuestSupportClass, and expose two helpers functions for +invodking them. + +Signed-off-by: Xiaoyao Li +Message-Id: <20240229060038.606591-1-xiaoyao.li@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 41a605944e3fecae43ca18ded95ec31f28e0c7fe) +Signed-off-by: Paolo Bonzini +--- + include/exec/confidential-guest-support.h | 34 ++++++++++++++++++++++- + 1 file changed, 33 insertions(+), 1 deletion(-) + +diff --git a/include/exec/confidential-guest-support.h b/include/exec/confidential-guest-support.h +index ba2dd4b5df..e5b188cffb 100644 +--- a/include/exec/confidential-guest-support.h ++++ b/include/exec/confidential-guest-support.h +@@ -23,7 +23,10 @@ + #include "qom/object.h" + + #define TYPE_CONFIDENTIAL_GUEST_SUPPORT "confidential-guest-support" +-OBJECT_DECLARE_SIMPLE_TYPE(ConfidentialGuestSupport, CONFIDENTIAL_GUEST_SUPPORT) ++OBJECT_DECLARE_TYPE(ConfidentialGuestSupport, ++ ConfidentialGuestSupportClass, ++ CONFIDENTIAL_GUEST_SUPPORT) ++ + + struct ConfidentialGuestSupport { + Object parent; +@@ -55,8 +58,37 @@ struct ConfidentialGuestSupport { + + typedef struct ConfidentialGuestSupportClass { + ObjectClass parent; ++ ++ int (*kvm_init)(ConfidentialGuestSupport *cgs, Error **errp); ++ int (*kvm_reset)(ConfidentialGuestSupport *cgs, Error **errp); + } ConfidentialGuestSupportClass; + ++static inline int confidential_guest_kvm_init(ConfidentialGuestSupport *cgs, ++ Error **errp) ++{ ++ ConfidentialGuestSupportClass *klass; ++ ++ klass = CONFIDENTIAL_GUEST_SUPPORT_GET_CLASS(cgs); ++ if (klass->kvm_init) { ++ return klass->kvm_init(cgs, errp); ++ } ++ ++ return 0; ++} ++ ++static inline int confidential_guest_kvm_reset(ConfidentialGuestSupport *cgs, ++ Error **errp) ++{ ++ ConfidentialGuestSupportClass *klass; ++ ++ klass = CONFIDENTIAL_GUEST_SUPPORT_GET_CLASS(cgs); ++ if (klass->kvm_reset) { ++ return klass->kvm_reset(cgs, errp); ++ } ++ ++ return 0; ++} ++ + #endif /* !CONFIG_USER_ONLY */ + + #endif /* QEMU_CONFIDENTIAL_GUEST_SUPPORT_H */ +-- +2.39.3 + diff --git a/SOURCES/kvm-crypto-block-drop-qcrypto_block_open-n_threads-argum.patch b/SOURCES/kvm-crypto-block-drop-qcrypto_block_open-n_threads-argum.patch new file mode 100644 index 0000000..b5fcef5 --- /dev/null +++ b/SOURCES/kvm-crypto-block-drop-qcrypto_block_open-n_threads-argum.patch @@ -0,0 +1,228 @@ +From 117486e0820f135f191e19f8ebb8838a98b121c6 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Mon, 27 May 2024 11:58:51 -0400 +Subject: [PATCH 5/5] crypto/block: drop qcrypto_block_open() n_threads + argument +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 251: block/crypto: create ciphers on demand +RH-Jira: RHEL-36159 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/2] 68290935b174b1f2b76aa857a926da9011e54abe (stefanha/centos-stream-qemu-kvm) + +The n_threads argument is no longer used since the previous commit. +Remove it. + +Signed-off-by: Stefan Hajnoczi +Message-ID: <20240527155851.892885-3-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Acked-by: Daniel P. Berrangé +Signed-off-by: Kevin Wolf +(cherry picked from commit 3ab0f063e58ed9224237d69c4211ca83335164c4) +Signed-off-by: Stefan Hajnoczi +--- + block/crypto.c | 1 - + block/qcow.c | 2 +- + block/qcow2.c | 5 ++--- + crypto/block-luks.c | 1 - + crypto/block-qcow.c | 6 ++---- + crypto/block.c | 3 +-- + crypto/blockpriv.h | 1 - + include/crypto/block.h | 2 -- + tests/unit/test-crypto-block.c | 4 ---- + 9 files changed, 6 insertions(+), 19 deletions(-) + +diff --git a/block/crypto.c b/block/crypto.c +index 21eed909c1..4eed3ffa6a 100644 +--- a/block/crypto.c ++++ b/block/crypto.c +@@ -363,7 +363,6 @@ static int block_crypto_open_generic(QCryptoBlockFormat format, + block_crypto_read_func, + bs, + cflags, +- 1, + errp); + + if (!crypto->block) { +diff --git a/block/qcow.c b/block/qcow.c +index ca8e1d5ec8..c2f89db055 100644 +--- a/block/qcow.c ++++ b/block/qcow.c +@@ -211,7 +211,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags, + cflags |= QCRYPTO_BLOCK_OPEN_NO_IO; + } + s->crypto = qcrypto_block_open(crypto_opts, "encrypt.", +- NULL, NULL, cflags, 1, errp); ++ NULL, NULL, cflags, errp); + if (!s->crypto) { + ret = -EINVAL; + goto fail; +diff --git a/block/qcow2.c b/block/qcow2.c +index 0e8b2f7518..0ebd455dc8 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -321,7 +321,7 @@ qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, + } + s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.", + qcow2_crypto_hdr_read_func, +- bs, cflags, QCOW2_MAX_THREADS, errp); ++ bs, cflags, errp); + if (!s->crypto) { + return -EINVAL; + } +@@ -1707,8 +1707,7 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, + cflags |= QCRYPTO_BLOCK_OPEN_NO_IO; + } + s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.", +- NULL, NULL, cflags, +- QCOW2_MAX_THREADS, errp); ++ NULL, NULL, cflags, errp); + if (!s->crypto) { + ret = -EINVAL; + goto fail; +diff --git a/crypto/block-luks.c b/crypto/block-luks.c +index 3357852c0a..5b777c15d3 100644 +--- a/crypto/block-luks.c ++++ b/crypto/block-luks.c +@@ -1189,7 +1189,6 @@ qcrypto_block_luks_open(QCryptoBlock *block, + QCryptoBlockReadFunc readfunc, + void *opaque, + unsigned int flags, +- size_t n_threads, + Error **errp) + { + QCryptoBlockLUKS *luks = NULL; +diff --git a/crypto/block-qcow.c b/crypto/block-qcow.c +index 02305058e3..42e9556e42 100644 +--- a/crypto/block-qcow.c ++++ b/crypto/block-qcow.c +@@ -44,7 +44,6 @@ qcrypto_block_qcow_has_format(const uint8_t *buf G_GNUC_UNUSED, + static int + qcrypto_block_qcow_init(QCryptoBlock *block, + const char *keysecret, +- size_t n_threads, + Error **errp) + { + char *password; +@@ -100,7 +99,6 @@ qcrypto_block_qcow_open(QCryptoBlock *block, + QCryptoBlockReadFunc readfunc G_GNUC_UNUSED, + void *opaque G_GNUC_UNUSED, + unsigned int flags, +- size_t n_threads, + Error **errp) + { + if (flags & QCRYPTO_BLOCK_OPEN_NO_IO) { +@@ -115,7 +113,7 @@ qcrypto_block_qcow_open(QCryptoBlock *block, + return -1; + } + return qcrypto_block_qcow_init(block, options->u.qcow.key_secret, +- n_threads, errp); ++ errp); + } + } + +@@ -135,7 +133,7 @@ qcrypto_block_qcow_create(QCryptoBlock *block, + return -1; + } + /* QCow2 has no special header, since everything is hardwired */ +- return qcrypto_block_qcow_init(block, options->u.qcow.key_secret, 1, errp); ++ return qcrypto_block_qcow_init(block, options->u.qcow.key_secret, errp); + } + + +diff --git a/crypto/block.c b/crypto/block.c +index ba6d1cebc7..3bcc4270c3 100644 +--- a/crypto/block.c ++++ b/crypto/block.c +@@ -53,7 +53,6 @@ QCryptoBlock *qcrypto_block_open(QCryptoBlockOpenOptions *options, + QCryptoBlockReadFunc readfunc, + void *opaque, + unsigned int flags, +- size_t n_threads, + Error **errp) + { + QCryptoBlock *block = g_new0(QCryptoBlock, 1); +@@ -73,7 +72,7 @@ QCryptoBlock *qcrypto_block_open(QCryptoBlockOpenOptions *options, + block->driver = qcrypto_block_drivers[options->format]; + + if (block->driver->open(block, options, optprefix, +- readfunc, opaque, flags, n_threads, errp) < 0) ++ readfunc, opaque, flags, errp) < 0) + { + g_free(block); + return NULL; +diff --git a/crypto/blockpriv.h b/crypto/blockpriv.h +index 4bf6043d5d..b8f77cb5eb 100644 +--- a/crypto/blockpriv.h ++++ b/crypto/blockpriv.h +@@ -59,7 +59,6 @@ struct QCryptoBlockDriver { + QCryptoBlockReadFunc readfunc, + void *opaque, + unsigned int flags, +- size_t n_threads, + Error **errp); + + int (*create)(QCryptoBlock *block, +diff --git a/include/crypto/block.h b/include/crypto/block.h +index 92e823c9f2..5b5d039800 100644 +--- a/include/crypto/block.h ++++ b/include/crypto/block.h +@@ -76,7 +76,6 @@ typedef enum { + * @readfunc: callback for reading data from the volume + * @opaque: data to pass to @readfunc + * @flags: bitmask of QCryptoBlockOpenFlags values +- * @n_threads: allow concurrent I/O from up to @n_threads threads + * @errp: pointer to a NULL-initialized error object + * + * Create a new block encryption object for an existing +@@ -113,7 +112,6 @@ QCryptoBlock *qcrypto_block_open(QCryptoBlockOpenOptions *options, + QCryptoBlockReadFunc readfunc, + void *opaque, + unsigned int flags, +- size_t n_threads, + Error **errp); + + typedef enum { +diff --git a/tests/unit/test-crypto-block.c b/tests/unit/test-crypto-block.c +index 6cfc817a92..42cfab6067 100644 +--- a/tests/unit/test-crypto-block.c ++++ b/tests/unit/test-crypto-block.c +@@ -303,7 +303,6 @@ static void test_block(gconstpointer opaque) + test_block_read_func, + &header, + 0, +- 1, + NULL); + g_assert(blk == NULL); + +@@ -312,7 +311,6 @@ static void test_block(gconstpointer opaque) + test_block_read_func, + &header, + QCRYPTO_BLOCK_OPEN_NO_IO, +- 1, + &error_abort); + + g_assert(qcrypto_block_get_cipher(blk) == NULL); +@@ -327,7 +325,6 @@ static void test_block(gconstpointer opaque) + test_block_read_func, + &header, + 0, +- 1, + &error_abort); + g_assert(blk); + +@@ -384,7 +381,6 @@ test_luks_bad_header(gconstpointer data) + test_block_read_func, + &buf, + 0, +- 1, + &err); + g_assert(!blk); + g_assert(err); +-- +2.39.3 + diff --git a/SOURCES/kvm-graph-lock-Disable-locking-for-now.patch b/SOURCES/kvm-graph-lock-Disable-locking-for-now.patch deleted file mode 100644 index 77086e5..0000000 --- a/SOURCES/kvm-graph-lock-Disable-locking-for-now.patch +++ /dev/null @@ -1,153 +0,0 @@ -From 516bf44de08a13d97c08e210137078e642ce8e88 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 17 May 2023 17:28:32 +0200 -Subject: [PATCH 02/21] graph-lock: Disable locking for now - -RH-Author: Kevin Wolf -RH-MergeRequest: 166: block/graph-lock: Disable locking for now -RH-Bugzilla: 2186725 -RH-Acked-by: Eric Blake -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [2/4] 39d42fb527aad0491a018743289de7b762108317 (kmwolf/centos-qemu-kvm) - -In QEMU 8.0, we've been seeing deadlocks in bdrv_graph_wrlock(). They -come from callers that hold an AioContext lock, which is not allowed -during polling. In theory, we could temporarily release the lock, but -callers are inconsistent about whether they hold a lock, and if they do, -some are also confused about which one they hold. While all of this is -fixable, it's not trivial, and the best course of action for 8.0.1 is -probably just disabling the graph locking code temporarily. - -We don't currently rely on graph locking yet. It is supposed to replace -the AioContext lock eventually to enable multiqueue support, but as long -as we still have the AioContext lock, it is sufficient without the graph -lock. Once the AioContext lock goes away, the deadlock doesn't exist any -more either and this commit can be reverted. (Of course, it can also be -reverted while the AioContext lock still exists if the callers have been -fixed.) - -Cc: qemu-stable@nongnu.org -Signed-off-by: Kevin Wolf -Message-Id: <20230517152834.277483-2-kwolf@redhat.com> -Reviewed-by: Eric Blake -Signed-off-by: Kevin Wolf -(cherry picked from commit 80fc5d260002432628710f8b0c7cfc7d9b97bb9d) -Signed-off-by: Kevin Wolf ---- - block/graph-lock.c | 24 ++++++++++++++++++++++++ - 1 file changed, 24 insertions(+) - -diff --git a/block/graph-lock.c b/block/graph-lock.c -index 259a7a0bde..2490926c90 100644 ---- a/block/graph-lock.c -+++ b/block/graph-lock.c -@@ -30,8 +30,10 @@ BdrvGraphLock graph_lock; - /* Protects the list of aiocontext and orphaned_reader_count */ - static QemuMutex aio_context_list_lock; - -+#if 0 - /* Written and read with atomic operations. */ - static int has_writer; -+#endif - - /* - * A reader coroutine could move from an AioContext to another. -@@ -88,6 +90,7 @@ void unregister_aiocontext(AioContext *ctx) - g_free(ctx->bdrv_graph); - } - -+#if 0 - static uint32_t reader_count(void) - { - BdrvGraphRWlock *brdv_graph; -@@ -105,10 +108,17 @@ static uint32_t reader_count(void) - assert((int32_t)rd >= 0); - return rd; - } -+#endif - - void bdrv_graph_wrlock(void) - { - GLOBAL_STATE_CODE(); -+ /* -+ * TODO Some callers hold an AioContext lock when this is called, which -+ * causes deadlocks. Reenable once the AioContext locking is cleaned up (or -+ * AioContext locks are gone). -+ */ -+#if 0 - assert(!qatomic_read(&has_writer)); - - /* Make sure that constantly arriving new I/O doesn't cause starvation */ -@@ -139,11 +149,13 @@ void bdrv_graph_wrlock(void) - } while (reader_count() >= 1); - - bdrv_drain_all_end(); -+#endif - } - - void bdrv_graph_wrunlock(void) - { - GLOBAL_STATE_CODE(); -+#if 0 - QEMU_LOCK_GUARD(&aio_context_list_lock); - assert(qatomic_read(&has_writer)); - -@@ -155,10 +167,13 @@ void bdrv_graph_wrunlock(void) - - /* Wake up all coroutine that are waiting to read the graph */ - qemu_co_enter_all(&reader_queue, &aio_context_list_lock); -+#endif - } - - void coroutine_fn bdrv_graph_co_rdlock(void) - { -+ /* TODO Reenable when wrlock is reenabled */ -+#if 0 - BdrvGraphRWlock *bdrv_graph; - bdrv_graph = qemu_get_current_aio_context()->bdrv_graph; - -@@ -223,10 +238,12 @@ void coroutine_fn bdrv_graph_co_rdlock(void) - qemu_co_queue_wait(&reader_queue, &aio_context_list_lock); - } - } -+#endif - } - - void coroutine_fn bdrv_graph_co_rdunlock(void) - { -+#if 0 - BdrvGraphRWlock *bdrv_graph; - bdrv_graph = qemu_get_current_aio_context()->bdrv_graph; - -@@ -249,6 +266,7 @@ void coroutine_fn bdrv_graph_co_rdunlock(void) - if (qatomic_read(&has_writer)) { - aio_wait_kick(); - } -+#endif - } - - void bdrv_graph_rdlock_main_loop(void) -@@ -266,13 +284,19 @@ void bdrv_graph_rdunlock_main_loop(void) - void assert_bdrv_graph_readable(void) - { - /* reader_count() is slow due to aio_context_list_lock lock contention */ -+ /* TODO Reenable when wrlock is reenabled */ -+#if 0 - #ifdef CONFIG_DEBUG_GRAPH_LOCK - assert(qemu_in_main_thread() || reader_count()); - #endif -+#endif - } - - void assert_bdrv_graph_writable(void) - { - assert(qemu_in_main_thread()); -+ /* TODO Reenable when wrlock is reenabled */ -+#if 0 - assert(qatomic_read(&has_writer)); -+#endif - } --- -2.39.3 - diff --git a/SOURCES/kvm-hw-acpi-Mark-acpi-blobs-as-resizable-on-RHEL-pc-mach.patch b/SOURCES/kvm-hw-acpi-Mark-acpi-blobs-as-resizable-on-RHEL-pc-mach.patch deleted file mode 100644 index 67e702c..0000000 --- a/SOURCES/kvm-hw-acpi-Mark-acpi-blobs-as-resizable-on-RHEL-pc-mach.patch +++ /dev/null @@ -1,40 +0,0 @@ -From b4645e7682aa1bde6f89df0eff2a9de83720eecc Mon Sep 17 00:00:00 2001 -From: Ani Sinha -Date: Tue, 2 May 2023 15:51:53 +0530 -Subject: [PATCH 3/3] hw/acpi: Mark acpi blobs as resizable on RHEL pc machines - version 7.6 and above - -RH-Author: Ani Sinha -RH-MergeRequest: 160: hw/acpi: limit warning on acpi table size to pc machines older than version 2.3 -RH-Bugzilla: 1934134 -RH-Acked-by: Igor Mammedov -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: MST -RH-Commit: [2/2] 95d443af6e75c569d89d04d028012c3c56c0c3a4 (anisinha/centos-qemu-kvm) - -Please look at QEMU upstream commit -1af507756bae7 ("hw/acpi: limit warning on acpi table size to pc machines older than version 2.3") -This patch adapts the above change so that it applies to RHEL pc machines of -version 7.6 and newer. These are the machine types that are currently supported -in RHEL. Q35 machines are not affected. - -Signed-off-by: Ani Sinha ---- - hw/i386/pc_piix.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 4d5880e249..6c7be628e1 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -914,6 +914,7 @@ static void pc_machine_rhel7_options(MachineClass *m) - m->default_machine_opts = "firmware=bios-256k.bin,hpet=off"; - pcmc->default_nic_model = "e1000"; - pcmc->pci_root_uid = 0; -+ pcmc->resizable_acpi_blob = true; - m->default_display = "std"; - m->no_parallel = 1; - m->numa_mem_supported = true; --- -2.39.1 - diff --git a/SOURCES/kvm-hw-acpi-limit-warning-on-acpi-table-size-to-pc-machi.patch b/SOURCES/kvm-hw-acpi-limit-warning-on-acpi-table-size-to-pc-machi.patch deleted file mode 100644 index e06113a..0000000 --- a/SOURCES/kvm-hw-acpi-limit-warning-on-acpi-table-size-to-pc-machi.patch +++ /dev/null @@ -1,101 +0,0 @@ -From 3f70da88788c398877b8ded0b27689530385302b Mon Sep 17 00:00:00 2001 -From: Ani Sinha -Date: Wed, 29 Mar 2023 10:27:26 +0530 -Subject: [PATCH 2/3] hw/acpi: limit warning on acpi table size to pc machines - older than version 2.3 - -RH-Author: Ani Sinha -RH-MergeRequest: 160: hw/acpi: limit warning on acpi table size to pc machines older than version 2.3 -RH-Bugzilla: 1934134 -RH-Acked-by: Igor Mammedov -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: MST -RH-Commit: [1/2] 96c3b6d51e16734eb4e8de52635e0ca036964090 (anisinha/centos-qemu-kvm) - -i440fx machine versions 2.3 and newer supports dynamic ram -resizing. See commit a1666142db6233 ("acpi-build: make ROMs RAM blocks resizeable") . -Currently supported all q35 machine types (versions 2.4 and newer) supports -resizable RAM/ROM blocks.Therefore the warning generated when the ACPI table -size exceeds a pre-defined value does not apply to those machine versions. -Add a check limiting the warning message to only those machines that does not -support expandable ram blocks (that is, i440fx machines with version 2.2 -and older). - -Signed-off-by: Ani Sinha -Message-Id: <20230329045726.14028-1-anisinha@redhat.com> -Reviewed-by: Igor Mammedov -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 1af507756bae775028c27d30e602e2b9c72cd074) ---- - hw/i386/acpi-build.c | 6 ++++-- - hw/i386/pc.c | 1 + - hw/i386/pc_piix.c | 1 + - include/hw/i386/pc.h | 3 +++ - 4 files changed, 9 insertions(+), 2 deletions(-) - -diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c -index ec857a117e..9bc4d8a981 100644 ---- a/hw/i386/acpi-build.c -+++ b/hw/i386/acpi-build.c -@@ -2695,7 +2695,8 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) - int legacy_table_size = - ROUND_UP(tables_blob->len - aml_len + legacy_aml_len, - ACPI_BUILD_ALIGN_SIZE); -- if (tables_blob->len > legacy_table_size) { -+ if ((tables_blob->len > legacy_table_size) && -+ !pcmc->resizable_acpi_blob) { - /* Should happen only with PCI bridges and -M pc-i440fx-2.0. */ - warn_report("ACPI table size %u exceeds %d bytes," - " migration may not work", -@@ -2706,7 +2707,8 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) - g_array_set_size(tables_blob, legacy_table_size); - } else { - /* Make sure we have a buffer in case we need to resize the tables. */ -- if (tables_blob->len > ACPI_BUILD_TABLE_SIZE / 2) { -+ if ((tables_blob->len > ACPI_BUILD_TABLE_SIZE / 2) && -+ !pcmc->resizable_acpi_blob) { - /* As of QEMU 2.1, this fires with 160 VCPUs and 255 memory slots. */ - warn_report("ACPI table size %u exceeds %d bytes," - " migration may not work", -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index f216922cee..7db5a2348f 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -2092,6 +2092,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) - pcmc->acpi_data_size = 0x20000 + 0x8000; - pcmc->pvh_enabled = true; - pcmc->kvmclock_create_always = true; -+ pcmc->resizable_acpi_blob = true; - assert(!mc->get_hotplug_handler); - mc->async_pf_vmexit_disable = false; - mc->get_hotplug_handler = pc_get_hotplug_handler; -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index fc704d783f..4d5880e249 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -750,6 +750,7 @@ static void pc_i440fx_2_2_machine_options(MachineClass *m) - compat_props_add(m->compat_props, hw_compat_2_2, hw_compat_2_2_len); - compat_props_add(m->compat_props, pc_compat_2_2, pc_compat_2_2_len); - pcmc->rsdp_in_ram = false; -+ pcmc->resizable_acpi_blob = false; - } - - DEFINE_I440FX_MACHINE(v2_2, "pc-i440fx-2.2", pc_compat_2_2_fn, -diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index d218ad1628..2f514d13d8 100644 ---- a/include/hw/i386/pc.h -+++ b/include/hw/i386/pc.h -@@ -130,6 +130,9 @@ struct PCMachineClass { - - /* create kvmclock device even when KVM PV features are not exposed */ - bool kvmclock_create_always; -+ -+ /* resizable acpi blob compat */ -+ bool resizable_acpi_blob; - }; - - #define TYPE_PC_MACHINE "generic-pc-machine" --- -2.39.1 - diff --git a/SOURCES/kvm-hw-arm-Validate-cluster-and-NUMA-node-boundary.patch b/SOURCES/kvm-hw-arm-Validate-cluster-and-NUMA-node-boundary.patch deleted file mode 100644 index e96bb10..0000000 --- a/SOURCES/kvm-hw-arm-Validate-cluster-and-NUMA-node-boundary.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 7b57aec372fc238cbaafe86557f9fb4b560895b1 Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Tue, 27 Jun 2023 20:20:09 +1000 -Subject: [PATCH 2/6] hw/arm: Validate cluster and NUMA node boundary - -RH-Author: Gavin Shan -RH-MergeRequest: 175: hw/arm: Validate CPU cluster and NUMA node boundary for RHEL machines -RH-Bugzilla: 2171363 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Eric Auger -RH-Commit: [2/3] fcac7ea85d9f73613989903c642fc1bf6c51946b - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2171363 - -There are two ARM machines where NUMA is aware: 'virt' and 'sbsa-ref'. -Both of them are required to follow cluster-NUMA-node boundary. To -enable the validation to warn about the irregular configuration where -multiple CPUs in one cluster have been associated with different NUMA -nodes. - -Signed-off-by: Gavin Shan -Acked-by: Igor Mammedov -Message-Id: <20230509002739.18388-3-gshan@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit fecff672351ace5e39adf7dbcf7a8ee748b201cb) -Signed-off-by: Gavin Shan ---- - hw/arm/sbsa-ref.c | 2 ++ - hw/arm/virt.c | 2 ++ - 2 files changed, 4 insertions(+) - -diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c -index 0b93558dde..efb380e7c8 100644 ---- a/hw/arm/sbsa-ref.c -+++ b/hw/arm/sbsa-ref.c -@@ -864,6 +864,8 @@ static void sbsa_ref_class_init(ObjectClass *oc, void *data) - mc->possible_cpu_arch_ids = sbsa_ref_possible_cpu_arch_ids; - mc->cpu_index_to_instance_props = sbsa_ref_cpu_index_to_props; - mc->get_default_cpu_node_id = sbsa_ref_get_default_cpu_node_id; -+ /* platform instead of architectural choice */ -+ mc->cpu_cluster_has_numa_boundary = true; - } - - static const TypeInfo sbsa_ref_info = { -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 9be53e9355..df6a0231bc 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -3083,6 +3083,8 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) - mc->smp_props.clusters_supported = true; - mc->auto_enable_numa_with_memhp = true; - mc->auto_enable_numa_with_memdev = true; -+ /* platform instead of architectural choice */ -+ mc->cpu_cluster_has_numa_boundary = true; - mc->default_ram_id = "mach-virt.ram"; - - object_class_property_add(oc, "acpi", "OnOffAuto", --- -2.39.3 - diff --git a/SOURCES/kvm-hw-arm-smmu-Handle-big-endian-hosts-correctly.patch b/SOURCES/kvm-hw-arm-smmu-Handle-big-endian-hosts-correctly.patch deleted file mode 100644 index 3bbe93f..0000000 --- a/SOURCES/kvm-hw-arm-smmu-Handle-big-endian-hosts-correctly.patch +++ /dev/null @@ -1,166 +0,0 @@ -From a3412036477e8c91e0b71fcd91de4e24a9904077 Mon Sep 17 00:00:00 2001 -From: Peter Maydell -Date: Tue, 25 Jul 2023 10:56:51 +0100 -Subject: [PATCH 09/14] hw/arm/smmu: Handle big-endian hosts correctly -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 197: virtio-iommu/smmu: backport some late fixes -RH-Bugzilla: 2229133 -RH-Acked-by: Thomas Huth -RH-Acked-by: Peter Xu -RH-Commit: [3/3] df9c8d228b25273e0c4927a10b21e66fb4bef5f0 (eauger1/centos-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2229133 - -The implementation of the SMMUv3 has multiple places where it reads a -data structure from the guest and directly operates on it without -doing a guest-to-host endianness conversion. Since all SMMU data -structures are little-endian, this means that the SMMU doesn't work -on a big-endian host. In particular, this causes the Avocado test - machine_aarch64_virt.py:Aarch64VirtMachine.test_alpine_virt_tcg_gic_max -to fail on an s390x host. - -Add appropriate byte-swapping on reads and writes of guest in-memory -data structures so that the device works correctly on big-endian -hosts. - -As part of this we constrain queue_read() to operate only on Cmd -structs and queue_write() on Evt structs, because in practice these -are the only data structures the two functions are used with, and we -need to know what the data structure is to be able to byte-swap its -parts correctly. - -Signed-off-by: Peter Maydell -Tested-by: Thomas Huth -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Eric Auger -Message-id: 20230717132641.764660-1-peter.maydell@linaro.org -Cc: qemu-stable@nongnu.org -(cherry picked from commit c6445544d4cea2628fbad3bad09f3d3a03c749d3) -Signed-off-by: Eric Auger ---- - hw/arm/smmu-common.c | 3 +-- - hw/arm/smmuv3.c | 39 +++++++++++++++++++++++++++++++-------- - 2 files changed, 32 insertions(+), 10 deletions(-) - -diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c -index e7f1c1f219..daa02ce798 100644 ---- a/hw/arm/smmu-common.c -+++ b/hw/arm/smmu-common.c -@@ -192,8 +192,7 @@ static int get_pte(dma_addr_t baseaddr, uint32_t index, uint64_t *pte, - dma_addr_t addr = baseaddr + index * sizeof(*pte); - - /* TODO: guarantee 64-bit single-copy atomicity */ -- ret = dma_memory_read(&address_space_memory, addr, pte, sizeof(*pte), -- MEMTXATTRS_UNSPECIFIED); -+ ret = ldq_le_dma(&address_space_memory, addr, pte, MEMTXATTRS_UNSPECIFIED); - - if (ret != MEMTX_OK) { - info->type = SMMU_PTW_ERR_WALK_EABT; -diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c -index 270c80b665..cfb56725a6 100644 ---- a/hw/arm/smmuv3.c -+++ b/hw/arm/smmuv3.c -@@ -98,20 +98,34 @@ static void smmuv3_write_gerrorn(SMMUv3State *s, uint32_t new_gerrorn) - trace_smmuv3_write_gerrorn(toggled & pending, s->gerrorn); - } - --static inline MemTxResult queue_read(SMMUQueue *q, void *data) -+static inline MemTxResult queue_read(SMMUQueue *q, Cmd *cmd) - { - dma_addr_t addr = Q_CONS_ENTRY(q); -+ MemTxResult ret; -+ int i; - -- return dma_memory_read(&address_space_memory, addr, data, q->entry_size, -- MEMTXATTRS_UNSPECIFIED); -+ ret = dma_memory_read(&address_space_memory, addr, cmd, sizeof(Cmd), -+ MEMTXATTRS_UNSPECIFIED); -+ if (ret != MEMTX_OK) { -+ return ret; -+ } -+ for (i = 0; i < ARRAY_SIZE(cmd->word); i++) { -+ le32_to_cpus(&cmd->word[i]); -+ } -+ return ret; - } - --static MemTxResult queue_write(SMMUQueue *q, void *data) -+static MemTxResult queue_write(SMMUQueue *q, Evt *evt_in) - { - dma_addr_t addr = Q_PROD_ENTRY(q); - MemTxResult ret; -+ Evt evt = *evt_in; -+ int i; - -- ret = dma_memory_write(&address_space_memory, addr, data, q->entry_size, -+ for (i = 0; i < ARRAY_SIZE(evt.word); i++) { -+ cpu_to_le32s(&evt.word[i]); -+ } -+ ret = dma_memory_write(&address_space_memory, addr, &evt, sizeof(Evt), - MEMTXATTRS_UNSPECIFIED); - if (ret != MEMTX_OK) { - return ret; -@@ -291,7 +305,7 @@ static void smmuv3_init_regs(SMMUv3State *s) - static int smmu_get_ste(SMMUv3State *s, dma_addr_t addr, STE *buf, - SMMUEventInfo *event) - { -- int ret; -+ int ret, i; - - trace_smmuv3_get_ste(addr); - /* TODO: guarantee 64-bit single-copy atomicity */ -@@ -304,6 +318,9 @@ static int smmu_get_ste(SMMUv3State *s, dma_addr_t addr, STE *buf, - event->u.f_ste_fetch.addr = addr; - return -EINVAL; - } -+ for (i = 0; i < ARRAY_SIZE(buf->word); i++) { -+ le32_to_cpus(&buf->word[i]); -+ } - return 0; - - } -@@ -313,7 +330,7 @@ static int smmu_get_cd(SMMUv3State *s, STE *ste, uint32_t ssid, - CD *buf, SMMUEventInfo *event) - { - dma_addr_t addr = STE_CTXPTR(ste); -- int ret; -+ int ret, i; - - trace_smmuv3_get_cd(addr); - /* TODO: guarantee 64-bit single-copy atomicity */ -@@ -326,6 +343,9 @@ static int smmu_get_cd(SMMUv3State *s, STE *ste, uint32_t ssid, - event->u.f_ste_fetch.addr = addr; - return -EINVAL; - } -+ for (i = 0; i < ARRAY_SIZE(buf->word); i++) { -+ le32_to_cpus(&buf->word[i]); -+ } - return 0; - } - -@@ -407,7 +427,7 @@ static int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste, - return -EINVAL; - } - if (s->features & SMMU_FEATURE_2LVL_STE) { -- int l1_ste_offset, l2_ste_offset, max_l2_ste, span; -+ int l1_ste_offset, l2_ste_offset, max_l2_ste, span, i; - dma_addr_t l1ptr, l2ptr; - STEDesc l1std; - -@@ -431,6 +451,9 @@ static int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste, - event->u.f_ste_fetch.addr = l1ptr; - return -EINVAL; - } -+ for (i = 0; i < ARRAY_SIZE(l1std.word); i++) { -+ le32_to_cpus(&l1std.word[i]); -+ } - - span = L1STD_SPAN(&l1std); - --- -2.39.3 - diff --git a/SOURCES/kvm-hw-arm-virt-Avoid-unexpected-warning-from-Linux-gues.patch b/SOURCES/kvm-hw-arm-virt-Avoid-unexpected-warning-from-Linux-gues.patch new file mode 100644 index 0000000..29991d5 --- /dev/null +++ b/SOURCES/kvm-hw-arm-virt-Avoid-unexpected-warning-from-Linux-gues.patch @@ -0,0 +1,120 @@ +From 41c4083269ec772b406c6c57b496ca2011f928c7 Mon Sep 17 00:00:00 2001 +From: Zhenyu Zhang +Date: Tue, 9 Jul 2024 23:08:59 -0400 +Subject: [PATCH 2/2] hw/arm/virt: Avoid unexpected warning from Linux guest on + host with Fujitsu CPUs + +RH-Author: zhenyzha +RH-MergeRequest: 256: hw/arm/virt: Avoid unexpected warning from Linux guest on host with Fujitsu CPUs +RH-Jira: RHEL-39936 +RH-Acked-by: Gavin Shan +RH-Acked-by: Sebastian Ott +RH-Acked-by: Cornelia Huck +RH-Commit: [1/1] fdf156fd05b219a06e2e2ca409fff0f728c1e2cf (zhenyzha/qemu-kvm) + +JIRA: https://issues.redhat.com/browse/RHEL-39936 + +Multiple warning messages and corresponding backtraces are observed when Linux +guest is booted on the host with Fujitsu CPUs. One of them is shown as below. + +[ 0.032443] ------------[ cut here ]------------ +[ 0.032446] uart-pl011 9000000.pl011: ARCH_DMA_MINALIGN smaller than +CTR_EL0.CWG (128 < 256) +[ 0.032454] WARNING: CPU: 0 PID: 1 at arch/arm64/mm/dma-mapping.c:54 +arch_setup_dma_ops+0xbc/0xcc +[ 0.032470] Modules linked in: +[ 0.032475] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-452.el9.aarch64 +[ 0.032481] Hardware name: linux,dummy-virt (DT) +[ 0.032484] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) +[ 0.032490] pc : arch_setup_dma_ops+0xbc/0xcc +[ 0.032496] lr : arch_setup_dma_ops+0xbc/0xcc +[ 0.032501] sp : ffff80008003b860 +[ 0.032503] x29: ffff80008003b860 x28: 0000000000000000 x27: ffffaae4b949049c +[ 0.032510] x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000 +[ 0.032517] x23: 0000000000000100 x22: 0000000000000000 x21: 0000000000000000 +[ 0.032523] x20: 0000000100000000 x19: ffff2f06c02ea400 x18: ffffffffffffffff +[ 0.032529] x17: 00000000208a5f76 x16: 000000006589dbcb x15: ffffaae4ba071c89 +[ 0.032535] x14: 0000000000000000 x13: ffffaae4ba071c84 x12: 455f525443206e61 +[ 0.032541] x11: 68742072656c6c61 x10: 0000000000000029 x9 : ffffaae4b7d21da4 +[ 0.032547] x8 : 0000000000000029 x7 : 4c414e494d5f414d x6 : 0000000000000029 +[ 0.032553] x5 : 000000000000000f x4 : ffffaae4b9617a00 x3 : 0000000000000001 +[ 0.032558] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff2f06c029be40 +[ 0.032564] Call trace: +[ 0.032566] arch_setup_dma_ops+0xbc/0xcc +[ 0.032572] of_dma_configure_id+0x138/0x300 +[ 0.032591] amba_dma_configure+0x34/0xc0 +[ 0.032600] really_probe+0x78/0x3dc +[ 0.032614] __driver_probe_device+0x108/0x160 +[ 0.032619] driver_probe_device+0x44/0x114 +[ 0.032624] __device_attach_driver+0xb8/0x14c +[ 0.032629] bus_for_each_drv+0x88/0xe4 +[ 0.032634] __device_attach+0xb0/0x1e0 +[ 0.032638] device_initial_probe+0x18/0x20 +[ 0.032643] bus_probe_device+0xa8/0xb0 +[ 0.032648] device_add+0x4b4/0x6c0 +[ 0.032652] amba_device_try_add.part.0+0x48/0x360 +[ 0.032657] amba_device_add+0x104/0x144 +[ 0.032662] of_amba_device_create.isra.0+0x100/0x1c4 +[ 0.032666] of_platform_bus_create+0x294/0x35c +[ 0.032669] of_platform_populate+0x5c/0x150 +[ 0.032672] of_platform_default_populate_init+0xd0/0xec +[ 0.032697] do_one_initcall+0x4c/0x2e0 +[ 0.032701] do_initcalls+0x100/0x13c +[ 0.032707] kernel_init_freeable+0x1c8/0x21c +[ 0.032712] kernel_init+0x28/0x140 +[ 0.032731] ret_from_fork+0x10/0x20 +[ 0.032735] ---[ end trace 0000000000000000 ]--- + +In Linux, a check is applied to every device which is exposed through +device-tree node. The warning message is raised when the device isn't +DMA coherent and the cache line size is larger than ARCH_DMA_MINALIGN +(128 bytes). The cache line is sorted from CTR_EL0[CWG], which corresponds +to 256 bytes on the guest CPUs. The DMA coherent capability is claimed +through 'dma-coherent' in their device-tree nodes or parent nodes. +This happens even when the device doesn't implement or use DMA at all, +for legacy reasons. + +Fix the issue by adding 'dma-coherent' property to the device-tree root +node, meaning all devices are capable of DMA coherent by default. +This both suppresses the spurious kernel warnings and also guards +against possible future QEMU bugs where we add a DMA-capable device +and forget to mark it as dma-coherent. + +Signed-off-by: Zhenyu Zhang +Reviewed-by: Gavin Shan +Reviewed-by: Donald Dutile +Message-id: 20240612020506.307793-1-zhenyzha@redhat.com +[PMM: tweaked commit message] +Signed-off-by: Peter Maydell +(cherry picked from commit dda533087ad5559674ff486e7031c88dc01e0abd) +Signed-off-by: Zhenyu Zhang +--- + hw/arm/virt.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 3f0496cdb9..6ece67f11d 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -330,6 +330,17 @@ static void create_fdt(VirtMachineState *vms) + qemu_fdt_setprop_cell(fdt, "/", "#size-cells", 0x2); + qemu_fdt_setprop_string(fdt, "/", "model", "linux,dummy-virt"); + ++ /* ++ * For QEMU, all DMA is coherent. Advertising this in the root node ++ * has two benefits: ++ * ++ * - It avoids potential bugs where we forget to mark a DMA ++ * capable device as being dma-coherent ++ * - It avoids spurious warnings from the Linux kernel about ++ * devices which can't do DMA at all ++ */ ++ qemu_fdt_setprop(fdt, "/", "dma-coherent", NULL, 0); ++ + /* /chosen must exist for load_dtb to fill in necessary properties later */ + qemu_fdt_add_subnode(fdt, "/chosen"); + if (vms->dtb_randomness) { +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-arm-virt-Fix-spurious-call-to-arm_virt_compat_set.patch b/SOURCES/kvm-hw-arm-virt-Fix-spurious-call-to-arm_virt_compat_set.patch new file mode 100644 index 0000000..8128a4e --- /dev/null +++ b/SOURCES/kvm-hw-arm-virt-Fix-spurious-call-to-arm_virt_compat_set.patch @@ -0,0 +1,59 @@ +From e3360c415f7de923d27c3167260a93cb679afabe Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Mon, 6 May 2024 15:09:43 +0200 +Subject: [PATCH 1/2] hw/arm/virt: Fix spurious call to arm_virt_compat_set() + +RH-Author: Eric Auger +RH-MergeRequest: 238: hw/arm/virt: Fix spurious call to arm_virt_compat_set() +RH-Jira: RHEL-34945 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cornelia Huck +RH-Acked-by: Gavin Shan +RH-Commit: [1/1] a858a3e1dff12b28e14f7e4bd2b896a9f06eacbb (eauger1/centos-qemu-kvm) + +JIRA: https://issues.redhat.com/browse/RHEL-34945 +Status: RHEL-only + +Downstream, we apply arm_rhel_compat in place of arm_virt_compat. +This is done though arm_rhel_compat_set() transparently called in +DEFINE_RHEL_MACHINE_LATEST(). So there is no need to call +arm_virt_compat_set() in rhel_machine_class_init(). Besides +this triggers a "GLib: g_ptr_array_add: assertion 'rarray' failed" +warning. + +Signed-off-by: Eric Auger +--- + hw/arm/virt.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index f1af9495c6..3f0496cdb9 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -85,6 +85,7 @@ + #include "hw/char/pl011.h" + #include "qemu/guest-random.h" + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static GlobalProperty arm_virt_compat[] = { + { TYPE_VIRTIO_IOMMU_PCI, "aw-bits", "48" }, + }; +@@ -101,7 +102,6 @@ static void arm_virt_compat_set(MachineClass *mc) + arm_virt_compat_len); + } + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ + static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ + void *data) \ +@@ -3536,7 +3536,6 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + { + MachineClass *mc = MACHINE_CLASS(oc); + HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); +- arm_virt_compat_set(mc); + + mc->family = "virt-rhel-Z"; + mc->init = machvirt_init; +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-arm-virt-Validate-cluster-and-NUMA-node-boundary-.patch b/SOURCES/kvm-hw-arm-virt-Validate-cluster-and-NUMA-node-boundary-.patch deleted file mode 100644 index 42ec705..0000000 --- a/SOURCES/kvm-hw-arm-virt-Validate-cluster-and-NUMA-node-boundary-.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 022529f6d0ee306da857825c72a98bf7ddf5de22 Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Tue, 27 Jun 2023 20:20:09 +1000 -Subject: [PATCH 3/6] hw/arm/virt: Validate cluster and NUMA node boundary for - RHEL machines - -RH-Author: Gavin Shan -RH-MergeRequest: 175: hw/arm: Validate CPU cluster and NUMA node boundary for RHEL machines -RH-Bugzilla: 2171363 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Eric Auger -RH-Commit: [3/3] a396c499259b566861ca007b01f8539bf6113711 - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2171363 -Upstream Status: RHEL only - -Set mc->cpu_cluster_has_numa_boundary to true so that the boundary of -CPU cluster and NUMA node will be validated for 'virt-rhel*' machines. -A warning message will be printed if the boundary is broken. - -Signed-off-by: Gavin Shan ---- - hw/arm/virt.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index df6a0231bc..faf68488d5 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -3530,6 +3530,8 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) - mc->smp_props.clusters_supported = true; - mc->auto_enable_numa_with_memhp = true; - mc->auto_enable_numa_with_memdev = true; -+ /* platform instead of architectural choice */ -+ mc->cpu_cluster_has_numa_boundary = true; - mc->default_ram_id = "mach-virt.ram"; - - object_class_property_add(oc, "acpi", "OnOffAuto", --- -2.39.3 - diff --git a/SOURCES/kvm-hw-i386-Add-support-for-loading-BIOS-using-guest_mem.patch b/SOURCES/kvm-hw-i386-Add-support-for-loading-BIOS-using-guest_mem.patch new file mode 100644 index 0000000..ee2f88e --- /dev/null +++ b/SOURCES/kvm-hw-i386-Add-support-for-loading-BIOS-using-guest_mem.patch @@ -0,0 +1,73 @@ +From e74980be81d641736ea9d44d0fe9af02af63a220 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:40 -0500 +Subject: [PATCH 083/100] hw/i386: Add support for loading BIOS using + guest_memfd + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [83/91] 7b77d212ef7d83b66ad9d8348179ee84e64fb911 (bonzini/rhel-qemu-kvm) + +When guest_memfd is enabled, the BIOS is generally part of the initial +encrypted guest image and will be accessed as private guest memory. Add +the necessary changes to set up the associated RAM region with a +guest_memfd backend to allow for this. + +Current support centers around using -bios to load the BIOS data. +Support for loading the BIOS via pflash requires additional enablement +since those interfaces rely on the use of ROM memory regions which make +use of the KVM_MEM_READONLY memslot flag, which is not supported for +guest_memfd-backed memslots. + +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-29-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit fc7a69e177e4ba26d11fcf47b853f85115b35a11) +Signed-off-by: Paolo Bonzini +--- + hw/i386/x86-common.c | 17 ++++++++++++----- + 1 file changed, 12 insertions(+), 5 deletions(-) + +diff --git a/hw/i386/x86-common.c b/hw/i386/x86-common.c +index 35fe6eabea..6cbb76c25c 100644 +--- a/hw/i386/x86-common.c ++++ b/hw/i386/x86-common.c +@@ -969,8 +969,13 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + (bios_size % 65536) != 0) { + goto bios_error; + } +- memory_region_init_ram(&x86ms->bios, NULL, "pc.bios", bios_size, +- &error_fatal); ++ if (machine_require_guest_memfd(MACHINE(x86ms))) { ++ memory_region_init_ram_guest_memfd(&x86ms->bios, NULL, "pc.bios", ++ bios_size, &error_fatal); ++ } else { ++ memory_region_init_ram(&x86ms->bios, NULL, "pc.bios", ++ bios_size, &error_fatal); ++ } + if (sev_enabled()) { + /* + * The concept of a "reset" simply doesn't exist for +@@ -991,9 +996,11 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + } + g_free(filename); + +- /* map the last 128KB of the BIOS in ISA space */ +- x86_isa_bios_init(&x86ms->isa_bios, rom_memory, &x86ms->bios, +- !isapc_ram_fw); ++ if (!machine_require_guest_memfd(MACHINE(x86ms))) { ++ /* map the last 128KB of the BIOS in ISA space */ ++ x86_isa_bios_init(&x86ms->isa_bios, rom_memory, &x86ms->bios, ++ !isapc_ram_fw); ++ } + + /* map all the bios at the top of memory */ + memory_region_add_subregion(rom_memory, +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-Have-x86_bios_rom_init-take-X86MachineState-.patch b/SOURCES/kvm-hw-i386-Have-x86_bios_rom_init-take-X86MachineState-.patch new file mode 100644 index 0000000..1fafe03 --- /dev/null +++ b/SOURCES/kvm-hw-i386-Have-x86_bios_rom_init-take-X86MachineState-.patch @@ -0,0 +1,106 @@ +From c1e615d6b8f609b72a94ffe6d31a9848a41744ef Mon Sep 17 00:00:00 2001 +From: Bernhard Beschow +Date: Tue, 30 Apr 2024 17:06:39 +0200 +Subject: [PATCH 038/100] hw/i386: Have x86_bios_rom_init() take + X86MachineState rather than MachineState +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [38/91] 59f388b1dffc5d0aa2f0fff768194d755bc3efbb (bonzini/rhel-qemu-kvm) + +The function creates and leaks two MemoryRegion objects regarding the BIOS which +will be moved into X86MachineState in the next steps to avoid the leakage. + +Signed-off-by: Bernhard Beschow +Reviewed-by: Philippe Mathieu-Daudé +Message-ID: <20240430150643.111976-3-shentey@gmail.com> +Signed-off-by: Philippe Mathieu-Daudé +(cherry picked from commit 848351840148f8c3b53ddf6210194506547d3ffd) +Signed-off-by: Paolo Bonzini +--- + hw/i386/microvm.c | 2 +- + hw/i386/pc_sysfw.c | 4 ++-- + hw/i386/x86.c | 4 ++-- + include/hw/i386/x86.h | 2 +- + 4 files changed, 6 insertions(+), 6 deletions(-) + +diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c +index 61a772dfe6..fec63cacfa 100644 +--- a/hw/i386/microvm.c ++++ b/hw/i386/microvm.c +@@ -278,7 +278,7 @@ static void microvm_devices_init(MicrovmMachineState *mms) + default_firmware = x86_machine_is_acpi_enabled(x86ms) + ? MICROVM_BIOS_FILENAME + : MICROVM_QBOOT_FILENAME; +- x86_bios_rom_init(MACHINE(mms), default_firmware, get_system_memory(), true); ++ x86_bios_rom_init(x86ms, default_firmware, get_system_memory(), true); + } + + static void microvm_memory_init(MicrovmMachineState *mms) +diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c +index 3efabbbab2..ef7dea9798 100644 +--- a/hw/i386/pc_sysfw.c ++++ b/hw/i386/pc_sysfw.c +@@ -206,7 +206,7 @@ void pc_system_firmware_init(PCMachineState *pcms, + BlockBackend *pflash_blk[ARRAY_SIZE(pcms->flash)]; + + if (!pcmc->pci_enabled) { +- x86_bios_rom_init(MACHINE(pcms), "bios.bin", rom_memory, true); ++ x86_bios_rom_init(X86_MACHINE(pcms), "bios.bin", rom_memory, true); + return; + } + +@@ -227,7 +227,7 @@ void pc_system_firmware_init(PCMachineState *pcms, + + if (!pflash_blk[0]) { + /* Machine property pflash0 not set, use ROM mode */ +- x86_bios_rom_init(MACHINE(pcms), "bios.bin", rom_memory, false); ++ x86_bios_rom_init(X86_MACHINE(pcms), "bios.bin", rom_memory, false); + } else { + if (kvm_enabled() && !kvm_readonly_mem_enabled()) { + /* +diff --git a/hw/i386/x86.c b/hw/i386/x86.c +index 2a4f3ee285..6d3c72f124 100644 +--- a/hw/i386/x86.c ++++ b/hw/i386/x86.c +@@ -1128,7 +1128,7 @@ void x86_load_linux(X86MachineState *x86ms, + nb_option_roms++; + } + +-void x86_bios_rom_init(MachineState *ms, const char *default_firmware, ++void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + MemoryRegion *rom_memory, bool isapc_ram_fw) + { + const char *bios_name; +@@ -1138,7 +1138,7 @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware, + ssize_t ret; + + /* BIOS load */ +- bios_name = ms->firmware ?: default_firmware; ++ bios_name = MACHINE(x86ms)->firmware ?: default_firmware; + filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); + if (filename) { + bios_size = get_image_size(filename); +diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h +index 4dc30dcb4d..cb07618d19 100644 +--- a/include/hw/i386/x86.h ++++ b/include/hw/i386/x86.h +@@ -116,7 +116,7 @@ void x86_cpu_unplug_request_cb(HotplugHandler *hotplug_dev, + void x86_cpu_unplug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp); + +-void x86_bios_rom_init(MachineState *ms, const char *default_firmware, ++void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + MemoryRegion *rom_memory, bool isapc_ram_fw); + + void x86_load_linux(X86MachineState *x86ms, +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-acpi-Set-PCAT_COMPAT-bit-only-when-pic-is-no.patch b/SOURCES/kvm-hw-i386-acpi-Set-PCAT_COMPAT-bit-only-when-pic-is-no.patch new file mode 100644 index 0000000..a789fb7 --- /dev/null +++ b/SOURCES/kvm-hw-i386-acpi-Set-PCAT_COMPAT-bit-only-when-pic-is-no.patch @@ -0,0 +1,51 @@ +From 7bb1f124413891bc5d2187f12cd19da6e794904b Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Wed, 3 Apr 2024 10:59:53 -0400 +Subject: [PATCH 010/100] hw/i386/acpi: Set PCAT_COMPAT bit only when pic is + not disabled + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [10/91] 62110e4bf52cb3e106c8d2a902bbd31548beba00 (bonzini/rhel-qemu-kvm) + +A value 1 of PCAT_COMPAT (bit 0) of MADT.Flags indicates that the system +also has a PC-AT-compatible dual-8259 setup, i.e., the PIC. When PIC +is not enabled (pic=off) for x86 machine, the PCAT_COMPAT bit needs to +be cleared. The PIC probe should then print: + + [ 0.155970] Using NULL legacy PIC + +However, no such log printed in guest kernel unless PCAT_COMPAT is +cleared. + +Signed-off-by: Xiaoyao Li +Message-ID: <20240403145953.3082491-1-xiaoyao.li@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 292dd287e78e0cbafde9d1522c729349d132d844) +Signed-off-by: Paolo Bonzini +--- + hw/i386/acpi-common.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/i386/acpi-common.c b/hw/i386/acpi-common.c +index 20f19269da..0cc2919bb8 100644 +--- a/hw/i386/acpi-common.c ++++ b/hw/i386/acpi-common.c +@@ -107,7 +107,9 @@ void acpi_build_madt(GArray *table_data, BIOSLinker *linker, + acpi_table_begin(&table, table_data); + /* Local APIC Address */ + build_append_int_noprefix(table_data, APIC_DEFAULT_ADDRESS, 4); +- build_append_int_noprefix(table_data, 1 /* PCAT_COMPAT */, 4); /* Flags */ ++ /* Flags. bit 0: PCAT_COMPAT */ ++ build_append_int_noprefix(table_data, ++ x86ms->pic != ON_OFF_AUTO_OFF ? 1 : 0 , 4); + + for (i = 0; i < apic_ids->len; i++) { + pc_madt_cpu_entry(i, apic_ids, table_data, false); +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-pc_sysfw-Alias-rather-than-copy-isa-bios-reg.patch b/SOURCES/kvm-hw-i386-pc_sysfw-Alias-rather-than-copy-isa-bios-reg.patch new file mode 100644 index 0000000..021db3d --- /dev/null +++ b/SOURCES/kvm-hw-i386-pc_sysfw-Alias-rather-than-copy-isa-bios-reg.patch @@ -0,0 +1,164 @@ +From fd6de3c5e97bdf13a39342fc71815a20c66867ae Mon Sep 17 00:00:00 2001 +From: Bernhard Beschow +Date: Wed, 8 May 2024 19:55:07 +0200 +Subject: [PATCH 043/100] hw/i386/pc_sysfw: Alias rather than copy isa-bios + region + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [43/91] f64dab2a091838a10a9b94e3d09ea11432b0809f (bonzini/rhel-qemu-kvm) + +In the -bios case the "isa-bios" memory region is an alias to the BIOS mapped +to the top of the 4G memory boundary. Do the same in the -pflash case, but only +for new machine versions for migration compatibility. This establishes common +behavior and makes pflash commands work in the "isa-bios" region which some +real-world legacy bioses rely on. + +Note that in the sev_enabled() case, the "isa-bios" memory region in the -pflash +case will now also point to encrypted memory, just like it already does in the +-bios case. + +When running `info mtree` before and after this commit with +`qemu-system-x86_64 -S -drive \ +if=pflash,format=raw,readonly=on,file=/usr/share/qemu/bios-256k.bin` and running +`diff -u before.mtree after.mtree` results in the following changes in the +memory tree: + +| --- before.mtree +| +++ after.mtree +| @@ -71,7 +71,7 @@ +| 0000000000000000-ffffffffffffffff (prio -1, i/o): pci +| 00000000000a0000-00000000000bffff (prio 1, i/o): vga-lowmem +| 00000000000c0000-00000000000dffff (prio 1, rom): pc.rom +| - 00000000000e0000-00000000000fffff (prio 1, rom): isa-bios +| + 00000000000e0000-00000000000fffff (prio 1, romd): alias isa-bios @system.flash0 0000000000020000-000000000003ffff +| 00000000000a0000-00000000000bffff (prio 1, i/o): alias smram-region @pci 00000000000a0000-00000000000bffff +| 00000000000c0000-00000000000c3fff (prio 1, i/o): alias pam-pci @pci 00000000000c0000-00000000000c3fff +| 00000000000c4000-00000000000c7fff (prio 1, i/o): alias pam-pci @pci 00000000000c4000-00000000000c7fff +| @@ -108,7 +108,7 @@ +| 0000000000000000-ffffffffffffffff (prio -1, i/o): pci +| 00000000000a0000-00000000000bffff (prio 1, i/o): vga-lowmem +| 00000000000c0000-00000000000dffff (prio 1, rom): pc.rom +| - 00000000000e0000-00000000000fffff (prio 1, rom): isa-bios +| + 00000000000e0000-00000000000fffff (prio 1, romd): alias isa-bios @system.flash0 0000000000020000-000000000003ffff +| 00000000000a0000-00000000000bffff (prio 1, i/o): alias smram-region @pci 00000000000a0000-00000000000bffff +| 00000000000c0000-00000000000c3fff (prio 1, i/o): alias pam-pci @pci 00000000000c0000-00000000000c3fff +| 00000000000c4000-00000000000c7fff (prio 1, i/o): alias pam-pci @pci 00000000000c4000-00000000000c7fff +| @@ -131,11 +131,14 @@ +| memory-region: pc.ram +| 0000000000000000-0000000007ffffff (prio 0, ram): pc.ram +| +| +memory-region: system.flash0 +| + 00000000fffc0000-00000000ffffffff (prio 0, romd): system.flash0 +| + +| memory-region: pci +| 0000000000000000-ffffffffffffffff (prio -1, i/o): pci +| 00000000000a0000-00000000000bffff (prio 1, i/o): vga-lowmem +| 00000000000c0000-00000000000dffff (prio 1, rom): pc.rom +| - 00000000000e0000-00000000000fffff (prio 1, rom): isa-bios +| + 00000000000e0000-00000000000fffff (prio 1, romd): alias isa-bios @system.flash0 0000000000020000-000000000003ffff +| +| memory-region: smram +| 00000000000a0000-00000000000bffff (prio 0, ram): alias smram-low @pc.ram 00000000000a0000-00000000000bffff + +Note that in both cases the "system" memory region contains the entry + + 00000000fffc0000-00000000ffffffff (prio 0, romd): system.flash0 + +but the "system.flash0" memory region only appears standalone when "isa-bios" is +an alias. + +Signed-off-by: Bernhard Beschow +Message-ID: <20240508175507.22270-7-shentey@gmail.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit a44ea3fa7f2aa1d809fdca1b84a52695b53d8ad0) +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc.c | 1 + + hw/i386/pc_piix.c | 1 + + hw/i386/pc_q35.c | 1 + + hw/i386/pc_sysfw.c | 8 +++++++- + include/hw/i386/pc.h | 1 + + 5 files changed, 11 insertions(+), 1 deletion(-) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 1a34bc4522..660a59c63b 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -1967,6 +1967,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) + pcmc->has_reserved_memory = true; + pcmc->enforce_aligned_dimm = true; + pcmc->enforce_amd_1tb_hole = true; ++ pcmc->isa_bios_alias = true; + /* BIOS ACPI tables: 128K. Other BIOS datastructures: less than 4K reported + * to be used at the moment, 32K should be enough for a while. */ + pcmc->acpi_data_size = 0x20000 + 0x8000; +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index bef3e8b73e..dbb7f2ed17 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -975,6 +975,7 @@ static void pc_machine_rhel7_options(MachineClass *m) + m->alias = "pc"; + m->is_default = 1; + m->smp_props.prefer_sockets = true; ++ pcmc->isa_bios_alias = false; + } + + static void pc_init_rhel760(MachineState *machine) +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index dedc86eec9..f9900ad798 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -735,6 +735,7 @@ static void pc_q35_machine_rhel940_options(MachineClass *m) + m->desc = "RHEL-9.4.0 PC (Q35 + ICH9, 2009)"; + pcmc->smbios_stream_product = "RHEL"; + pcmc->smbios_stream_version = "9.4.0"; ++ pcmc->isa_bios_alias = false; + + compat_props_add(m->compat_props, pc_rhel_9_5_compat, + pc_rhel_9_5_compat_len); +diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c +index 82d37cb376..ac88ad4eb9 100644 +--- a/hw/i386/pc_sysfw.c ++++ b/hw/i386/pc_sysfw.c +@@ -135,6 +135,7 @@ static void pc_system_flash_map(PCMachineState *pcms, + MemoryRegion *rom_memory) + { + X86MachineState *x86ms = X86_MACHINE(pcms); ++ PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); + hwaddr total_size = 0; + int i; + BlockBackend *blk; +@@ -184,7 +185,12 @@ static void pc_system_flash_map(PCMachineState *pcms, + + if (i == 0) { + flash_mem = pflash_cfi01_get_memory(system_flash); +- pc_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem); ++ if (pcmc->isa_bios_alias) { ++ x86_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem, ++ true); ++ } else { ++ pc_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem); ++ } + + /* Encrypt the pflash boot ROM */ + if (sev_enabled()) { +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 467e7fb52f..3f53ec73ac 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -122,6 +122,7 @@ struct PCMachineClass { + bool enforce_aligned_dimm; + bool broken_reserved_end; + bool enforce_amd_1tb_hole; ++ bool isa_bios_alias; + + /* generate legacy CPU hotplug AML */ + bool legacy_cpu_hotplug; +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-pc_sysfw-Remove-unused-parameter-from-pc_isa.patch b/SOURCES/kvm-hw-i386-pc_sysfw-Remove-unused-parameter-from-pc_isa.patch new file mode 100644 index 0000000..4188fd3 --- /dev/null +++ b/SOURCES/kvm-hw-i386-pc_sysfw-Remove-unused-parameter-from-pc_isa.patch @@ -0,0 +1,53 @@ +From 9bf1d368c4b53139db39649833d475e097fc98d1 Mon Sep 17 00:00:00 2001 +From: Bernhard Beschow +Date: Mon, 22 Apr 2024 22:06:22 +0200 +Subject: [PATCH 039/100] hw/i386/pc_sysfw: Remove unused parameter from + pc_isa_bios_init() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [39/91] c0019dc2706a8e3f40486fd4a4c0dd1fbe23237b (bonzini/rhel-qemu-kvm) + +Signed-off-by: Bernhard Beschow +Reviewed-by: Philippe Mathieu-Daudé +Message-ID: <20240422200625.2768-2-shentey@gmail.com> +Signed-off-by: Philippe Mathieu-Daudé +(cherry picked from commit f4b63768b91811cdcf1fb7b270587123251dfea5) +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc_sysfw.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c +index ef7dea9798..59c7a81692 100644 +--- a/hw/i386/pc_sysfw.c ++++ b/hw/i386/pc_sysfw.c +@@ -41,8 +41,7 @@ + #define FLASH_SECTOR_SIZE 4096 + + static void pc_isa_bios_init(MemoryRegion *rom_memory, +- MemoryRegion *flash_mem, +- int ram_size) ++ MemoryRegion *flash_mem) + { + int isa_bios_size; + MemoryRegion *isa_bios; +@@ -186,7 +185,7 @@ static void pc_system_flash_map(PCMachineState *pcms, + + if (i == 0) { + flash_mem = pflash_cfi01_get_memory(system_flash); +- pc_isa_bios_init(rom_memory, flash_mem, size); ++ pc_isa_bios_init(rom_memory, flash_mem); + + /* Encrypt the pflash boot ROM */ + if (sev_enabled()) { +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-sev-Add-function-to-get-SEV-metadata-from-OV.patch b/SOURCES/kvm-hw-i386-sev-Add-function-to-get-SEV-metadata-from-OV.patch new file mode 100644 index 0000000..a543c79 --- /dev/null +++ b/SOURCES/kvm-hw-i386-sev-Add-function-to-get-SEV-metadata-from-OV.patch @@ -0,0 +1,158 @@ +From e6472ff46cbed97c2a238a8ef7d321351931333a Mon Sep 17 00:00:00 2001 +From: Brijesh Singh +Date: Thu, 30 May 2024 06:16:30 -0500 +Subject: [PATCH 070/100] hw/i386/sev: Add function to get SEV metadata from + OVMF header + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [70/91] ba818dade96119c8a51ca1fb222f4f69e2752396 (bonzini/rhel-qemu-kvm) + +A recent version of OVMF expanded the reset vector GUID list to add +SEV-specific metadata GUID. The SEV metadata describes the reserved +memory regions such as the secrets and CPUID page used during the SEV-SNP +guest launch. + +The pc_system_get_ovmf_sev_metadata_ptr() is used to retieve the SEV +metadata pointer from the OVMF GUID list. + +Signed-off-by: Brijesh Singh +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-19-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit f3c30c575d34122573b7370a7da5ca3a27dde481) +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc_sysfw.c | 4 ++++ + include/hw/i386/pc.h | 26 ++++++++++++++++++++++++++ + target/i386/sev-sysemu-stub.c | 4 ++++ + target/i386/sev.c | 32 ++++++++++++++++++++++++++++++++ + target/i386/sev.h | 2 ++ + 5 files changed, 68 insertions(+) + +diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c +index ac88ad4eb9..9b8671c441 100644 +--- a/hw/i386/pc_sysfw.c ++++ b/hw/i386/pc_sysfw.c +@@ -260,6 +260,10 @@ void x86_firmware_configure(void *ptr, int size) + pc_system_parse_ovmf_flash(ptr, size); + + if (sev_enabled()) { ++ ++ /* Copy the SEV metadata table (if it exists) */ ++ pc_system_parse_sev_metadata(ptr, size); ++ + ret = sev_es_save_reset_vector(ptr, size); + if (ret) { + error_report("failed to locate and/or save reset vector"); +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 3f53ec73ac..94b49310f5 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -167,6 +167,32 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level); + #define PCI_HOST_ABOVE_4G_MEM_SIZE "above-4g-mem-size" + #define PCI_HOST_PROP_SMM_RANGES "smm-ranges" + ++typedef enum { ++ SEV_DESC_TYPE_UNDEF, ++ /* The section contains the region that must be validated by the VMM. */ ++ SEV_DESC_TYPE_SNP_SEC_MEM, ++ /* The section contains the SNP secrets page */ ++ SEV_DESC_TYPE_SNP_SECRETS, ++ /* The section contains address that can be used as a CPUID page */ ++ SEV_DESC_TYPE_CPUID, ++ ++} ovmf_sev_metadata_desc_type; ++ ++typedef struct __attribute__((__packed__)) OvmfSevMetadataDesc { ++ uint32_t base; ++ uint32_t len; ++ ovmf_sev_metadata_desc_type type; ++} OvmfSevMetadataDesc; ++ ++typedef struct __attribute__((__packed__)) OvmfSevMetadata { ++ uint8_t signature[4]; ++ uint32_t len; ++ uint32_t version; ++ uint32_t num_desc; ++ OvmfSevMetadataDesc descs[]; ++} OvmfSevMetadata; ++ ++OvmfSevMetadata *pc_system_get_ovmf_sev_metadata_ptr(void); + + void pc_pci_as_mapping_init(MemoryRegion *system_memory, + MemoryRegion *pci_address_space); +diff --git a/target/i386/sev-sysemu-stub.c b/target/i386/sev-sysemu-stub.c +index 96e1c15cc3..fc1c57c411 100644 +--- a/target/i386/sev-sysemu-stub.c ++++ b/target/i386/sev-sysemu-stub.c +@@ -67,3 +67,7 @@ void hmp_info_sev(Monitor *mon, const QDict *qdict) + { + monitor_printf(mon, "SEV is not available in this QEMU\n"); + } ++ ++void pc_system_parse_sev_metadata(uint8_t *flash_ptr, size_t flash_size) ++{ ++} +diff --git a/target/i386/sev.c b/target/i386/sev.c +index e84e4395a5..17281bb2c7 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -597,6 +597,38 @@ SevCapability *qmp_query_sev_capabilities(Error **errp) + return sev_get_capabilities(errp); + } + ++static OvmfSevMetadata *ovmf_sev_metadata_table; ++ ++#define OVMF_SEV_META_DATA_GUID "dc886566-984a-4798-A75e-5585a7bf67cc" ++typedef struct __attribute__((__packed__)) OvmfSevMetadataOffset { ++ uint32_t offset; ++} OvmfSevMetadataOffset; ++ ++OvmfSevMetadata *pc_system_get_ovmf_sev_metadata_ptr(void) ++{ ++ return ovmf_sev_metadata_table; ++} ++ ++void pc_system_parse_sev_metadata(uint8_t *flash_ptr, size_t flash_size) ++{ ++ OvmfSevMetadata *metadata; ++ OvmfSevMetadataOffset *data; ++ ++ if (!pc_system_ovmf_table_find(OVMF_SEV_META_DATA_GUID, (uint8_t **)&data, ++ NULL)) { ++ return; ++ } ++ ++ metadata = (OvmfSevMetadata *)(flash_ptr + flash_size - data->offset); ++ if (memcmp(metadata->signature, "ASEV", 4) != 0 || ++ metadata->len < sizeof(OvmfSevMetadata) || ++ metadata->len > flash_size - data->offset) { ++ return; ++ } ++ ++ ovmf_sev_metadata_table = g_memdup2(metadata, metadata->len); ++} ++ + static SevAttestationReport *sev_get_attestation_report(const char *mnonce, + Error **errp) + { +diff --git a/target/i386/sev.h b/target/i386/sev.h +index 5dc4767b1e..cc12824dd6 100644 +--- a/target/i386/sev.h ++++ b/target/i386/sev.h +@@ -66,4 +66,6 @@ int sev_inject_launch_secret(const char *hdr, const char *secret, + int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size); + void sev_es_set_reset_vector(CPUState *cpu); + ++void pc_system_parse_sev_metadata(uint8_t *flash_ptr, size_t flash_size); ++ + #endif +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-sev-Add-support-to-encrypt-BIOS-when-SEV-SNP.patch b/SOURCES/kvm-hw-i386-sev-Add-support-to-encrypt-BIOS-when-SEV-SNP.patch new file mode 100644 index 0000000..c5a7a28 --- /dev/null +++ b/SOURCES/kvm-hw-i386-sev-Add-support-to-encrypt-BIOS-when-SEV-SNP.patch @@ -0,0 +1,165 @@ +From 226cf6c3d3e2fd1a35422043dbe0b73d1216df83 Mon Sep 17 00:00:00 2001 +From: Brijesh Singh +Date: Thu, 30 May 2024 06:16:36 -0500 +Subject: [PATCH 073/100] hw/i386/sev: Add support to encrypt BIOS when SEV-SNP + is enabled + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [73/91] 844afd322c12c3e8992cf6ec692c94e70747bd0c (bonzini/rhel-qemu-kvm) + +As with SEV, an SNP guest requires that the BIOS be part of the initial +encrypted/measured guest payload. Extend sev_encrypt_flash() to handle +the SNP case and plumb through the GPA of the BIOS location since this +is needed for SNP. + +Signed-off-by: Brijesh Singh +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-25-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 77d1abd91e5352ad30ae2f83790f95fa6a3c0b6b) +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc_sysfw.c | 12 +++++++----- + hw/i386/x86-common.c | 2 +- + include/hw/i386/x86.h | 2 +- + target/i386/sev-sysemu-stub.c | 2 +- + target/i386/sev.c | 5 +++-- + target/i386/sev.h | 2 +- + 6 files changed, 14 insertions(+), 11 deletions(-) + +diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c +index 9b8671c441..7cdbafc8d2 100644 +--- a/hw/i386/pc_sysfw.c ++++ b/hw/i386/pc_sysfw.c +@@ -148,6 +148,8 @@ static void pc_system_flash_map(PCMachineState *pcms, + assert(PC_MACHINE_GET_CLASS(pcms)->pci_enabled); + + for (i = 0; i < ARRAY_SIZE(pcms->flash); i++) { ++ hwaddr gpa; ++ + system_flash = pcms->flash[i]; + blk = pflash_cfi01_get_blk(system_flash); + if (!blk) { +@@ -177,11 +179,11 @@ static void pc_system_flash_map(PCMachineState *pcms, + } + + total_size += size; ++ gpa = 0x100000000ULL - total_size; /* where the flash is mapped */ + qdev_prop_set_uint32(DEVICE(system_flash), "num-blocks", + size / FLASH_SECTOR_SIZE); + sysbus_realize_and_unref(SYS_BUS_DEVICE(system_flash), &error_fatal); +- sysbus_mmio_map(SYS_BUS_DEVICE(system_flash), 0, +- 0x100000000ULL - total_size); ++ sysbus_mmio_map(SYS_BUS_DEVICE(system_flash), 0, gpa); + + if (i == 0) { + flash_mem = pflash_cfi01_get_memory(system_flash); +@@ -196,7 +198,7 @@ static void pc_system_flash_map(PCMachineState *pcms, + if (sev_enabled()) { + flash_ptr = memory_region_get_ram_ptr(flash_mem); + flash_size = memory_region_size(flash_mem); +- x86_firmware_configure(flash_ptr, flash_size); ++ x86_firmware_configure(gpa, flash_ptr, flash_size); + } + } + } +@@ -249,7 +251,7 @@ void pc_system_firmware_init(PCMachineState *pcms, + pc_system_flash_cleanup_unused(pcms); + } + +-void x86_firmware_configure(void *ptr, int size) ++void x86_firmware_configure(hwaddr gpa, void *ptr, int size) + { + int ret; + +@@ -270,6 +272,6 @@ void x86_firmware_configure(void *ptr, int size) + exit(1); + } + +- sev_encrypt_flash(ptr, size, &error_fatal); ++ sev_encrypt_flash(gpa, ptr, size, &error_fatal); + } + } +diff --git a/hw/i386/x86-common.c b/hw/i386/x86-common.c +index 67b03c913a..35fe6eabea 100644 +--- a/hw/i386/x86-common.c ++++ b/hw/i386/x86-common.c +@@ -981,7 +981,7 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + */ + void *ptr = memory_region_get_ram_ptr(&x86ms->bios); + load_image_size(filename, ptr, bios_size); +- x86_firmware_configure(ptr, bios_size); ++ x86_firmware_configure(0x100000000ULL - bios_size, ptr, bios_size); + } else { + memory_region_set_readonly(&x86ms->bios, !isapc_ram_fw); + ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1); +diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h +index b006f16b8d..d43cb3908e 100644 +--- a/include/hw/i386/x86.h ++++ b/include/hw/i386/x86.h +@@ -154,6 +154,6 @@ void ioapic_init_gsi(GSIState *gsi_state, Object *parent); + DeviceState *ioapic_init_secondary(GSIState *gsi_state); + + /* pc_sysfw.c */ +-void x86_firmware_configure(void *ptr, int size); ++void x86_firmware_configure(hwaddr gpa, void *ptr, int size); + + #endif +diff --git a/target/i386/sev-sysemu-stub.c b/target/i386/sev-sysemu-stub.c +index fc1c57c411..d5bf886e79 100644 +--- a/target/i386/sev-sysemu-stub.c ++++ b/target/i386/sev-sysemu-stub.c +@@ -42,7 +42,7 @@ void qmp_sev_inject_launch_secret(const char *packet_header, const char *secret, + error_setg(errp, "SEV is not available in this QEMU"); + } + +-int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp) ++int sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp) + { + g_assert_not_reached(); + } +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 06401f0526..7b5c4b4874 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -1484,7 +1484,7 @@ static int sev_snp_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + } + + int +-sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp) ++sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp) + { + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + +@@ -1841,7 +1841,8 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) + /* zero the excess data so the measurement can be reliably calculated */ + memset(padded_ht->padding, 0, sizeof(padded_ht->padding)); + +- if (sev_encrypt_flash((uint8_t *)padded_ht, sizeof(*padded_ht), errp) < 0) { ++ if (sev_encrypt_flash(area->base, (uint8_t *)padded_ht, ++ sizeof(*padded_ht), errp) < 0) { + ret = false; + } + +diff --git a/target/i386/sev.h b/target/i386/sev.h +index cc12824dd6..858005a119 100644 +--- a/target/i386/sev.h ++++ b/target/i386/sev.h +@@ -59,7 +59,7 @@ uint32_t sev_get_cbit_position(void); + uint32_t sev_get_reduced_phys_bits(void); + bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp); + +-int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp); ++int sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp); + int sev_inject_launch_secret(const char *hdr, const char *secret, + uint64_t gpa, Error **errp); + +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-sev-Use-guest_memfd-for-legacy-ROMs.patch b/SOURCES/kvm-hw-i386-sev-Use-guest_memfd-for-legacy-ROMs.patch new file mode 100644 index 0000000..050a522 --- /dev/null +++ b/SOURCES/kvm-hw-i386-sev-Use-guest_memfd-for-legacy-ROMs.patch @@ -0,0 +1,123 @@ +From a20b2e3e52b9589ac1abc8b9b818d526c86368cf Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:39 -0500 +Subject: [PATCH 082/100] hw/i386/sev: Use guest_memfd for legacy ROMs + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [82/91] a591e85e00c353009803b143c80852b8c9b1f15e (bonzini/rhel-qemu-kvm) + +Current SNP guest kernels will attempt to access these regions with +with C-bit set, so guest_memfd is needed to handle that. Otherwise, +kvm_convert_memory() will fail when the guest kernel tries to access it +and QEMU attempts to call KVM_SET_MEMORY_ATTRIBUTES to set these ranges +to private. + +Whether guests should actually try to access ROM regions in this way (or +need to deal with legacy ROM regions at all), is a separate issue to be +addressed on kernel side, but current SNP guest kernels will exhibit +this behavior and so this handling is needed to allow QEMU to continue +running existing SNP guest kernels. + +Signed-off-by: Michael Roth +[pankaj: Added sev_snp_enabled() check] +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-28-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 413a67450750e0459efeffc3db3ba9759c3e381c) +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc.c | 14 ++++++++++---- + hw/i386/pc_sysfw.c | 19 +++++++++++++------ + 2 files changed, 23 insertions(+), 10 deletions(-) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 0aca0cc79e..b25d075b59 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -62,6 +62,7 @@ + #include "hw/mem/memory-device.h" + #include "e820_memory_layout.h" + #include "trace.h" ++#include "sev.h" + #include CONFIG_DEVICES + + #ifdef CONFIG_XEN_EMU +@@ -1173,10 +1174,15 @@ void pc_memory_init(PCMachineState *pcms, + pc_system_firmware_init(pcms, rom_memory); + + option_rom_mr = g_malloc(sizeof(*option_rom_mr)); +- memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, +- &error_fatal); +- if (pcmc->pci_enabled) { +- memory_region_set_readonly(option_rom_mr, true); ++ if (machine_require_guest_memfd(machine)) { ++ memory_region_init_ram_guest_memfd(option_rom_mr, NULL, "pc.rom", ++ PC_ROM_SIZE, &error_fatal); ++ } else { ++ memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, ++ &error_fatal); ++ if (pcmc->pci_enabled) { ++ memory_region_set_readonly(option_rom_mr, true); ++ } + } + memory_region_add_subregion_overlap(rom_memory, + PC_ROM_MIN_VGA, +diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c +index 7cdbafc8d2..ef80281d28 100644 +--- a/hw/i386/pc_sysfw.c ++++ b/hw/i386/pc_sysfw.c +@@ -40,8 +40,8 @@ + + #define FLASH_SECTOR_SIZE 4096 + +-static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory, +- MemoryRegion *flash_mem) ++static void pc_isa_bios_init(PCMachineState *pcms, MemoryRegion *isa_bios, ++ MemoryRegion *rom_memory, MemoryRegion *flash_mem) + { + int isa_bios_size; + uint64_t flash_size; +@@ -51,8 +51,13 @@ static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory, + + /* map the last 128KB of the BIOS in ISA space */ + isa_bios_size = MIN(flash_size, 128 * KiB); +- memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size, +- &error_fatal); ++ if (machine_require_guest_memfd(MACHINE(pcms))) { ++ memory_region_init_ram_guest_memfd(isa_bios, NULL, "isa-bios", ++ isa_bios_size, &error_fatal); ++ } else { ++ memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size, ++ &error_fatal); ++ } + memory_region_add_subregion_overlap(rom_memory, + 0x100000 - isa_bios_size, + isa_bios, +@@ -65,7 +70,9 @@ static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory, + ((uint8_t*)flash_ptr) + (flash_size - isa_bios_size), + isa_bios_size); + +- memory_region_set_readonly(isa_bios, true); ++ if (!machine_require_guest_memfd(current_machine)) { ++ memory_region_set_readonly(isa_bios, true); ++ } + } + + static PFlashCFI01 *pc_pflash_create(PCMachineState *pcms, +@@ -191,7 +198,7 @@ static void pc_system_flash_map(PCMachineState *pcms, + x86_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem, + true); + } else { +- pc_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem); ++ pc_isa_bios_init(pcms, &x86ms->isa_bios, rom_memory, flash_mem); + } + + /* Encrypt the pflash boot ROM */ +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-sev-Use-legacy-SEV-VM-types-for-older-machin.patch b/SOURCES/kvm-hw-i386-sev-Use-legacy-SEV-VM-types-for-older-machin.patch new file mode 100644 index 0000000..7b03cb4 --- /dev/null +++ b/SOURCES/kvm-hw-i386-sev-Use-legacy-SEV-VM-types-for-older-machin.patch @@ -0,0 +1,58 @@ +From 4331180aa09e44550ff8de781c618bae5e99bb70 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Tue, 9 Apr 2024 18:07:43 -0500 +Subject: [PATCH 025/100] hw/i386/sev: Use legacy SEV VM types for older + machine types + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [25/91] 8c73cd312736ccb0818b4d3216fd13712f21f3c9 (bonzini/rhel-qemu-kvm) + +Newer 9.1 machine types will default to using the KVM_SEV_INIT2 API for +creating SEV/SEV-ES going forward. However, this API results in guest +measurement changes which are generally not expected for users of these +older guest types and can cause disruption if they switch to a newer +QEMU/kernel version. Avoid this by continuing to use the older +KVM_SEV_INIT/KVM_SEV_ES_INIT APIs for older machine types. + +Signed-off-by: Michael Roth +Message-ID: <20240409230743.962513-4-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit ea7fbd37537b3a598335c21ccb2ea674630fc810) +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc.c | 1 + + target/i386/sev.c | 1 + + 2 files changed, 2 insertions(+) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index b9fde3cec1..1a34bc4522 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -351,6 +351,7 @@ const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + GlobalProperty pc_rhel_9_5_compat[] = { + /* pc_rhel_9_5_compat from pc_compat_pc_9_0 (backported from 9.1) */ + { TYPE_X86_CPU, "guest-phys-bits", "0" }, ++ { "sev-guest", "legacy-vm-type", "true" }, + }; + const size_t pc_rhel_9_5_compat_len = G_N_ELEMENTS(pc_rhel_9_5_compat); + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index f4ee317cb0..d30b68c11e 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -1417,6 +1417,7 @@ sev_guest_instance_init(Object *obj) + object_property_add_uint32_ptr(obj, "reduced-phys-bits", + &sev->reduced_phys_bits, + OBJ_PROP_FLAG_READWRITE); ++ object_apply_compat_props(obj); + } + + /* sev guest info */ +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-split-x86.c-in-multiple-parts.patch b/SOURCES/kvm-hw-i386-split-x86.c-in-multiple-parts.patch new file mode 100644 index 0000000..40ca52b --- /dev/null +++ b/SOURCES/kvm-hw-i386-split-x86.c-in-multiple-parts.patch @@ -0,0 +1,2301 @@ +From bf2206fae2e640da9de7fc0648b4b90ad3ddfbe3 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Thu, 9 May 2024 19:00:41 +0200 +Subject: [PATCH 046/100] hw/i386: split x86.c in multiple parts + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [46/91] 3d6e8364aa9b691c25bdcf54a30b116da5d33874 (bonzini/rhel-qemu-kvm) + +Keep the basic X86MachineState definition in x86.c. Move out functions that +are only needed by other files: x86-common.c for the pc and microvm machines, +x86-cpu.c for those used by accelerator code. + +Signed-off-by: Paolo Bonzini +Reviewed-by: Zhao Liu +Message-ID: <20240509170044.190795-11-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit b061f0598b9231f7992aff4fcdf3f336f9747d11) +Signed-off-by: Paolo Bonzini +--- + hw/i386/meson.build | 4 +- + hw/i386/x86-common.c | 1007 +++++++++++++++++++++++++++++++++++++++ + hw/i386/x86-cpu.c | 97 ++++ + hw/i386/x86.c | 1052 +---------------------------------------- + include/hw/i386/x86.h | 6 +- + 5 files changed, 1113 insertions(+), 1053 deletions(-) + create mode 100644 hw/i386/x86-common.c + create mode 100644 hw/i386/x86-cpu.c + +diff --git a/hw/i386/meson.build b/hw/i386/meson.build +index d9da676038..3437da0aad 100644 +--- a/hw/i386/meson.build ++++ b/hw/i386/meson.build +@@ -4,6 +4,7 @@ i386_ss.add(files( + 'e820_memory_layout.c', + 'multiboot.c', + 'x86.c', ++ 'x86-cpu.c', + )) + + i386_ss.add(when: 'CONFIG_APIC', if_true: files('vapic.c')) +@@ -12,7 +13,7 @@ i386_ss.add(when: 'CONFIG_X86_IOMMU', if_true: files('x86-iommu.c'), + i386_ss.add(when: 'CONFIG_AMD_IOMMU', if_true: files('amd_iommu.c'), + if_false: files('amd_iommu-stub.c')) + i386_ss.add(when: 'CONFIG_I440FX', if_true: files('pc_piix.c')) +-i386_ss.add(when: 'CONFIG_MICROVM', if_true: files('microvm.c', 'acpi-microvm.c', 'microvm-dt.c')) ++i386_ss.add(when: 'CONFIG_MICROVM', if_true: files('x86-common.c', 'microvm.c', 'acpi-microvm.c', 'microvm-dt.c')) + i386_ss.add(when: 'CONFIG_Q35', if_true: files('pc_q35.c')) + i386_ss.add(when: 'CONFIG_VMMOUSE', if_true: files('vmmouse.c')) + i386_ss.add(when: 'CONFIG_VMPORT', if_true: files('vmport.c')) +@@ -22,6 +23,7 @@ i386_ss.add(when: 'CONFIG_SGX', if_true: files('sgx-epc.c','sgx.c'), + + i386_ss.add(when: 'CONFIG_ACPI', if_true: files('acpi-common.c')) + i386_ss.add(when: 'CONFIG_PC', if_true: files( ++ 'x86-common.c', + 'pc.c', + 'pc_sysfw.c', + 'acpi-build.c', +diff --git a/hw/i386/x86-common.c b/hw/i386/x86-common.c +new file mode 100644 +index 0000000000..67b03c913a +--- /dev/null ++++ b/hw/i386/x86-common.c +@@ -0,0 +1,1007 @@ ++/* ++ * Copyright (c) 2003-2004 Fabrice Bellard ++ * Copyright (c) 2019, 2024 Red Hat, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "qemu/osdep.h" ++#include "qemu/error-report.h" ++#include "qemu/cutils.h" ++#include "qemu/units.h" ++#include "qemu/datadir.h" ++#include "qapi/error.h" ++#include "sysemu/numa.h" ++#include "sysemu/sysemu.h" ++#include "sysemu/xen.h" ++#include "trace.h" ++ ++#include "hw/i386/x86.h" ++#include "target/i386/cpu.h" ++#include "hw/rtc/mc146818rtc.h" ++#include "target/i386/sev.h" ++ ++#include "hw/acpi/cpu_hotplug.h" ++#include "hw/irq.h" ++#include "hw/loader.h" ++#include "multiboot.h" ++#include "elf.h" ++#include "standard-headers/asm-x86/bootparam.h" ++#include CONFIG_DEVICES ++#include "kvm/kvm_i386.h" ++ ++#ifdef CONFIG_XEN_EMU ++#include "hw/xen/xen.h" ++#include "hw/i386/kvm/xen_evtchn.h" ++#endif ++ ++/* Physical Address of PVH entry point read from kernel ELF NOTE */ ++static size_t pvh_start_addr; ++ ++static void x86_cpu_new(X86MachineState *x86ms, int64_t apic_id, Error **errp) ++{ ++ Object *cpu = object_new(MACHINE(x86ms)->cpu_type); ++ ++ if (!object_property_set_uint(cpu, "apic-id", apic_id, errp)) { ++ goto out; ++ } ++ qdev_realize(DEVICE(cpu), NULL, errp); ++ ++out: ++ object_unref(cpu); ++} ++ ++void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version) ++{ ++ int i; ++ const CPUArchIdList *possible_cpus; ++ MachineState *ms = MACHINE(x86ms); ++ MachineClass *mc = MACHINE_GET_CLASS(x86ms); ++ ++ x86_cpu_set_default_version(default_cpu_version); ++ ++ /* ++ * Calculates the limit to CPU APIC ID values ++ * ++ * Limit for the APIC ID value, so that all ++ * CPU APIC IDs are < x86ms->apic_id_limit. ++ * ++ * This is used for FW_CFG_MAX_CPUS. See comments on fw_cfg_arch_create(). ++ */ ++ x86ms->apic_id_limit = x86_cpu_apic_id_from_index(x86ms, ++ ms->smp.max_cpus - 1) + 1; ++ ++ /* ++ * Can we support APIC ID 255 or higher? With KVM, that requires ++ * both in-kernel lapic and X2APIC userspace API. ++ * ++ * kvm_enabled() must go first to ensure that kvm_* references are ++ * not emitted for the linker to consume (kvm_enabled() is ++ * a literal `0` in configurations where kvm_* aren't defined) ++ */ ++ if (kvm_enabled() && x86ms->apic_id_limit > 255 && ++ kvm_irqchip_in_kernel() && !kvm_enable_x2apic()) { ++ error_report("current -smp configuration requires kernel " ++ "irqchip and X2APIC API support."); ++ exit(EXIT_FAILURE); ++ } ++ ++ if (kvm_enabled()) { ++ kvm_set_max_apic_id(x86ms->apic_id_limit); ++ } ++ ++ if (!kvm_irqchip_in_kernel()) { ++ apic_set_max_apic_id(x86ms->apic_id_limit); ++ } ++ ++ possible_cpus = mc->possible_cpu_arch_ids(ms); ++ for (i = 0; i < ms->smp.cpus; i++) { ++ x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal); ++ } ++} ++ ++void x86_rtc_set_cpus_count(ISADevice *s, uint16_t cpus_count) ++{ ++ MC146818RtcState *rtc = MC146818_RTC(s); ++ ++ if (cpus_count > 0xff) { ++ /* ++ * If the number of CPUs can't be represented in 8 bits, the ++ * BIOS must use "FW_CFG_NB_CPUS". Set RTC field to 0 just ++ * to make old BIOSes fail more predictably. ++ */ ++ mc146818rtc_set_cmos_data(rtc, 0x5f, 0); ++ } else { ++ mc146818rtc_set_cmos_data(rtc, 0x5f, cpus_count - 1); ++ } ++} ++ ++static int x86_apic_cmp(const void *a, const void *b) ++{ ++ CPUArchId *apic_a = (CPUArchId *)a; ++ CPUArchId *apic_b = (CPUArchId *)b; ++ ++ return apic_a->arch_id - apic_b->arch_id; ++} ++ ++/* ++ * returns pointer to CPUArchId descriptor that matches CPU's apic_id ++ * in ms->possible_cpus->cpus, if ms->possible_cpus->cpus has no ++ * entry corresponding to CPU's apic_id returns NULL. ++ */ ++static CPUArchId *x86_find_cpu_slot(MachineState *ms, uint32_t id, int *idx) ++{ ++ CPUArchId apic_id, *found_cpu; ++ ++ apic_id.arch_id = id; ++ found_cpu = bsearch(&apic_id, ms->possible_cpus->cpus, ++ ms->possible_cpus->len, sizeof(*ms->possible_cpus->cpus), ++ x86_apic_cmp); ++ if (found_cpu && idx) { ++ *idx = found_cpu - ms->possible_cpus->cpus; ++ } ++ return found_cpu; ++} ++ ++void x86_cpu_plug(HotplugHandler *hotplug_dev, ++ DeviceState *dev, Error **errp) ++{ ++ CPUArchId *found_cpu; ++ Error *local_err = NULL; ++ X86CPU *cpu = X86_CPU(dev); ++ X86MachineState *x86ms = X86_MACHINE(hotplug_dev); ++ ++ if (x86ms->acpi_dev) { ++ hotplug_handler_plug(x86ms->acpi_dev, dev, &local_err); ++ if (local_err) { ++ goto out; ++ } ++ } ++ ++ /* increment the number of CPUs */ ++ x86ms->boot_cpus++; ++ if (x86ms->rtc) { ++ x86_rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus); ++ } ++ if (x86ms->fw_cfg) { ++ fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); ++ } ++ ++ found_cpu = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, NULL); ++ found_cpu->cpu = CPU(dev); ++out: ++ error_propagate(errp, local_err); ++} ++ ++void x86_cpu_unplug_request_cb(HotplugHandler *hotplug_dev, ++ DeviceState *dev, Error **errp) ++{ ++ int idx = -1; ++ X86CPU *cpu = X86_CPU(dev); ++ X86MachineState *x86ms = X86_MACHINE(hotplug_dev); ++ ++ if (!x86ms->acpi_dev) { ++ error_setg(errp, "CPU hot unplug not supported without ACPI"); ++ return; ++ } ++ ++ x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, &idx); ++ assert(idx != -1); ++ if (idx == 0) { ++ error_setg(errp, "Boot CPU is unpluggable"); ++ return; ++ } ++ ++ hotplug_handler_unplug_request(x86ms->acpi_dev, dev, ++ errp); ++} ++ ++void x86_cpu_unplug_cb(HotplugHandler *hotplug_dev, ++ DeviceState *dev, Error **errp) ++{ ++ CPUArchId *found_cpu; ++ Error *local_err = NULL; ++ X86CPU *cpu = X86_CPU(dev); ++ X86MachineState *x86ms = X86_MACHINE(hotplug_dev); ++ ++ hotplug_handler_unplug(x86ms->acpi_dev, dev, &local_err); ++ if (local_err) { ++ goto out; ++ } ++ ++ found_cpu = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, NULL); ++ found_cpu->cpu = NULL; ++ qdev_unrealize(dev); ++ ++ /* decrement the number of CPUs */ ++ x86ms->boot_cpus--; ++ /* Update the number of CPUs in CMOS */ ++ x86_rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus); ++ fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); ++ out: ++ error_propagate(errp, local_err); ++} ++ ++void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, ++ DeviceState *dev, Error **errp) ++{ ++ int idx; ++ CPUState *cs; ++ CPUArchId *cpu_slot; ++ X86CPUTopoIDs topo_ids; ++ X86CPU *cpu = X86_CPU(dev); ++ CPUX86State *env = &cpu->env; ++ MachineState *ms = MACHINE(hotplug_dev); ++ X86MachineState *x86ms = X86_MACHINE(hotplug_dev); ++ unsigned int smp_cores = ms->smp.cores; ++ unsigned int smp_threads = ms->smp.threads; ++ X86CPUTopoInfo topo_info; ++ ++ if (!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) { ++ error_setg(errp, "Invalid CPU type, expected cpu type: '%s'", ++ ms->cpu_type); ++ return; ++ } ++ ++ if (x86ms->acpi_dev) { ++ Error *local_err = NULL; ++ ++ hotplug_handler_pre_plug(HOTPLUG_HANDLER(x86ms->acpi_dev), dev, ++ &local_err); ++ if (local_err) { ++ error_propagate(errp, local_err); ++ return; ++ } ++ } ++ ++ init_topo_info(&topo_info, x86ms); ++ ++ env->nr_dies = ms->smp.dies; ++ ++ /* ++ * If APIC ID is not set, ++ * set it based on socket/die/core/thread properties. ++ */ ++ if (cpu->apic_id == UNASSIGNED_APIC_ID) { ++ int max_socket = (ms->smp.max_cpus - 1) / ++ smp_threads / smp_cores / ms->smp.dies; ++ ++ /* ++ * die-id was optional in QEMU 4.0 and older, so keep it optional ++ * if there's only one die per socket. ++ */ ++ if (cpu->die_id < 0 && ms->smp.dies == 1) { ++ cpu->die_id = 0; ++ } ++ ++ if (cpu->socket_id < 0) { ++ error_setg(errp, "CPU socket-id is not set"); ++ return; ++ } else if (cpu->socket_id > max_socket) { ++ error_setg(errp, "Invalid CPU socket-id: %u must be in range 0:%u", ++ cpu->socket_id, max_socket); ++ return; ++ } ++ if (cpu->die_id < 0) { ++ error_setg(errp, "CPU die-id is not set"); ++ return; ++ } else if (cpu->die_id > ms->smp.dies - 1) { ++ error_setg(errp, "Invalid CPU die-id: %u must be in range 0:%u", ++ cpu->die_id, ms->smp.dies - 1); ++ return; ++ } ++ if (cpu->core_id < 0) { ++ error_setg(errp, "CPU core-id is not set"); ++ return; ++ } else if (cpu->core_id > (smp_cores - 1)) { ++ error_setg(errp, "Invalid CPU core-id: %u must be in range 0:%u", ++ cpu->core_id, smp_cores - 1); ++ return; ++ } ++ if (cpu->thread_id < 0) { ++ error_setg(errp, "CPU thread-id is not set"); ++ return; ++ } else if (cpu->thread_id > (smp_threads - 1)) { ++ error_setg(errp, "Invalid CPU thread-id: %u must be in range 0:%u", ++ cpu->thread_id, smp_threads - 1); ++ return; ++ } ++ ++ topo_ids.pkg_id = cpu->socket_id; ++ topo_ids.die_id = cpu->die_id; ++ topo_ids.core_id = cpu->core_id; ++ topo_ids.smt_id = cpu->thread_id; ++ cpu->apic_id = x86_apicid_from_topo_ids(&topo_info, &topo_ids); ++ } ++ ++ cpu_slot = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, &idx); ++ if (!cpu_slot) { ++ x86_topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids); ++ error_setg(errp, ++ "Invalid CPU [socket: %u, die: %u, core: %u, thread: %u] with" ++ " APIC ID %" PRIu32 ", valid index range 0:%d", ++ topo_ids.pkg_id, topo_ids.die_id, topo_ids.core_id, topo_ids.smt_id, ++ cpu->apic_id, ms->possible_cpus->len - 1); ++ return; ++ } ++ ++ if (cpu_slot->cpu) { ++ error_setg(errp, "CPU[%d] with APIC ID %" PRIu32 " exists", ++ idx, cpu->apic_id); ++ return; ++ } ++ ++ /* if 'address' properties socket-id/core-id/thread-id are not set, set them ++ * so that machine_query_hotpluggable_cpus would show correct values ++ */ ++ /* TODO: move socket_id/core_id/thread_id checks into x86_cpu_realizefn() ++ * once -smp refactoring is complete and there will be CPU private ++ * CPUState::nr_cores and CPUState::nr_threads fields instead of globals */ ++ x86_topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids); ++ if (cpu->socket_id != -1 && cpu->socket_id != topo_ids.pkg_id) { ++ error_setg(errp, "property socket-id: %u doesn't match set apic-id:" ++ " 0x%x (socket-id: %u)", cpu->socket_id, cpu->apic_id, ++ topo_ids.pkg_id); ++ return; ++ } ++ cpu->socket_id = topo_ids.pkg_id; ++ ++ if (cpu->die_id != -1 && cpu->die_id != topo_ids.die_id) { ++ error_setg(errp, "property die-id: %u doesn't match set apic-id:" ++ " 0x%x (die-id: %u)", cpu->die_id, cpu->apic_id, topo_ids.die_id); ++ return; ++ } ++ cpu->die_id = topo_ids.die_id; ++ ++ if (cpu->core_id != -1 && cpu->core_id != topo_ids.core_id) { ++ error_setg(errp, "property core-id: %u doesn't match set apic-id:" ++ " 0x%x (core-id: %u)", cpu->core_id, cpu->apic_id, ++ topo_ids.core_id); ++ return; ++ } ++ cpu->core_id = topo_ids.core_id; ++ ++ if (cpu->thread_id != -1 && cpu->thread_id != topo_ids.smt_id) { ++ error_setg(errp, "property thread-id: %u doesn't match set apic-id:" ++ " 0x%x (thread-id: %u)", cpu->thread_id, cpu->apic_id, ++ topo_ids.smt_id); ++ return; ++ } ++ cpu->thread_id = topo_ids.smt_id; ++ ++ /* ++ * kvm_enabled() must go first to ensure that kvm_* references are ++ * not emitted for the linker to consume (kvm_enabled() is ++ * a literal `0` in configurations where kvm_* aren't defined) ++ */ ++ if (kvm_enabled() && hyperv_feat_enabled(cpu, HYPERV_FEAT_VPINDEX) && ++ !kvm_hv_vpindex_settable()) { ++ error_setg(errp, "kernel doesn't allow setting HyperV VP_INDEX"); ++ return; ++ } ++ ++ cs = CPU(cpu); ++ cs->cpu_index = idx; ++ ++ numa_cpu_pre_plug(cpu_slot, dev, errp); ++} ++ ++static long get_file_size(FILE *f) ++{ ++ long where, size; ++ ++ /* XXX: on Unix systems, using fstat() probably makes more sense */ ++ ++ where = ftell(f); ++ fseek(f, 0, SEEK_END); ++ size = ftell(f); ++ fseek(f, where, SEEK_SET); ++ ++ return size; ++} ++ ++void gsi_handler(void *opaque, int n, int level) ++{ ++ GSIState *s = opaque; ++ ++ trace_x86_gsi_interrupt(n, level); ++ switch (n) { ++ case 0 ... ISA_NUM_IRQS - 1: ++ if (s->i8259_irq[n]) { ++ /* Under KVM, Kernel will forward to both PIC and IOAPIC */ ++ qemu_set_irq(s->i8259_irq[n], level); ++ } ++ /* fall through */ ++ case ISA_NUM_IRQS ... IOAPIC_NUM_PINS - 1: ++#ifdef CONFIG_XEN_EMU ++ /* ++ * Xen delivers the GSI to the Legacy PIC (not that Legacy PIC ++ * routing actually works properly under Xen). And then to ++ * *either* the PIRQ handling or the I/OAPIC depending on ++ * whether the former wants it. ++ */ ++ if (xen_mode == XEN_EMULATE && xen_evtchn_set_gsi(n, level)) { ++ break; ++ } ++#endif ++ qemu_set_irq(s->ioapic_irq[n], level); ++ break; ++ case IO_APIC_SECONDARY_IRQBASE ++ ... IO_APIC_SECONDARY_IRQBASE + IOAPIC_NUM_PINS - 1: ++ qemu_set_irq(s->ioapic2_irq[n - IO_APIC_SECONDARY_IRQBASE], level); ++ break; ++ } ++} ++ ++void ioapic_init_gsi(GSIState *gsi_state, Object *parent) ++{ ++ DeviceState *dev; ++ SysBusDevice *d; ++ unsigned int i; ++ ++ assert(parent); ++ if (kvm_ioapic_in_kernel()) { ++ dev = qdev_new(TYPE_KVM_IOAPIC); ++ } else { ++ dev = qdev_new(TYPE_IOAPIC); ++ } ++ object_property_add_child(parent, "ioapic", OBJECT(dev)); ++ d = SYS_BUS_DEVICE(dev); ++ sysbus_realize_and_unref(d, &error_fatal); ++ sysbus_mmio_map(d, 0, IO_APIC_DEFAULT_ADDRESS); ++ ++ for (i = 0; i < IOAPIC_NUM_PINS; i++) { ++ gsi_state->ioapic_irq[i] = qdev_get_gpio_in(dev, i); ++ } ++} ++ ++DeviceState *ioapic_init_secondary(GSIState *gsi_state) ++{ ++ DeviceState *dev; ++ SysBusDevice *d; ++ unsigned int i; ++ ++ dev = qdev_new(TYPE_IOAPIC); ++ d = SYS_BUS_DEVICE(dev); ++ sysbus_realize_and_unref(d, &error_fatal); ++ sysbus_mmio_map(d, 0, IO_APIC_SECONDARY_ADDRESS); ++ ++ for (i = 0; i < IOAPIC_NUM_PINS; i++) { ++ gsi_state->ioapic2_irq[i] = qdev_get_gpio_in(dev, i); ++ } ++ return dev; ++} ++ ++/* ++ * The entry point into the kernel for PVH boot is different from ++ * the native entry point. The PVH entry is defined by the x86/HVM ++ * direct boot ABI and is available in an ELFNOTE in the kernel binary. ++ * ++ * This function is passed to load_elf() when it is called from ++ * load_elfboot() which then additionally checks for an ELF Note of ++ * type XEN_ELFNOTE_PHYS32_ENTRY and passes it to this function to ++ * parse the PVH entry address from the ELF Note. ++ * ++ * Due to trickery in elf_opts.h, load_elf() is actually available as ++ * load_elf32() or load_elf64() and this routine needs to be able ++ * to deal with being called as 32 or 64 bit. ++ * ++ * The address of the PVH entry point is saved to the 'pvh_start_addr' ++ * global variable. (although the entry point is 32-bit, the kernel ++ * binary can be either 32-bit or 64-bit). ++ */ ++static uint64_t read_pvh_start_addr(void *arg1, void *arg2, bool is64) ++{ ++ size_t *elf_note_data_addr; ++ ++ /* Check if ELF Note header passed in is valid */ ++ if (arg1 == NULL) { ++ return 0; ++ } ++ ++ if (is64) { ++ struct elf64_note *nhdr64 = (struct elf64_note *)arg1; ++ uint64_t nhdr_size64 = sizeof(struct elf64_note); ++ uint64_t phdr_align = *(uint64_t *)arg2; ++ uint64_t nhdr_namesz = nhdr64->n_namesz; ++ ++ elf_note_data_addr = ++ ((void *)nhdr64) + nhdr_size64 + ++ QEMU_ALIGN_UP(nhdr_namesz, phdr_align); ++ ++ pvh_start_addr = *elf_note_data_addr; ++ } else { ++ struct elf32_note *nhdr32 = (struct elf32_note *)arg1; ++ uint32_t nhdr_size32 = sizeof(struct elf32_note); ++ uint32_t phdr_align = *(uint32_t *)arg2; ++ uint32_t nhdr_namesz = nhdr32->n_namesz; ++ ++ elf_note_data_addr = ++ ((void *)nhdr32) + nhdr_size32 + ++ QEMU_ALIGN_UP(nhdr_namesz, phdr_align); ++ ++ pvh_start_addr = *(uint32_t *)elf_note_data_addr; ++ } ++ ++ return pvh_start_addr; ++} ++ ++static bool load_elfboot(const char *kernel_filename, ++ int kernel_file_size, ++ uint8_t *header, ++ size_t pvh_xen_start_addr, ++ FWCfgState *fw_cfg) ++{ ++ uint32_t flags = 0; ++ uint32_t mh_load_addr = 0; ++ uint32_t elf_kernel_size = 0; ++ uint64_t elf_entry; ++ uint64_t elf_low, elf_high; ++ int kernel_size; ++ ++ if (ldl_p(header) != 0x464c457f) { ++ return false; /* no elfboot */ ++ } ++ ++ bool elf_is64 = header[EI_CLASS] == ELFCLASS64; ++ flags = elf_is64 ? ++ ((Elf64_Ehdr *)header)->e_flags : ((Elf32_Ehdr *)header)->e_flags; ++ ++ if (flags & 0x00010004) { /* LOAD_ELF_HEADER_HAS_ADDR */ ++ error_report("elfboot unsupported flags = %x", flags); ++ exit(1); ++ } ++ ++ uint64_t elf_note_type = XEN_ELFNOTE_PHYS32_ENTRY; ++ kernel_size = load_elf(kernel_filename, read_pvh_start_addr, ++ NULL, &elf_note_type, &elf_entry, ++ &elf_low, &elf_high, NULL, 0, I386_ELF_MACHINE, ++ 0, 0); ++ ++ if (kernel_size < 0) { ++ error_report("Error while loading elf kernel"); ++ exit(1); ++ } ++ mh_load_addr = elf_low; ++ elf_kernel_size = elf_high - elf_low; ++ ++ if (pvh_start_addr == 0) { ++ error_report("Error loading uncompressed kernel without PVH ELF Note"); ++ exit(1); ++ } ++ fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ENTRY, pvh_start_addr); ++ fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, mh_load_addr); ++ fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, elf_kernel_size); ++ ++ return true; ++} ++ ++void x86_load_linux(X86MachineState *x86ms, ++ FWCfgState *fw_cfg, ++ int acpi_data_size, ++ bool pvh_enabled) ++{ ++ bool linuxboot_dma_enabled = X86_MACHINE_GET_CLASS(x86ms)->fwcfg_dma_enabled; ++ uint16_t protocol; ++ int setup_size, kernel_size, cmdline_size; ++ int dtb_size, setup_data_offset; ++ uint32_t initrd_max; ++ uint8_t header[8192], *setup, *kernel; ++ hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0; ++ FILE *f; ++ char *vmode; ++ MachineState *machine = MACHINE(x86ms); ++ struct setup_data *setup_data; ++ const char *kernel_filename = machine->kernel_filename; ++ const char *initrd_filename = machine->initrd_filename; ++ const char *dtb_filename = machine->dtb; ++ const char *kernel_cmdline = machine->kernel_cmdline; ++ SevKernelLoaderContext sev_load_ctx = {}; ++ ++ /* Align to 16 bytes as a paranoia measure */ ++ cmdline_size = (strlen(kernel_cmdline) + 16) & ~15; ++ ++ /* load the kernel header */ ++ f = fopen(kernel_filename, "rb"); ++ if (!f) { ++ fprintf(stderr, "qemu: could not open kernel file '%s': %s\n", ++ kernel_filename, strerror(errno)); ++ exit(1); ++ } ++ ++ kernel_size = get_file_size(f); ++ if (!kernel_size || ++ fread(header, 1, MIN(ARRAY_SIZE(header), kernel_size), f) != ++ MIN(ARRAY_SIZE(header), kernel_size)) { ++ fprintf(stderr, "qemu: could not load kernel '%s': %s\n", ++ kernel_filename, strerror(errno)); ++ exit(1); ++ } ++ ++ /* kernel protocol version */ ++ if (ldl_p(header + 0x202) == 0x53726448) { ++ protocol = lduw_p(header + 0x206); ++ } else { ++ /* ++ * This could be a multiboot kernel. If it is, let's stop treating it ++ * like a Linux kernel. ++ * Note: some multiboot images could be in the ELF format (the same of ++ * PVH), so we try multiboot first since we check the multiboot magic ++ * header before to load it. ++ */ ++ if (load_multiboot(x86ms, fw_cfg, f, kernel_filename, initrd_filename, ++ kernel_cmdline, kernel_size, header)) { ++ return; ++ } ++ /* ++ * Check if the file is an uncompressed kernel file (ELF) and load it, ++ * saving the PVH entry point used by the x86/HVM direct boot ABI. ++ * If load_elfboot() is successful, populate the fw_cfg info. ++ */ ++ if (pvh_enabled && ++ load_elfboot(kernel_filename, kernel_size, ++ header, pvh_start_addr, fw_cfg)) { ++ fclose(f); ++ ++ fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, ++ strlen(kernel_cmdline) + 1); ++ fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); ++ ++ fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, sizeof(header)); ++ fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, ++ header, sizeof(header)); ++ ++ /* load initrd */ ++ if (initrd_filename) { ++ GMappedFile *mapped_file; ++ gsize initrd_size; ++ gchar *initrd_data; ++ GError *gerr = NULL; ++ ++ mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); ++ if (!mapped_file) { ++ fprintf(stderr, "qemu: error reading initrd %s: %s\n", ++ initrd_filename, gerr->message); ++ exit(1); ++ } ++ x86ms->initrd_mapped_file = mapped_file; ++ ++ initrd_data = g_mapped_file_get_contents(mapped_file); ++ initrd_size = g_mapped_file_get_length(mapped_file); ++ initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1; ++ if (initrd_size >= initrd_max) { ++ fprintf(stderr, "qemu: initrd is too large, cannot support." ++ "(max: %"PRIu32", need %"PRId64")\n", ++ initrd_max, (uint64_t)initrd_size); ++ exit(1); ++ } ++ ++ initrd_addr = (initrd_max - initrd_size) & ~4095; ++ ++ fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); ++ fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); ++ fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, ++ initrd_size); ++ } ++ ++ option_rom[nb_option_roms].bootindex = 0; ++ option_rom[nb_option_roms].name = "pvh.bin"; ++ nb_option_roms++; ++ ++ return; ++ } ++ protocol = 0; ++ } ++ ++ if (protocol < 0x200 || !(header[0x211] & 0x01)) { ++ /* Low kernel */ ++ real_addr = 0x90000; ++ cmdline_addr = 0x9a000 - cmdline_size; ++ prot_addr = 0x10000; ++ } else if (protocol < 0x202) { ++ /* High but ancient kernel */ ++ real_addr = 0x90000; ++ cmdline_addr = 0x9a000 - cmdline_size; ++ prot_addr = 0x100000; ++ } else { ++ /* High and recent kernel */ ++ real_addr = 0x10000; ++ cmdline_addr = 0x20000; ++ prot_addr = 0x100000; ++ } ++ ++ /* highest address for loading the initrd */ ++ if (protocol >= 0x20c && ++ lduw_p(header + 0x236) & XLF_CAN_BE_LOADED_ABOVE_4G) { ++ /* ++ * Linux has supported initrd up to 4 GB for a very long time (2007, ++ * long before XLF_CAN_BE_LOADED_ABOVE_4G which was added in 2013), ++ * though it only sets initrd_max to 2 GB to "work around bootloader ++ * bugs". Luckily, QEMU firmware(which does something like bootloader) ++ * has supported this. ++ * ++ * It's believed that if XLF_CAN_BE_LOADED_ABOVE_4G is set, initrd can ++ * be loaded into any address. ++ * ++ * In addition, initrd_max is uint32_t simply because QEMU doesn't ++ * support the 64-bit boot protocol (specifically the ext_ramdisk_image ++ * field). ++ * ++ * Therefore here just limit initrd_max to UINT32_MAX simply as well. ++ */ ++ initrd_max = UINT32_MAX; ++ } else if (protocol >= 0x203) { ++ initrd_max = ldl_p(header + 0x22c); ++ } else { ++ initrd_max = 0x37ffffff; ++ } ++ ++ if (initrd_max >= x86ms->below_4g_mem_size - acpi_data_size) { ++ initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1; ++ } ++ ++ fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr); ++ fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline) + 1); ++ fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); ++ sev_load_ctx.cmdline_data = (char *)kernel_cmdline; ++ sev_load_ctx.cmdline_size = strlen(kernel_cmdline) + 1; ++ ++ if (protocol >= 0x202) { ++ stl_p(header + 0x228, cmdline_addr); ++ } else { ++ stw_p(header + 0x20, 0xA33F); ++ stw_p(header + 0x22, cmdline_addr - real_addr); ++ } ++ ++ /* handle vga= parameter */ ++ vmode = strstr(kernel_cmdline, "vga="); ++ if (vmode) { ++ unsigned int video_mode; ++ const char *end; ++ int ret; ++ /* skip "vga=" */ ++ vmode += 4; ++ if (!strncmp(vmode, "normal", 6)) { ++ video_mode = 0xffff; ++ } else if (!strncmp(vmode, "ext", 3)) { ++ video_mode = 0xfffe; ++ } else if (!strncmp(vmode, "ask", 3)) { ++ video_mode = 0xfffd; ++ } else { ++ ret = qemu_strtoui(vmode, &end, 0, &video_mode); ++ if (ret != 0 || (*end && *end != ' ')) { ++ fprintf(stderr, "qemu: invalid 'vga=' kernel parameter.\n"); ++ exit(1); ++ } ++ } ++ stw_p(header + 0x1fa, video_mode); ++ } ++ ++ /* loader type */ ++ /* ++ * High nybble = B reserved for QEMU; low nybble is revision number. ++ * If this code is substantially changed, you may want to consider ++ * incrementing the revision. ++ */ ++ if (protocol >= 0x200) { ++ header[0x210] = 0xB0; ++ } ++ /* heap */ ++ if (protocol >= 0x201) { ++ header[0x211] |= 0x80; /* CAN_USE_HEAP */ ++ stw_p(header + 0x224, cmdline_addr - real_addr - 0x200); ++ } ++ ++ /* load initrd */ ++ if (initrd_filename) { ++ GMappedFile *mapped_file; ++ gsize initrd_size; ++ gchar *initrd_data; ++ GError *gerr = NULL; ++ ++ if (protocol < 0x200) { ++ fprintf(stderr, "qemu: linux kernel too old to load a ram disk\n"); ++ exit(1); ++ } ++ ++ mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); ++ if (!mapped_file) { ++ fprintf(stderr, "qemu: error reading initrd %s: %s\n", ++ initrd_filename, gerr->message); ++ exit(1); ++ } ++ x86ms->initrd_mapped_file = mapped_file; ++ ++ initrd_data = g_mapped_file_get_contents(mapped_file); ++ initrd_size = g_mapped_file_get_length(mapped_file); ++ if (initrd_size >= initrd_max) { ++ fprintf(stderr, "qemu: initrd is too large, cannot support." ++ "(max: %"PRIu32", need %"PRId64")\n", ++ initrd_max, (uint64_t)initrd_size); ++ exit(1); ++ } ++ ++ initrd_addr = (initrd_max - initrd_size) & ~4095; ++ ++ fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); ++ fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); ++ fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, initrd_size); ++ sev_load_ctx.initrd_data = initrd_data; ++ sev_load_ctx.initrd_size = initrd_size; ++ ++ stl_p(header + 0x218, initrd_addr); ++ stl_p(header + 0x21c, initrd_size); ++ } ++ ++ /* load kernel and setup */ ++ setup_size = header[0x1f1]; ++ if (setup_size == 0) { ++ setup_size = 4; ++ } ++ setup_size = (setup_size + 1) * 512; ++ if (setup_size > kernel_size) { ++ fprintf(stderr, "qemu: invalid kernel header\n"); ++ exit(1); ++ } ++ kernel_size -= setup_size; ++ ++ setup = g_malloc(setup_size); ++ kernel = g_malloc(kernel_size); ++ fseek(f, 0, SEEK_SET); ++ if (fread(setup, 1, setup_size, f) != setup_size) { ++ fprintf(stderr, "fread() failed\n"); ++ exit(1); ++ } ++ if (fread(kernel, 1, kernel_size, f) != kernel_size) { ++ fprintf(stderr, "fread() failed\n"); ++ exit(1); ++ } ++ fclose(f); ++ ++ /* append dtb to kernel */ ++ if (dtb_filename) { ++ if (protocol < 0x209) { ++ fprintf(stderr, "qemu: Linux kernel too old to load a dtb\n"); ++ exit(1); ++ } ++ ++ dtb_size = get_image_size(dtb_filename); ++ if (dtb_size <= 0) { ++ fprintf(stderr, "qemu: error reading dtb %s: %s\n", ++ dtb_filename, strerror(errno)); ++ exit(1); ++ } ++ ++ setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16); ++ kernel_size = setup_data_offset + sizeof(struct setup_data) + dtb_size; ++ kernel = g_realloc(kernel, kernel_size); ++ ++ stq_p(header + 0x250, prot_addr + setup_data_offset); ++ ++ setup_data = (struct setup_data *)(kernel + setup_data_offset); ++ setup_data->next = 0; ++ setup_data->type = cpu_to_le32(SETUP_DTB); ++ setup_data->len = cpu_to_le32(dtb_size); ++ ++ load_image_size(dtb_filename, setup_data->data, dtb_size); ++ } ++ ++ /* ++ * If we're starting an encrypted VM, it will be OVMF based, which uses the ++ * efi stub for booting and doesn't require any values to be placed in the ++ * kernel header. We therefore don't update the header so the hash of the ++ * kernel on the other side of the fw_cfg interface matches the hash of the ++ * file the user passed in. ++ */ ++ if (!sev_enabled()) { ++ memcpy(setup, header, MIN(sizeof(header), setup_size)); ++ } ++ ++ fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr); ++ fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size); ++ fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size); ++ sev_load_ctx.kernel_data = (char *)kernel; ++ sev_load_ctx.kernel_size = kernel_size; ++ ++ fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr); ++ fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size); ++ fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size); ++ sev_load_ctx.setup_data = (char *)setup; ++ sev_load_ctx.setup_size = setup_size; ++ ++ if (sev_enabled()) { ++ sev_add_kernel_loader_hashes(&sev_load_ctx, &error_fatal); ++ } ++ ++ option_rom[nb_option_roms].bootindex = 0; ++ option_rom[nb_option_roms].name = "linuxboot.bin"; ++ if (linuxboot_dma_enabled && fw_cfg_dma_enabled(fw_cfg)) { ++ option_rom[nb_option_roms].name = "linuxboot_dma.bin"; ++ } ++ nb_option_roms++; ++} ++ ++void x86_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *isa_memory, ++ MemoryRegion *bios, bool read_only) ++{ ++ uint64_t bios_size = memory_region_size(bios); ++ uint64_t isa_bios_size = MIN(bios_size, 128 * KiB); ++ ++ memory_region_init_alias(isa_bios, NULL, "isa-bios", bios, ++ bios_size - isa_bios_size, isa_bios_size); ++ memory_region_add_subregion_overlap(isa_memory, 1 * MiB - isa_bios_size, ++ isa_bios, 1); ++ memory_region_set_readonly(isa_bios, read_only); ++} ++ ++void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, ++ MemoryRegion *rom_memory, bool isapc_ram_fw) ++{ ++ const char *bios_name; ++ char *filename; ++ int bios_size; ++ ssize_t ret; ++ ++ /* BIOS load */ ++ bios_name = MACHINE(x86ms)->firmware ?: default_firmware; ++ filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); ++ if (filename) { ++ bios_size = get_image_size(filename); ++ } else { ++ bios_size = -1; ++ } ++ if (bios_size <= 0 || ++ (bios_size % 65536) != 0) { ++ goto bios_error; ++ } ++ memory_region_init_ram(&x86ms->bios, NULL, "pc.bios", bios_size, ++ &error_fatal); ++ if (sev_enabled()) { ++ /* ++ * The concept of a "reset" simply doesn't exist for ++ * confidential computing guests, we have to destroy and ++ * re-launch them instead. So there is no need to register ++ * the firmware as rom to properly re-initialize on reset. ++ * Just go for a straight file load instead. ++ */ ++ void *ptr = memory_region_get_ram_ptr(&x86ms->bios); ++ load_image_size(filename, ptr, bios_size); ++ x86_firmware_configure(ptr, bios_size); ++ } else { ++ memory_region_set_readonly(&x86ms->bios, !isapc_ram_fw); ++ ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1); ++ if (ret != 0) { ++ goto bios_error; ++ } ++ } ++ g_free(filename); ++ ++ /* map the last 128KB of the BIOS in ISA space */ ++ x86_isa_bios_init(&x86ms->isa_bios, rom_memory, &x86ms->bios, ++ !isapc_ram_fw); ++ ++ /* map all the bios at the top of memory */ ++ memory_region_add_subregion(rom_memory, ++ (uint32_t)(-bios_size), ++ &x86ms->bios); ++ return; ++ ++bios_error: ++ fprintf(stderr, "qemu: could not load PC BIOS '%s'\n", bios_name); ++ exit(1); ++} +diff --git a/hw/i386/x86-cpu.c b/hw/i386/x86-cpu.c +new file mode 100644 +index 0000000000..ab2920522d +--- /dev/null ++++ b/hw/i386/x86-cpu.c +@@ -0,0 +1,97 @@ ++/* ++ * Copyright (c) 2003-2004 Fabrice Bellard ++ * Copyright (c) 2019, 2024 Red Hat, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "qemu/osdep.h" ++#include "sysemu/whpx.h" ++#include "sysemu/cpu-timers.h" ++#include "trace.h" ++ ++#include "hw/i386/x86.h" ++#include "target/i386/cpu.h" ++#include "hw/intc/i8259.h" ++#include "hw/irq.h" ++#include "sysemu/kvm.h" ++ ++/* TSC handling */ ++uint64_t cpu_get_tsc(CPUX86State *env) ++{ ++ return cpus_get_elapsed_ticks(); ++} ++ ++/* IRQ handling */ ++static void pic_irq_request(void *opaque, int irq, int level) ++{ ++ CPUState *cs = first_cpu; ++ X86CPU *cpu = X86_CPU(cs); ++ ++ trace_x86_pic_interrupt(irq, level); ++ if (cpu_is_apic_enabled(cpu->apic_state) && !kvm_irqchip_in_kernel() && ++ !whpx_apic_in_platform()) { ++ CPU_FOREACH(cs) { ++ cpu = X86_CPU(cs); ++ if (apic_accept_pic_intr(cpu->apic_state)) { ++ apic_deliver_pic_intr(cpu->apic_state, level); ++ } ++ } ++ } else { ++ if (level) { ++ cpu_interrupt(cs, CPU_INTERRUPT_HARD); ++ } else { ++ cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); ++ } ++ } ++} ++ ++qemu_irq x86_allocate_cpu_irq(void) ++{ ++ return qemu_allocate_irq(pic_irq_request, NULL, 0); ++} ++ ++int cpu_get_pic_interrupt(CPUX86State *env) ++{ ++ X86CPU *cpu = env_archcpu(env); ++ int intno; ++ ++ if (!kvm_irqchip_in_kernel() && !whpx_apic_in_platform()) { ++ intno = apic_get_interrupt(cpu->apic_state); ++ if (intno >= 0) { ++ return intno; ++ } ++ /* read the irq from the PIC */ ++ if (!apic_accept_pic_intr(cpu->apic_state)) { ++ return -1; ++ } ++ } ++ ++ intno = pic_read_irq(isa_pic); ++ return intno; ++} ++ ++DeviceState *cpu_get_current_apic(void) ++{ ++ if (current_cpu) { ++ X86CPU *cpu = X86_CPU(current_cpu); ++ return cpu->apic_state; ++ } else { ++ return NULL; ++ } ++} +diff --git a/hw/i386/x86.c b/hw/i386/x86.c +index fcef652c1e..0b5cc59956 100644 +--- a/hw/i386/x86.c ++++ b/hw/i386/x86.c +@@ -22,52 +22,25 @@ + */ + #include "qemu/osdep.h" + #include "qemu/error-report.h" +-#include "qemu/option.h" +-#include "qemu/cutils.h" + #include "qemu/units.h" +-#include "qemu/datadir.h" + #include "qapi/error.h" + #include "qapi/qapi-visit-common.h" +-#include "qapi/clone-visitor.h" + #include "qapi/qapi-visit-machine.h" + #include "qapi/visitor.h" + #include "sysemu/qtest.h" +-#include "sysemu/whpx.h" + #include "sysemu/numa.h" +-#include "sysemu/replay.h" +-#include "sysemu/sysemu.h" +-#include "sysemu/cpu-timers.h" +-#include "sysemu/xen.h" + #include "trace.h" + ++#include "hw/acpi/aml-build.h" + #include "hw/i386/x86.h" +-#include "target/i386/cpu.h" + #include "hw/i386/topology.h" +-#include "hw/i386/fw_cfg.h" +-#include "hw/intc/i8259.h" +-#include "hw/rtc/mc146818rtc.h" +-#include "target/i386/sev.h" + +-#include "hw/acpi/cpu_hotplug.h" +-#include "hw/irq.h" + #include "hw/nmi.h" +-#include "hw/loader.h" +-#include "multiboot.h" +-#include "elf.h" +-#include "standard-headers/asm-x86/bootparam.h" +-#include CONFIG_DEVICES + #include "kvm/kvm_i386.h" + +-#ifdef CONFIG_XEN_EMU +-#include "hw/xen/xen.h" +-#include "hw/i386/kvm/xen_evtchn.h" +-#endif + +-/* Physical Address of PVH entry point read from kernel ELF NOTE */ +-static size_t pvh_start_addr; +- +-static void init_topo_info(X86CPUTopoInfo *topo_info, +- const X86MachineState *x86ms) ++void init_topo_info(X86CPUTopoInfo *topo_info, ++ const X86MachineState *x86ms) + { + MachineState *ms = MACHINE(x86ms); + +@@ -94,355 +67,6 @@ uint32_t x86_cpu_apic_id_from_index(X86MachineState *x86ms, + return x86_apicid_from_cpu_idx(&topo_info, cpu_index); + } + +- +-void x86_cpu_new(X86MachineState *x86ms, int64_t apic_id, Error **errp) +-{ +- Object *cpu = object_new(MACHINE(x86ms)->cpu_type); +- +- if (!object_property_set_uint(cpu, "apic-id", apic_id, errp)) { +- goto out; +- } +- qdev_realize(DEVICE(cpu), NULL, errp); +- +-out: +- object_unref(cpu); +-} +- +-void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version) +-{ +- int i; +- const CPUArchIdList *possible_cpus; +- MachineState *ms = MACHINE(x86ms); +- MachineClass *mc = MACHINE_GET_CLASS(x86ms); +- +- x86_cpu_set_default_version(default_cpu_version); +- +- /* +- * Calculates the limit to CPU APIC ID values +- * +- * Limit for the APIC ID value, so that all +- * CPU APIC IDs are < x86ms->apic_id_limit. +- * +- * This is used for FW_CFG_MAX_CPUS. See comments on fw_cfg_arch_create(). +- */ +- x86ms->apic_id_limit = x86_cpu_apic_id_from_index(x86ms, +- ms->smp.max_cpus - 1) + 1; +- +- /* +- * Can we support APIC ID 255 or higher? With KVM, that requires +- * both in-kernel lapic and X2APIC userspace API. +- * +- * kvm_enabled() must go first to ensure that kvm_* references are +- * not emitted for the linker to consume (kvm_enabled() is +- * a literal `0` in configurations where kvm_* aren't defined) +- */ +- if (kvm_enabled() && x86ms->apic_id_limit > 255 && +- kvm_irqchip_in_kernel() && !kvm_enable_x2apic()) { +- error_report("current -smp configuration requires kernel " +- "irqchip and X2APIC API support."); +- exit(EXIT_FAILURE); +- } +- +- if (kvm_enabled()) { +- kvm_set_max_apic_id(x86ms->apic_id_limit); +- } +- +- if (!kvm_irqchip_in_kernel()) { +- apic_set_max_apic_id(x86ms->apic_id_limit); +- } +- +- possible_cpus = mc->possible_cpu_arch_ids(ms); +- for (i = 0; i < ms->smp.cpus; i++) { +- x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal); +- } +-} +- +-void x86_rtc_set_cpus_count(ISADevice *s, uint16_t cpus_count) +-{ +- MC146818RtcState *rtc = MC146818_RTC(s); +- +- if (cpus_count > 0xff) { +- /* +- * If the number of CPUs can't be represented in 8 bits, the +- * BIOS must use "FW_CFG_NB_CPUS". Set RTC field to 0 just +- * to make old BIOSes fail more predictably. +- */ +- mc146818rtc_set_cmos_data(rtc, 0x5f, 0); +- } else { +- mc146818rtc_set_cmos_data(rtc, 0x5f, cpus_count - 1); +- } +-} +- +-static int x86_apic_cmp(const void *a, const void *b) +-{ +- CPUArchId *apic_a = (CPUArchId *)a; +- CPUArchId *apic_b = (CPUArchId *)b; +- +- return apic_a->arch_id - apic_b->arch_id; +-} +- +-/* +- * returns pointer to CPUArchId descriptor that matches CPU's apic_id +- * in ms->possible_cpus->cpus, if ms->possible_cpus->cpus has no +- * entry corresponding to CPU's apic_id returns NULL. +- */ +-CPUArchId *x86_find_cpu_slot(MachineState *ms, uint32_t id, int *idx) +-{ +- CPUArchId apic_id, *found_cpu; +- +- apic_id.arch_id = id; +- found_cpu = bsearch(&apic_id, ms->possible_cpus->cpus, +- ms->possible_cpus->len, sizeof(*ms->possible_cpus->cpus), +- x86_apic_cmp); +- if (found_cpu && idx) { +- *idx = found_cpu - ms->possible_cpus->cpus; +- } +- return found_cpu; +-} +- +-void x86_cpu_plug(HotplugHandler *hotplug_dev, +- DeviceState *dev, Error **errp) +-{ +- CPUArchId *found_cpu; +- Error *local_err = NULL; +- X86CPU *cpu = X86_CPU(dev); +- X86MachineState *x86ms = X86_MACHINE(hotplug_dev); +- +- if (x86ms->acpi_dev) { +- hotplug_handler_plug(x86ms->acpi_dev, dev, &local_err); +- if (local_err) { +- goto out; +- } +- } +- +- /* increment the number of CPUs */ +- x86ms->boot_cpus++; +- if (x86ms->rtc) { +- x86_rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus); +- } +- if (x86ms->fw_cfg) { +- fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); +- } +- +- found_cpu = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, NULL); +- found_cpu->cpu = CPU(dev); +-out: +- error_propagate(errp, local_err); +-} +- +-void x86_cpu_unplug_request_cb(HotplugHandler *hotplug_dev, +- DeviceState *dev, Error **errp) +-{ +- int idx = -1; +- X86CPU *cpu = X86_CPU(dev); +- X86MachineState *x86ms = X86_MACHINE(hotplug_dev); +- +- if (!x86ms->acpi_dev) { +- error_setg(errp, "CPU hot unplug not supported without ACPI"); +- return; +- } +- +- x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, &idx); +- assert(idx != -1); +- if (idx == 0) { +- error_setg(errp, "Boot CPU is unpluggable"); +- return; +- } +- +- hotplug_handler_unplug_request(x86ms->acpi_dev, dev, +- errp); +-} +- +-void x86_cpu_unplug_cb(HotplugHandler *hotplug_dev, +- DeviceState *dev, Error **errp) +-{ +- CPUArchId *found_cpu; +- Error *local_err = NULL; +- X86CPU *cpu = X86_CPU(dev); +- X86MachineState *x86ms = X86_MACHINE(hotplug_dev); +- +- hotplug_handler_unplug(x86ms->acpi_dev, dev, &local_err); +- if (local_err) { +- goto out; +- } +- +- found_cpu = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, NULL); +- found_cpu->cpu = NULL; +- qdev_unrealize(dev); +- +- /* decrement the number of CPUs */ +- x86ms->boot_cpus--; +- /* Update the number of CPUs in CMOS */ +- x86_rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus); +- fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); +- out: +- error_propagate(errp, local_err); +-} +- +-void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, +- DeviceState *dev, Error **errp) +-{ +- int idx; +- CPUState *cs; +- CPUArchId *cpu_slot; +- X86CPUTopoIDs topo_ids; +- X86CPU *cpu = X86_CPU(dev); +- CPUX86State *env = &cpu->env; +- MachineState *ms = MACHINE(hotplug_dev); +- X86MachineState *x86ms = X86_MACHINE(hotplug_dev); +- unsigned int smp_cores = ms->smp.cores; +- unsigned int smp_threads = ms->smp.threads; +- X86CPUTopoInfo topo_info; +- +- if (!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) { +- error_setg(errp, "Invalid CPU type, expected cpu type: '%s'", +- ms->cpu_type); +- return; +- } +- +- if (x86ms->acpi_dev) { +- Error *local_err = NULL; +- +- hotplug_handler_pre_plug(HOTPLUG_HANDLER(x86ms->acpi_dev), dev, +- &local_err); +- if (local_err) { +- error_propagate(errp, local_err); +- return; +- } +- } +- +- init_topo_info(&topo_info, x86ms); +- +- env->nr_dies = ms->smp.dies; +- +- /* +- * If APIC ID is not set, +- * set it based on socket/die/core/thread properties. +- */ +- if (cpu->apic_id == UNASSIGNED_APIC_ID) { +- int max_socket = (ms->smp.max_cpus - 1) / +- smp_threads / smp_cores / ms->smp.dies; +- +- /* +- * die-id was optional in QEMU 4.0 and older, so keep it optional +- * if there's only one die per socket. +- */ +- if (cpu->die_id < 0 && ms->smp.dies == 1) { +- cpu->die_id = 0; +- } +- +- if (cpu->socket_id < 0) { +- error_setg(errp, "CPU socket-id is not set"); +- return; +- } else if (cpu->socket_id > max_socket) { +- error_setg(errp, "Invalid CPU socket-id: %u must be in range 0:%u", +- cpu->socket_id, max_socket); +- return; +- } +- if (cpu->die_id < 0) { +- error_setg(errp, "CPU die-id is not set"); +- return; +- } else if (cpu->die_id > ms->smp.dies - 1) { +- error_setg(errp, "Invalid CPU die-id: %u must be in range 0:%u", +- cpu->die_id, ms->smp.dies - 1); +- return; +- } +- if (cpu->core_id < 0) { +- error_setg(errp, "CPU core-id is not set"); +- return; +- } else if (cpu->core_id > (smp_cores - 1)) { +- error_setg(errp, "Invalid CPU core-id: %u must be in range 0:%u", +- cpu->core_id, smp_cores - 1); +- return; +- } +- if (cpu->thread_id < 0) { +- error_setg(errp, "CPU thread-id is not set"); +- return; +- } else if (cpu->thread_id > (smp_threads - 1)) { +- error_setg(errp, "Invalid CPU thread-id: %u must be in range 0:%u", +- cpu->thread_id, smp_threads - 1); +- return; +- } +- +- topo_ids.pkg_id = cpu->socket_id; +- topo_ids.die_id = cpu->die_id; +- topo_ids.core_id = cpu->core_id; +- topo_ids.smt_id = cpu->thread_id; +- cpu->apic_id = x86_apicid_from_topo_ids(&topo_info, &topo_ids); +- } +- +- cpu_slot = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, &idx); +- if (!cpu_slot) { +- x86_topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids); +- error_setg(errp, +- "Invalid CPU [socket: %u, die: %u, core: %u, thread: %u] with" +- " APIC ID %" PRIu32 ", valid index range 0:%d", +- topo_ids.pkg_id, topo_ids.die_id, topo_ids.core_id, topo_ids.smt_id, +- cpu->apic_id, ms->possible_cpus->len - 1); +- return; +- } +- +- if (cpu_slot->cpu) { +- error_setg(errp, "CPU[%d] with APIC ID %" PRIu32 " exists", +- idx, cpu->apic_id); +- return; +- } +- +- /* if 'address' properties socket-id/core-id/thread-id are not set, set them +- * so that machine_query_hotpluggable_cpus would show correct values +- */ +- /* TODO: move socket_id/core_id/thread_id checks into x86_cpu_realizefn() +- * once -smp refactoring is complete and there will be CPU private +- * CPUState::nr_cores and CPUState::nr_threads fields instead of globals */ +- x86_topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids); +- if (cpu->socket_id != -1 && cpu->socket_id != topo_ids.pkg_id) { +- error_setg(errp, "property socket-id: %u doesn't match set apic-id:" +- " 0x%x (socket-id: %u)", cpu->socket_id, cpu->apic_id, +- topo_ids.pkg_id); +- return; +- } +- cpu->socket_id = topo_ids.pkg_id; +- +- if (cpu->die_id != -1 && cpu->die_id != topo_ids.die_id) { +- error_setg(errp, "property die-id: %u doesn't match set apic-id:" +- " 0x%x (die-id: %u)", cpu->die_id, cpu->apic_id, topo_ids.die_id); +- return; +- } +- cpu->die_id = topo_ids.die_id; +- +- if (cpu->core_id != -1 && cpu->core_id != topo_ids.core_id) { +- error_setg(errp, "property core-id: %u doesn't match set apic-id:" +- " 0x%x (core-id: %u)", cpu->core_id, cpu->apic_id, +- topo_ids.core_id); +- return; +- } +- cpu->core_id = topo_ids.core_id; +- +- if (cpu->thread_id != -1 && cpu->thread_id != topo_ids.smt_id) { +- error_setg(errp, "property thread-id: %u doesn't match set apic-id:" +- " 0x%x (thread-id: %u)", cpu->thread_id, cpu->apic_id, +- topo_ids.smt_id); +- return; +- } +- cpu->thread_id = topo_ids.smt_id; +- +- /* +- * kvm_enabled() must go first to ensure that kvm_* references are +- * not emitted for the linker to consume (kvm_enabled() is +- * a literal `0` in configurations where kvm_* aren't defined) +- */ +- if (kvm_enabled() && hyperv_feat_enabled(cpu, HYPERV_FEAT_VPINDEX) && +- !kvm_hv_vpindex_settable()) { +- error_setg(errp, "kernel doesn't allow setting HyperV VP_INDEX"); +- return; +- } +- +- cs = CPU(cpu); +- cs->cpu_index = idx; +- +- numa_cpu_pre_plug(cpu_slot, dev, errp); +-} +- + static CpuInstanceProperties + x86_cpu_index_to_props(MachineState *ms, unsigned cpu_index) + { +@@ -528,676 +152,6 @@ static void x86_nmi(NMIState *n, int cpu_index, Error **errp) + } + } + +-static long get_file_size(FILE *f) +-{ +- long where, size; +- +- /* XXX: on Unix systems, using fstat() probably makes more sense */ +- +- where = ftell(f); +- fseek(f, 0, SEEK_END); +- size = ftell(f); +- fseek(f, where, SEEK_SET); +- +- return size; +-} +- +-/* TSC handling */ +-uint64_t cpu_get_tsc(CPUX86State *env) +-{ +- return cpus_get_elapsed_ticks(); +-} +- +-/* IRQ handling */ +-static void pic_irq_request(void *opaque, int irq, int level) +-{ +- CPUState *cs = first_cpu; +- X86CPU *cpu = X86_CPU(cs); +- +- trace_x86_pic_interrupt(irq, level); +- if (cpu_is_apic_enabled(cpu->apic_state) && !kvm_irqchip_in_kernel() && +- !whpx_apic_in_platform()) { +- CPU_FOREACH(cs) { +- cpu = X86_CPU(cs); +- if (apic_accept_pic_intr(cpu->apic_state)) { +- apic_deliver_pic_intr(cpu->apic_state, level); +- } +- } +- } else { +- if (level) { +- cpu_interrupt(cs, CPU_INTERRUPT_HARD); +- } else { +- cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); +- } +- } +-} +- +-qemu_irq x86_allocate_cpu_irq(void) +-{ +- return qemu_allocate_irq(pic_irq_request, NULL, 0); +-} +- +-int cpu_get_pic_interrupt(CPUX86State *env) +-{ +- X86CPU *cpu = env_archcpu(env); +- int intno; +- +- if (!kvm_irqchip_in_kernel() && !whpx_apic_in_platform()) { +- intno = apic_get_interrupt(cpu->apic_state); +- if (intno >= 0) { +- return intno; +- } +- /* read the irq from the PIC */ +- if (!apic_accept_pic_intr(cpu->apic_state)) { +- return -1; +- } +- } +- +- intno = pic_read_irq(isa_pic); +- return intno; +-} +- +-DeviceState *cpu_get_current_apic(void) +-{ +- if (current_cpu) { +- X86CPU *cpu = X86_CPU(current_cpu); +- return cpu->apic_state; +- } else { +- return NULL; +- } +-} +- +-void gsi_handler(void *opaque, int n, int level) +-{ +- GSIState *s = opaque; +- +- trace_x86_gsi_interrupt(n, level); +- switch (n) { +- case 0 ... ISA_NUM_IRQS - 1: +- if (s->i8259_irq[n]) { +- /* Under KVM, Kernel will forward to both PIC and IOAPIC */ +- qemu_set_irq(s->i8259_irq[n], level); +- } +- /* fall through */ +- case ISA_NUM_IRQS ... IOAPIC_NUM_PINS - 1: +-#ifdef CONFIG_XEN_EMU +- /* +- * Xen delivers the GSI to the Legacy PIC (not that Legacy PIC +- * routing actually works properly under Xen). And then to +- * *either* the PIRQ handling or the I/OAPIC depending on +- * whether the former wants it. +- */ +- if (xen_mode == XEN_EMULATE && xen_evtchn_set_gsi(n, level)) { +- break; +- } +-#endif +- qemu_set_irq(s->ioapic_irq[n], level); +- break; +- case IO_APIC_SECONDARY_IRQBASE +- ... IO_APIC_SECONDARY_IRQBASE + IOAPIC_NUM_PINS - 1: +- qemu_set_irq(s->ioapic2_irq[n - IO_APIC_SECONDARY_IRQBASE], level); +- break; +- } +-} +- +-void ioapic_init_gsi(GSIState *gsi_state, Object *parent) +-{ +- DeviceState *dev; +- SysBusDevice *d; +- unsigned int i; +- +- assert(parent); +- if (kvm_ioapic_in_kernel()) { +- dev = qdev_new(TYPE_KVM_IOAPIC); +- } else { +- dev = qdev_new(TYPE_IOAPIC); +- } +- object_property_add_child(parent, "ioapic", OBJECT(dev)); +- d = SYS_BUS_DEVICE(dev); +- sysbus_realize_and_unref(d, &error_fatal); +- sysbus_mmio_map(d, 0, IO_APIC_DEFAULT_ADDRESS); +- +- for (i = 0; i < IOAPIC_NUM_PINS; i++) { +- gsi_state->ioapic_irq[i] = qdev_get_gpio_in(dev, i); +- } +-} +- +-DeviceState *ioapic_init_secondary(GSIState *gsi_state) +-{ +- DeviceState *dev; +- SysBusDevice *d; +- unsigned int i; +- +- dev = qdev_new(TYPE_IOAPIC); +- d = SYS_BUS_DEVICE(dev); +- sysbus_realize_and_unref(d, &error_fatal); +- sysbus_mmio_map(d, 0, IO_APIC_SECONDARY_ADDRESS); +- +- for (i = 0; i < IOAPIC_NUM_PINS; i++) { +- gsi_state->ioapic2_irq[i] = qdev_get_gpio_in(dev, i); +- } +- return dev; +-} +- +-/* +- * The entry point into the kernel for PVH boot is different from +- * the native entry point. The PVH entry is defined by the x86/HVM +- * direct boot ABI and is available in an ELFNOTE in the kernel binary. +- * +- * This function is passed to load_elf() when it is called from +- * load_elfboot() which then additionally checks for an ELF Note of +- * type XEN_ELFNOTE_PHYS32_ENTRY and passes it to this function to +- * parse the PVH entry address from the ELF Note. +- * +- * Due to trickery in elf_opts.h, load_elf() is actually available as +- * load_elf32() or load_elf64() and this routine needs to be able +- * to deal with being called as 32 or 64 bit. +- * +- * The address of the PVH entry point is saved to the 'pvh_start_addr' +- * global variable. (although the entry point is 32-bit, the kernel +- * binary can be either 32-bit or 64-bit). +- */ +-static uint64_t read_pvh_start_addr(void *arg1, void *arg2, bool is64) +-{ +- size_t *elf_note_data_addr; +- +- /* Check if ELF Note header passed in is valid */ +- if (arg1 == NULL) { +- return 0; +- } +- +- if (is64) { +- struct elf64_note *nhdr64 = (struct elf64_note *)arg1; +- uint64_t nhdr_size64 = sizeof(struct elf64_note); +- uint64_t phdr_align = *(uint64_t *)arg2; +- uint64_t nhdr_namesz = nhdr64->n_namesz; +- +- elf_note_data_addr = +- ((void *)nhdr64) + nhdr_size64 + +- QEMU_ALIGN_UP(nhdr_namesz, phdr_align); +- +- pvh_start_addr = *elf_note_data_addr; +- } else { +- struct elf32_note *nhdr32 = (struct elf32_note *)arg1; +- uint32_t nhdr_size32 = sizeof(struct elf32_note); +- uint32_t phdr_align = *(uint32_t *)arg2; +- uint32_t nhdr_namesz = nhdr32->n_namesz; +- +- elf_note_data_addr = +- ((void *)nhdr32) + nhdr_size32 + +- QEMU_ALIGN_UP(nhdr_namesz, phdr_align); +- +- pvh_start_addr = *(uint32_t *)elf_note_data_addr; +- } +- +- return pvh_start_addr; +-} +- +-static bool load_elfboot(const char *kernel_filename, +- int kernel_file_size, +- uint8_t *header, +- size_t pvh_xen_start_addr, +- FWCfgState *fw_cfg) +-{ +- uint32_t flags = 0; +- uint32_t mh_load_addr = 0; +- uint32_t elf_kernel_size = 0; +- uint64_t elf_entry; +- uint64_t elf_low, elf_high; +- int kernel_size; +- +- if (ldl_p(header) != 0x464c457f) { +- return false; /* no elfboot */ +- } +- +- bool elf_is64 = header[EI_CLASS] == ELFCLASS64; +- flags = elf_is64 ? +- ((Elf64_Ehdr *)header)->e_flags : ((Elf32_Ehdr *)header)->e_flags; +- +- if (flags & 0x00010004) { /* LOAD_ELF_HEADER_HAS_ADDR */ +- error_report("elfboot unsupported flags = %x", flags); +- exit(1); +- } +- +- uint64_t elf_note_type = XEN_ELFNOTE_PHYS32_ENTRY; +- kernel_size = load_elf(kernel_filename, read_pvh_start_addr, +- NULL, &elf_note_type, &elf_entry, +- &elf_low, &elf_high, NULL, 0, I386_ELF_MACHINE, +- 0, 0); +- +- if (kernel_size < 0) { +- error_report("Error while loading elf kernel"); +- exit(1); +- } +- mh_load_addr = elf_low; +- elf_kernel_size = elf_high - elf_low; +- +- if (pvh_start_addr == 0) { +- error_report("Error loading uncompressed kernel without PVH ELF Note"); +- exit(1); +- } +- fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ENTRY, pvh_start_addr); +- fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, mh_load_addr); +- fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, elf_kernel_size); +- +- return true; +-} +- +-void x86_load_linux(X86MachineState *x86ms, +- FWCfgState *fw_cfg, +- int acpi_data_size, +- bool pvh_enabled) +-{ +- bool linuxboot_dma_enabled = X86_MACHINE_GET_CLASS(x86ms)->fwcfg_dma_enabled; +- uint16_t protocol; +- int setup_size, kernel_size, cmdline_size; +- int dtb_size, setup_data_offset; +- uint32_t initrd_max; +- uint8_t header[8192], *setup, *kernel; +- hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0; +- FILE *f; +- char *vmode; +- MachineState *machine = MACHINE(x86ms); +- struct setup_data *setup_data; +- const char *kernel_filename = machine->kernel_filename; +- const char *initrd_filename = machine->initrd_filename; +- const char *dtb_filename = machine->dtb; +- const char *kernel_cmdline = machine->kernel_cmdline; +- SevKernelLoaderContext sev_load_ctx = {}; +- +- /* Align to 16 bytes as a paranoia measure */ +- cmdline_size = (strlen(kernel_cmdline) + 16) & ~15; +- +- /* load the kernel header */ +- f = fopen(kernel_filename, "rb"); +- if (!f) { +- fprintf(stderr, "qemu: could not open kernel file '%s': %s\n", +- kernel_filename, strerror(errno)); +- exit(1); +- } +- +- kernel_size = get_file_size(f); +- if (!kernel_size || +- fread(header, 1, MIN(ARRAY_SIZE(header), kernel_size), f) != +- MIN(ARRAY_SIZE(header), kernel_size)) { +- fprintf(stderr, "qemu: could not load kernel '%s': %s\n", +- kernel_filename, strerror(errno)); +- exit(1); +- } +- +- /* kernel protocol version */ +- if (ldl_p(header + 0x202) == 0x53726448) { +- protocol = lduw_p(header + 0x206); +- } else { +- /* +- * This could be a multiboot kernel. If it is, let's stop treating it +- * like a Linux kernel. +- * Note: some multiboot images could be in the ELF format (the same of +- * PVH), so we try multiboot first since we check the multiboot magic +- * header before to load it. +- */ +- if (load_multiboot(x86ms, fw_cfg, f, kernel_filename, initrd_filename, +- kernel_cmdline, kernel_size, header)) { +- return; +- } +- /* +- * Check if the file is an uncompressed kernel file (ELF) and load it, +- * saving the PVH entry point used by the x86/HVM direct boot ABI. +- * If load_elfboot() is successful, populate the fw_cfg info. +- */ +- if (pvh_enabled && +- load_elfboot(kernel_filename, kernel_size, +- header, pvh_start_addr, fw_cfg)) { +- fclose(f); +- +- fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, +- strlen(kernel_cmdline) + 1); +- fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); +- +- fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, sizeof(header)); +- fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, +- header, sizeof(header)); +- +- /* load initrd */ +- if (initrd_filename) { +- GMappedFile *mapped_file; +- gsize initrd_size; +- gchar *initrd_data; +- GError *gerr = NULL; +- +- mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); +- if (!mapped_file) { +- fprintf(stderr, "qemu: error reading initrd %s: %s\n", +- initrd_filename, gerr->message); +- exit(1); +- } +- x86ms->initrd_mapped_file = mapped_file; +- +- initrd_data = g_mapped_file_get_contents(mapped_file); +- initrd_size = g_mapped_file_get_length(mapped_file); +- initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1; +- if (initrd_size >= initrd_max) { +- fprintf(stderr, "qemu: initrd is too large, cannot support." +- "(max: %"PRIu32", need %"PRId64")\n", +- initrd_max, (uint64_t)initrd_size); +- exit(1); +- } +- +- initrd_addr = (initrd_max - initrd_size) & ~4095; +- +- fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); +- fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); +- fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, +- initrd_size); +- } +- +- option_rom[nb_option_roms].bootindex = 0; +- option_rom[nb_option_roms].name = "pvh.bin"; +- nb_option_roms++; +- +- return; +- } +- protocol = 0; +- } +- +- if (protocol < 0x200 || !(header[0x211] & 0x01)) { +- /* Low kernel */ +- real_addr = 0x90000; +- cmdline_addr = 0x9a000 - cmdline_size; +- prot_addr = 0x10000; +- } else if (protocol < 0x202) { +- /* High but ancient kernel */ +- real_addr = 0x90000; +- cmdline_addr = 0x9a000 - cmdline_size; +- prot_addr = 0x100000; +- } else { +- /* High and recent kernel */ +- real_addr = 0x10000; +- cmdline_addr = 0x20000; +- prot_addr = 0x100000; +- } +- +- /* highest address for loading the initrd */ +- if (protocol >= 0x20c && +- lduw_p(header + 0x236) & XLF_CAN_BE_LOADED_ABOVE_4G) { +- /* +- * Linux has supported initrd up to 4 GB for a very long time (2007, +- * long before XLF_CAN_BE_LOADED_ABOVE_4G which was added in 2013), +- * though it only sets initrd_max to 2 GB to "work around bootloader +- * bugs". Luckily, QEMU firmware(which does something like bootloader) +- * has supported this. +- * +- * It's believed that if XLF_CAN_BE_LOADED_ABOVE_4G is set, initrd can +- * be loaded into any address. +- * +- * In addition, initrd_max is uint32_t simply because QEMU doesn't +- * support the 64-bit boot protocol (specifically the ext_ramdisk_image +- * field). +- * +- * Therefore here just limit initrd_max to UINT32_MAX simply as well. +- */ +- initrd_max = UINT32_MAX; +- } else if (protocol >= 0x203) { +- initrd_max = ldl_p(header + 0x22c); +- } else { +- initrd_max = 0x37ffffff; +- } +- +- if (initrd_max >= x86ms->below_4g_mem_size - acpi_data_size) { +- initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1; +- } +- +- fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr); +- fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline) + 1); +- fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); +- sev_load_ctx.cmdline_data = (char *)kernel_cmdline; +- sev_load_ctx.cmdline_size = strlen(kernel_cmdline) + 1; +- +- if (protocol >= 0x202) { +- stl_p(header + 0x228, cmdline_addr); +- } else { +- stw_p(header + 0x20, 0xA33F); +- stw_p(header + 0x22, cmdline_addr - real_addr); +- } +- +- /* handle vga= parameter */ +- vmode = strstr(kernel_cmdline, "vga="); +- if (vmode) { +- unsigned int video_mode; +- const char *end; +- int ret; +- /* skip "vga=" */ +- vmode += 4; +- if (!strncmp(vmode, "normal", 6)) { +- video_mode = 0xffff; +- } else if (!strncmp(vmode, "ext", 3)) { +- video_mode = 0xfffe; +- } else if (!strncmp(vmode, "ask", 3)) { +- video_mode = 0xfffd; +- } else { +- ret = qemu_strtoui(vmode, &end, 0, &video_mode); +- if (ret != 0 || (*end && *end != ' ')) { +- fprintf(stderr, "qemu: invalid 'vga=' kernel parameter.\n"); +- exit(1); +- } +- } +- stw_p(header + 0x1fa, video_mode); +- } +- +- /* loader type */ +- /* +- * High nybble = B reserved for QEMU; low nybble is revision number. +- * If this code is substantially changed, you may want to consider +- * incrementing the revision. +- */ +- if (protocol >= 0x200) { +- header[0x210] = 0xB0; +- } +- /* heap */ +- if (protocol >= 0x201) { +- header[0x211] |= 0x80; /* CAN_USE_HEAP */ +- stw_p(header + 0x224, cmdline_addr - real_addr - 0x200); +- } +- +- /* load initrd */ +- if (initrd_filename) { +- GMappedFile *mapped_file; +- gsize initrd_size; +- gchar *initrd_data; +- GError *gerr = NULL; +- +- if (protocol < 0x200) { +- fprintf(stderr, "qemu: linux kernel too old to load a ram disk\n"); +- exit(1); +- } +- +- mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); +- if (!mapped_file) { +- fprintf(stderr, "qemu: error reading initrd %s: %s\n", +- initrd_filename, gerr->message); +- exit(1); +- } +- x86ms->initrd_mapped_file = mapped_file; +- +- initrd_data = g_mapped_file_get_contents(mapped_file); +- initrd_size = g_mapped_file_get_length(mapped_file); +- if (initrd_size >= initrd_max) { +- fprintf(stderr, "qemu: initrd is too large, cannot support." +- "(max: %"PRIu32", need %"PRId64")\n", +- initrd_max, (uint64_t)initrd_size); +- exit(1); +- } +- +- initrd_addr = (initrd_max - initrd_size) & ~4095; +- +- fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); +- fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); +- fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, initrd_size); +- sev_load_ctx.initrd_data = initrd_data; +- sev_load_ctx.initrd_size = initrd_size; +- +- stl_p(header + 0x218, initrd_addr); +- stl_p(header + 0x21c, initrd_size); +- } +- +- /* load kernel and setup */ +- setup_size = header[0x1f1]; +- if (setup_size == 0) { +- setup_size = 4; +- } +- setup_size = (setup_size + 1) * 512; +- if (setup_size > kernel_size) { +- fprintf(stderr, "qemu: invalid kernel header\n"); +- exit(1); +- } +- kernel_size -= setup_size; +- +- setup = g_malloc(setup_size); +- kernel = g_malloc(kernel_size); +- fseek(f, 0, SEEK_SET); +- if (fread(setup, 1, setup_size, f) != setup_size) { +- fprintf(stderr, "fread() failed\n"); +- exit(1); +- } +- if (fread(kernel, 1, kernel_size, f) != kernel_size) { +- fprintf(stderr, "fread() failed\n"); +- exit(1); +- } +- fclose(f); +- +- /* append dtb to kernel */ +- if (dtb_filename) { +- if (protocol < 0x209) { +- fprintf(stderr, "qemu: Linux kernel too old to load a dtb\n"); +- exit(1); +- } +- +- dtb_size = get_image_size(dtb_filename); +- if (dtb_size <= 0) { +- fprintf(stderr, "qemu: error reading dtb %s: %s\n", +- dtb_filename, strerror(errno)); +- exit(1); +- } +- +- setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16); +- kernel_size = setup_data_offset + sizeof(struct setup_data) + dtb_size; +- kernel = g_realloc(kernel, kernel_size); +- +- stq_p(header + 0x250, prot_addr + setup_data_offset); +- +- setup_data = (struct setup_data *)(kernel + setup_data_offset); +- setup_data->next = 0; +- setup_data->type = cpu_to_le32(SETUP_DTB); +- setup_data->len = cpu_to_le32(dtb_size); +- +- load_image_size(dtb_filename, setup_data->data, dtb_size); +- } +- +- /* +- * If we're starting an encrypted VM, it will be OVMF based, which uses the +- * efi stub for booting and doesn't require any values to be placed in the +- * kernel header. We therefore don't update the header so the hash of the +- * kernel on the other side of the fw_cfg interface matches the hash of the +- * file the user passed in. +- */ +- if (!sev_enabled()) { +- memcpy(setup, header, MIN(sizeof(header), setup_size)); +- } +- +- fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr); +- fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size); +- fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size); +- sev_load_ctx.kernel_data = (char *)kernel; +- sev_load_ctx.kernel_size = kernel_size; +- +- fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr); +- fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size); +- fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size); +- sev_load_ctx.setup_data = (char *)setup; +- sev_load_ctx.setup_size = setup_size; +- +- if (sev_enabled()) { +- sev_add_kernel_loader_hashes(&sev_load_ctx, &error_fatal); +- } +- +- option_rom[nb_option_roms].bootindex = 0; +- option_rom[nb_option_roms].name = "linuxboot.bin"; +- if (linuxboot_dma_enabled && fw_cfg_dma_enabled(fw_cfg)) { +- option_rom[nb_option_roms].name = "linuxboot_dma.bin"; +- } +- nb_option_roms++; +-} +- +-void x86_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *isa_memory, +- MemoryRegion *bios, bool read_only) +-{ +- uint64_t bios_size = memory_region_size(bios); +- uint64_t isa_bios_size = MIN(bios_size, 128 * KiB); +- +- memory_region_init_alias(isa_bios, NULL, "isa-bios", bios, +- bios_size - isa_bios_size, isa_bios_size); +- memory_region_add_subregion_overlap(isa_memory, 1 * MiB - isa_bios_size, +- isa_bios, 1); +- memory_region_set_readonly(isa_bios, read_only); +-} +- +-void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, +- MemoryRegion *rom_memory, bool isapc_ram_fw) +-{ +- const char *bios_name; +- char *filename; +- int bios_size; +- ssize_t ret; +- +- /* BIOS load */ +- bios_name = MACHINE(x86ms)->firmware ?: default_firmware; +- filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); +- if (filename) { +- bios_size = get_image_size(filename); +- } else { +- bios_size = -1; +- } +- if (bios_size <= 0 || +- (bios_size % 65536) != 0) { +- goto bios_error; +- } +- memory_region_init_ram(&x86ms->bios, NULL, "pc.bios", bios_size, +- &error_fatal); +- if (sev_enabled()) { +- /* +- * The concept of a "reset" simply doesn't exist for +- * confidential computing guests, we have to destroy and +- * re-launch them instead. So there is no need to register +- * the firmware as rom to properly re-initialize on reset. +- * Just go for a straight file load instead. +- */ +- void *ptr = memory_region_get_ram_ptr(&x86ms->bios); +- load_image_size(filename, ptr, bios_size); +- x86_firmware_configure(ptr, bios_size); +- } else { +- memory_region_set_readonly(&x86ms->bios, !isapc_ram_fw); +- ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1); +- if (ret != 0) { +- goto bios_error; +- } +- } +- g_free(filename); +- +- /* map the last 128KB of the BIOS in ISA space */ +- x86_isa_bios_init(&x86ms->isa_bios, rom_memory, &x86ms->bios, +- !isapc_ram_fw); +- +- /* map all the bios at the top of memory */ +- memory_region_add_subregion(rom_memory, +- (uint32_t)(-bios_size), +- &x86ms->bios); +- return; +- +-bios_error: +- fprintf(stderr, "qemu: could not load PC BIOS '%s'\n", bios_name); +- exit(1); +-} +- + bool x86_machine_is_smm_enabled(const X86MachineState *x86ms) + { + bool smm_available = false; +diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h +index c2062db13f..b006f16b8d 100644 +--- a/include/hw/i386/x86.h ++++ b/include/hw/i386/x86.h +@@ -21,6 +21,7 @@ + #include "exec/memory.h" + + #include "hw/boards.h" ++#include "hw/i386/topology.h" + #include "hw/intc/ioapic.h" + #include "hw/isa/isa.h" + #include "qom/object.h" +@@ -109,12 +110,11 @@ struct X86MachineState { + #define TYPE_X86_MACHINE MACHINE_TYPE_NAME("x86") + OBJECT_DECLARE_TYPE(X86MachineState, X86MachineClass, X86_MACHINE) + +-uint32_t x86_cpu_apic_id_from_index(X86MachineState *pcms, ++void init_topo_info(X86CPUTopoInfo *topo_info, const X86MachineState *x86ms); ++uint32_t x86_cpu_apic_id_from_index(X86MachineState *x86ms, + unsigned int cpu_index); + +-void x86_cpu_new(X86MachineState *pcms, int64_t apic_id, Error **errp); + void x86_cpus_init(X86MachineState *pcms, int default_cpu_version); +-CPUArchId *x86_find_cpu_slot(MachineState *ms, uint32_t id, int *idx); + void x86_rtc_set_cpus_count(ISADevice *rtc, uint16_t cpus_count); + void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp); +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-x86-Don-t-leak-isa-bios-memory-regions.patch b/SOURCES/kvm-hw-i386-x86-Don-t-leak-isa-bios-memory-regions.patch new file mode 100644 index 0000000..38fd870 --- /dev/null +++ b/SOURCES/kvm-hw-i386-x86-Don-t-leak-isa-bios-memory-regions.patch @@ -0,0 +1,133 @@ +From ebf08d2a822576acfa60fbd5f552d26de1e4c4be Mon Sep 17 00:00:00 2001 +From: Bernhard Beschow +Date: Wed, 8 May 2024 19:55:04 +0200 +Subject: [PATCH 040/100] hw/i386/x86: Don't leak "isa-bios" memory regions +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [40/91] bb595357c6cc2d5a80bf3873853c69553c5feee5 (bonzini/rhel-qemu-kvm) + +Fix the leaking in x86_bios_rom_init() and pc_isa_bios_init() by adding an +"isa_bios" attribute to X86MachineState. + +Suggested-by: Philippe Mathieu-Daudé +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Bernhard Beschow +Message-ID: <20240508175507.22270-4-shentey@gmail.com> +Signed-off-by: Philippe Mathieu-Daudé +(cherry picked from commit 32d3ee87a17fc91e981a23dba94855bff89f5920) +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc_sysfw.c | 7 +++---- + hw/i386/x86.c | 9 ++++----- + include/hw/i386/x86.h | 7 +++++++ + 3 files changed, 14 insertions(+), 9 deletions(-) + +diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c +index 59c7a81692..82d37cb376 100644 +--- a/hw/i386/pc_sysfw.c ++++ b/hw/i386/pc_sysfw.c +@@ -40,11 +40,10 @@ + + #define FLASH_SECTOR_SIZE 4096 + +-static void pc_isa_bios_init(MemoryRegion *rom_memory, ++static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory, + MemoryRegion *flash_mem) + { + int isa_bios_size; +- MemoryRegion *isa_bios; + uint64_t flash_size; + void *flash_ptr, *isa_bios_ptr; + +@@ -52,7 +51,6 @@ static void pc_isa_bios_init(MemoryRegion *rom_memory, + + /* map the last 128KB of the BIOS in ISA space */ + isa_bios_size = MIN(flash_size, 128 * KiB); +- isa_bios = g_malloc(sizeof(*isa_bios)); + memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size, + &error_fatal); + memory_region_add_subregion_overlap(rom_memory, +@@ -136,6 +134,7 @@ void pc_system_flash_cleanup_unused(PCMachineState *pcms) + static void pc_system_flash_map(PCMachineState *pcms, + MemoryRegion *rom_memory) + { ++ X86MachineState *x86ms = X86_MACHINE(pcms); + hwaddr total_size = 0; + int i; + BlockBackend *blk; +@@ -185,7 +184,7 @@ static void pc_system_flash_map(PCMachineState *pcms, + + if (i == 0) { + flash_mem = pflash_cfi01_get_memory(system_flash); +- pc_isa_bios_init(rom_memory, flash_mem); ++ pc_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem); + + /* Encrypt the pflash boot ROM */ + if (sev_enabled()) { +diff --git a/hw/i386/x86.c b/hw/i386/x86.c +index 6d3c72f124..457e8a34a5 100644 +--- a/hw/i386/x86.c ++++ b/hw/i386/x86.c +@@ -1133,7 +1133,7 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + { + const char *bios_name; + char *filename; +- MemoryRegion *bios, *isa_bios; ++ MemoryRegion *bios; + int bios_size, isa_bios_size; + ssize_t ret; + +@@ -1173,14 +1173,13 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + + /* map the last 128KB of the BIOS in ISA space */ + isa_bios_size = MIN(bios_size, 128 * KiB); +- isa_bios = g_malloc(sizeof(*isa_bios)); +- memory_region_init_alias(isa_bios, NULL, "isa-bios", bios, ++ memory_region_init_alias(&x86ms->isa_bios, NULL, "isa-bios", bios, + bios_size - isa_bios_size, isa_bios_size); + memory_region_add_subregion_overlap(rom_memory, + 0x100000 - isa_bios_size, +- isa_bios, ++ &x86ms->isa_bios, + 1); +- memory_region_set_readonly(isa_bios, !isapc_ram_fw); ++ memory_region_set_readonly(&x86ms->isa_bios, !isapc_ram_fw); + + /* map all the bios at the top of memory */ + memory_region_add_subregion(rom_memory, +diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h +index cb07618d19..a07de79167 100644 +--- a/include/hw/i386/x86.h ++++ b/include/hw/i386/x86.h +@@ -18,6 +18,7 @@ + #define HW_I386_X86_H + + #include "exec/hwaddr.h" ++#include "exec/memory.h" + + #include "hw/boards.h" + #include "hw/intc/ioapic.h" +@@ -52,6 +53,12 @@ struct X86MachineState { + GMappedFile *initrd_mapped_file; + HotplugHandler *acpi_dev; + ++ /* ++ * Map the upper 128 KiB of the BIOS just underneath the 1 MiB address ++ * boundary. ++ */ ++ MemoryRegion isa_bios; ++ + /* RAM information (sizes, addresses, configuration): */ + ram_addr_t below_4g_mem_size, above_4g_mem_size; + +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-x86-Don-t-leak-pc.bios-memory-region.patch b/SOURCES/kvm-hw-i386-x86-Don-t-leak-pc.bios-memory-region.patch new file mode 100644 index 0000000..7a61f95 --- /dev/null +++ b/SOURCES/kvm-hw-i386-x86-Don-t-leak-pc.bios-memory-region.patch @@ -0,0 +1,105 @@ +From e1f2265b5f6bf5b63bf3808bb540888f3cf8badb Mon Sep 17 00:00:00 2001 +From: Bernhard Beschow +Date: Wed, 8 May 2024 19:55:05 +0200 +Subject: [PATCH 041/100] hw/i386/x86: Don't leak "pc.bios" memory region +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [41/91] a9cd61d8d240134c09c46e244efb89217cadf60c (bonzini/rhel-qemu-kvm) + +Fix the leaking in x86_bios_rom_init() by adding a "bios" attribute to +X86MachineState. Note that it is only used in the -bios case. + +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Bernhard Beschow +Message-ID: <20240508175507.22270-5-shentey@gmail.com> +Signed-off-by: Philippe Mathieu-Daudé +(cherry picked from commit 865d95321ffc8d9941e33000b10140550f094556) +Signed-off-by: Paolo Bonzini +--- + hw/i386/x86.c | 13 ++++++------- + include/hw/i386/x86.h | 6 ++++++ + 2 files changed, 12 insertions(+), 7 deletions(-) + +diff --git a/hw/i386/x86.c b/hw/i386/x86.c +index 457e8a34a5..29167de97d 100644 +--- a/hw/i386/x86.c ++++ b/hw/i386/x86.c +@@ -1133,7 +1133,6 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + { + const char *bios_name; + char *filename; +- MemoryRegion *bios; + int bios_size, isa_bios_size; + ssize_t ret; + +@@ -1149,8 +1148,8 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + (bios_size % 65536) != 0) { + goto bios_error; + } +- bios = g_malloc(sizeof(*bios)); +- memory_region_init_ram(bios, NULL, "pc.bios", bios_size, &error_fatal); ++ memory_region_init_ram(&x86ms->bios, NULL, "pc.bios", bios_size, ++ &error_fatal); + if (sev_enabled()) { + /* + * The concept of a "reset" simply doesn't exist for +@@ -1159,11 +1158,11 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + * the firmware as rom to properly re-initialize on reset. + * Just go for a straight file load instead. + */ +- void *ptr = memory_region_get_ram_ptr(bios); ++ void *ptr = memory_region_get_ram_ptr(&x86ms->bios); + load_image_size(filename, ptr, bios_size); + x86_firmware_configure(ptr, bios_size); + } else { +- memory_region_set_readonly(bios, !isapc_ram_fw); ++ memory_region_set_readonly(&x86ms->bios, !isapc_ram_fw); + ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1); + if (ret != 0) { + goto bios_error; +@@ -1173,7 +1172,7 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + + /* map the last 128KB of the BIOS in ISA space */ + isa_bios_size = MIN(bios_size, 128 * KiB); +- memory_region_init_alias(&x86ms->isa_bios, NULL, "isa-bios", bios, ++ memory_region_init_alias(&x86ms->isa_bios, NULL, "isa-bios", &x86ms->bios, + bios_size - isa_bios_size, isa_bios_size); + memory_region_add_subregion_overlap(rom_memory, + 0x100000 - isa_bios_size, +@@ -1184,7 +1183,7 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + /* map all the bios at the top of memory */ + memory_region_add_subregion(rom_memory, + (uint32_t)(-bios_size), +- bios); ++ &x86ms->bios); + return; + + bios_error: +diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h +index a07de79167..55c6809ae0 100644 +--- a/include/hw/i386/x86.h ++++ b/include/hw/i386/x86.h +@@ -53,6 +53,12 @@ struct X86MachineState { + GMappedFile *initrd_mapped_file; + HotplugHandler *acpi_dev; + ++ /* ++ * Map the whole BIOS just underneath the 4 GiB address boundary. Only used ++ * in the ROM (-bios) case. ++ */ ++ MemoryRegion bios; ++ + /* + * Map the upper 128 KiB of the BIOS just underneath the 1 MiB address + * boundary. +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-x86-Eliminate-two-if-statements-in-x86_bios_.patch b/SOURCES/kvm-hw-i386-x86-Eliminate-two-if-statements-in-x86_bios_.patch new file mode 100644 index 0000000..b9c18e7 --- /dev/null +++ b/SOURCES/kvm-hw-i386-x86-Eliminate-two-if-statements-in-x86_bios_.patch @@ -0,0 +1,69 @@ +From b9d0c78f04160fbc1eee6cfd94b17f1133a35d83 Mon Sep 17 00:00:00 2001 +From: Bernhard Beschow +Date: Tue, 30 Apr 2024 17:06:38 +0200 +Subject: [PATCH 037/100] hw/i386/x86: Eliminate two if statements in + x86_bios_rom_init() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [37/91] 1ef6a13214e85f6ef773f5c894c720f20330912b (bonzini/rhel-qemu-kvm) + +Given that memory_region_set_readonly() is a no-op when the readonlyness is +already as requested it is possible to simplify the pattern + + if (condition) { + foo(true); + } + +to + + foo(condition); + +which is shorter and allows to see the invariant of the code more easily. + +Signed-off-by: Bernhard Beschow +Reviewed-by: Philippe Mathieu-Daudé +Message-ID: <20240430150643.111976-2-shentey@gmail.com> +Signed-off-by: Philippe Mathieu-Daudé +(cherry picked from commit 014dbdac8798799d081abc9dff3e4876ca54f49e) +Signed-off-by: Paolo Bonzini +--- + hw/i386/x86.c | 8 ++------ + 1 file changed, 2 insertions(+), 6 deletions(-) + +diff --git a/hw/i386/x86.c b/hw/i386/x86.c +index 3d5b51e92d..2a4f3ee285 100644 +--- a/hw/i386/x86.c ++++ b/hw/i386/x86.c +@@ -1163,9 +1163,7 @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware, + load_image_size(filename, ptr, bios_size); + x86_firmware_configure(ptr, bios_size); + } else { +- if (!isapc_ram_fw) { +- memory_region_set_readonly(bios, true); +- } ++ memory_region_set_readonly(bios, !isapc_ram_fw); + ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1); + if (ret != 0) { + goto bios_error; +@@ -1182,9 +1180,7 @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware, + 0x100000 - isa_bios_size, + isa_bios, + 1); +- if (!isapc_ram_fw) { +- memory_region_set_readonly(isa_bios, true); +- } ++ memory_region_set_readonly(isa_bios, !isapc_ram_fw); + + /* map all the bios at the top of memory */ + memory_region_add_subregion(rom_memory, +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-x86-Extract-x86_isa_bios_init-from-x86_bios_.patch b/SOURCES/kvm-hw-i386-x86-Extract-x86_isa_bios_init-from-x86_bios_.patch new file mode 100644 index 0000000..6ce9c72 --- /dev/null +++ b/SOURCES/kvm-hw-i386-x86-Extract-x86_isa_bios_init-from-x86_bios_.patch @@ -0,0 +1,98 @@ +From 1baf67564d4227d6ba98923217a15814c438c32b Mon Sep 17 00:00:00 2001 +From: Bernhard Beschow +Date: Wed, 8 May 2024 19:55:06 +0200 +Subject: [PATCH 042/100] hw/i386/x86: Extract x86_isa_bios_init() from + x86_bios_rom_init() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [42/91] 1db417a5995480924f7fd0661a306f2d2bfa0a77 (bonzini/rhel-qemu-kvm) + +The function is inspired by pc_isa_bios_init() and should eventually replace it. +Using x86_isa_bios_init() rather than pc_isa_bios_init() fixes pflash commands +to work in the isa-bios region. + +While at it convert the magic number 0x100000 (== 1MiB) to increase readability. + +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Bernhard Beschow +Message-ID: <20240508175507.22270-6-shentey@gmail.com> +Signed-off-by: Philippe Mathieu-Daudé +(cherry picked from commit 5c5ffec12c30d2017cbdee6798f54d8fad3f9656) +Signed-off-by: Paolo Bonzini +--- + hw/i386/x86.c | 25 ++++++++++++++++--------- + include/hw/i386/x86.h | 2 ++ + 2 files changed, 18 insertions(+), 9 deletions(-) + +diff --git a/hw/i386/x86.c b/hw/i386/x86.c +index 29167de97d..c61f4ebfa6 100644 +--- a/hw/i386/x86.c ++++ b/hw/i386/x86.c +@@ -1128,12 +1128,25 @@ void x86_load_linux(X86MachineState *x86ms, + nb_option_roms++; + } + ++void x86_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *isa_memory, ++ MemoryRegion *bios, bool read_only) ++{ ++ uint64_t bios_size = memory_region_size(bios); ++ uint64_t isa_bios_size = MIN(bios_size, 128 * KiB); ++ ++ memory_region_init_alias(isa_bios, NULL, "isa-bios", bios, ++ bios_size - isa_bios_size, isa_bios_size); ++ memory_region_add_subregion_overlap(isa_memory, 1 * MiB - isa_bios_size, ++ isa_bios, 1); ++ memory_region_set_readonly(isa_bios, read_only); ++} ++ + void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + MemoryRegion *rom_memory, bool isapc_ram_fw) + { + const char *bios_name; + char *filename; +- int bios_size, isa_bios_size; ++ int bios_size; + ssize_t ret; + + /* BIOS load */ +@@ -1171,14 +1184,8 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + g_free(filename); + + /* map the last 128KB of the BIOS in ISA space */ +- isa_bios_size = MIN(bios_size, 128 * KiB); +- memory_region_init_alias(&x86ms->isa_bios, NULL, "isa-bios", &x86ms->bios, +- bios_size - isa_bios_size, isa_bios_size); +- memory_region_add_subregion_overlap(rom_memory, +- 0x100000 - isa_bios_size, +- &x86ms->isa_bios, +- 1); +- memory_region_set_readonly(&x86ms->isa_bios, !isapc_ram_fw); ++ x86_isa_bios_init(&x86ms->isa_bios, rom_memory, &x86ms->bios, ++ !isapc_ram_fw); + + /* map all the bios at the top of memory */ + memory_region_add_subregion(rom_memory, +diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h +index 55c6809ae0..d7b7d3f3ce 100644 +--- a/include/hw/i386/x86.h ++++ b/include/hw/i386/x86.h +@@ -129,6 +129,8 @@ void x86_cpu_unplug_request_cb(HotplugHandler *hotplug_dev, + void x86_cpu_unplug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp); + ++void x86_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *isa_memory, ++ MemoryRegion *bios, bool read_only); + void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + MemoryRegion *rom_memory, bool isapc_ram_fw); + +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-reg-for-machine-ty.patch b/SOURCES/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-reg-for-machine-ty.patch deleted file mode 100644 index fe9cd8c..0000000 --- a/SOURCES/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-reg-for-machine-ty.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 491cf9e251026d135f315b7fe0d8771841f06e9f Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Tue, 25 Jul 2023 15:34:45 -0300 -Subject: [PATCH 8/9] hw/pci: Disable PCI_ERR_UNCOR_MASK reg for machine type - <= pc-q35-rhel9.2.0 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 192: hw/pci: Disable PCI_ERR_UNCOR_MASK reg for machine type <= pc-q35-rhel9.2.0 -RH-Bugzilla: 2223691 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [1/1] e57816f8ad15a9ce5f342b061c103ae011ec1223 (LeoBras/centos-qemu-kvm) - -This is a downstream-only patch to that sets off the property -x-pcie-err-unc-mask for machine types <= pc-q35-rhel9.2.0, allowing -live migrations to RHEL9.2 happen successfully. - -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2223691 -Fixes: 293a34b4be ("hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine -type < 8.0") -Signed-off-by: Leonardo Bras ---- - hw/core/machine.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 5ea52317b9..6f5117669d 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -62,6 +62,8 @@ GlobalProperty hw_compat_rhel_9_2[] = { - { "virtio-mem", "x-early-migration", "false" }, - /* hw_compat_rhel_9_2 from hw_compat_7_2 */ - { "migration", "x-preempt-pre-7-2", "true" }, -+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */ -+ { TYPE_PCI_DEVICE, "x-pcie-err-unc-mask", "off" }, - }; - const size_t hw_compat_rhel_9_2_len = G_N_ELEMENTS(hw_compat_rhel_9_2); - --- -2.39.3 - diff --git a/SOURCES/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-register-for-machi.patch b/SOURCES/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-register-for-machi.patch deleted file mode 100644 index 164bea7..0000000 --- a/SOURCES/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-register-for-machi.patch +++ /dev/null @@ -1,118 +0,0 @@ -From 3ac01bb90da12538898f95b2fb4e7f6bc1557eb3 Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Tue, 2 May 2023 21:27:02 -0300 -Subject: [PATCH 18/21] hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine - type < 8.0 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 170: hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine type < 8.0 -RH-Bugzilla: 2189423 -RH-Acked-by: Peter Xu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/1] ad62dd5a8567f386770577513c00a0bf36bd3df1 (LeoBras/centos-qemu-kvm) - -Since it's implementation on v8.0.0-rc0, having the PCI_ERR_UNCOR_MASK -set for machine types < 8.0 will cause migration to fail if the target -QEMU version is < 8.0.0 : - -qemu-system-x86_64: get_pci_config_device: Bad config data: i=0x10a read: 40 device: 0 cmask: ff wmask: 0 w1cmask:0 -qemu-system-x86_64: Failed to load PCIDevice:config -qemu-system-x86_64: Failed to load e1000e:parent_obj -qemu-system-x86_64: error while loading state for instance 0x0 of device '0000:00:02.0/e1000e' -qemu-system-x86_64: load of migration failed: Invalid argument - -The above test migrated a 7.2 machine type from QEMU master to QEMU 7.2.0, -with this cmdline: - -./qemu-system-x86_64 -M pc-q35-7.2 [-incoming XXX] - -In order to fix this, property x-pcie-err-unc-mask was introduced to -control when PCI_ERR_UNCOR_MASK is enabled. This property is enabled by -default, but is disabled if machine type <= 7.2. - -Fixes: 010746ae1d ("hw/pci/aer: Implement PCI_ERR_UNCOR_MASK register") -Suggested-by: Michael S. Tsirkin -Signed-off-by: Leonardo Bras -Message-Id: <20230503002701.854329-1-leobras@redhat.com> -Reviewed-by: Jonathan Cameron -Reviewed-by: Peter Xu -Reviewed-by: Juan Quintela -Fixes: https://gitlab.com/qemu-project/qemu/-/issues/1576 -Tested-by: Fiona Ebner -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 5ed3dabe57dd9f4c007404345e5f5bf0e347317f) -Signed-off-by: Leonardo Bras ---- - hw/core/machine.c | 1 + - hw/pci/pci.c | 2 ++ - hw/pci/pcie_aer.c | 11 +++++++---- - include/hw/pci/pci.h | 2 ++ - 4 files changed, 12 insertions(+), 4 deletions(-) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 0e0120b7f2..c28702b690 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -43,6 +43,7 @@ GlobalProperty hw_compat_7_2[] = { - { "e1000e", "migrate-timadj", "off" }, - { "virtio-mem", "x-early-migration", "false" }, - { "migration", "x-preempt-pre-7-2", "true" }, -+ { TYPE_PCI_DEVICE, "x-pcie-err-unc-mask", "off" }, - }; - const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2); - -diff --git a/hw/pci/pci.c b/hw/pci/pci.c -index def5000e7b..8ad4349e96 100644 ---- a/hw/pci/pci.c -+++ b/hw/pci/pci.c -@@ -79,6 +79,8 @@ static Property pci_props[] = { - DEFINE_PROP_STRING("failover_pair_id", PCIDevice, - failover_pair_id), - DEFINE_PROP_UINT32("acpi-index", PCIDevice, acpi_index, 0), -+ DEFINE_PROP_BIT("x-pcie-err-unc-mask", PCIDevice, cap_present, -+ QEMU_PCIE_ERR_UNC_MASK_BITNR, true), - DEFINE_PROP_END_OF_LIST() - }; - -diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c -index 103667c368..374d593ead 100644 ---- a/hw/pci/pcie_aer.c -+++ b/hw/pci/pcie_aer.c -@@ -112,10 +112,13 @@ int pcie_aer_init(PCIDevice *dev, uint8_t cap_ver, uint16_t offset, - - pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS, - PCI_ERR_UNC_SUPPORTED); -- pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK, -- PCI_ERR_UNC_MASK_DEFAULT); -- pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK, -- PCI_ERR_UNC_SUPPORTED); -+ -+ if (dev->cap_present & QEMU_PCIE_ERR_UNC_MASK) { -+ pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK, -+ PCI_ERR_UNC_MASK_DEFAULT); -+ pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK, -+ PCI_ERR_UNC_SUPPORTED); -+ } - - pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER, - PCI_ERR_UNC_SEVERITY_DEFAULT); -diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h -index d5a40cd058..6dc6742fc4 100644 ---- a/include/hw/pci/pci.h -+++ b/include/hw/pci/pci.h -@@ -207,6 +207,8 @@ enum { - QEMU_PCIE_EXTCAP_INIT = (1 << QEMU_PCIE_EXTCAP_INIT_BITNR), - #define QEMU_PCIE_CXL_BITNR 10 - QEMU_PCIE_CAP_CXL = (1 << QEMU_PCIE_CXL_BITNR), -+#define QEMU_PCIE_ERR_UNC_MASK_BITNR 11 -+ QEMU_PCIE_ERR_UNC_MASK = (1 << QEMU_PCIE_ERR_UNC_MASK_BITNR), - }; - - typedef struct PCIINTxRoute { --- -2.39.3 - diff --git a/SOURCES/kvm-hw-replace-most-qemu_bh_new-calls-with-qemu_bh_new_g.patch b/SOURCES/kvm-hw-replace-most-qemu_bh_new-calls-with-qemu_bh_new_g.patch deleted file mode 100644 index 08ee94f..0000000 --- a/SOURCES/kvm-hw-replace-most-qemu_bh_new-calls-with-qemu_bh_new_g.patch +++ /dev/null @@ -1,470 +0,0 @@ -From d1b7a9b25c0df9016cd8e93d40837314b1a81d70 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 9 May 2023 10:29:03 -0400 -Subject: [PATCH 08/21] hw: replace most qemu_bh_new calls with - qemu_bh_new_guarded - -RH-Author: Jon Maloy -RH-MergeRequest: 165: memory: prevent dma-reentracy issues -RH-Jira: RHEL-516 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [4/13] bcbc67dd0023aee2b3a342665237daa83b183c7b (jmaloy/jmaloy-qemu-kvm-2) - -Jira: https://issues.redhat.com/browse/RHEL-516 -Upstream: Merged -CVE: CVE-2023-2680 - -commit f63192b0544af5d3e4d5edfd85ab520fcf671377 -Author: Alexander Bulekov -Date: Thu Apr 27 17:10:09 2023 -0400 - - hw: replace most qemu_bh_new calls with qemu_bh_new_guarded - - This protects devices from bh->mmio reentrancy issues. - - Thanks: Thomas Huth for diagnosing OS X test failure. - Signed-off-by: Alexander Bulekov - Reviewed-by: Darren Kenny - Reviewed-by: Stefan Hajnoczi - Reviewed-by: Michael S. Tsirkin - Reviewed-by: Paul Durrant - Reviewed-by: Thomas Huth - Message-Id: <20230427211013.2994127-5-alxndr@bu.edu> - Signed-off-by: Thomas Huth - -Signed-off-by: Jon Maloy ---- - hw/9pfs/xen-9p-backend.c | 5 ++++- - hw/block/dataplane/virtio-blk.c | 3 ++- - hw/block/dataplane/xen-block.c | 5 +++-- - hw/char/virtio-serial-bus.c | 3 ++- - hw/display/qxl.c | 9 ++++++--- - hw/display/virtio-gpu.c | 6 ++++-- - hw/ide/ahci.c | 3 ++- - hw/ide/ahci_internal.h | 1 + - hw/ide/core.c | 4 +++- - hw/misc/imx_rngc.c | 6 ++++-- - hw/misc/macio/mac_dbdma.c | 2 +- - hw/net/virtio-net.c | 3 ++- - hw/nvme/ctrl.c | 6 ++++-- - hw/scsi/mptsas.c | 3 ++- - hw/scsi/scsi-bus.c | 3 ++- - hw/scsi/vmw_pvscsi.c | 3 ++- - hw/usb/dev-uas.c | 3 ++- - hw/usb/hcd-dwc2.c | 3 ++- - hw/usb/hcd-ehci.c | 3 ++- - hw/usb/hcd-uhci.c | 2 +- - hw/usb/host-libusb.c | 6 ++++-- - hw/usb/redirect.c | 6 ++++-- - hw/usb/xen-usb.c | 3 ++- - hw/virtio/virtio-balloon.c | 5 +++-- - hw/virtio/virtio-crypto.c | 3 ++- - 25 files changed, 66 insertions(+), 33 deletions(-) - -diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c -index 74f3a05f88..0e266c552b 100644 ---- a/hw/9pfs/xen-9p-backend.c -+++ b/hw/9pfs/xen-9p-backend.c -@@ -61,6 +61,7 @@ typedef struct Xen9pfsDev { - - int num_rings; - Xen9pfsRing *rings; -+ MemReentrancyGuard mem_reentrancy_guard; - } Xen9pfsDev; - - static void xen_9pfs_disconnect(struct XenLegacyDevice *xendev); -@@ -443,7 +444,9 @@ static int xen_9pfs_connect(struct XenLegacyDevice *xendev) - xen_9pdev->rings[i].ring.out = xen_9pdev->rings[i].data + - XEN_FLEX_RING_SIZE(ring_order); - -- xen_9pdev->rings[i].bh = qemu_bh_new(xen_9pfs_bh, &xen_9pdev->rings[i]); -+ xen_9pdev->rings[i].bh = qemu_bh_new_guarded(xen_9pfs_bh, -+ &xen_9pdev->rings[i], -+ &xen_9pdev->mem_reentrancy_guard); - xen_9pdev->rings[i].out_cons = 0; - xen_9pdev->rings[i].out_size = 0; - xen_9pdev->rings[i].inprogress = false; -diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c -index b28d81737e..a6202997ee 100644 ---- a/hw/block/dataplane/virtio-blk.c -+++ b/hw/block/dataplane/virtio-blk.c -@@ -127,7 +127,8 @@ bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, - } else { - s->ctx = qemu_get_aio_context(); - } -- s->bh = aio_bh_new(s->ctx, notify_guest_bh, s); -+ s->bh = aio_bh_new_guarded(s->ctx, notify_guest_bh, s, -+ &DEVICE(vdev)->mem_reentrancy_guard); - s->batch_notify_vqs = bitmap_new(conf->num_queues); - - *dataplane = s; -diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c -index 734da42ea7..d8bc39d359 100644 ---- a/hw/block/dataplane/xen-block.c -+++ b/hw/block/dataplane/xen-block.c -@@ -633,8 +633,9 @@ XenBlockDataPlane *xen_block_dataplane_create(XenDevice *xendev, - } else { - dataplane->ctx = qemu_get_aio_context(); - } -- dataplane->bh = aio_bh_new(dataplane->ctx, xen_block_dataplane_bh, -- dataplane); -+ dataplane->bh = aio_bh_new_guarded(dataplane->ctx, xen_block_dataplane_bh, -+ dataplane, -+ &DEVICE(xendev)->mem_reentrancy_guard); - - return dataplane; - } -diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c -index 7d4601cb5d..dd619f0731 100644 ---- a/hw/char/virtio-serial-bus.c -+++ b/hw/char/virtio-serial-bus.c -@@ -985,7 +985,8 @@ static void virtser_port_device_realize(DeviceState *dev, Error **errp) - return; - } - -- port->bh = qemu_bh_new(flush_queued_data_bh, port); -+ port->bh = qemu_bh_new_guarded(flush_queued_data_bh, port, -+ &dev->mem_reentrancy_guard); - port->elem = NULL; - } - -diff --git a/hw/display/qxl.c b/hw/display/qxl.c -index 80ce1e9a93..f1c0eb7dfc 100644 ---- a/hw/display/qxl.c -+++ b/hw/display/qxl.c -@@ -2201,11 +2201,14 @@ static void qxl_realize_common(PCIQXLDevice *qxl, Error **errp) - - qemu_add_vm_change_state_handler(qxl_vm_change_state_handler, qxl); - -- qxl->update_irq = qemu_bh_new(qxl_update_irq_bh, qxl); -+ qxl->update_irq = qemu_bh_new_guarded(qxl_update_irq_bh, qxl, -+ &DEVICE(qxl)->mem_reentrancy_guard); - qxl_reset_state(qxl); - -- qxl->update_area_bh = qemu_bh_new(qxl_render_update_area_bh, qxl); -- qxl->ssd.cursor_bh = qemu_bh_new(qemu_spice_cursor_refresh_bh, &qxl->ssd); -+ qxl->update_area_bh = qemu_bh_new_guarded(qxl_render_update_area_bh, qxl, -+ &DEVICE(qxl)->mem_reentrancy_guard); -+ qxl->ssd.cursor_bh = qemu_bh_new_guarded(qemu_spice_cursor_refresh_bh, &qxl->ssd, -+ &DEVICE(qxl)->mem_reentrancy_guard); - } - - static void qxl_realize_primary(PCIDevice *dev, Error **errp) -diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c -index 5e15c79b94..66ac9b6cc5 100644 ---- a/hw/display/virtio-gpu.c -+++ b/hw/display/virtio-gpu.c -@@ -1339,8 +1339,10 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error **errp) - - g->ctrl_vq = virtio_get_queue(vdev, 0); - g->cursor_vq = virtio_get_queue(vdev, 1); -- g->ctrl_bh = qemu_bh_new(virtio_gpu_ctrl_bh, g); -- g->cursor_bh = qemu_bh_new(virtio_gpu_cursor_bh, g); -+ g->ctrl_bh = qemu_bh_new_guarded(virtio_gpu_ctrl_bh, g, -+ &qdev->mem_reentrancy_guard); -+ g->cursor_bh = qemu_bh_new_guarded(virtio_gpu_cursor_bh, g, -+ &qdev->mem_reentrancy_guard); - QTAILQ_INIT(&g->reslist); - QTAILQ_INIT(&g->cmdq); - QTAILQ_INIT(&g->fenceq); -diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c -index 55902e1df7..4e76d6b191 100644 ---- a/hw/ide/ahci.c -+++ b/hw/ide/ahci.c -@@ -1509,7 +1509,8 @@ static void ahci_cmd_done(const IDEDMA *dma) - ahci_write_fis_d2h(ad); - - if (ad->port_regs.cmd_issue && !ad->check_bh) { -- ad->check_bh = qemu_bh_new(ahci_check_cmd_bh, ad); -+ ad->check_bh = qemu_bh_new_guarded(ahci_check_cmd_bh, ad, -+ &ad->mem_reentrancy_guard); - qemu_bh_schedule(ad->check_bh); - } - } -diff --git a/hw/ide/ahci_internal.h b/hw/ide/ahci_internal.h -index 303fcd7235..2480455372 100644 ---- a/hw/ide/ahci_internal.h -+++ b/hw/ide/ahci_internal.h -@@ -321,6 +321,7 @@ struct AHCIDevice { - bool init_d2h_sent; - AHCICmdHdr *cur_cmd; - NCQTransferState ncq_tfs[AHCI_MAX_CMDS]; -+ MemReentrancyGuard mem_reentrancy_guard; - }; - - struct AHCIPCIState { -diff --git a/hw/ide/core.c b/hw/ide/core.c -index 45d14a25e9..de48ff9f86 100644 ---- a/hw/ide/core.c -+++ b/hw/ide/core.c -@@ -513,6 +513,7 @@ BlockAIOCB *ide_issue_trim( - BlockCompletionFunc *cb, void *cb_opaque, void *opaque) - { - IDEState *s = opaque; -+ IDEDevice *dev = s->unit ? s->bus->slave : s->bus->master; - TrimAIOCB *iocb; - - /* Paired with a decrement in ide_trim_bh_cb() */ -@@ -520,7 +521,8 @@ BlockAIOCB *ide_issue_trim( - - iocb = blk_aio_get(&trim_aiocb_info, s->blk, cb, cb_opaque); - iocb->s = s; -- iocb->bh = qemu_bh_new(ide_trim_bh_cb, iocb); -+ iocb->bh = qemu_bh_new_guarded(ide_trim_bh_cb, iocb, -+ &DEVICE(dev)->mem_reentrancy_guard); - iocb->ret = 0; - iocb->qiov = qiov; - iocb->i = -1; -diff --git a/hw/misc/imx_rngc.c b/hw/misc/imx_rngc.c -index 632c03779c..082c6980ad 100644 ---- a/hw/misc/imx_rngc.c -+++ b/hw/misc/imx_rngc.c -@@ -228,8 +228,10 @@ static void imx_rngc_realize(DeviceState *dev, Error **errp) - sysbus_init_mmio(sbd, &s->iomem); - - sysbus_init_irq(sbd, &s->irq); -- s->self_test_bh = qemu_bh_new(imx_rngc_self_test, s); -- s->seed_bh = qemu_bh_new(imx_rngc_seed, s); -+ s->self_test_bh = qemu_bh_new_guarded(imx_rngc_self_test, s, -+ &dev->mem_reentrancy_guard); -+ s->seed_bh = qemu_bh_new_guarded(imx_rngc_seed, s, -+ &dev->mem_reentrancy_guard); - } - - static void imx_rngc_reset(DeviceState *dev) -diff --git a/hw/misc/macio/mac_dbdma.c b/hw/misc/macio/mac_dbdma.c -index 43bb1f56ba..80a789f32b 100644 ---- a/hw/misc/macio/mac_dbdma.c -+++ b/hw/misc/macio/mac_dbdma.c -@@ -914,7 +914,7 @@ static void mac_dbdma_realize(DeviceState *dev, Error **errp) - { - DBDMAState *s = MAC_DBDMA(dev); - -- s->bh = qemu_bh_new(DBDMA_run_bh, s); -+ s->bh = qemu_bh_new_guarded(DBDMA_run_bh, s, &dev->mem_reentrancy_guard); - } - - static void mac_dbdma_class_init(ObjectClass *oc, void *data) -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index 53e1c32643..447f669921 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -2917,7 +2917,8 @@ static void virtio_net_add_queue(VirtIONet *n, int index) - n->vqs[index].tx_vq = - virtio_add_queue(vdev, n->net_conf.tx_queue_size, - virtio_net_handle_tx_bh); -- n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]); -+ n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index], -+ &DEVICE(vdev)->mem_reentrancy_guard); - } - - n->vqs[index].tx_waiting = 0; -diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c -index ac24eeb5ed..e5a468975e 100644 ---- a/hw/nvme/ctrl.c -+++ b/hw/nvme/ctrl.c -@@ -4607,7 +4607,8 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr, - QTAILQ_INSERT_TAIL(&(sq->req_list), &sq->io_req[i], entry); - } - -- sq->bh = qemu_bh_new(nvme_process_sq, sq); -+ sq->bh = qemu_bh_new_guarded(nvme_process_sq, sq, -+ &DEVICE(sq->ctrl)->mem_reentrancy_guard); - - if (n->dbbuf_enabled) { - sq->db_addr = n->dbbuf_dbs + (sqid << 3); -@@ -5253,7 +5254,8 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr, - } - } - n->cq[cqid] = cq; -- cq->bh = qemu_bh_new(nvme_post_cqes, cq); -+ cq->bh = qemu_bh_new_guarded(nvme_post_cqes, cq, -+ &DEVICE(cq->ctrl)->mem_reentrancy_guard); - } - - static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req) -diff --git a/hw/scsi/mptsas.c b/hw/scsi/mptsas.c -index c485da792c..3de288b454 100644 ---- a/hw/scsi/mptsas.c -+++ b/hw/scsi/mptsas.c -@@ -1322,7 +1322,8 @@ static void mptsas_scsi_realize(PCIDevice *dev, Error **errp) - } - s->max_devices = MPTSAS_NUM_PORTS; - -- s->request_bh = qemu_bh_new(mptsas_fetch_requests, s); -+ s->request_bh = qemu_bh_new_guarded(mptsas_fetch_requests, s, -+ &DEVICE(dev)->mem_reentrancy_guard); - - scsi_bus_init(&s->bus, sizeof(s->bus), &dev->qdev, &mptsas_scsi_info); - } -diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c -index c97176110c..3c20b47ad0 100644 ---- a/hw/scsi/scsi-bus.c -+++ b/hw/scsi/scsi-bus.c -@@ -193,7 +193,8 @@ static void scsi_dma_restart_cb(void *opaque, bool running, RunState state) - AioContext *ctx = blk_get_aio_context(s->conf.blk); - /* The reference is dropped in scsi_dma_restart_bh.*/ - object_ref(OBJECT(s)); -- s->bh = aio_bh_new(ctx, scsi_dma_restart_bh, s); -+ s->bh = aio_bh_new_guarded(ctx, scsi_dma_restart_bh, s, -+ &DEVICE(s)->mem_reentrancy_guard); - qemu_bh_schedule(s->bh); - } - } -diff --git a/hw/scsi/vmw_pvscsi.c b/hw/scsi/vmw_pvscsi.c -index fa76696855..4de34536e9 100644 ---- a/hw/scsi/vmw_pvscsi.c -+++ b/hw/scsi/vmw_pvscsi.c -@@ -1184,7 +1184,8 @@ pvscsi_realizefn(PCIDevice *pci_dev, Error **errp) - pcie_endpoint_cap_init(pci_dev, PVSCSI_EXP_EP_OFFSET); - } - -- s->completion_worker = qemu_bh_new(pvscsi_process_completion_queue, s); -+ s->completion_worker = qemu_bh_new_guarded(pvscsi_process_completion_queue, s, -+ &DEVICE(pci_dev)->mem_reentrancy_guard); - - scsi_bus_init(&s->bus, sizeof(s->bus), DEVICE(pci_dev), &pvscsi_scsi_info); - /* override default SCSI bus hotplug-handler, with pvscsi's one */ -diff --git a/hw/usb/dev-uas.c b/hw/usb/dev-uas.c -index 88f99c05d5..f013ded91e 100644 ---- a/hw/usb/dev-uas.c -+++ b/hw/usb/dev-uas.c -@@ -937,7 +937,8 @@ static void usb_uas_realize(USBDevice *dev, Error **errp) - - QTAILQ_INIT(&uas->results); - QTAILQ_INIT(&uas->requests); -- uas->status_bh = qemu_bh_new(usb_uas_send_status_bh, uas); -+ uas->status_bh = qemu_bh_new_guarded(usb_uas_send_status_bh, uas, -+ &d->mem_reentrancy_guard); - - dev->flags |= (1 << USB_DEV_FLAG_IS_SCSI_STORAGE); - scsi_bus_init(&uas->bus, sizeof(uas->bus), DEVICE(dev), &usb_uas_scsi_info); -diff --git a/hw/usb/hcd-dwc2.c b/hw/usb/hcd-dwc2.c -index 8755e9cbb0..a0c4e782b2 100644 ---- a/hw/usb/hcd-dwc2.c -+++ b/hw/usb/hcd-dwc2.c -@@ -1364,7 +1364,8 @@ static void dwc2_realize(DeviceState *dev, Error **errp) - s->fi = USB_FRMINTVL - 1; - s->eof_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, dwc2_frame_boundary, s); - s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, dwc2_work_timer, s); -- s->async_bh = qemu_bh_new(dwc2_work_bh, s); -+ s->async_bh = qemu_bh_new_guarded(dwc2_work_bh, s, -+ &dev->mem_reentrancy_guard); - - sysbus_init_irq(sbd, &s->irq); - } -diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c -index d4da8dcb8d..c930c60921 100644 ---- a/hw/usb/hcd-ehci.c -+++ b/hw/usb/hcd-ehci.c -@@ -2533,7 +2533,8 @@ void usb_ehci_realize(EHCIState *s, DeviceState *dev, Error **errp) - } - - s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, ehci_work_timer, s); -- s->async_bh = qemu_bh_new(ehci_work_bh, s); -+ s->async_bh = qemu_bh_new_guarded(ehci_work_bh, s, -+ &dev->mem_reentrancy_guard); - s->device = dev; - - s->vmstate = qemu_add_vm_change_state_handler(usb_ehci_vm_state_change, s); -diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c -index 8ac1175ad2..77baaa7a6b 100644 ---- a/hw/usb/hcd-uhci.c -+++ b/hw/usb/hcd-uhci.c -@@ -1190,7 +1190,7 @@ void usb_uhci_common_realize(PCIDevice *dev, Error **errp) - USB_SPEED_MASK_LOW | USB_SPEED_MASK_FULL); - } - } -- s->bh = qemu_bh_new(uhci_bh, s); -+ s->bh = qemu_bh_new_guarded(uhci_bh, s, &DEVICE(dev)->mem_reentrancy_guard); - s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, uhci_frame_timer, s); - s->num_ports_vmstate = NB_PORTS; - QTAILQ_INIT(&s->queues); -diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c -index 176868d345..f500db85ab 100644 ---- a/hw/usb/host-libusb.c -+++ b/hw/usb/host-libusb.c -@@ -1141,7 +1141,8 @@ static void usb_host_nodev_bh(void *opaque) - static void usb_host_nodev(USBHostDevice *s) - { - if (!s->bh_nodev) { -- s->bh_nodev = qemu_bh_new(usb_host_nodev_bh, s); -+ s->bh_nodev = qemu_bh_new_guarded(usb_host_nodev_bh, s, -+ &DEVICE(s)->mem_reentrancy_guard); - } - qemu_bh_schedule(s->bh_nodev); - } -@@ -1739,7 +1740,8 @@ static int usb_host_post_load(void *opaque, int version_id) - USBHostDevice *dev = opaque; - - if (!dev->bh_postld) { -- dev->bh_postld = qemu_bh_new(usb_host_post_load_bh, dev); -+ dev->bh_postld = qemu_bh_new_guarded(usb_host_post_load_bh, dev, -+ &DEVICE(dev)->mem_reentrancy_guard); - } - qemu_bh_schedule(dev->bh_postld); - dev->bh_postld_pending = true; -diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c -index fd7df599bc..39fbaaab16 100644 ---- a/hw/usb/redirect.c -+++ b/hw/usb/redirect.c -@@ -1441,8 +1441,10 @@ static void usbredir_realize(USBDevice *udev, Error **errp) - } - } - -- dev->chardev_close_bh = qemu_bh_new(usbredir_chardev_close_bh, dev); -- dev->device_reject_bh = qemu_bh_new(usbredir_device_reject_bh, dev); -+ dev->chardev_close_bh = qemu_bh_new_guarded(usbredir_chardev_close_bh, dev, -+ &DEVICE(dev)->mem_reentrancy_guard); -+ dev->device_reject_bh = qemu_bh_new_guarded(usbredir_device_reject_bh, dev, -+ &DEVICE(dev)->mem_reentrancy_guard); - dev->attach_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, usbredir_do_attach, dev); - - packet_id_queue_init(&dev->cancelled, dev, "cancelled"); -diff --git a/hw/usb/xen-usb.c b/hw/usb/xen-usb.c -index 66cb3f7c24..38ee660a30 100644 ---- a/hw/usb/xen-usb.c -+++ b/hw/usb/xen-usb.c -@@ -1032,7 +1032,8 @@ static void usbback_alloc(struct XenLegacyDevice *xendev) - - QTAILQ_INIT(&usbif->req_free_q); - QSIMPLEQ_INIT(&usbif->hotplug_q); -- usbif->bh = qemu_bh_new(usbback_bh, usbif); -+ usbif->bh = qemu_bh_new_guarded(usbback_bh, usbif, -+ &DEVICE(xendev)->mem_reentrancy_guard); - } - - static int usbback_free(struct XenLegacyDevice *xendev) -diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c -index 43092aa634..5186e831dd 100644 ---- a/hw/virtio/virtio-balloon.c -+++ b/hw/virtio/virtio-balloon.c -@@ -909,8 +909,9 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp) - precopy_add_notifier(&s->free_page_hint_notify); - - object_ref(OBJECT(s->iothread)); -- s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread), -- virtio_ballloon_get_free_page_hints, s); -+ s->free_page_bh = aio_bh_new_guarded(iothread_get_aio_context(s->iothread), -+ virtio_ballloon_get_free_page_hints, s, -+ &dev->mem_reentrancy_guard); - } - - if (virtio_has_feature(s->host_features, VIRTIO_BALLOON_F_REPORTING)) { -diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c -index 802e1b9659..2fe804510f 100644 ---- a/hw/virtio/virtio-crypto.c -+++ b/hw/virtio/virtio-crypto.c -@@ -1074,7 +1074,8 @@ static void virtio_crypto_device_realize(DeviceState *dev, Error **errp) - vcrypto->vqs[i].dataq = - virtio_add_queue(vdev, 1024, virtio_crypto_handle_dataq_bh); - vcrypto->vqs[i].dataq_bh = -- qemu_bh_new(virtio_crypto_dataq_bh, &vcrypto->vqs[i]); -+ qemu_bh_new_guarded(virtio_crypto_dataq_bh, &vcrypto->vqs[i], -+ &dev->mem_reentrancy_guard); - vcrypto->vqs[i].vcrypto = vcrypto; - } - --- -2.39.3 - diff --git a/SOURCES/kvm-hw-scsi-lsi53c895a-Fix-reentrancy-issues-in-the-LSI-.patch b/SOURCES/kvm-hw-scsi-lsi53c895a-Fix-reentrancy-issues-in-the-LSI-.patch deleted file mode 100644 index efa966e..0000000 --- a/SOURCES/kvm-hw-scsi-lsi53c895a-Fix-reentrancy-issues-in-the-LSI-.patch +++ /dev/null @@ -1,141 +0,0 @@ -From 8075a9e05699ef0c4e078017eefc20db3186328f Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Mon, 29 May 2023 14:21:08 -0400 -Subject: [PATCH 17/21] hw/scsi/lsi53c895a: Fix reentrancy issues in the LSI - controller (CVE-2023-0330) - -RH-Author: Jon Maloy -RH-MergeRequest: 165: memory: prevent dma-reentracy issues -RH-Jira: RHEL-516 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [13/13] 0b6fa742075ef2db3a354ee672dccca3747051cc (jmaloy/jmaloy-qemu-kvm-2) - -Jira: https://issues.redhat.com/browse/RHEL-516 -Upstream: Merged -CVE: CVE-2023-2680 - -commit b987718bbb1d0eabf95499b976212dd5f0120d75 -Author: Thomas Huth -Date: Mon May 22 11:10:11 2023 +0200 - - hw/scsi/lsi53c895a: Fix reentrancy issues in the LSI controller (CVE-2023-0330) - - We cannot use the generic reentrancy guard in the LSI code, so - we have to manually prevent endless reentrancy here. The problematic - lsi_execute_script() function has already a way to detect whether - too many instructions have been executed - we just have to slightly - change the logic here that it also takes into account if the function - has been called too often in a reentrant way. - - The code in fuzz-lsi53c895a-test.c has been taken from an earlier - patch by Mauro Matteo Cascella. - - Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1563 - Message-Id: <20230522091011.1082574-1-thuth@redhat.com> - Reviewed-by: Stefan Hajnoczi - Reviewed-by: Alexander Bulekov - Signed-off-by: Thomas Huth - -Signed-off-by: Jon Maloy ---- - hw/scsi/lsi53c895a.c | 23 +++++++++++++++------ - tests/qtest/fuzz-lsi53c895a-test.c | 33 ++++++++++++++++++++++++++++++ - 2 files changed, 50 insertions(+), 6 deletions(-) - -diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c -index 048436352b..f7d45b0b20 100644 ---- a/hw/scsi/lsi53c895a.c -+++ b/hw/scsi/lsi53c895a.c -@@ -1134,15 +1134,24 @@ static void lsi_execute_script(LSIState *s) - uint32_t addr, addr_high; - int opcode; - int insn_processed = 0; -+ static int reentrancy_level; -+ -+ reentrancy_level++; - - s->istat1 |= LSI_ISTAT1_SRUN; - again: -- if (++insn_processed > LSI_MAX_INSN) { -- /* Some windows drivers make the device spin waiting for a memory -- location to change. If we have been executed a lot of code then -- assume this is the case and force an unexpected device disconnect. -- This is apparently sufficient to beat the drivers into submission. -- */ -+ /* -+ * Some windows drivers make the device spin waiting for a memory location -+ * to change. If we have executed more than LSI_MAX_INSN instructions then -+ * assume this is the case and force an unexpected device disconnect. This -+ * is apparently sufficient to beat the drivers into submission. -+ * -+ * Another issue (CVE-2023-0330) can occur if the script is programmed to -+ * trigger itself again and again. Avoid this problem by stopping after -+ * being called multiple times in a reentrant way (8 is an arbitrary value -+ * which should be enough for all valid use cases). -+ */ -+ if (++insn_processed > LSI_MAX_INSN || reentrancy_level > 8) { - if (!(s->sien0 & LSI_SIST0_UDC)) { - qemu_log_mask(LOG_GUEST_ERROR, - "lsi_scsi: inf. loop with UDC masked"); -@@ -1596,6 +1605,8 @@ again: - } - } - trace_lsi_execute_script_stop(); -+ -+ reentrancy_level--; - } - - static uint8_t lsi_reg_readb(LSIState *s, int offset) -diff --git a/tests/qtest/fuzz-lsi53c895a-test.c b/tests/qtest/fuzz-lsi53c895a-test.c -index 2012bd54b7..1b55928b9f 100644 ---- a/tests/qtest/fuzz-lsi53c895a-test.c -+++ b/tests/qtest/fuzz-lsi53c895a-test.c -@@ -8,6 +8,36 @@ - #include "qemu/osdep.h" - #include "libqtest.h" - -+/* -+ * This used to trigger a DMA reentrancy issue -+ * leading to memory corruption bugs like stack -+ * overflow or use-after-free -+ * https://gitlab.com/qemu-project/qemu/-/issues/1563 -+ */ -+static void test_lsi_dma_reentrancy(void) -+{ -+ QTestState *s; -+ -+ s = qtest_init("-M q35 -m 512M -nodefaults " -+ "-blockdev driver=null-co,node-name=null0 " -+ "-device lsi53c810 -device scsi-cd,drive=null0"); -+ -+ qtest_outl(s, 0xcf8, 0x80000804); /* PCI Command Register */ -+ qtest_outw(s, 0xcfc, 0x7); /* Enables accesses */ -+ qtest_outl(s, 0xcf8, 0x80000814); /* Memory Bar 1 */ -+ qtest_outl(s, 0xcfc, 0xff100000); /* Set MMIO Address*/ -+ qtest_outl(s, 0xcf8, 0x80000818); /* Memory Bar 2 */ -+ qtest_outl(s, 0xcfc, 0xff000000); /* Set RAM Address*/ -+ qtest_writel(s, 0xff000000, 0xc0000024); -+ qtest_writel(s, 0xff000114, 0x00000080); -+ qtest_writel(s, 0xff00012c, 0xff000000); -+ qtest_writel(s, 0xff000004, 0xff000114); -+ qtest_writel(s, 0xff000008, 0xff100014); -+ qtest_writel(s, 0xff10002f, 0x000000ff); -+ -+ qtest_quit(s); -+} -+ - /* - * This used to trigger a UAF in lsi_do_msgout() - * https://gitlab.com/qemu-project/qemu/-/issues/972 -@@ -124,5 +154,8 @@ int main(int argc, char **argv) - qtest_add_func("fuzz/lsi53c895a/lsi_do_msgout_cancel_req", - test_lsi_do_msgout_cancel_req); - -+ qtest_add_func("fuzz/lsi53c895a/lsi_dma_reentrancy", -+ test_lsi_dma_reentrancy); -+ - return g_test_run(); - } --- -2.39.3 - diff --git a/SOURCES/kvm-hw-vfio-pci-quirks-Sanitize-capability-pointer.patch b/SOURCES/kvm-hw-vfio-pci-quirks-Sanitize-capability-pointer.patch deleted file mode 100644 index ffabd75..0000000 --- a/SOURCES/kvm-hw-vfio-pci-quirks-Sanitize-capability-pointer.patch +++ /dev/null @@ -1,76 +0,0 @@ -From fcd6219a95851d17fd8bde69d87e78c6533be990 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 24/37] hw/vfio/pci-quirks: Sanitize capability pointer -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [22/28] cb080409c1912f4365f8e31cd23c914b48f91575 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 0ddcb39c9357 -Author: Alex Williamson -Date: Fri Jun 30 16:36:08 2023 -0600 - - hw/vfio/pci-quirks: Sanitize capability pointer - - Coverity reports a tained scalar when traversing the capabilities - chain (CID 1516589). In practice I've never seen a device with a - chain so broken as to cause an issue, but it's also pretty easy to - sanitize. - - Fixes: f6b30c1984f7 ("hw/vfio/pci-quirks: Support alternate offset for GPUDirect Cliques") - Signed-off-by: Alex Williamson - Reviewed-by: Cédric Le Goater - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/pci-quirks.c | 10 ++++++++-- - 1 file changed, 8 insertions(+), 2 deletions(-) - -diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c -index 0ed2fcd531..f4ff836805 100644 ---- a/hw/vfio/pci-quirks.c -+++ b/hw/vfio/pci-quirks.c -@@ -1530,6 +1530,12 @@ const PropertyInfo qdev_prop_nv_gpudirect_clique = { - .set = set_nv_gpudirect_clique_id, - }; - -+static bool is_valid_std_cap_offset(uint8_t pos) -+{ -+ return (pos >= PCI_STD_HEADER_SIZEOF && -+ pos <= (PCI_CFG_SPACE_SIZE - PCI_CAP_SIZEOF)); -+} -+ - static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) - { - PCIDevice *pdev = &vdev->pdev; -@@ -1563,7 +1569,7 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) - */ - ret = pread(vdev->vbasedev.fd, &tmp, 1, - vdev->config_offset + PCI_CAPABILITY_LIST); -- if (ret != 1 || !tmp) { -+ if (ret != 1 || !is_valid_std_cap_offset(tmp)) { - error_setg(errp, "NVIDIA GPUDirect Clique ID: error getting cap list"); - return -EINVAL; - } -@@ -1575,7 +1581,7 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) - d4_conflict = true; - } - tmp = pdev->config[tmp + PCI_CAP_LIST_NEXT]; -- } while (tmp); -+ } while (is_valid_std_cap_offset(tmp)); - - if (!c8_conflict) { - pos = 0xC8; --- -2.39.3 - diff --git a/SOURCES/kvm-hw-vfio-pci-quirks-Support-alternate-offset-for-GPUD.patch b/SOURCES/kvm-hw-vfio-pci-quirks-Support-alternate-offset-for-GPUD.patch deleted file mode 100644 index 99f5c75..0000000 --- a/SOURCES/kvm-hw-vfio-pci-quirks-Support-alternate-offset-for-GPUD.patch +++ /dev/null @@ -1,110 +0,0 @@ -From dd38230a0a375fb8427fa106ff79562e56c51b6c Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 18/37] hw/vfio/pci-quirks: Support alternate offset for - GPUDirect Cliques -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [16/28] 9befb7c9adaeb58e9d0b49686cf54b751c742832 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit f6b30c1984f7 -Author: Alex Williamson -Date: Thu Jun 8 12:05:07 2023 -0600 - - hw/vfio/pci-quirks: Support alternate offset for GPUDirect Cliques - - NVIDIA Turing and newer GPUs implement the MSI-X capability at the offset - previously reserved for use by hypervisors to implement the GPUDirect - Cliques capability. A revised specification provides an alternate - location. Add a config space walk to the quirk to check for conflicts, - allowing us to fall back to the new location or generate an error at the - quirk setup rather than when the real conflicting capability is added - should there be no available location. - - Signed-off-by: Alex Williamson - Reviewed-by: Cédric Le Goater - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/pci-quirks.c | 41 ++++++++++++++++++++++++++++++++++++++++- - 1 file changed, 40 insertions(+), 1 deletion(-) - -diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c -index f0147a050a..0ed2fcd531 100644 ---- a/hw/vfio/pci-quirks.c -+++ b/hw/vfio/pci-quirks.c -@@ -1490,6 +1490,9 @@ void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev) - * +---------------------------------+---------------------------------+ - * - * https://lists.gnu.org/archive/html/qemu-devel/2017-08/pdfUda5iEpgOS.pdf -+ * -+ * Specification for Turning and later GPU architectures: -+ * https://lists.gnu.org/archive/html/qemu-devel/2023-06/pdf142OR4O4c2.pdf - */ - static void get_nv_gpudirect_clique_id(Object *obj, Visitor *v, - const char *name, void *opaque, -@@ -1530,7 +1533,9 @@ const PropertyInfo qdev_prop_nv_gpudirect_clique = { - static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) - { - PCIDevice *pdev = &vdev->pdev; -- int ret, pos = 0xC8; -+ int ret, pos; -+ bool c8_conflict = false, d4_conflict = false; -+ uint8_t tmp; - - if (vdev->nv_gpudirect_clique == 0xFF) { - return 0; -@@ -1547,6 +1552,40 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) - return -EINVAL; - } - -+ /* -+ * Per the updated specification above, it's recommended to use offset -+ * D4h for Turing and later GPU architectures due to a conflict of the -+ * MSI-X capability at C8h. We don't know how to determine the GPU -+ * architecture, instead we walk the capability chain to mark conflicts -+ * and choose one or error based on the result. -+ * -+ * NB. Cap list head in pdev->config is already cleared, read from device. -+ */ -+ ret = pread(vdev->vbasedev.fd, &tmp, 1, -+ vdev->config_offset + PCI_CAPABILITY_LIST); -+ if (ret != 1 || !tmp) { -+ error_setg(errp, "NVIDIA GPUDirect Clique ID: error getting cap list"); -+ return -EINVAL; -+ } -+ -+ do { -+ if (tmp == 0xC8) { -+ c8_conflict = true; -+ } else if (tmp == 0xD4) { -+ d4_conflict = true; -+ } -+ tmp = pdev->config[tmp + PCI_CAP_LIST_NEXT]; -+ } while (tmp); -+ -+ if (!c8_conflict) { -+ pos = 0xC8; -+ } else if (!d4_conflict) { -+ pos = 0xD4; -+ } else { -+ error_setg(errp, "NVIDIA GPUDirect Clique ID: invalid config space"); -+ return -EINVAL; -+ } -+ - ret = pci_add_capability(pdev, PCI_CAP_ID_VNDR, pos, 8, errp); - if (ret < 0) { - error_prepend(errp, "Failed to add NVIDIA GPUDirect cap: "); --- -2.39.3 - diff --git a/SOURCES/kvm-hw-virtio-Fix-the-de-initialization-of-vhost-user-de.patch b/SOURCES/kvm-hw-virtio-Fix-the-de-initialization-of-vhost-user-de.patch new file mode 100644 index 0000000..7b2e1b6 --- /dev/null +++ b/SOURCES/kvm-hw-virtio-Fix-the-de-initialization-of-vhost-user-de.patch @@ -0,0 +1,108 @@ +From c554f8768a18ceba173aedbd582c1cae43a41e2c Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Tue, 18 Jun 2024 14:19:58 +0200 +Subject: [PATCH 1/2] hw/virtio: Fix the de-initialization of vhost-user + devices +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 255: hw/virtio: Fix the de-initialization of vhost-user devices +RH-Jira: RHEL-40708 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/1] c7815a249ec135993f45934cab1c1f2c038b80ea (thuth/qemu-kvm-cs9) + +JIRA: https://issues.redhat.com/browse/RHEL-40708 + +The unrealize functions of the various vhost-user devices are +calling the corresponding vhost_*_set_status() functions with a +status of 0 to shut down the device correctly. + +Now these vhost_*_set_status() functions all follow this scheme: + + bool should_start = virtio_device_should_start(vdev, status); + + if (vhost_dev_is_started(&vvc->vhost_dev) == should_start) { + return; + } + + if (should_start) { + /* ... do the initialization stuff ... */ + } else { + /* ... do the cleanup stuff ... */ + } + +The problem here is virtio_device_should_start(vdev, 0) currently +always returns "true" since it internally only looks at vdev->started +instead of looking at the "status" parameter. Thus once the device +got started once, virtio_device_should_start() always returns true +and thus the vhost_*_set_status() functions return early, without +ever doing any clean-up when being called with status == 0. This +causes e.g. problems when trying to hot-plug and hot-unplug a vhost +user devices multiple times since the de-initialization step is +completely skipped during the unplug operation. + +This bug has been introduced in commit 9f6bcfd99f ("hw/virtio: move +vm_running check to virtio_device_started") which replaced + + should_start = status & VIRTIO_CONFIG_S_DRIVER_OK; + +with + + should_start = virtio_device_started(vdev, status); + +which later got replaced by virtio_device_should_start(). This blocked +the possibility to set should_start to false in case the status flag +VIRTIO_CONFIG_S_DRIVER_OK was not set. + +Fix it by adjusting the virtio_device_should_start() function to +only consider the status flag instead of vdev->started. Since this +function is only used in the various vhost_*_set_status() functions +for exactly the same purpose, it should be fine to fix it in this +central place there without any risk to change the behavior of other +code. + +Fixes: 9f6bcfd99f ("hw/virtio: move vm_running check to virtio_device_started") +Buglink: https://issues.redhat.com/browse/RHEL-40708 +Signed-off-by: Thomas Huth +Message-Id: <20240618121958.88673-1-thuth@redhat.com> +Reviewed-by: Manos Pitsidianakis +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit d72479b11797c28893e1e3fc565497a9cae5ca16) +Signed-off-by: Thomas Huth +--- + include/hw/virtio/virtio.h | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h +index 7d5ffdc145..2eafad17b8 100644 +--- a/include/hw/virtio/virtio.h ++++ b/include/hw/virtio/virtio.h +@@ -470,9 +470,9 @@ static inline bool virtio_device_started(VirtIODevice *vdev, uint8_t status) + * @vdev - the VirtIO device + * @status - the devices status bits + * +- * This is similar to virtio_device_started() but also encapsulates a +- * check on the VM status which would prevent a device starting +- * anyway. ++ * This is similar to virtio_device_started() but ignores vdev->started ++ * and also encapsulates a check on the VM status which would prevent a ++ * device from starting anyway. + */ + static inline bool virtio_device_should_start(VirtIODevice *vdev, uint8_t status) + { +@@ -480,7 +480,7 @@ static inline bool virtio_device_should_start(VirtIODevice *vdev, uint8_t status + return false; + } + +- return virtio_device_started(vdev, status); ++ return status & VIRTIO_CONFIG_S_DRIVER_OK; + } + + static inline void virtio_set_started(VirtIODevice *vdev, bool started) +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-virtio-iommu-Fix-potential-OOB-access-in-virtio_i.patch b/SOURCES/kvm-hw-virtio-iommu-Fix-potential-OOB-access-in-virtio_i.patch deleted file mode 100644 index 7a5963c..0000000 --- a/SOURCES/kvm-hw-virtio-iommu-Fix-potential-OOB-access-in-virtio_i.patch +++ /dev/null @@ -1,62 +0,0 @@ -From 0a731ac1191182546e80af5f39d178a5a2f3688f Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Mon, 17 Jul 2023 18:21:26 +0200 -Subject: [PATCH 07/14] hw/virtio-iommu: Fix potential OOB access in - virtio_iommu_handle_command() - -RH-Author: Eric Auger -RH-MergeRequest: 197: virtio-iommu/smmu: backport some late fixes -RH-Bugzilla: 2229133 -RH-Acked-by: Thomas Huth -RH-Acked-by: Peter Xu -RH-Commit: [1/3] ecdb1e1aa6b93761dc87ea79bc0a1093ad649a74 (eauger1/centos-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2229133 - -In the virtio_iommu_handle_command() when a PROBE request is handled, -output_size takes a value greater than the tail size and on a subsequent -iteration we can get a stack out-of-band access. Initialize the -output_size on each iteration. - -The issue was found with ASAN. Credits to: -Yiming Tao(Zhejiang University) -Gaoning Pan(Zhejiang University) - -Fixes: 1733eebb9e7 ("virtio-iommu: Implement RESV_MEM probe request") -Signed-off-by: Eric Auger -Reported-by: Mauro Matteo Cascella -Cc: qemu-stable@nongnu.org - -Message-Id: <20230717162126.11693-1-eric.auger@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit cf2f89edf36a59183166ae8721a8d7ab5cd286bd) -Signed-off-by: Eric Auger ---- - hw/virtio/virtio-iommu.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c -index 421e2a944f..17ce630200 100644 ---- a/hw/virtio/virtio-iommu.c -+++ b/hw/virtio/virtio-iommu.c -@@ -728,13 +728,15 @@ static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq) - VirtIOIOMMU *s = VIRTIO_IOMMU(vdev); - struct virtio_iommu_req_head head; - struct virtio_iommu_req_tail tail = {}; -- size_t output_size = sizeof(tail), sz; - VirtQueueElement *elem; - unsigned int iov_cnt; - struct iovec *iov; - void *buf = NULL; -+ size_t sz; - - for (;;) { -+ size_t output_size = sizeof(tail); -+ - elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); - if (!elem) { - return; --- -2.39.3 - diff --git a/SOURCES/kvm-i386-correctly-select-code-in-hw-i386-that-depends-o.patch b/SOURCES/kvm-i386-correctly-select-code-in-hw-i386-that-depends-o.patch new file mode 100644 index 0000000..8f69f9e --- /dev/null +++ b/SOURCES/kvm-i386-correctly-select-code-in-hw-i386-that-depends-o.patch @@ -0,0 +1,68 @@ +From f572a40924c7138072e387111d0f092185972477 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Thu, 9 May 2024 19:00:39 +0200 +Subject: [PATCH 044/100] i386: correctly select code in hw/i386 that depends + on other components + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [44/91] 1327a5eb2b91edacf56cc4e93255cad456abbbeb (bonzini/rhel-qemu-kvm) + +fw_cfg.c and vapic.c are currently included unconditionally but +depend on other components. vapic.c depends on the local APIC, +while fw_cfg.c includes a piece of AML builder code that depends +on CONFIG_ACPI. + +Signed-off-by: Paolo Bonzini +Reviewed-by: Zhao Liu +Message-ID: <20240509170044.190795-9-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 7974e51342775c87f6e759a8c525db1045ddfa24) +Signed-off-by: Paolo Bonzini +--- + hw/i386/fw_cfg.c | 2 ++ + hw/i386/meson.build | 2 +- + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c +index 283c3f4c16..7f97d40616 100644 +--- a/hw/i386/fw_cfg.c ++++ b/hw/i386/fw_cfg.c +@@ -204,6 +204,7 @@ void fw_cfg_build_feature_control(MachineState *ms, FWCfgState *fw_cfg) + fw_cfg_add_file(fw_cfg, "etc/msr_feature_control", val, sizeof(*val)); + } + ++#ifdef CONFIG_ACPI + void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg) + { + /* +@@ -230,3 +231,4 @@ void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg) + aml_append(dev, aml_name_decl("_CRS", crs)); + aml_append(scope, dev); + } ++#endif +diff --git a/hw/i386/meson.build b/hw/i386/meson.build +index d8b70ef3e9..d9da676038 100644 +--- a/hw/i386/meson.build ++++ b/hw/i386/meson.build +@@ -1,12 +1,12 @@ + i386_ss = ss.source_set() + i386_ss.add(files( + 'fw_cfg.c', +- 'vapic.c', + 'e820_memory_layout.c', + 'multiboot.c', + 'x86.c', + )) + ++i386_ss.add(when: 'CONFIG_APIC', if_true: files('vapic.c')) + i386_ss.add(when: 'CONFIG_X86_IOMMU', if_true: files('x86-iommu.c'), + if_false: files('x86-iommu-stub.c')) + i386_ss.add(when: 'CONFIG_AMD_IOMMU', if_true: files('amd_iommu.c'), +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-cpu-Set-SEV-SNP-CPUID-bit-when-SNP-enabled.patch b/SOURCES/kvm-i386-cpu-Set-SEV-SNP-CPUID-bit-when-SNP-enabled.patch new file mode 100644 index 0000000..31a7e92 --- /dev/null +++ b/SOURCES/kvm-i386-cpu-Set-SEV-SNP-CPUID-bit-when-SNP-enabled.patch @@ -0,0 +1,40 @@ +From 127f3c60668e1bd08ec00856a317cb841adf0440 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:23 -0500 +Subject: [PATCH 063/100] i386/cpu: Set SEV-SNP CPUID bit when SNP enabled + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [63/91] 0f834a6897c5cdc0e29a5b1862e621f8ce309657 (bonzini/rhel-qemu-kvm) + +SNP guests will rely on this bit to determine certain feature support. + +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-12-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 7831221941cccbde922412c1550ed8b4bce7c361) +Signed-off-by: Paolo Bonzini +--- + target/i386/cpu.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 489c853b42..13737cd703 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -6822,6 +6822,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + if (sev_enabled()) { + *eax = 0x2; + *eax |= sev_es_enabled() ? 0x8 : 0; ++ *eax |= sev_snp_enabled() ? 0x10 : 0; + *ebx = sev_get_cbit_position() & 0x3f; /* EBX[5:0] */ + *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */ + } +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-cpu-Update-how-the-EBX-register-of-CPUID-0x8000.patch b/SOURCES/kvm-i386-cpu-Update-how-the-EBX-register-of-CPUID-0x8000.patch deleted file mode 100644 index 3ee6b29..0000000 --- a/SOURCES/kvm-i386-cpu-Update-how-the-EBX-register-of-CPUID-0x8000.patch +++ /dev/null @@ -1,52 +0,0 @@ -From f9d982fae156aa9db0506e1e098c1e8a7f7eec94 Mon Sep 17 00:00:00 2001 -From: Bandan Das -Date: Thu, 3 Aug 2023 14:29:15 -0400 -Subject: [PATCH 13/14] i386/cpu: Update how the EBX register of CPUID - 0x8000001F is set - -RH-Author: Bandan Das -RH-MergeRequest: 196: Updates to SEV reduced-phys-bits parameter -RH-Bugzilla: 2214839 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [4/4] efc368b2c844fd4fbc3c755a5e2da288329e7a2c (bdas1/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2214839 - -commit fb6bbafc0f19385fb257ee073ed13dcaf613f2f8 -Author: Tom Lendacky -Date: Fri Sep 30 10:14:30 2022 -0500 - - i386/cpu: Update how the EBX register of CPUID 0x8000001F is set - - Update the setting of CPUID 0x8000001F EBX to clearly document the ranges - associated with fields being set. - - Fixes: 6cb8f2a663 ("cpu/i386: populate CPUID 0x8000_001F when SEV is active") - Signed-off-by: Tom Lendacky - Reviewed-by: Dr. David Alan Gilbert - Message-Id: <5822fd7d02b575121380e1f493a8f6d9eba2b11a.1664550870.git.thomas.lendacky@amd.com> - Signed-off-by: Paolo Bonzini - -Signed-off-by: Bandan Das ---- - target/i386/cpu.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 839706b430..4ac3046313 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -6008,8 +6008,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, - if (sev_enabled()) { - *eax = 0x2; - *eax |= sev_es_enabled() ? 0x8 : 0; -- *ebx = sev_get_cbit_position(); -- *ebx |= sev_get_reduced_phys_bits() << 6; -+ *ebx = sev_get_cbit_position() & 0x3f; /* EBX[5:0] */ -+ *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */ - } - break; - default: --- -2.39.3 - diff --git a/SOURCES/kvm-i386-kvm-Add-KVM_EXIT_HYPERCALL-handling-for-KVM_HC_.patch b/SOURCES/kvm-i386-kvm-Add-KVM_EXIT_HYPERCALL-handling-for-KVM_HC_.patch new file mode 100644 index 0000000..fd604d2 --- /dev/null +++ b/SOURCES/kvm-i386-kvm-Add-KVM_EXIT_HYPERCALL-handling-for-KVM_HC_.patch @@ -0,0 +1,145 @@ +From 14aa42bbacde75b2ce9a59d1267f73d613026461 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:42 -0500 +Subject: [PATCH 076/100] i386/kvm: Add KVM_EXIT_HYPERCALL handling for + KVM_HC_MAP_GPA_RANGE + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [76/91] 3e1201c330dc826af1ec4650974d47053270eb16 (bonzini/rhel-qemu-kvm) + +KVM_HC_MAP_GPA_RANGE will be used to send requests to userspace for +private/shared memory attribute updates requested by the guest. +Implement handling for that use-case along with some basic +infrastructure for enabling specific hypercall events. + +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-31-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 47e76d03b155e43beca550251a6eb7ea926c059f) +Signed-off-by: Paolo Bonzini +--- + target/i386/kvm/kvm.c | 55 ++++++++++++++++++++++++++++++++++++ + target/i386/kvm/kvm_i386.h | 1 + + target/i386/kvm/trace-events | 1 + + 3 files changed, 57 insertions(+) + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 75e75d9772..2935e3931a 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -21,6 +21,7 @@ + #include + + #include ++#include + #include "standard-headers/asm-x86/kvm_para.h" + #include "hw/xen/interface/arch-x86/cpuid.h" + +@@ -208,6 +209,13 @@ int kvm_get_vm_type(MachineState *ms) + return kvm_type; + } + ++bool kvm_enable_hypercall(uint64_t enable_mask) ++{ ++ KVMState *s = KVM_STATE(current_accel()); ++ ++ return !kvm_vm_enable_cap(s, KVM_CAP_EXIT_HYPERCALL, 0, enable_mask); ++} ++ + bool kvm_has_smm(void) + { + return kvm_vm_check_extension(kvm_state, KVM_CAP_X86_SMM); +@@ -5325,6 +5333,50 @@ static bool host_supports_vmx(void) + return ecx & CPUID_EXT_VMX; + } + ++/* ++ * Currently the handling here only supports use of KVM_HC_MAP_GPA_RANGE ++ * to service guest-initiated memory attribute update requests so that ++ * KVM_SET_MEMORY_ATTRIBUTES can update whether or not a page should be ++ * backed by the private memory pool provided by guest_memfd, and as such ++ * is only applicable to guest_memfd-backed guests (e.g. SNP/TDX). ++ * ++ * Other other use-cases for KVM_HC_MAP_GPA_RANGE, such as for SEV live ++ * migration, are not implemented here currently. ++ * ++ * For the guest_memfd use-case, these exits will generally be synthesized ++ * by KVM based on platform-specific hypercalls, like GHCB requests in the ++ * case of SEV-SNP, and not issued directly within the guest though the ++ * KVM_HC_MAP_GPA_RANGE hypercall. So in this case, KVM_HC_MAP_GPA_RANGE is ++ * not actually advertised to guests via the KVM CPUID feature bit, as ++ * opposed to SEV live migration where it would be. Since it is unlikely the ++ * SEV live migration use-case would be useful for guest-memfd backed guests, ++ * because private/shared page tracking is already provided through other ++ * means, these 2 use-cases should be treated as being mutually-exclusive. ++ */ ++static int kvm_handle_hc_map_gpa_range(struct kvm_run *run) ++{ ++ uint64_t gpa, size, attributes; ++ ++ if (!machine_require_guest_memfd(current_machine)) ++ return -EINVAL; ++ ++ gpa = run->hypercall.args[0]; ++ size = run->hypercall.args[1] * TARGET_PAGE_SIZE; ++ attributes = run->hypercall.args[2]; ++ ++ trace_kvm_hc_map_gpa_range(gpa, size, attributes, run->hypercall.flags); ++ ++ return kvm_convert_memory(gpa, size, attributes & KVM_MAP_GPA_RANGE_ENCRYPTED); ++} ++ ++static int kvm_handle_hypercall(struct kvm_run *run) ++{ ++ if (run->hypercall.nr == KVM_HC_MAP_GPA_RANGE) ++ return kvm_handle_hc_map_gpa_range(run); ++ ++ return -EINVAL; ++} ++ + #define VMX_INVALID_GUEST_STATE 0x80000021 + + int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) +@@ -5420,6 +5472,9 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) + ret = kvm_xen_handle_exit(cpu, &run->xen); + break; + #endif ++ case KVM_EXIT_HYPERCALL: ++ ret = kvm_handle_hypercall(run); ++ break; + default: + fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); + ret = -1; +diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h +index 6b44844d95..34fc60774b 100644 +--- a/target/i386/kvm/kvm_i386.h ++++ b/target/i386/kvm/kvm_i386.h +@@ -33,6 +33,7 @@ + bool kvm_has_smm(void); + bool kvm_enable_x2apic(void); + bool kvm_hv_vpindex_settable(void); ++bool kvm_enable_hypercall(uint64_t enable_mask); + + bool kvm_enable_sgx_provisioning(KVMState *s); + bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp); +diff --git a/target/i386/kvm/trace-events b/target/i386/kvm/trace-events +index b365a8e8e2..74a6234ff7 100644 +--- a/target/i386/kvm/trace-events ++++ b/target/i386/kvm/trace-events +@@ -5,6 +5,7 @@ kvm_x86_fixup_msi_error(uint32_t gsi) "VT-d failed to remap interrupt for GSI %" + kvm_x86_add_msi_route(int virq) "Adding route entry for virq %d" + kvm_x86_remove_msi_route(int virq) "Removing route entry for virq %d" + kvm_x86_update_msi_routes(int num) "Updated %d MSI routes" ++kvm_hc_map_gpa_range(uint64_t gpa, uint64_t size, uint64_t attributes, uint64_t flags) "gpa 0x%" PRIx64 " size 0x%" PRIx64 " attributes 0x%" PRIx64 " flags 0x%" PRIx64 + + # xen-emu.c + kvm_xen_hypercall(int cpu, uint8_t cpl, uint64_t input, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t ret) "xen_hypercall: cpu %d cpl %d input %" PRIu64 " a0 0x%" PRIx64 " a1 0x%" PRIx64 " a2 0x%" PRIx64" ret 0x%" PRIx64 +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-kvm-Move-architectural-CPUID-leaf-generation-to.patch b/SOURCES/kvm-i386-kvm-Move-architectural-CPUID-leaf-generation-to.patch new file mode 100644 index 0000000..4b91e93 --- /dev/null +++ b/SOURCES/kvm-i386-kvm-Move-architectural-CPUID-leaf-generation-to.patch @@ -0,0 +1,536 @@ +From 5ead79f45e8e90b7a04586c89e70cb9d0b66b730 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Thu, 29 Feb 2024 01:36:43 -0500 +Subject: [PATCH 004/100] i386/kvm: Move architectural CPUID leaf generation to + separate helper + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [4/91] 06ecdbcf05ad3d658273980b114f02477d0b0475 (bonzini/rhel-qemu-kvm) + +Move the architectural (for lack of a better term) CPUID leaf generation +to a separate helper so that the generation code can be reused by TDX, +which needs to generate a canonical VM-scoped configuration. + +For now this is just a cleanup, so keep the function static. + +Signed-off-by: Sean Christopherson +Signed-off-by: Xiaoyao Li +Message-ID: <20240229063726.610065-23-xiaoyao.li@intel.com> +Reviewed-by: Xiaoyao Li +Signed-off-by: Paolo Bonzini +(cherry picked from commit a5acf4f26c208a05d05ef1bde65553ce2ab5e5d0) +Signed-off-by: Paolo Bonzini +--- + target/i386/kvm/kvm.c | 417 +++++++++++++++++++++--------------------- + 1 file changed, 211 insertions(+), 206 deletions(-) + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 739f33db47..5f30b649a0 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -1706,195 +1706,22 @@ static void kvm_init_nested_state(CPUX86State *env) + } + } + +-int kvm_arch_init_vcpu(CPUState *cs) ++static uint32_t kvm_x86_build_cpuid(CPUX86State *env, ++ struct kvm_cpuid_entry2 *entries, ++ uint32_t cpuid_i) + { +- struct { +- struct kvm_cpuid2 cpuid; +- struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES]; +- } cpuid_data; +- /* +- * The kernel defines these structs with padding fields so there +- * should be no extra padding in our cpuid_data struct. +- */ +- QEMU_BUILD_BUG_ON(sizeof(cpuid_data) != +- sizeof(struct kvm_cpuid2) + +- sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES); +- +- X86CPU *cpu = X86_CPU(cs); +- CPUX86State *env = &cpu->env; +- uint32_t limit, i, j, cpuid_i; ++ uint32_t limit, i, j; + uint32_t unused; + struct kvm_cpuid_entry2 *c; +- uint32_t signature[3]; +- int kvm_base = KVM_CPUID_SIGNATURE; +- int max_nested_state_len; +- int r; +- Error *local_err = NULL; +- +- memset(&cpuid_data, 0, sizeof(cpuid_data)); +- +- cpuid_i = 0; +- +- has_xsave2 = kvm_check_extension(cs->kvm_state, KVM_CAP_XSAVE2); +- +- r = kvm_arch_set_tsc_khz(cs); +- if (r < 0) { +- return r; +- } +- +- /* vcpu's TSC frequency is either specified by user, or following +- * the value used by KVM if the former is not present. In the +- * latter case, we query it from KVM and record in env->tsc_khz, +- * so that vcpu's TSC frequency can be migrated later via this field. +- */ +- if (!env->tsc_khz) { +- r = kvm_check_extension(cs->kvm_state, KVM_CAP_GET_TSC_KHZ) ? +- kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ) : +- -ENOTSUP; +- if (r > 0) { +- env->tsc_khz = r; +- } +- } +- +- env->apic_bus_freq = KVM_APIC_BUS_FREQUENCY; +- +- /* +- * kvm_hyperv_expand_features() is called here for the second time in case +- * KVM_CAP_SYS_HYPERV_CPUID is not supported. While we can't possibly handle +- * 'query-cpu-model-expansion' in this case as we don't have a KVM vCPU to +- * check which Hyper-V enlightenments are supported and which are not, we +- * can still proceed and check/expand Hyper-V enlightenments here so legacy +- * behavior is preserved. +- */ +- if (!kvm_hyperv_expand_features(cpu, &local_err)) { +- error_report_err(local_err); +- return -ENOSYS; +- } +- +- if (hyperv_enabled(cpu)) { +- r = hyperv_init_vcpu(cpu); +- if (r) { +- return r; +- } +- +- cpuid_i = hyperv_fill_cpuids(cs, cpuid_data.entries); +- kvm_base = KVM_CPUID_SIGNATURE_NEXT; +- has_msr_hv_hypercall = true; +- } +- +- if (cs->kvm_state->xen_version) { +-#ifdef CONFIG_XEN_EMU +- struct kvm_cpuid_entry2 *xen_max_leaf; +- +- memcpy(signature, "XenVMMXenVMM", 12); +- +- xen_max_leaf = c = &cpuid_data.entries[cpuid_i++]; +- c->function = kvm_base + XEN_CPUID_SIGNATURE; +- c->eax = kvm_base + XEN_CPUID_TIME; +- c->ebx = signature[0]; +- c->ecx = signature[1]; +- c->edx = signature[2]; +- +- c = &cpuid_data.entries[cpuid_i++]; +- c->function = kvm_base + XEN_CPUID_VENDOR; +- c->eax = cs->kvm_state->xen_version; +- c->ebx = 0; +- c->ecx = 0; +- c->edx = 0; +- +- c = &cpuid_data.entries[cpuid_i++]; +- c->function = kvm_base + XEN_CPUID_HVM_MSR; +- /* Number of hypercall-transfer pages */ +- c->eax = 1; +- /* Hypercall MSR base address */ +- if (hyperv_enabled(cpu)) { +- c->ebx = XEN_HYPERCALL_MSR_HYPERV; +- kvm_xen_init(cs->kvm_state, c->ebx); +- } else { +- c->ebx = XEN_HYPERCALL_MSR; +- } +- c->ecx = 0; +- c->edx = 0; +- +- c = &cpuid_data.entries[cpuid_i++]; +- c->function = kvm_base + XEN_CPUID_TIME; +- c->eax = ((!!tsc_is_stable_and_known(env) << 1) | +- (!!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_RDTSCP) << 2)); +- /* default=0 (emulate if necessary) */ +- c->ebx = 0; +- /* guest tsc frequency */ +- c->ecx = env->user_tsc_khz; +- /* guest tsc incarnation (migration count) */ +- c->edx = 0; +- +- c = &cpuid_data.entries[cpuid_i++]; +- c->function = kvm_base + XEN_CPUID_HVM; +- xen_max_leaf->eax = kvm_base + XEN_CPUID_HVM; +- if (cs->kvm_state->xen_version >= XEN_VERSION(4, 5)) { +- c->function = kvm_base + XEN_CPUID_HVM; +- +- if (cpu->xen_vapic) { +- c->eax |= XEN_HVM_CPUID_APIC_ACCESS_VIRT; +- c->eax |= XEN_HVM_CPUID_X2APIC_VIRT; +- } +- +- c->eax |= XEN_HVM_CPUID_IOMMU_MAPPINGS; +- +- if (cs->kvm_state->xen_version >= XEN_VERSION(4, 6)) { +- c->eax |= XEN_HVM_CPUID_VCPU_ID_PRESENT; +- c->ebx = cs->cpu_index; +- } +- +- if (cs->kvm_state->xen_version >= XEN_VERSION(4, 17)) { +- c->eax |= XEN_HVM_CPUID_UPCALL_VECTOR; +- } +- } +- +- r = kvm_xen_init_vcpu(cs); +- if (r) { +- return r; +- } +- +- kvm_base += 0x100; +-#else /* CONFIG_XEN_EMU */ +- /* This should never happen as kvm_arch_init() would have died first. */ +- fprintf(stderr, "Cannot enable Xen CPUID without Xen support\n"); +- abort(); +-#endif +- } else if (cpu->expose_kvm) { +- memcpy(signature, "KVMKVMKVM\0\0\0", 12); +- c = &cpuid_data.entries[cpuid_i++]; +- c->function = KVM_CPUID_SIGNATURE | kvm_base; +- c->eax = KVM_CPUID_FEATURES | kvm_base; +- c->ebx = signature[0]; +- c->ecx = signature[1]; +- c->edx = signature[2]; +- +- c = &cpuid_data.entries[cpuid_i++]; +- c->function = KVM_CPUID_FEATURES | kvm_base; +- c->eax = env->features[FEAT_KVM]; +- c->edx = env->features[FEAT_KVM_HINTS]; +- } + + cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused); + +- if (cpu->kvm_pv_enforce_cpuid) { +- r = kvm_vcpu_enable_cap(cs, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, 0, 1); +- if (r < 0) { +- fprintf(stderr, +- "failed to enable KVM_CAP_ENFORCE_PV_FEATURE_CPUID: %s", +- strerror(-r)); +- abort(); +- } +- } +- + for (i = 0; i <= limit; i++) { ++ j = 0; + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { +- fprintf(stderr, "unsupported level value: 0x%x\n", limit); +- abort(); ++ goto full; + } +- c = &cpuid_data.entries[cpuid_i++]; +- ++ c = &entries[cpuid_i++]; + switch (i) { + case 2: { + /* Keep reading function 2 till all the input is received */ +@@ -1908,11 +1735,9 @@ int kvm_arch_init_vcpu(CPUState *cs) + + for (j = 1; j < times; ++j) { + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { +- fprintf(stderr, "cpuid_data is full, no space for " +- "cpuid(eax:2):eax & 0xf = 0x%x\n", times); +- abort(); ++ goto full; + } +- c = &cpuid_data.entries[cpuid_i++]; ++ c = &entries[cpuid_i++]; + c->function = i; + c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC; + cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); +@@ -1951,11 +1776,9 @@ int kvm_arch_init_vcpu(CPUState *cs) + continue; + } + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { +- fprintf(stderr, "cpuid_data is full, no space for " +- "cpuid(eax:0x%x,ecx:0x%x)\n", i, j); +- abort(); ++ goto full; + } +- c = &cpuid_data.entries[cpuid_i++]; ++ c = &entries[cpuid_i++]; + } + break; + case 0x12: +@@ -1970,11 +1793,9 @@ int kvm_arch_init_vcpu(CPUState *cs) + } + + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { +- fprintf(stderr, "cpuid_data is full, no space for " +- "cpuid(eax:0x12,ecx:0x%x)\n", j); +- abort(); ++ goto full; + } +- c = &cpuid_data.entries[cpuid_i++]; ++ c = &entries[cpuid_i++]; + } + break; + case 0x7: +@@ -1991,11 +1812,9 @@ int kvm_arch_init_vcpu(CPUState *cs) + + for (j = 1; j <= times; ++j) { + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { +- fprintf(stderr, "cpuid_data is full, no space for " +- "cpuid(eax:0x%x,ecx:0x%x)\n", i, j); +- abort(); ++ goto full; + } +- c = &cpuid_data.entries[cpuid_i++]; ++ c = &entries[cpuid_i++]; + c->function = i; + c->index = j; + c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; +@@ -2048,11 +1867,11 @@ int kvm_arch_init_vcpu(CPUState *cs) + cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused); + + for (i = 0x80000000; i <= limit; i++) { ++ j = 0; + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { +- fprintf(stderr, "unsupported xlevel value: 0x%x\n", limit); +- abort(); ++ goto full; + } +- c = &cpuid_data.entries[cpuid_i++]; ++ c = &entries[cpuid_i++]; + + switch (i) { + case 0x8000001d: +@@ -2067,11 +1886,9 @@ int kvm_arch_init_vcpu(CPUState *cs) + break; + } + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { +- fprintf(stderr, "cpuid_data is full, no space for " +- "cpuid(eax:0x%x,ecx:0x%x)\n", i, j); +- abort(); ++ goto full; + } +- c = &cpuid_data.entries[cpuid_i++]; ++ c = &entries[cpuid_i++]; + } + break; + default: +@@ -2094,11 +1911,11 @@ int kvm_arch_init_vcpu(CPUState *cs) + cpu_x86_cpuid(env, 0xC0000000, 0, &limit, &unused, &unused, &unused); + + for (i = 0xC0000000; i <= limit; i++) { ++ j = 0; + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { +- fprintf(stderr, "unsupported xlevel2 value: 0x%x\n", limit); +- abort(); ++ goto full; + } +- c = &cpuid_data.entries[cpuid_i++]; ++ c = &entries[cpuid_i++]; + + c->function = i; + c->flags = 0; +@@ -2106,6 +1923,194 @@ int kvm_arch_init_vcpu(CPUState *cs) + } + } + ++ return cpuid_i; ++ ++full: ++ fprintf(stderr, "cpuid_data is full, no space for " ++ "cpuid(eax:0x%x,ecx:0x%x)\n", i, j); ++ abort(); ++} ++ ++int kvm_arch_init_vcpu(CPUState *cs) ++{ ++ struct { ++ struct kvm_cpuid2 cpuid; ++ struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES]; ++ } cpuid_data; ++ /* ++ * The kernel defines these structs with padding fields so there ++ * should be no extra padding in our cpuid_data struct. ++ */ ++ QEMU_BUILD_BUG_ON(sizeof(cpuid_data) != ++ sizeof(struct kvm_cpuid2) + ++ sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES); ++ ++ X86CPU *cpu = X86_CPU(cs); ++ CPUX86State *env = &cpu->env; ++ uint32_t cpuid_i; ++ struct kvm_cpuid_entry2 *c; ++ uint32_t signature[3]; ++ int kvm_base = KVM_CPUID_SIGNATURE; ++ int max_nested_state_len; ++ int r; ++ Error *local_err = NULL; ++ ++ memset(&cpuid_data, 0, sizeof(cpuid_data)); ++ ++ cpuid_i = 0; ++ ++ has_xsave2 = kvm_check_extension(cs->kvm_state, KVM_CAP_XSAVE2); ++ ++ r = kvm_arch_set_tsc_khz(cs); ++ if (r < 0) { ++ return r; ++ } ++ ++ /* vcpu's TSC frequency is either specified by user, or following ++ * the value used by KVM if the former is not present. In the ++ * latter case, we query it from KVM and record in env->tsc_khz, ++ * so that vcpu's TSC frequency can be migrated later via this field. ++ */ ++ if (!env->tsc_khz) { ++ r = kvm_check_extension(cs->kvm_state, KVM_CAP_GET_TSC_KHZ) ? ++ kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ) : ++ -ENOTSUP; ++ if (r > 0) { ++ env->tsc_khz = r; ++ } ++ } ++ ++ env->apic_bus_freq = KVM_APIC_BUS_FREQUENCY; ++ ++ /* ++ * kvm_hyperv_expand_features() is called here for the second time in case ++ * KVM_CAP_SYS_HYPERV_CPUID is not supported. While we can't possibly handle ++ * 'query-cpu-model-expansion' in this case as we don't have a KVM vCPU to ++ * check which Hyper-V enlightenments are supported and which are not, we ++ * can still proceed and check/expand Hyper-V enlightenments here so legacy ++ * behavior is preserved. ++ */ ++ if (!kvm_hyperv_expand_features(cpu, &local_err)) { ++ error_report_err(local_err); ++ return -ENOSYS; ++ } ++ ++ if (hyperv_enabled(cpu)) { ++ r = hyperv_init_vcpu(cpu); ++ if (r) { ++ return r; ++ } ++ ++ cpuid_i = hyperv_fill_cpuids(cs, cpuid_data.entries); ++ kvm_base = KVM_CPUID_SIGNATURE_NEXT; ++ has_msr_hv_hypercall = true; ++ } ++ ++ if (cs->kvm_state->xen_version) { ++#ifdef CONFIG_XEN_EMU ++ struct kvm_cpuid_entry2 *xen_max_leaf; ++ ++ memcpy(signature, "XenVMMXenVMM", 12); ++ ++ xen_max_leaf = c = &cpuid_data.entries[cpuid_i++]; ++ c->function = kvm_base + XEN_CPUID_SIGNATURE; ++ c->eax = kvm_base + XEN_CPUID_TIME; ++ c->ebx = signature[0]; ++ c->ecx = signature[1]; ++ c->edx = signature[2]; ++ ++ c = &cpuid_data.entries[cpuid_i++]; ++ c->function = kvm_base + XEN_CPUID_VENDOR; ++ c->eax = cs->kvm_state->xen_version; ++ c->ebx = 0; ++ c->ecx = 0; ++ c->edx = 0; ++ ++ c = &cpuid_data.entries[cpuid_i++]; ++ c->function = kvm_base + XEN_CPUID_HVM_MSR; ++ /* Number of hypercall-transfer pages */ ++ c->eax = 1; ++ /* Hypercall MSR base address */ ++ if (hyperv_enabled(cpu)) { ++ c->ebx = XEN_HYPERCALL_MSR_HYPERV; ++ kvm_xen_init(cs->kvm_state, c->ebx); ++ } else { ++ c->ebx = XEN_HYPERCALL_MSR; ++ } ++ c->ecx = 0; ++ c->edx = 0; ++ ++ c = &cpuid_data.entries[cpuid_i++]; ++ c->function = kvm_base + XEN_CPUID_TIME; ++ c->eax = ((!!tsc_is_stable_and_known(env) << 1) | ++ (!!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_RDTSCP) << 2)); ++ /* default=0 (emulate if necessary) */ ++ c->ebx = 0; ++ /* guest tsc frequency */ ++ c->ecx = env->user_tsc_khz; ++ /* guest tsc incarnation (migration count) */ ++ c->edx = 0; ++ ++ c = &cpuid_data.entries[cpuid_i++]; ++ c->function = kvm_base + XEN_CPUID_HVM; ++ xen_max_leaf->eax = kvm_base + XEN_CPUID_HVM; ++ if (cs->kvm_state->xen_version >= XEN_VERSION(4, 5)) { ++ c->function = kvm_base + XEN_CPUID_HVM; ++ ++ if (cpu->xen_vapic) { ++ c->eax |= XEN_HVM_CPUID_APIC_ACCESS_VIRT; ++ c->eax |= XEN_HVM_CPUID_X2APIC_VIRT; ++ } ++ ++ c->eax |= XEN_HVM_CPUID_IOMMU_MAPPINGS; ++ ++ if (cs->kvm_state->xen_version >= XEN_VERSION(4, 6)) { ++ c->eax |= XEN_HVM_CPUID_VCPU_ID_PRESENT; ++ c->ebx = cs->cpu_index; ++ } ++ ++ if (cs->kvm_state->xen_version >= XEN_VERSION(4, 17)) { ++ c->eax |= XEN_HVM_CPUID_UPCALL_VECTOR; ++ } ++ } ++ ++ r = kvm_xen_init_vcpu(cs); ++ if (r) { ++ return r; ++ } ++ ++ kvm_base += 0x100; ++#else /* CONFIG_XEN_EMU */ ++ /* This should never happen as kvm_arch_init() would have died first. */ ++ fprintf(stderr, "Cannot enable Xen CPUID without Xen support\n"); ++ abort(); ++#endif ++ } else if (cpu->expose_kvm) { ++ memcpy(signature, "KVMKVMKVM\0\0\0", 12); ++ c = &cpuid_data.entries[cpuid_i++]; ++ c->function = KVM_CPUID_SIGNATURE | kvm_base; ++ c->eax = KVM_CPUID_FEATURES | kvm_base; ++ c->ebx = signature[0]; ++ c->ecx = signature[1]; ++ c->edx = signature[2]; ++ ++ c = &cpuid_data.entries[cpuid_i++]; ++ c->function = KVM_CPUID_FEATURES | kvm_base; ++ c->eax = env->features[FEAT_KVM]; ++ c->edx = env->features[FEAT_KVM_HINTS]; ++ } ++ ++ if (cpu->kvm_pv_enforce_cpuid) { ++ r = kvm_vcpu_enable_cap(cs, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, 0, 1); ++ if (r < 0) { ++ fprintf(stderr, ++ "failed to enable KVM_CAP_ENFORCE_PV_FEATURE_CPUID: %s", ++ strerror(-r)); ++ abort(); ++ } ++ } ++ ++ cpuid_i = kvm_x86_build_cpuid(env, cpuid_data.entries, cpuid_i); + cpuid_data.cpuid.nent = cpuid_i; + + if (((env->cpuid_version >> 8)&0xF) >= 6 +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-pc-remove-unnecessary-MachineClass-overrides.patch b/SOURCES/kvm-i386-pc-remove-unnecessary-MachineClass-overrides.patch new file mode 100644 index 0000000..65d09ab --- /dev/null +++ b/SOURCES/kvm-i386-pc-remove-unnecessary-MachineClass-overrides.patch @@ -0,0 +1,91 @@ +From 03e275023b482ac79b4f92ca4ceef6de3caa634f Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Thu, 9 May 2024 19:00:40 +0200 +Subject: [PATCH 045/100] i386: pc: remove unnecessary MachineClass overrides + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [45/91] c03d5b57014d0d02f6ce0cdfb19a34996d100dea (bonzini/rhel-qemu-kvm) + +There is no need to override these fields of MachineClass because they are +already set to the right value in the superclass. + +Signed-off-by: Paolo Bonzini +Reviewed-by: Zhao Liu +Message-ID: <20240509170044.190795-10-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit b348fdcdac9f9fc70be9ae56c54e41765e9aae24) +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc.c | 3 --- + hw/i386/x86.c | 6 +++--- + include/hw/i386/x86.h | 4 ---- + 3 files changed, 3 insertions(+), 10 deletions(-) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 660a59c63b..0aca0cc79e 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -1979,9 +1979,6 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) + mc->async_pf_vmexit_disable = false; + mc->get_hotplug_handler = pc_get_hotplug_handler; + mc->hotplug_allowed = pc_hotplug_allowed; +- mc->cpu_index_to_instance_props = x86_cpu_index_to_props; +- mc->get_default_cpu_node_id = x86_get_default_cpu_node_id; +- mc->possible_cpu_arch_ids = x86_possible_cpu_arch_ids; + mc->auto_enable_numa_with_memhp = true; + mc->auto_enable_numa_with_memdev = true; + mc->has_hotpluggable_cpus = true; +diff --git a/hw/i386/x86.c b/hw/i386/x86.c +index c61f4ebfa6..fcef652c1e 100644 +--- a/hw/i386/x86.c ++++ b/hw/i386/x86.c +@@ -443,7 +443,7 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, + numa_cpu_pre_plug(cpu_slot, dev, errp); + } + +-CpuInstanceProperties ++static CpuInstanceProperties + x86_cpu_index_to_props(MachineState *ms, unsigned cpu_index) + { + MachineClass *mc = MACHINE_GET_CLASS(ms); +@@ -453,7 +453,7 @@ x86_cpu_index_to_props(MachineState *ms, unsigned cpu_index) + return possible_cpus->cpus[cpu_index].props; + } + +-int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx) ++static int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx) + { + X86CPUTopoIDs topo_ids; + X86MachineState *x86ms = X86_MACHINE(ms); +@@ -467,7 +467,7 @@ int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx) + return topo_ids.pkg_id % ms->numa_state->num_nodes; + } + +-const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms) ++static const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms) + { + X86MachineState *x86ms = X86_MACHINE(ms); + unsigned int max_cpus = ms->smp.max_cpus; +diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h +index d7b7d3f3ce..c2062db13f 100644 +--- a/include/hw/i386/x86.h ++++ b/include/hw/i386/x86.h +@@ -114,10 +114,6 @@ uint32_t x86_cpu_apic_id_from_index(X86MachineState *pcms, + + void x86_cpu_new(X86MachineState *pcms, int64_t apic_id, Error **errp); + void x86_cpus_init(X86MachineState *pcms, int default_cpu_version); +-CpuInstanceProperties x86_cpu_index_to_props(MachineState *ms, +- unsigned cpu_index); +-int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx); +-const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms); + CPUArchId *x86_find_cpu_slot(MachineState *ms, uint32_t id, int *idx); + void x86_rtc_set_cpus_count(ISADevice *rtc, uint16_t cpus_count); + void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Add-a-class-method-to-determine-KVM-VM-type.patch b/SOURCES/kvm-i386-sev-Add-a-class-method-to-determine-KVM-VM-type.patch new file mode 100644 index 0000000..fce51aa --- /dev/null +++ b/SOURCES/kvm-i386-sev-Add-a-class-method-to-determine-KVM-VM-type.patch @@ -0,0 +1,116 @@ +From 652793962000d6906e219ceae36348a476b78c28 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 31 May 2024 12:44:44 +0200 +Subject: [PATCH 065/100] i386/sev: Add a class method to determine KVM VM type + for SNP guests + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [65/91] c6cbeac0a6f691138df212b80efaa9b1143fdaa8 (bonzini/rhel-qemu-kvm) + +SEV guests can use either KVM_X86_DEFAULT_VM, KVM_X86_SEV_VM, +or KVM_X86_SEV_ES_VM depending on the configuration and what +the host kernel supports. SNP guests on the other hand can only +ever use KVM_X86_SNP_VM, so split determination of VM type out +into a separate class method that can be set accordingly for +sev-guest vs. sev-snp-guest objects and add handling for SNP. + +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-14-pankaj.gupta@amd.com> +[Remove unnecessary function pointer declaration. - Paolo] +Signed-off-by: Paolo Bonzini +(cherry picked from commit a808132f6d8e855bd83a400570ec91d2e00bebe3) +Signed-off-by: Paolo Bonzini +--- + target/i386/kvm/kvm.c | 1 + + target/i386/sev.c | 15 ++++++++++++--- + 2 files changed, 13 insertions(+), 3 deletions(-) + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 408568d053..75e75d9772 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -166,6 +166,7 @@ static const char *vm_type_name[] = { + [KVM_X86_DEFAULT_VM] = "default", + [KVM_X86_SEV_VM] = "SEV", + [KVM_X86_SEV_ES_VM] = "SEV-ES", ++ [KVM_X86_SNP_VM] = "SEV-SNP", + }; + + bool kvm_is_vm_type_supported(int type) +diff --git a/target/i386/sev.c b/target/i386/sev.c +index c3daaf1ad5..072cc4f853 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -885,6 +885,11 @@ out: + return sev_common->kvm_type; + } + ++static int sev_snp_kvm_type(X86ConfidentialGuest *cg) ++{ ++ return KVM_X86_SNP_VM; ++} ++ + static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + { + char *devname; +@@ -894,6 +899,8 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + struct sev_user_data_status status = {}; + SevCommonState *sev_common = SEV_COMMON(cgs); + SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(cgs); ++ X86ConfidentialGuestClass *x86_klass = ++ X86_CONFIDENTIAL_GUEST_GET_CLASS(cgs); + + sev_common->state = SEV_STATE_UNINIT; + +@@ -964,7 +971,7 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + } + + trace_kvm_sev_init(); +- if (sev_kvm_type(X86_CONFIDENTIAL_GUEST(sev_common)) == KVM_X86_DEFAULT_VM) { ++ if (x86_klass->kvm_type(X86_CONFIDENTIAL_GUEST(sev_common)) == KVM_X86_DEFAULT_VM) { + cmd = sev_es_enabled() ? KVM_SEV_ES_INIT : KVM_SEV_INIT; + + ret = sev_ioctl(sev_common->sev_fd, cmd, NULL, &fw_error); +@@ -1441,10 +1448,8 @@ static void + sev_common_class_init(ObjectClass *oc, void *data) + { + ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); +- X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + + klass->kvm_init = sev_common_kvm_init; +- x86_klass->kvm_type = sev_kvm_type; + + object_class_property_add_str(oc, "sev-device", + sev_common_get_sev_device, +@@ -1529,10 +1534,12 @@ static void + sev_guest_class_init(ObjectClass *oc, void *data) + { + SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); ++ X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + + klass->launch_start = sev_launch_start; + klass->launch_finish = sev_launch_finish; + klass->kvm_init = sev_kvm_init; ++ x86_klass->kvm_type = sev_kvm_type; + + object_class_property_add_str(oc, "dh-cert-file", + sev_guest_get_dh_cert_file, +@@ -1770,8 +1777,10 @@ static void + sev_snp_guest_class_init(ObjectClass *oc, void *data) + { + SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); ++ X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + + klass->kvm_init = sev_snp_kvm_init; ++ x86_klass->kvm_type = sev_snp_kvm_type; + + object_class_property_add(oc, "policy", "uint64", + sev_snp_guest_get_policy, +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Add-a-sev_snp_enabled-helper.patch b/SOURCES/kvm-i386-sev-Add-a-sev_snp_enabled-helper.patch new file mode 100644 index 0000000..d194994 --- /dev/null +++ b/SOURCES/kvm-i386-sev-Add-a-sev_snp_enabled-helper.patch @@ -0,0 +1,84 @@ +From 82a714b79851b5c2d1389d2fa7a01548c486a854 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:20 -0500 +Subject: [PATCH 060/100] i386/sev: Add a sev_snp_enabled() helper + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [60/91] c35ead095028ccfb1e1be0fe010ca4f7688530a0 (bonzini/rhel-qemu-kvm) + +Add a simple helper to check if the current guest type is SNP. Also have +SNP-enabled imply that SEV-ES is enabled as well, and fix up any places +where the sev_es_enabled() check is expecting a pure/non-SNP guest. + +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-9-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 99190f805dca9475fe244fbd8041961842657dc2) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 13 ++++++++++++- + target/i386/sev.h | 2 ++ + 2 files changed, 14 insertions(+), 1 deletion(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index a81b3228d4..4edfedc139 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -325,12 +325,21 @@ sev_enabled(void) + return !!object_dynamic_cast(OBJECT(cgs), TYPE_SEV_COMMON); + } + ++bool ++sev_snp_enabled(void) ++{ ++ ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; ++ ++ return !!object_dynamic_cast(OBJECT(cgs), TYPE_SEV_SNP_GUEST); ++} ++ + bool + sev_es_enabled(void) + { + ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; + +- return sev_enabled() && (SEV_GUEST(cgs)->policy & SEV_POLICY_ES); ++ return sev_snp_enabled() || ++ (sev_enabled() && SEV_GUEST(cgs)->policy & SEV_POLICY_ES); + } + + uint32_t +@@ -946,7 +955,9 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + "support", __func__); + goto err; + } ++ } + ++ if (sev_es_enabled() && !sev_snp_enabled()) { + if (!(status.flags & SEV_STATUS_FLAGS_CONFIG_ES)) { + error_setg(errp, "%s: guest policy requires SEV-ES, but " + "host SEV-ES support unavailable", +diff --git a/target/i386/sev.h b/target/i386/sev.h +index bedc667eeb..94295ee74f 100644 +--- a/target/i386/sev.h ++++ b/target/i386/sev.h +@@ -45,9 +45,11 @@ typedef struct SevKernelLoaderContext { + #ifdef CONFIG_SEV + bool sev_enabled(void); + bool sev_es_enabled(void); ++bool sev_snp_enabled(void); + #else + #define sev_enabled() 0 + #define sev_es_enabled() 0 ++#define sev_snp_enabled() 0 + #endif + + uint32_t sev_get_cbit_position(void); +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Add-handling-to-encrypt-finalize-guest-laun.patch b/SOURCES/kvm-i386-sev-Add-handling-to-encrypt-finalize-guest-laun.patch new file mode 100644 index 0000000..2bab2ac --- /dev/null +++ b/SOURCES/kvm-i386-sev-Add-handling-to-encrypt-finalize-guest-laun.patch @@ -0,0 +1,187 @@ +From 0e435819540b0d39da2c828aacc0f35ecaadbdf6 Mon Sep 17 00:00:00 2001 +From: Brijesh Singh +Date: Thu, 30 May 2024 06:16:28 -0500 +Subject: [PATCH 068/100] i386/sev: Add handling to encrypt/finalize guest + launch data + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [68/91] fe77931d279aa8df061823da88a320fb5f72ffea (bonzini/rhel-qemu-kvm) + +Process any queued up launch data and encrypt/measure it into the SNP +guest instance prior to initial guest launch. + +This also updates the KVM_SEV_SNP_LAUNCH_UPDATE call to handle partial +update responses. + +Signed-off-by: Brijesh Singh +Co-developed-by: Michael Roth +Signed-off-by: Michael Roth +Co-developed-by: Pankaj Gupta +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-17-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 9f3a6999f9730a694d7db448a99f9c9cb6515992) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 112 ++++++++++++++++++++++++++++++++++++++- + target/i386/trace-events | 2 + + 2 files changed, 113 insertions(+), 1 deletion(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index e89b87d2f5..ef2e592ca7 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -756,6 +756,76 @@ out: + return ret; + } + ++static const char * ++snp_page_type_to_str(int type) ++{ ++ switch (type) { ++ case KVM_SEV_SNP_PAGE_TYPE_NORMAL: return "Normal"; ++ case KVM_SEV_SNP_PAGE_TYPE_ZERO: return "Zero"; ++ case KVM_SEV_SNP_PAGE_TYPE_UNMEASURED: return "Unmeasured"; ++ case KVM_SEV_SNP_PAGE_TYPE_SECRETS: return "Secrets"; ++ case KVM_SEV_SNP_PAGE_TYPE_CPUID: return "Cpuid"; ++ default: return "unknown"; ++ } ++} ++ ++static int ++sev_snp_launch_update(SevSnpGuestState *sev_snp_guest, ++ SevLaunchUpdateData *data) ++{ ++ int ret, fw_error; ++ struct kvm_sev_snp_launch_update update = {0}; ++ ++ if (!data->hva || !data->len) { ++ error_report("SNP_LAUNCH_UPDATE called with invalid address" ++ "/ length: %p / %lx", ++ data->hva, data->len); ++ return 1; ++ } ++ ++ update.uaddr = (__u64)(unsigned long)data->hva; ++ update.gfn_start = data->gpa >> TARGET_PAGE_BITS; ++ update.len = data->len; ++ update.type = data->type; ++ ++ /* ++ * KVM_SEV_SNP_LAUNCH_UPDATE requires that GPA ranges have the private ++ * memory attribute set in advance. ++ */ ++ ret = kvm_set_memory_attributes_private(data->gpa, data->len); ++ if (ret) { ++ error_report("SEV-SNP: failed to configure initial" ++ "private guest memory"); ++ goto out; ++ } ++ ++ while (update.len || ret == -EAGAIN) { ++ trace_kvm_sev_snp_launch_update(update.uaddr, update.gfn_start << ++ TARGET_PAGE_BITS, update.len, ++ snp_page_type_to_str(update.type)); ++ ++ ret = sev_ioctl(SEV_COMMON(sev_snp_guest)->sev_fd, ++ KVM_SEV_SNP_LAUNCH_UPDATE, ++ &update, &fw_error); ++ if (ret && ret != -EAGAIN) { ++ error_report("SNP_LAUNCH_UPDATE ret=%d fw_error=%d '%s'", ++ ret, fw_error, fw_error_to_str(fw_error)); ++ break; ++ } ++ } ++ ++out: ++ if (!ret && update.gfn_start << TARGET_PAGE_BITS != data->gpa + data->len) { ++ error_report("SEV-SNP: expected update of GPA range %lx-%lx," ++ "got GPA range %lx-%llx", ++ data->gpa, data->gpa + data->len, data->gpa, ++ update.gfn_start << TARGET_PAGE_BITS); ++ ret = -EIO; ++ } ++ ++ return ret; ++} ++ + static int + sev_launch_update_data(SevGuestState *sev_guest, uint8_t *addr, uint64_t len) + { +@@ -901,6 +971,46 @@ sev_launch_finish(SevCommonState *sev_common) + migrate_add_blocker(&sev_mig_blocker, &error_fatal); + } + ++static void ++sev_snp_launch_finish(SevCommonState *sev_common) ++{ ++ int ret, error; ++ Error *local_err = NULL; ++ SevLaunchUpdateData *data; ++ SevSnpGuestState *sev_snp = SEV_SNP_GUEST(sev_common); ++ struct kvm_sev_snp_launch_finish *finish = &sev_snp->kvm_finish_conf; ++ ++ QTAILQ_FOREACH(data, &launch_update, next) { ++ ret = sev_snp_launch_update(sev_snp, data); ++ if (ret) { ++ exit(1); ++ } ++ } ++ ++ trace_kvm_sev_snp_launch_finish(sev_snp->id_block, sev_snp->id_auth, ++ sev_snp->host_data); ++ ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_SNP_LAUNCH_FINISH, ++ finish, &error); ++ if (ret) { ++ error_report("SNP_LAUNCH_FINISH ret=%d fw_error=%d '%s'", ++ ret, error, fw_error_to_str(error)); ++ exit(1); ++ } ++ ++ sev_set_guest_state(sev_common, SEV_STATE_RUNNING); ++ ++ /* add migration blocker */ ++ error_setg(&sev_mig_blocker, ++ "SEV-SNP: Migration is not implemented"); ++ ret = migrate_add_blocker(&sev_mig_blocker, &local_err); ++ if (local_err) { ++ error_report_err(local_err); ++ error_free(sev_mig_blocker); ++ exit(1); ++ } ++} ++ ++ + static void + sev_vm_state_change(void *opaque, bool running, RunState state) + { +@@ -1832,10 +1942,10 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data) + X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + + klass->launch_start = sev_snp_launch_start; ++ klass->launch_finish = sev_snp_launch_finish; + klass->kvm_init = sev_snp_kvm_init; + x86_klass->kvm_type = sev_snp_kvm_type; + +- + object_class_property_add(oc, "policy", "uint64", + sev_snp_guest_get_policy, + sev_snp_guest_set_policy, NULL, NULL); +diff --git a/target/i386/trace-events b/target/i386/trace-events +index cb26d8a925..06b44ead2e 100644 +--- a/target/i386/trace-events ++++ b/target/i386/trace-events +@@ -12,3 +12,5 @@ kvm_sev_launch_finish(void) "" + kvm_sev_launch_secret(uint64_t hpa, uint64_t hva, uint64_t secret, int len) "hpa 0x%" PRIx64 " hva 0x%" PRIx64 " data 0x%" PRIx64 " len %d" + kvm_sev_attestation_report(const char *mnonce, const char *data) "mnonce %s data %s" + kvm_sev_snp_launch_start(uint64_t policy, char *gosvw) "policy 0x%" PRIx64 " gosvw %s" ++kvm_sev_snp_launch_update(uint64_t src, uint64_t gpa, uint64_t len, const char *type) "src 0x%" PRIx64 " gpa 0x%" PRIx64 " len 0x%" PRIx64 " (%s page)" ++kvm_sev_snp_launch_finish(char *id_block, char *id_auth, char *host_data) "id_block %s id_auth %s host_data %s" +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Add-legacy-vm-type-parameter-for-SEV-guest-.patch b/SOURCES/kvm-i386-sev-Add-legacy-vm-type-parameter-for-SEV-guest-.patch new file mode 100644 index 0000000..572dddc --- /dev/null +++ b/SOURCES/kvm-i386-sev-Add-legacy-vm-type-parameter-for-SEV-guest-.patch @@ -0,0 +1,127 @@ +From 2872c423fa44dcbf50b581a5c3feac064a0473a0 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Tue, 9 Apr 2024 18:07:41 -0500 +Subject: [PATCH 024/100] i386/sev: Add 'legacy-vm-type' parameter for SEV + guest objects + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [24/91] ce35d1b09fe8aa8772ff149543f7760455c1e6b5 (bonzini/rhel-qemu-kvm) + +QEMU will currently automatically make use of the KVM_SEV_INIT2 API for +initializing SEV and SEV-ES guests verses the older +KVM_SEV_INIT/KVM_SEV_ES_INIT interfaces. + +However, the older interfaces will silently avoid sync'ing FPU/XSAVE +state to the VMSA prior to encryption, thus relying on behavior and +measurements that assume the related fields to be allow zero. + +With KVM_SEV_INIT2, this state is now synced into the VMSA, resulting in +measurements changes and, theoretically, behaviorial changes, though the +latter are unlikely to be seen in practice. + +To allow a smooth transition to the newer interface, while still +providing a mechanism to maintain backward compatibility with VMs +created using the older interfaces, provide a new command-line +parameter: + + -object sev-guest,legacy-vm-type=true,... + +and have it default to false. + +Signed-off-by: Michael Roth +Message-ID: <20240409230743.962513-2-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 023267334da375226720e62963df9545aa8fc2fd) +Signed-off-by: Paolo Bonzini +--- + qapi/qom.json | 11 ++++++++++- + target/i386/sev.c | 18 +++++++++++++++++- + 2 files changed, 27 insertions(+), 2 deletions(-) + +diff --git a/qapi/qom.json b/qapi/qom.json +index 85e6b4f84a..38dde6d785 100644 +--- a/qapi/qom.json ++++ b/qapi/qom.json +@@ -898,6 +898,14 @@ + # designated guest firmware page for measured boot with -kernel + # (default: false) (since 6.2) + # ++# @legacy-vm-type: Use legacy KVM_SEV_INIT KVM interface for creating the VM. ++# The newer KVM_SEV_INIT2 interface syncs additional vCPU ++# state when initializing the VMSA structures, which will ++# result in a different guest measurement. Set this to ++# maintain compatibility with older QEMU or kernel versions ++# that rely on legacy KVM_SEV_INIT behavior. ++# (default: false) (since 9.1) ++# + # Since: 2.12 + ## + { 'struct': 'SevGuestProperties', +@@ -908,7 +916,8 @@ + '*handle': 'uint32', + '*cbitpos': 'uint32', + 'reduced-phys-bits': 'uint32', +- '*kernel-hashes': 'bool' } } ++ '*kernel-hashes': 'bool', ++ '*legacy-vm-type': 'bool' } } + + ## + # @ThreadContextProperties: +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 9dab4060b8..f4ee317cb0 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -67,6 +67,7 @@ struct SevGuestState { + uint32_t cbitpos; + uint32_t reduced_phys_bits; + bool kernel_hashes; ++ bool legacy_vm_type; + + /* runtime state */ + uint32_t handle; +@@ -356,6 +357,16 @@ static void sev_guest_set_kernel_hashes(Object *obj, bool value, Error **errp) + sev->kernel_hashes = value; + } + ++static bool sev_guest_get_legacy_vm_type(Object *obj, Error **errp) ++{ ++ return SEV_GUEST(obj)->legacy_vm_type; ++} ++ ++static void sev_guest_set_legacy_vm_type(Object *obj, bool value, Error **errp) ++{ ++ SEV_GUEST(obj)->legacy_vm_type = value; ++} ++ + bool + sev_enabled(void) + { +@@ -863,7 +874,7 @@ static int sev_kvm_type(X86ConfidentialGuest *cg) + } + + kvm_type = (sev->policy & SEV_POLICY_ES) ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM; +- if (kvm_is_vm_type_supported(kvm_type)) { ++ if (kvm_is_vm_type_supported(kvm_type) && !sev->legacy_vm_type) { + sev->kvm_type = kvm_type; + } else { + sev->kvm_type = KVM_X86_DEFAULT_VM; +@@ -1381,6 +1392,11 @@ sev_guest_class_init(ObjectClass *oc, void *data) + sev_guest_set_kernel_hashes); + object_class_property_set_description(oc, "kernel-hashes", + "add kernel hashes to guest firmware for measured Linux boot"); ++ object_class_property_add_bool(oc, "legacy-vm-type", ++ sev_guest_get_legacy_vm_type, ++ sev_guest_set_legacy_vm_type); ++ object_class_property_set_description(oc, "legacy-vm-type", ++ "use legacy VM type to maintain measurement compatibility with older QEMU or kernel versions."); + } + + static void +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Add-sev_kvm_init-override-for-SEV-class.patch b/SOURCES/kvm-i386-sev-Add-sev_kvm_init-override-for-SEV-class.patch new file mode 100644 index 0000000..ca1338c --- /dev/null +++ b/SOURCES/kvm-i386-sev-Add-sev_kvm_init-override-for-SEV-class.patch @@ -0,0 +1,203 @@ +From a236548a903aa8350fff9601d481b2f529c8d4a7 Mon Sep 17 00:00:00 2001 +From: Pankaj Gupta +Date: Thu, 30 May 2024 06:16:21 -0500 +Subject: [PATCH 061/100] i386/sev: Add sev_kvm_init() override for SEV class + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [61/91] b24fcbc8712e7394e029312229da023c63803969 (bonzini/rhel-qemu-kvm) + +Some aspects of the init routine SEV are specific to SEV and not +applicable for SNP guests, so move the SEV-specific bits into +separate class method and retain only the common functionality. + +Co-developed-by: Michael Roth +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-10-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 990da8d243a8c59dafcbed78b56a0e4ffb1605d9) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 72 +++++++++++++++++++++++++++++++++-------------- + 1 file changed, 51 insertions(+), 21 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 4edfedc139..5519de1c6b 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -73,6 +73,7 @@ struct SevCommonStateClass { + /* public */ + int (*launch_start)(SevCommonState *sev_common); + void (*launch_finish)(SevCommonState *sev_common); ++ int (*kvm_init)(ConfidentialGuestSupport *cgs, Error **errp); + }; + + /** +@@ -882,7 +883,7 @@ out: + return sev_common->kvm_type; + } + +-static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) ++static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + { + SevCommonState *sev_common = SEV_COMMON(cgs); + char *devname; +@@ -892,12 +893,6 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + struct sev_user_data_status status = {}; + SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(cgs); + +- ret = ram_block_discard_disable(true); +- if (ret) { +- error_report("%s: cannot disable RAM discard", __func__); +- return -1; +- } +- + sev_common->state = SEV_STATE_UNINIT; + + host_cpuid(0x8000001F, 0, NULL, &ebx, NULL, NULL); +@@ -911,7 +906,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + if (host_cbitpos != sev_common->cbitpos) { + error_setg(errp, "%s: cbitpos check failed, host '%d' requested '%d'", + __func__, host_cbitpos, sev_common->cbitpos); +- goto err; ++ return -1; + } + + /* +@@ -924,7 +919,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + error_setg(errp, "%s: reduced_phys_bits check failed," + " it should be in the range of 1 to 63, requested '%d'", + __func__, sev_common->reduced_phys_bits); +- goto err; ++ return -1; + } + + devname = object_property_get_str(OBJECT(sev_common), "sev-device", NULL); +@@ -933,7 +928,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + error_setg(errp, "%s: Failed to open %s '%s'", __func__, + devname, strerror(errno)); + g_free(devname); +- goto err; ++ return -1; + } + g_free(devname); + +@@ -943,7 +938,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + error_setg(errp, "%s: failed to get platform status ret=%d " + "fw_error='%d: %s'", __func__, ret, fw_error, + fw_error_to_str(fw_error)); +- goto err; ++ return -1; + } + sev_common->build_id = status.build; + sev_common->api_major = status.api_major; +@@ -953,7 +948,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + if (!kvm_kernel_irqchip_allowed()) { + error_setg(errp, "%s: SEV-ES guests require in-kernel irqchip" + "support", __func__); +- goto err; ++ return -1; + } + } + +@@ -962,7 +957,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + error_setg(errp, "%s: guest policy requires SEV-ES, but " + "host SEV-ES support unavailable", + __func__); +- goto err; ++ return -1; + } + } + +@@ -980,25 +975,59 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + if (ret) { + error_setg(errp, "%s: failed to initialize ret=%d fw_error=%d '%s'", + __func__, ret, fw_error, fw_error_to_str(fw_error)); +- goto err; ++ return -1; + } + + ret = klass->launch_start(sev_common); + if (ret) { + error_setg(errp, "%s: failed to create encryption context", __func__); +- goto err; ++ return -1; ++ } ++ ++ if (klass->kvm_init && klass->kvm_init(cgs, errp)) { ++ return -1; + } + +- ram_block_notifier_add(&sev_ram_notifier); +- qemu_add_machine_init_done_notifier(&sev_machine_done_notify); + qemu_add_vm_change_state_handler(sev_vm_state_change, sev_common); + + cgs->ready = true; + + return 0; +-err: +- ram_block_discard_disable(false); +- return -1; ++} ++ ++static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) ++{ ++ int ret; ++ ++ /* ++ * SEV/SEV-ES rely on pinned memory to back guest RAM so discarding ++ * isn't actually possible. With SNP, only guest_memfd pages are used ++ * for private guest memory, so discarding of shared memory is still ++ * possible.. ++ */ ++ ret = ram_block_discard_disable(true); ++ if (ret) { ++ error_setg(errp, "%s: cannot disable RAM discard", __func__); ++ return -1; ++ } ++ ++ /* ++ * SEV uses these notifiers to register/pin pages prior to guest use, ++ * but SNP relies on guest_memfd for private pages, which has its ++ * own internal mechanisms for registering/pinning private memory. ++ */ ++ ram_block_notifier_add(&sev_ram_notifier); ++ ++ /* ++ * The machine done notify event is used for SEV guests to get the ++ * measurement of the encrypted images. When SEV-SNP is enabled, the ++ * measurement is part of the guest attestation process where it can ++ * be collected without any reliance on the VMM. So skip registering ++ * the notifier for SNP in favor of using guest attestation instead. ++ */ ++ qemu_add_machine_init_done_notifier(&sev_machine_done_notify); ++ ++ return 0; + } + + int +@@ -1397,7 +1426,7 @@ sev_common_class_init(ObjectClass *oc, void *data) + ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); + X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + +- klass->kvm_init = sev_kvm_init; ++ klass->kvm_init = sev_common_kvm_init; + x86_klass->kvm_type = sev_kvm_type; + + object_class_property_add_str(oc, "sev-device", +@@ -1486,6 +1515,7 @@ sev_guest_class_init(ObjectClass *oc, void *data) + + klass->launch_start = sev_launch_start; + klass->launch_finish = sev_launch_finish; ++ klass->kvm_init = sev_kvm_init; + + object_class_property_add_str(oc, "dh-cert-file", + sev_guest_get_dh_cert_file, +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Add-snp_kvm_init-override-for-SNP-class.patch b/SOURCES/kvm-i386-sev-Add-snp_kvm_init-override-for-SNP-class.patch new file mode 100644 index 0000000..0db345c --- /dev/null +++ b/SOURCES/kvm-i386-sev-Add-snp_kvm_init-override-for-SNP-class.patch @@ -0,0 +1,94 @@ +From 35ceebdeccbf5dceb374c6f89a12e9981def570b Mon Sep 17 00:00:00 2001 +From: Pankaj Gupta +Date: Thu, 30 May 2024 06:16:22 -0500 +Subject: [PATCH 062/100] i386/sev: Add snp_kvm_init() override for SNP class + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [62/91] 8fa537961c9262b99a4ffb99e1c25f080d76d1de (bonzini/rhel-qemu-kvm) + +SNP does not support SMM and requires guest_memfd for +private guest memory, so add SNP specific kvm_init() +functionality in snp_kvm_init() class method. + +Signed-off-by: Michael Roth +Co-developed-by: Pankaj Gupta +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-11-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 125b95a6d465a03ff30816eff0b1889aec01f0c3) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 24 +++++++++++++++++++++++- + 1 file changed, 23 insertions(+), 1 deletion(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 5519de1c6b..6525b3c1a0 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -885,12 +885,12 @@ out: + + static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + { +- SevCommonState *sev_common = SEV_COMMON(cgs); + char *devname; + int ret, fw_error, cmd; + uint32_t ebx; + uint32_t host_cbitpos; + struct sev_user_data_status status = {}; ++ SevCommonState *sev_common = SEV_COMMON(cgs); + SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(cgs); + + sev_common->state = SEV_STATE_UNINIT; +@@ -1030,6 +1030,21 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + return 0; + } + ++static int sev_snp_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) ++{ ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ X86MachineState *x86ms = X86_MACHINE(ms); ++ ++ if (x86ms->smm == ON_OFF_AUTO_AUTO) { ++ x86ms->smm = ON_OFF_AUTO_OFF; ++ } else if (x86ms->smm == ON_OFF_AUTO_ON) { ++ error_setg(errp, "SEV-SNP does not support SMM."); ++ return -1; ++ } ++ ++ return 0; ++} ++ + int + sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp) + { +@@ -1752,6 +1767,10 @@ sev_snp_guest_set_host_data(Object *obj, const char *value, Error **errp) + static void + sev_snp_guest_class_init(ObjectClass *oc, void *data) + { ++ SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); ++ ++ klass->kvm_init = sev_snp_kvm_init; ++ + object_class_property_add(oc, "policy", "uint64", + sev_snp_guest_get_policy, + sev_snp_guest_set_policy, NULL, NULL); +@@ -1778,8 +1797,11 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data) + static void + sev_snp_guest_instance_init(Object *obj) + { ++ ConfidentialGuestSupport *cgs = CONFIDENTIAL_GUEST_SUPPORT(obj); + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + ++ cgs->require_guest_memfd = true; ++ + /* default init/start/finish params for kvm */ + sev_snp_guest->kvm_start_conf.policy = DEFAULT_SEV_SNP_POLICY; + } +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Add-support-for-SNP-CPUID-validation.patch b/SOURCES/kvm-i386-sev-Add-support-for-SNP-CPUID-validation.patch new file mode 100644 index 0000000..c10f75f --- /dev/null +++ b/SOURCES/kvm-i386-sev-Add-support-for-SNP-CPUID-validation.patch @@ -0,0 +1,262 @@ +From 4013364679757161d6b9754bfc33ae38be0a1b7f Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:32 -0500 +Subject: [PATCH 072/100] i386/sev: Add support for SNP CPUID validation + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [72/91] 080e2942552dc7de8966e69d0d0d3b8951392030 (bonzini/rhel-qemu-kvm) + +SEV-SNP firmware allows a special guest page to be populated with a +table of guest CPUID values so that they can be validated through +firmware before being loaded into encrypted guest memory where they can +be used in place of hypervisor-provided values[1]. + +As part of SEV-SNP guest initialization, use this interface to validate +the CPUID entries reported by KVM_GET_CPUID2 prior to initial guest +start and populate the CPUID page reserved by OVMF with the resulting +encrypted data. + +[1] SEV SNP Firmware ABI Specification, Rev. 0.8, 8.13.2.6 + +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-21-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 70943ad8e4dfbe5f77006b880290219be9d03553) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 164 +++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 162 insertions(+), 2 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index c57534fca2..06401f0526 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -200,6 +200,36 @@ static const char *const sev_fw_errlist[] = { + + #define SEV_FW_MAX_ERROR ARRAY_SIZE(sev_fw_errlist) + ++/* doesn't expose this, so re-use the max from kvm.c */ ++#define KVM_MAX_CPUID_ENTRIES 100 ++ ++typedef struct KvmCpuidInfo { ++ struct kvm_cpuid2 cpuid; ++ struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES]; ++} KvmCpuidInfo; ++ ++#define SNP_CPUID_FUNCTION_MAXCOUNT 64 ++#define SNP_CPUID_FUNCTION_UNKNOWN 0xFFFFFFFF ++ ++typedef struct { ++ uint32_t eax_in; ++ uint32_t ecx_in; ++ uint64_t xcr0_in; ++ uint64_t xss_in; ++ uint32_t eax; ++ uint32_t ebx; ++ uint32_t ecx; ++ uint32_t edx; ++ uint64_t reserved; ++} __attribute__((packed)) SnpCpuidFunc; ++ ++typedef struct { ++ uint32_t count; ++ uint32_t reserved1; ++ uint64_t reserved2; ++ SnpCpuidFunc entries[SNP_CPUID_FUNCTION_MAXCOUNT]; ++} __attribute__((packed)) SnpCpuidInfo; ++ + static int + sev_ioctl(int fd, int cmd, void *data, int *error) + { +@@ -788,6 +818,35 @@ out: + return ret; + } + ++static void ++sev_snp_cpuid_report_mismatches(SnpCpuidInfo *old, ++ SnpCpuidInfo *new) ++{ ++ size_t i; ++ ++ if (old->count != new->count) { ++ error_report("SEV-SNP: CPUID validation failed due to count mismatch," ++ "provided: %d, expected: %d", old->count, new->count); ++ return; ++ } ++ ++ for (i = 0; i < old->count; i++) { ++ SnpCpuidFunc *old_func, *new_func; ++ ++ old_func = &old->entries[i]; ++ new_func = &new->entries[i]; ++ ++ if (memcmp(old_func, new_func, sizeof(SnpCpuidFunc))) { ++ error_report("SEV-SNP: CPUID validation failed for function 0x%x, index: 0x%x" ++ "provided: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 0x%08x" ++ "expected: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 0x%08x", ++ old_func->eax_in, old_func->ecx_in, ++ old_func->eax, old_func->ebx, old_func->ecx, old_func->edx, ++ new_func->eax, new_func->ebx, new_func->ecx, new_func->edx); ++ } ++ } ++} ++ + static const char * + snp_page_type_to_str(int type) + { +@@ -806,6 +865,7 @@ sev_snp_launch_update(SevSnpGuestState *sev_snp_guest, + SevLaunchUpdateData *data) + { + int ret, fw_error; ++ SnpCpuidInfo snp_cpuid_info; + struct kvm_sev_snp_launch_update update = {0}; + + if (!data->hva || !data->len) { +@@ -815,6 +875,11 @@ sev_snp_launch_update(SevSnpGuestState *sev_snp_guest, + return 1; + } + ++ if (data->type == KVM_SEV_SNP_PAGE_TYPE_CPUID) { ++ /* Save a copy for comparison in case the LAUNCH_UPDATE fails */ ++ memcpy(&snp_cpuid_info, data->hva, sizeof(snp_cpuid_info)); ++ } ++ + update.uaddr = (__u64)(unsigned long)data->hva; + update.gfn_start = data->gpa >> TARGET_PAGE_BITS; + update.len = data->len; +@@ -842,6 +907,11 @@ sev_snp_launch_update(SevSnpGuestState *sev_snp_guest, + if (ret && ret != -EAGAIN) { + error_report("SNP_LAUNCH_UPDATE ret=%d fw_error=%d '%s'", + ret, fw_error, fw_error_to_str(fw_error)); ++ ++ if (data->type == KVM_SEV_SNP_PAGE_TYPE_CPUID) { ++ sev_snp_cpuid_report_mismatches(&snp_cpuid_info, data->hva); ++ error_report("SEV-SNP: failed update CPUID page"); ++ } + break; + } + } +@@ -1004,7 +1074,8 @@ sev_launch_finish(SevCommonState *sev_common) + } + + static int +-snp_launch_update_data(uint64_t gpa, void *hva, uint32_t len, int type) ++snp_launch_update_data(uint64_t gpa, void *hva, ++ uint32_t len, int type) + { + SevLaunchUpdateData *data; + +@@ -1019,6 +1090,90 @@ snp_launch_update_data(uint64_t gpa, void *hva, uint32_t len, int type) + return 0; + } + ++static int ++sev_snp_cpuid_info_fill(SnpCpuidInfo *snp_cpuid_info, ++ const KvmCpuidInfo *kvm_cpuid_info) ++{ ++ size_t i; ++ ++ if (kvm_cpuid_info->cpuid.nent > SNP_CPUID_FUNCTION_MAXCOUNT) { ++ error_report("SEV-SNP: CPUID entry count (%d) exceeds max (%d)", ++ kvm_cpuid_info->cpuid.nent, SNP_CPUID_FUNCTION_MAXCOUNT); ++ return -1; ++ } ++ ++ memset(snp_cpuid_info, 0, sizeof(*snp_cpuid_info)); ++ ++ for (i = 0; i < kvm_cpuid_info->cpuid.nent; i++) { ++ const struct kvm_cpuid_entry2 *kvm_cpuid_entry; ++ SnpCpuidFunc *snp_cpuid_entry; ++ ++ kvm_cpuid_entry = &kvm_cpuid_info->entries[i]; ++ snp_cpuid_entry = &snp_cpuid_info->entries[i]; ++ ++ snp_cpuid_entry->eax_in = kvm_cpuid_entry->function; ++ if (kvm_cpuid_entry->flags == KVM_CPUID_FLAG_SIGNIFCANT_INDEX) { ++ snp_cpuid_entry->ecx_in = kvm_cpuid_entry->index; ++ } ++ snp_cpuid_entry->eax = kvm_cpuid_entry->eax; ++ snp_cpuid_entry->ebx = kvm_cpuid_entry->ebx; ++ snp_cpuid_entry->ecx = kvm_cpuid_entry->ecx; ++ snp_cpuid_entry->edx = kvm_cpuid_entry->edx; ++ ++ /* ++ * Guest kernels will calculate EBX themselves using the 0xD ++ * subfunctions corresponding to the individual XSAVE areas, so only ++ * encode the base XSAVE size in the initial leaves, corresponding ++ * to the initial XCR0=1 state. ++ */ ++ if (snp_cpuid_entry->eax_in == 0xD && ++ (snp_cpuid_entry->ecx_in == 0x0 || snp_cpuid_entry->ecx_in == 0x1)) { ++ snp_cpuid_entry->ebx = 0x240; ++ snp_cpuid_entry->xcr0_in = 1; ++ snp_cpuid_entry->xss_in = 0; ++ } ++ } ++ ++ snp_cpuid_info->count = i; ++ ++ return 0; ++} ++ ++static int ++snp_launch_update_cpuid(uint32_t cpuid_addr, void *hva, uint32_t cpuid_len) ++{ ++ KvmCpuidInfo kvm_cpuid_info = {0}; ++ SnpCpuidInfo snp_cpuid_info; ++ CPUState *cs = first_cpu; ++ int ret; ++ uint32_t i = 0; ++ ++ assert(sizeof(snp_cpuid_info) <= cpuid_len); ++ ++ /* get the cpuid list from KVM */ ++ do { ++ kvm_cpuid_info.cpuid.nent = ++i; ++ ret = kvm_vcpu_ioctl(cs, KVM_GET_CPUID2, &kvm_cpuid_info); ++ } while (ret == -E2BIG); ++ ++ if (ret) { ++ error_report("SEV-SNP: unable to query CPUID values for CPU: '%s'", ++ strerror(-ret)); ++ return 1; ++ } ++ ++ ret = sev_snp_cpuid_info_fill(&snp_cpuid_info, &kvm_cpuid_info); ++ if (ret) { ++ error_report("SEV-SNP: failed to generate CPUID table information"); ++ return 1; ++ } ++ ++ memcpy(hva, &snp_cpuid_info, sizeof(snp_cpuid_info)); ++ ++ return snp_launch_update_data(cpuid_addr, hva, cpuid_len, ++ KVM_SEV_SNP_PAGE_TYPE_CPUID); ++} ++ + static int + snp_metadata_desc_to_page_type(int desc_type) + { +@@ -1053,7 +1208,12 @@ snp_populate_metadata_pages(SevSnpGuestState *sev_snp, + exit(1); + } + +- ret = snp_launch_update_data(desc->base, hva, desc->len, type); ++ if (type == KVM_SEV_SNP_PAGE_TYPE_CPUID) { ++ ret = snp_launch_update_cpuid(desc->base, hva, desc->len); ++ } else { ++ ret = snp_launch_update_data(desc->base, hva, desc->len, type); ++ } ++ + if (ret) { + error_report("%s: Failed to add metadata page gpa 0x%x+%x type %d", + __func__, desc->base, desc->len, desc->type); +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Add-support-for-populating-OVMF-metadata-pa.patch b/SOURCES/kvm-i386-sev-Add-support-for-populating-OVMF-metadata-pa.patch new file mode 100644 index 0000000..4691679 --- /dev/null +++ b/SOURCES/kvm-i386-sev-Add-support-for-populating-OVMF-metadata-pa.patch @@ -0,0 +1,127 @@ +From b2cfd4d89026e76ba86ea7adea323f2c3a588790 Mon Sep 17 00:00:00 2001 +From: Brijesh Singh +Date: Thu, 30 May 2024 06:16:31 -0500 +Subject: [PATCH 071/100] i386/sev: Add support for populating OVMF metadata + pages + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [71/91] b563442c0e2f6ea01937425d300b56d9e641fd57 (bonzini/rhel-qemu-kvm) + +OVMF reserves various pages so they can be pre-initialized/validated +prior to launching the guest. Add support for populating these pages +with the expected content. + +Signed-off-by: Brijesh Singh +Signed-off-by: Michael Roth +Co-developed-by: Pankaj Gupta +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-20-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 3d8c2a7f4806ff39423312e503737fd76c34dcae) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 74 insertions(+) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 17281bb2c7..c57534fca2 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -1003,15 +1003,89 @@ sev_launch_finish(SevCommonState *sev_common) + migrate_add_blocker(&sev_mig_blocker, &error_fatal); + } + ++static int ++snp_launch_update_data(uint64_t gpa, void *hva, uint32_t len, int type) ++{ ++ SevLaunchUpdateData *data; ++ ++ data = g_new0(SevLaunchUpdateData, 1); ++ data->gpa = gpa; ++ data->hva = hva; ++ data->len = len; ++ data->type = type; ++ ++ QTAILQ_INSERT_TAIL(&launch_update, data, next); ++ ++ return 0; ++} ++ ++static int ++snp_metadata_desc_to_page_type(int desc_type) ++{ ++ switch (desc_type) { ++ /* Add the umeasured prevalidated pages as a zero page */ ++ case SEV_DESC_TYPE_SNP_SEC_MEM: return KVM_SEV_SNP_PAGE_TYPE_ZERO; ++ case SEV_DESC_TYPE_SNP_SECRETS: return KVM_SEV_SNP_PAGE_TYPE_SECRETS; ++ case SEV_DESC_TYPE_CPUID: return KVM_SEV_SNP_PAGE_TYPE_CPUID; ++ default: ++ return KVM_SEV_SNP_PAGE_TYPE_ZERO; ++ } ++} ++ ++static void ++snp_populate_metadata_pages(SevSnpGuestState *sev_snp, ++ OvmfSevMetadata *metadata) ++{ ++ OvmfSevMetadataDesc *desc; ++ int type, ret, i; ++ void *hva; ++ MemoryRegion *mr = NULL; ++ ++ for (i = 0; i < metadata->num_desc; i++) { ++ desc = &metadata->descs[i]; ++ ++ type = snp_metadata_desc_to_page_type(desc->type); ++ ++ hva = gpa2hva(&mr, desc->base, desc->len, NULL); ++ if (!hva) { ++ error_report("%s: Failed to get HVA for GPA 0x%x sz 0x%x", ++ __func__, desc->base, desc->len); ++ exit(1); ++ } ++ ++ ret = snp_launch_update_data(desc->base, hva, desc->len, type); ++ if (ret) { ++ error_report("%s: Failed to add metadata page gpa 0x%x+%x type %d", ++ __func__, desc->base, desc->len, desc->type); ++ exit(1); ++ } ++ } ++} ++ + static void + sev_snp_launch_finish(SevCommonState *sev_common) + { + int ret, error; + Error *local_err = NULL; ++ OvmfSevMetadata *metadata; + SevLaunchUpdateData *data; + SevSnpGuestState *sev_snp = SEV_SNP_GUEST(sev_common); + struct kvm_sev_snp_launch_finish *finish = &sev_snp->kvm_finish_conf; + ++ /* ++ * To boot the SNP guest, the hypervisor is required to populate the CPUID ++ * and Secrets page before finalizing the launch flow. The location of ++ * the secrets and CPUID page is available through the OVMF metadata GUID. ++ */ ++ metadata = pc_system_get_ovmf_sev_metadata_ptr(); ++ if (metadata == NULL) { ++ error_report("%s: Failed to locate SEV metadata header", __func__); ++ exit(1); ++ } ++ ++ /* Populate all the metadata pages */ ++ snp_populate_metadata_pages(sev_snp, metadata); ++ + QTAILQ_FOREACH(data, &launch_update, next) { + ret = sev_snp_launch_update(sev_snp, data); + if (ret) { +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Add-the-SNP-launch-start-context.patch b/SOURCES/kvm-i386-sev-Add-the-SNP-launch-start-context.patch new file mode 100644 index 0000000..5da793f --- /dev/null +++ b/SOURCES/kvm-i386-sev-Add-the-SNP-launch-start-context.patch @@ -0,0 +1,122 @@ +From 0f7432f2b968298b64fd243df793b176f67a538f Mon Sep 17 00:00:00 2001 +From: Brijesh Singh +Date: Thu, 30 May 2024 06:16:27 -0500 +Subject: [PATCH 067/100] i386/sev: Add the SNP launch start context + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [67/91] 63759a25a413a7a9a7274fb4c3b8bc2528634855 (bonzini/rhel-qemu-kvm) + +The SNP_LAUNCH_START is called first to create a cryptographic launch +context within the firmware. + +Signed-off-by: Brijesh Singh +Signed-off-by: Michael Roth +Co-developed-by: Pankaj Gupta +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-16-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit d3107f882ec22cfb211eab7efa0c4e95f5ce11bb) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 39 +++++++++++++++++++++++++++++++++++++++ + target/i386/trace-events | 1 + + 2 files changed, 40 insertions(+) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 43d1c48bd9..e89b87d2f5 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -39,6 +39,7 @@ + #include "confidential-guest.h" + #include "hw/i386/pc.h" + #include "exec/address-spaces.h" ++#include "qemu/queue.h" + + OBJECT_DECLARE_TYPE(SevCommonState, SevCommonStateClass, SEV_COMMON) + OBJECT_DECLARE_TYPE(SevGuestState, SevCommonStateClass, SEV_GUEST) +@@ -115,6 +116,16 @@ struct SevSnpGuestState { + #define DEFAULT_SEV_DEVICE "/dev/sev" + #define DEFAULT_SEV_SNP_POLICY 0x30000 + ++typedef struct SevLaunchUpdateData { ++ QTAILQ_ENTRY(SevLaunchUpdateData) next; ++ hwaddr gpa; ++ void *hva; ++ uint64_t len; ++ int type; ++} SevLaunchUpdateData; ++ ++static QTAILQ_HEAD(, SevLaunchUpdateData) launch_update; ++ + #define SEV_INFO_BLOCK_GUID "00f771de-1a7e-4fcb-890e-68c77e2fb44e" + typedef struct __attribute__((__packed__)) SevInfoBlock { + /* SEV-ES Reset Vector Address */ +@@ -674,6 +685,31 @@ sev_read_file_base64(const char *filename, guchar **data, gsize *len) + return 0; + } + ++static int ++sev_snp_launch_start(SevCommonState *sev_common) ++{ ++ int fw_error, rc; ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(sev_common); ++ struct kvm_sev_snp_launch_start *start = &sev_snp_guest->kvm_start_conf; ++ ++ trace_kvm_sev_snp_launch_start(start->policy, ++ sev_snp_guest->guest_visible_workarounds); ++ ++ rc = sev_ioctl(sev_common->sev_fd, KVM_SEV_SNP_LAUNCH_START, ++ start, &fw_error); ++ if (rc < 0) { ++ error_report("%s: SNP_LAUNCH_START ret=%d fw_error=%d '%s'", ++ __func__, rc, fw_error, fw_error_to_str(fw_error)); ++ return 1; ++ } ++ ++ QTAILQ_INIT(&launch_update); ++ ++ sev_set_guest_state(sev_common, SEV_STATE_LAUNCH_UPDATE); ++ ++ return 0; ++} ++ + static int + sev_launch_start(SevCommonState *sev_common) + { +@@ -1003,6 +1039,7 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + } + + ret = klass->launch_start(sev_common); ++ + if (ret) { + error_setg(errp, "%s: failed to create encryption context", __func__); + return -1; +@@ -1794,9 +1831,11 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data) + SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); + X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + ++ klass->launch_start = sev_snp_launch_start; + klass->kvm_init = sev_snp_kvm_init; + x86_klass->kvm_type = sev_snp_kvm_type; + ++ + object_class_property_add(oc, "policy", "uint64", + sev_snp_guest_get_policy, + sev_snp_guest_set_policy, NULL, NULL); +diff --git a/target/i386/trace-events b/target/i386/trace-events +index 2cd8726eeb..cb26d8a925 100644 +--- a/target/i386/trace-events ++++ b/target/i386/trace-events +@@ -11,3 +11,4 @@ kvm_sev_launch_measurement(const char *value) "data %s" + kvm_sev_launch_finish(void) "" + kvm_sev_launch_secret(uint64_t hpa, uint64_t hva, uint64_t secret, int len) "hpa 0x%" PRIx64 " hva 0x%" PRIx64 " data 0x%" PRIx64 " len %d" + kvm_sev_attestation_report(const char *mnonce, const char *data) "mnonce %s data %s" ++kvm_sev_snp_launch_start(uint64_t policy, char *gosvw) "policy 0x%" PRIx64 " gosvw %s" +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Allow-measured-direct-kernel-boot-on-SNP.patch b/SOURCES/kvm-i386-sev-Allow-measured-direct-kernel-boot-on-SNP.patch new file mode 100644 index 0000000..f809242 --- /dev/null +++ b/SOURCES/kvm-i386-sev-Allow-measured-direct-kernel-boot-on-SNP.patch @@ -0,0 +1,237 @@ +From ec786a1ec0a76775e980862d77500f5196a937e3 Mon Sep 17 00:00:00 2001 +From: Dov Murik +Date: Thu, 30 May 2024 06:16:35 -0500 +Subject: [PATCH 080/100] i386/sev: Allow measured direct kernel boot on SNP + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [80/91] 11c629862519c1a279566febf5a537c63c5fcf61 (bonzini/rhel-qemu-kvm) + +In SNP, the hashes page designated with a specific metadata entry +published in AmdSev OVMF. + +Therefore, if the user enabled kernel hashes (for measured direct boot), +QEMU should prepare the content of hashes table, and during the +processing of the metadata entry it copy the content into the designated +page and encrypt it. + +Note that in SNP (unlike SEV and SEV-ES) the measurements is done in +whole 4KB pages. Therefore QEMU zeros the whole page that includes the +hashes table, and fills in the kernel hashes area in that page, and then +encrypts the whole page. The rest of the page is reserved for SEV +launch secrets which are not usable anyway on SNP. + +If the user disabled kernel hashes, QEMU pre-validates the kernel hashes +page as a zero page. + +Signed-off-by: Dov Murik +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-24-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit c1996992cc882b00139f78067d6a64e2ec9cb0d8) +Signed-off-by: Paolo Bonzini +--- + include/hw/i386/pc.h | 2 + + target/i386/sev.c | 111 ++++++++++++++++++++++++++++++++----------- + 2 files changed, 85 insertions(+), 28 deletions(-) + +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 94b49310f5..ee3bfb7be9 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -175,6 +175,8 @@ typedef enum { + SEV_DESC_TYPE_SNP_SECRETS, + /* The section contains address that can be used as a CPUID page */ + SEV_DESC_TYPE_CPUID, ++ /* The section contains the region for kernel hashes for measured direct boot */ ++ SEV_DESC_TYPE_SNP_KERNEL_HASHES = 0x10, + + } ovmf_sev_metadata_desc_type; + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 3fce4c08eb..004c667ac1 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -115,6 +115,10 @@ struct SevCommonStateClass { + X86ConfidentialGuestClass parent_class; + + /* public */ ++ bool (*build_kernel_loader_hashes)(SevCommonState *sev_common, ++ SevHashTableDescriptor *area, ++ SevKernelLoaderContext *ctx, ++ Error **errp); + int (*launch_start)(SevCommonState *sev_common); + void (*launch_finish)(SevCommonState *sev_common); + int (*launch_update_data)(SevCommonState *sev_common, hwaddr gpa, uint8_t *ptr, uint64_t len); +@@ -154,6 +158,9 @@ struct SevSnpGuestState { + + struct kvm_sev_snp_launch_start kvm_start_conf; + struct kvm_sev_snp_launch_finish kvm_finish_conf; ++ ++ uint32_t kernel_hashes_offset; ++ PaddedSevHashTable *kernel_hashes_data; + }; + + #define DEFAULT_GUEST_POLICY 0x1 /* disable debug */ +@@ -1189,6 +1196,23 @@ snp_launch_update_cpuid(uint32_t cpuid_addr, void *hva, uint32_t cpuid_len) + KVM_SEV_SNP_PAGE_TYPE_CPUID); + } + ++static int ++snp_launch_update_kernel_hashes(SevSnpGuestState *sev_snp, uint32_t addr, ++ void *hva, uint32_t len) ++{ ++ int type = KVM_SEV_SNP_PAGE_TYPE_ZERO; ++ if (sev_snp->parent_obj.kernel_hashes) { ++ assert(sev_snp->kernel_hashes_data); ++ assert((sev_snp->kernel_hashes_offset + ++ sizeof(*sev_snp->kernel_hashes_data)) <= len); ++ memset(hva, 0, len); ++ memcpy(hva + sev_snp->kernel_hashes_offset, sev_snp->kernel_hashes_data, ++ sizeof(*sev_snp->kernel_hashes_data)); ++ type = KVM_SEV_SNP_PAGE_TYPE_NORMAL; ++ } ++ return snp_launch_update_data(addr, hva, len, type); ++} ++ + static int + snp_metadata_desc_to_page_type(int desc_type) + { +@@ -1225,6 +1249,9 @@ snp_populate_metadata_pages(SevSnpGuestState *sev_snp, + + if (type == KVM_SEV_SNP_PAGE_TYPE_CPUID) { + ret = snp_launch_update_cpuid(desc->base, hva, desc->len); ++ } else if (desc->type == SEV_DESC_TYPE_SNP_KERNEL_HASHES) { ++ ret = snp_launch_update_kernel_hashes(sev_snp, desc->base, hva, ++ desc->len); + } else { + ret = snp_launch_update_data(desc->base, hva, desc->len, type); + } +@@ -1823,6 +1850,58 @@ static bool build_kernel_loader_hashes(PaddedSevHashTable *padded_ht, + return true; + } + ++static bool sev_snp_build_kernel_loader_hashes(SevCommonState *sev_common, ++ SevHashTableDescriptor *area, ++ SevKernelLoaderContext *ctx, ++ Error **errp) ++{ ++ /* ++ * SNP: Populate the hashes table in an area that later in ++ * snp_launch_update_kernel_hashes() will be copied to the guest memory ++ * and encrypted. ++ */ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(sev_common); ++ sev_snp_guest->kernel_hashes_offset = area->base & ~TARGET_PAGE_MASK; ++ sev_snp_guest->kernel_hashes_data = g_new0(PaddedSevHashTable, 1); ++ return build_kernel_loader_hashes(sev_snp_guest->kernel_hashes_data, ctx, errp); ++} ++ ++static bool sev_build_kernel_loader_hashes(SevCommonState *sev_common, ++ SevHashTableDescriptor *area, ++ SevKernelLoaderContext *ctx, ++ Error **errp) ++{ ++ PaddedSevHashTable *padded_ht; ++ hwaddr mapped_len = sizeof(*padded_ht); ++ MemTxAttrs attrs = { 0 }; ++ bool ret = true; ++ ++ /* ++ * Populate the hashes table in the guest's memory at the OVMF-designated ++ * area for the SEV hashes table ++ */ ++ padded_ht = address_space_map(&address_space_memory, area->base, ++ &mapped_len, true, attrs); ++ if (!padded_ht || mapped_len != sizeof(*padded_ht)) { ++ error_setg(errp, "SEV: cannot map hashes table guest memory area"); ++ return false; ++ } ++ ++ if (build_kernel_loader_hashes(padded_ht, ctx, errp)) { ++ if (sev_encrypt_flash(area->base, (uint8_t *)padded_ht, ++ sizeof(*padded_ht), errp) < 0) { ++ ret = false; ++ } ++ } else { ++ ret = false; ++ } ++ ++ address_space_unmap(&address_space_memory, padded_ht, ++ mapped_len, true, mapped_len); ++ ++ return ret; ++} ++ + /* + * Add the hashes of the linux kernel/initrd/cmdline to an encrypted guest page + * which is included in SEV's initial memory measurement. +@@ -1831,11 +1910,8 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) + { + uint8_t *data; + SevHashTableDescriptor *area; +- PaddedSevHashTable *padded_ht; +- hwaddr mapped_len = sizeof(*padded_ht); +- MemTxAttrs attrs = { 0 }; +- bool ret = true; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); ++ SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(sev_common); + + /* + * Only add the kernel hashes if the sev-guest configuration explicitly +@@ -1858,30 +1934,7 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) + return false; + } + +- /* +- * Populate the hashes table in the guest's memory at the OVMF-designated +- * area for the SEV hashes table +- */ +- padded_ht = address_space_map(&address_space_memory, area->base, +- &mapped_len, true, attrs); +- if (!padded_ht || mapped_len != sizeof(*padded_ht)) { +- error_setg(errp, "SEV: cannot map hashes table guest memory area"); +- return false; +- } +- +- if (build_kernel_loader_hashes(padded_ht, ctx, errp)) { +- if (sev_encrypt_flash(area->base, (uint8_t *)padded_ht, +- sizeof(*padded_ht), errp) < 0) { +- ret = false; +- } +- } else { +- ret = false; +- } +- +- address_space_unmap(&address_space_memory, padded_ht, +- mapped_len, true, mapped_len); +- +- return ret; ++ return klass->build_kernel_loader_hashes(sev_common, area, ctx, errp); + } + + static char * +@@ -1998,6 +2051,7 @@ sev_guest_class_init(ObjectClass *oc, void *data) + SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); + X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + ++ klass->build_kernel_loader_hashes = sev_build_kernel_loader_hashes; + klass->launch_start = sev_launch_start; + klass->launch_finish = sev_launch_finish; + klass->launch_update_data = sev_launch_update_data; +@@ -2242,6 +2296,7 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data) + SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); + X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + ++ klass->build_kernel_loader_hashes = sev_snp_build_kernel_loader_hashes; + klass->launch_start = sev_snp_launch_start; + klass->launch_finish = sev_snp_launch_finish; + klass->launch_update_data = sev_snp_launch_update_data; +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Don-t-allow-automatic-fallback-to-legacy-KV.patch b/SOURCES/kvm-i386-sev-Don-t-allow-automatic-fallback-to-legacy-KV.patch new file mode 100644 index 0000000..aacb0da --- /dev/null +++ b/SOURCES/kvm-i386-sev-Don-t-allow-automatic-fallback-to-legacy-KV.patch @@ -0,0 +1,268 @@ +From ab6197309551bd6ddd9f8239191f68dfac23684b Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Tue, 9 Jul 2024 23:10:05 -0500 +Subject: [PATCH 090/100] i386/sev: Don't allow automatic fallback to legacy + KVM_SEV*_INIT +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [90/91] 2b1345faa56f993bb6e13d63e11656c784e20412 (bonzini/rhel-qemu-kvm) + +Currently if the 'legacy-vm-type' property of the sev-guest object is +'on', QEMU will attempt to use the newer KVM_SEV_INIT2 kernel +interface in conjunction with the newer KVM_X86_SEV_VM and +KVM_X86_SEV_ES_VM KVM VM types. + +This can lead to measurement changes if, for instance, an SEV guest was +created on a host that originally had an older kernel that didn't +support KVM_SEV_INIT2, but is booted on the same host later on after the +host kernel was upgraded. + +Instead, if legacy-vm-type is 'off', QEMU should fail if the +KVM_SEV_INIT2 interface is not provided by the current host kernel. +Modify the fallback handling accordingly. + +In the future, VMSA features and other flags might be added to QEMU +which will require legacy-vm-type to be 'off' because they will rely +on the newer KVM_SEV_INIT2 interface. It may be difficult to convey to +users what values of legacy-vm-type are compatible with which +features/options, so as part of this rework, switch legacy-vm-type to a +tri-state OnOffAuto option. 'auto' in this case will automatically +switch to using the newer KVM_SEV_INIT2, but only if it is required to +make use of new VMSA features or other options only available via +KVM_SEV_INIT2. + +Defining 'auto' in this way would avoid inadvertantly breaking +compatibility with older kernels since it would only be used in cases +where users opt into newer features that are only available via +KVM_SEV_INIT2 and newer kernels, and provide better default behavior +than the legacy-vm-type=off behavior that was previously in place, so +make it the default for 9.1+ machine types. + +Cc: Daniel P. Berrangé +Cc: Paolo Bonzini +cc: kvm@vger.kernel.org +Signed-off-by: Michael Roth +Reviewed-by: Daniel P. Berrangé +Link: https://lore.kernel.org/r/20240710041005.83720-1-michael.roth@amd.com +Signed-off-by: Paolo Bonzini +(cherry picked from commit 9d38d9dca2a81aaf5752d45d221021ef96d496cd) + +RHEL: adjust compatiility setting, applying it to 9.4 machine type +--- + hw/i386/pc.c | 2 +- + qapi/qom.json | 18 ++++++---- + target/i386/sev.c | 85 +++++++++++++++++++++++++++++++++++++++-------- + 3 files changed, 83 insertions(+), 22 deletions(-) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index b25d075b59..e9c5ea5d8f 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -352,7 +352,7 @@ const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + GlobalProperty pc_rhel_9_5_compat[] = { + /* pc_rhel_9_5_compat from pc_compat_pc_9_0 (backported from 9.1) */ + { TYPE_X86_CPU, "guest-phys-bits", "0" }, +- { "sev-guest", "legacy-vm-type", "true" }, ++ { "sev-guest", "legacy-vm-type", "on" }, + }; + const size_t pc_rhel_9_5_compat_len = G_N_ELEMENTS(pc_rhel_9_5_compat); + +diff --git a/qapi/qom.json b/qapi/qom.json +index 8bd299265e..17bd5a0cf7 100644 +--- a/qapi/qom.json ++++ b/qapi/qom.json +@@ -912,12 +912,16 @@ + # @handle: SEV firmware handle (default: 0) + # + # @legacy-vm-type: Use legacy KVM_SEV_INIT KVM interface for creating the VM. +-# The newer KVM_SEV_INIT2 interface syncs additional vCPU +-# state when initializing the VMSA structures, which will +-# result in a different guest measurement. Set this to +-# maintain compatibility with older QEMU or kernel versions +-# that rely on legacy KVM_SEV_INIT behavior. +-# (default: false) (since 9.1) ++# The newer KVM_SEV_INIT2 interface, from Linux >= 6.10, syncs ++# additional vCPU state when initializing the VMSA structures, ++# which will result in a different guest measurement. Set ++# this to 'on' to force compatibility with older QEMU or kernel ++# versions that rely on legacy KVM_SEV_INIT behavior. 'auto' ++# will behave identically to 'on', but will automatically ++# switch to using KVM_SEV_INIT2 if the user specifies any ++# additional options that require it. If set to 'off', QEMU ++# will require KVM_SEV_INIT2 unconditionally. ++# (default: off) (since 9.1) + # + # Since: 2.12 + ## +@@ -927,7 +931,7 @@ + '*session-file': 'str', + '*policy': 'uint32', + '*handle': 'uint32', +- '*legacy-vm-type': 'bool' } } ++ '*legacy-vm-type': 'OnOffAuto' } } + + ## + # @SevSnpGuestProperties: +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 491fab74fd..b921defb63 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -144,7 +144,7 @@ struct SevGuestState { + uint32_t policy; + char *dh_cert_file; + char *session_file; +- bool legacy_vm_type; ++ OnOffAuto legacy_vm_type; + }; + + struct SevSnpGuestState { +@@ -1334,6 +1334,17 @@ sev_vm_state_change(void *opaque, bool running, RunState state) + } + } + ++/* ++ * This helper is to examine sev-guest properties and determine if any options ++ * have been set which rely on the newer KVM_SEV_INIT2 interface and associated ++ * KVM VM types. ++ */ ++static bool sev_init2_required(SevGuestState *sev_guest) ++{ ++ /* Currently no KVM_SEV_INIT2-specific options are exposed via QEMU */ ++ return false; ++} ++ + static int sev_kvm_type(X86ConfidentialGuest *cg) + { + SevCommonState *sev_common = SEV_COMMON(cg); +@@ -1344,14 +1355,39 @@ static int sev_kvm_type(X86ConfidentialGuest *cg) + goto out; + } + ++ /* These are the only cases where legacy VM types can be used. */ ++ if (sev_guest->legacy_vm_type == ON_OFF_AUTO_ON || ++ (sev_guest->legacy_vm_type == ON_OFF_AUTO_AUTO && ++ !sev_init2_required(sev_guest))) { ++ sev_common->kvm_type = KVM_X86_DEFAULT_VM; ++ goto out; ++ } ++ ++ /* ++ * Newer VM types are required, either explicitly via legacy-vm-type=on, or ++ * implicitly via legacy-vm-type=auto along with additional sev-guest ++ * properties that require the newer VM types. ++ */ + kvm_type = (sev_guest->policy & SEV_POLICY_ES) ? + KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM; +- if (kvm_is_vm_type_supported(kvm_type) && !sev_guest->legacy_vm_type) { +- sev_common->kvm_type = kvm_type; +- } else { +- sev_common->kvm_type = KVM_X86_DEFAULT_VM; ++ if (!kvm_is_vm_type_supported(kvm_type)) { ++ if (sev_guest->legacy_vm_type == ON_OFF_AUTO_AUTO) { ++ error_report("SEV: host kernel does not support requested %s VM type, which is required " ++ "for the set of options specified. To allow use of the legacy " ++ "KVM_X86_DEFAULT_VM VM type, please disable any options that are not " ++ "compatible with the legacy VM type, or upgrade your kernel.", ++ kvm_type == KVM_X86_SEV_VM ? "KVM_X86_SEV_VM" : "KVM_X86_SEV_ES_VM"); ++ } else { ++ error_report("SEV: host kernel does not support requested %s VM type. To allow use of " ++ "the legacy KVM_X86_DEFAULT_VM VM type, the 'legacy-vm-type' argument " ++ "must be set to 'on' or 'auto' for the sev-guest object.", ++ kvm_type == KVM_X86_SEV_VM ? "KVM_X86_SEV_VM" : "KVM_X86_SEV_ES_VM"); ++ } ++ ++ return -1; + } + ++ sev_common->kvm_type = kvm_type; + out: + return sev_common->kvm_type; + } +@@ -1442,14 +1478,24 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + } + + trace_kvm_sev_init(); +- if (x86_klass->kvm_type(X86_CONFIDENTIAL_GUEST(sev_common)) == KVM_X86_DEFAULT_VM) { ++ switch (x86_klass->kvm_type(X86_CONFIDENTIAL_GUEST(sev_common))) { ++ case KVM_X86_DEFAULT_VM: + cmd = sev_es_enabled() ? KVM_SEV_ES_INIT : KVM_SEV_INIT; + + ret = sev_ioctl(sev_common->sev_fd, cmd, NULL, &fw_error); +- } else { ++ break; ++ case KVM_X86_SEV_VM: ++ case KVM_X86_SEV_ES_VM: ++ case KVM_X86_SNP_VM: { + struct kvm_sev_init args = { 0 }; + + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_INIT2, &args, &fw_error); ++ break; ++ } ++ default: ++ error_setg(errp, "%s: host kernel does not support the requested SEV configuration.", ++ __func__); ++ return -1; + } + + if (ret) { +@@ -2037,14 +2083,23 @@ sev_guest_set_session_file(Object *obj, const char *value, Error **errp) + SEV_GUEST(obj)->session_file = g_strdup(value); + } + +-static bool sev_guest_get_legacy_vm_type(Object *obj, Error **errp) ++static void sev_guest_get_legacy_vm_type(Object *obj, Visitor *v, ++ const char *name, void *opaque, ++ Error **errp) + { +- return SEV_GUEST(obj)->legacy_vm_type; ++ SevGuestState *sev_guest = SEV_GUEST(obj); ++ OnOffAuto legacy_vm_type = sev_guest->legacy_vm_type; ++ ++ visit_type_OnOffAuto(v, name, &legacy_vm_type, errp); + } + +-static void sev_guest_set_legacy_vm_type(Object *obj, bool value, Error **errp) ++static void sev_guest_set_legacy_vm_type(Object *obj, Visitor *v, ++ const char *name, void *opaque, ++ Error **errp) + { +- SEV_GUEST(obj)->legacy_vm_type = value; ++ SevGuestState *sev_guest = SEV_GUEST(obj); ++ ++ visit_type_OnOffAuto(v, name, &sev_guest->legacy_vm_type, errp); + } + + static void +@@ -2070,9 +2125,9 @@ sev_guest_class_init(ObjectClass *oc, void *data) + sev_guest_set_session_file); + object_class_property_set_description(oc, "session-file", + "guest owners session parameters (encoded with base64)"); +- object_class_property_add_bool(oc, "legacy-vm-type", +- sev_guest_get_legacy_vm_type, +- sev_guest_set_legacy_vm_type); ++ object_class_property_add(oc, "legacy-vm-type", "OnOffAuto", ++ sev_guest_get_legacy_vm_type, ++ sev_guest_set_legacy_vm_type, NULL, NULL); + object_class_property_set_description(oc, "legacy-vm-type", + "use legacy VM type to maintain measurement compatibility with older QEMU or kernel versions."); + } +@@ -2088,6 +2143,8 @@ sev_guest_instance_init(Object *obj) + object_property_add_uint32_ptr(obj, "policy", &sev_guest->policy, + OBJ_PROP_FLAG_READWRITE); + object_apply_compat_props(obj); ++ ++ sev_guest->legacy_vm_type = ON_OFF_AUTO_AUTO; + } + + /* guest info specific sev/sev-es */ +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Don-t-return-launch-measurements-for-SEV-SN.patch b/SOURCES/kvm-i386-sev-Don-t-return-launch-measurements-for-SEV-SN.patch new file mode 100644 index 0000000..739a145 --- /dev/null +++ b/SOURCES/kvm-i386-sev-Don-t-return-launch-measurements-for-SEV-SN.patch @@ -0,0 +1,46 @@ +From ebb3c3536366c383fa09b0987a4efb68d018b7b8 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:24 -0500 +Subject: [PATCH 064/100] i386/sev: Don't return launch measurements for + SEV-SNP guests + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [64/91] 5a29bb2d8b5a07aec6fd271ec37345e665e9cce4 (bonzini/rhel-qemu-kvm) + +For SEV-SNP guests, launch measurement is queried from within the guest +during attestation, so don't attempt to return it as part of +query-sev-launch-measure. + +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-13-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 73ae63b162fc1fed520f53ad200712964d7d0264) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 6525b3c1a0..c3daaf1ad5 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -795,7 +795,9 @@ sev_launch_get_measure(Notifier *notifier, void *unused) + + static char *sev_get_launch_measurement(void) + { +- SevGuestState *sev_guest = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); ++ ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; ++ SevGuestState *sev_guest = ++ (SevGuestState *)object_dynamic_cast(OBJECT(cgs), TYPE_SEV_GUEST); + + if (sev_guest && + SEV_COMMON(sev_guest)->state >= SEV_STATE_LAUNCH_SECRET) { +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Enable-KVM_HC_MAP_GPA_RANGE-hcall-for-SNP-g.patch b/SOURCES/kvm-i386-sev-Enable-KVM_HC_MAP_GPA_RANGE-hcall-for-SNP-g.patch new file mode 100644 index 0000000..e438cd3 --- /dev/null +++ b/SOURCES/kvm-i386-sev-Enable-KVM_HC_MAP_GPA_RANGE-hcall-for-SNP-g.patch @@ -0,0 +1,54 @@ +From 0612c7ed587422ec7e07c27c8ca11b89c7aa8b02 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:43 -0500 +Subject: [PATCH 077/100] i386/sev: Enable KVM_HC_MAP_GPA_RANGE hcall for SNP + guests + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [77/91] 3c494eb54499c24121cc2c47045626478b8bb41e (bonzini/rhel-qemu-kvm) + +KVM will forward GHCB page-state change requests to userspace in the +form of KVM_HC_MAP_GPA_RANGE, so make sure the hypercall handling is +enabled for SNP guests. + +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-32-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit e3cddff93c1f88fea3b26841e792dc0be6b6fae8) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index eaf5fc6c6b..abb63062ac 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -14,6 +14,7 @@ + #include "qemu/osdep.h" + + #include ++#include + #include + + #include +@@ -758,6 +759,10 @@ sev_snp_launch_start(SevCommonState *sev_common) + trace_kvm_sev_snp_launch_start(start->policy, + sev_snp_guest->guest_visible_workarounds); + ++ if (!kvm_enable_hypercall(BIT_ULL(KVM_HC_MAP_GPA_RANGE))) { ++ return 1; ++ } ++ + rc = sev_ioctl(sev_common->sev_fd, KVM_SEV_SNP_LAUNCH_START, + start, &fw_error); + if (rc < 0) { +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Extract-build_kernel_loader_hashes.patch b/SOURCES/kvm-i386-sev-Extract-build_kernel_loader_hashes.patch new file mode 100644 index 0000000..a06301d --- /dev/null +++ b/SOURCES/kvm-i386-sev-Extract-build_kernel_loader_hashes.patch @@ -0,0 +1,167 @@ +From eed17520567c202f53ab767bfd42cfe303838772 Mon Sep 17 00:00:00 2001 +From: Dov Murik +Date: Thu, 30 May 2024 06:16:33 -0500 +Subject: [PATCH 078/100] i386/sev: Extract build_kernel_loader_hashes + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [78/91] 291ea10e774178826d1afd38fc8292d67c5fd42d (bonzini/rhel-qemu-kvm) + +Extract the building of the kernel hashes table out from +sev_add_kernel_loader_hashes() to allow building it in +other memory areas (for SNP support). + +No functional change intended. + +Signed-off-by: Dov Murik +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-22-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 06cbd66cecaa3230cccb330facac241a677b29d5) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 102 ++++++++++++++++++++++++++-------------------- + 1 file changed, 58 insertions(+), 44 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index abb63062ac..73f9406715 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -1754,45 +1754,16 @@ static const QemuUUID sev_cmdline_entry_guid = { + 0x4d, 0x36, 0xab, 0x2a) + }; + +-/* +- * Add the hashes of the linux kernel/initrd/cmdline to an encrypted guest page +- * which is included in SEV's initial memory measurement. +- */ +-bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) ++static bool build_kernel_loader_hashes(PaddedSevHashTable *padded_ht, ++ SevKernelLoaderContext *ctx, ++ Error **errp) + { +- uint8_t *data; +- SevHashTableDescriptor *area; + SevHashTable *ht; +- PaddedSevHashTable *padded_ht; + uint8_t cmdline_hash[HASH_SIZE]; + uint8_t initrd_hash[HASH_SIZE]; + uint8_t kernel_hash[HASH_SIZE]; + uint8_t *hashp; + size_t hash_len = HASH_SIZE; +- hwaddr mapped_len = sizeof(*padded_ht); +- MemTxAttrs attrs = { 0 }; +- bool ret = true; +- SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); +- +- /* +- * Only add the kernel hashes if the sev-guest configuration explicitly +- * stated kernel-hashes=on. +- */ +- if (!sev_common->kernel_hashes) { +- return false; +- } +- +- if (!pc_system_ovmf_table_find(SEV_HASH_TABLE_RV_GUID, &data, NULL)) { +- error_setg(errp, "SEV: kernel specified but guest firmware " +- "has no hashes table GUID"); +- return false; +- } +- area = (SevHashTableDescriptor *)data; +- if (!area->base || area->size < sizeof(PaddedSevHashTable)) { +- error_setg(errp, "SEV: guest firmware hashes table area is invalid " +- "(base=0x%x size=0x%x)", area->base, area->size); +- return false; +- } + + /* + * Calculate hash of kernel command-line with the terminating null byte. If +@@ -1829,16 +1800,6 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) + } + assert(hash_len == HASH_SIZE); + +- /* +- * Populate the hashes table in the guest's memory at the OVMF-designated +- * area for the SEV hashes table +- */ +- padded_ht = address_space_map(&address_space_memory, area->base, +- &mapped_len, true, attrs); +- if (!padded_ht || mapped_len != sizeof(*padded_ht)) { +- error_setg(errp, "SEV: cannot map hashes table guest memory area"); +- return false; +- } + ht = &padded_ht->ht; + + ht->guid = sev_hash_table_header_guid; +@@ -1859,8 +1820,61 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) + /* zero the excess data so the measurement can be reliably calculated */ + memset(padded_ht->padding, 0, sizeof(padded_ht->padding)); + +- if (sev_encrypt_flash(area->base, (uint8_t *)padded_ht, +- sizeof(*padded_ht), errp) < 0) { ++ return true; ++} ++ ++/* ++ * Add the hashes of the linux kernel/initrd/cmdline to an encrypted guest page ++ * which is included in SEV's initial memory measurement. ++ */ ++bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) ++{ ++ uint8_t *data; ++ SevHashTableDescriptor *area; ++ PaddedSevHashTable *padded_ht; ++ hwaddr mapped_len = sizeof(*padded_ht); ++ MemTxAttrs attrs = { 0 }; ++ bool ret = true; ++ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); ++ ++ /* ++ * Only add the kernel hashes if the sev-guest configuration explicitly ++ * stated kernel-hashes=on. ++ */ ++ if (!sev_common->kernel_hashes) { ++ return false; ++ } ++ ++ if (!pc_system_ovmf_table_find(SEV_HASH_TABLE_RV_GUID, &data, NULL)) { ++ error_setg(errp, "SEV: kernel specified but guest firmware " ++ "has no hashes table GUID"); ++ return false; ++ } ++ ++ area = (SevHashTableDescriptor *)data; ++ if (!area->base || area->size < sizeof(PaddedSevHashTable)) { ++ error_setg(errp, "SEV: guest firmware hashes table area is invalid " ++ "(base=0x%x size=0x%x)", area->base, area->size); ++ return false; ++ } ++ ++ /* ++ * Populate the hashes table in the guest's memory at the OVMF-designated ++ * area for the SEV hashes table ++ */ ++ padded_ht = address_space_map(&address_space_memory, area->base, ++ &mapped_len, true, attrs); ++ if (!padded_ht || mapped_len != sizeof(*padded_ht)) { ++ error_setg(errp, "SEV: cannot map hashes table guest memory area"); ++ return false; ++ } ++ ++ if (build_kernel_loader_hashes(padded_ht, ctx, errp)) { ++ if (sev_encrypt_flash(area->base, (uint8_t *)padded_ht, ++ sizeof(*padded_ht), errp) < 0) { ++ ret = false; ++ } ++ } else { + ret = false; + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Fallback-to-the-default-SEV-device-if-none-.patch b/SOURCES/kvm-i386-sev-Fallback-to-the-default-SEV-device-if-none-.patch new file mode 100644 index 0000000..1d30674 --- /dev/null +++ b/SOURCES/kvm-i386-sev-Fallback-to-the-default-SEV-device-if-none-.patch @@ -0,0 +1,65 @@ +From a9530c89225fce9e381929c4cd8e372068827acf Mon Sep 17 00:00:00 2001 +From: Michal Privoznik +Date: Mon, 24 Jun 2024 10:52:49 +0200 +Subject: [PATCH 089/100] i386/sev: Fallback to the default SEV device if none + provided in sev_get_capabilities() + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [89/91] 22318c20d7102815f754cec0efaf383e05ef79c1 (bonzini/rhel-qemu-kvm) + +When management tools (e.g. libvirt) query QEMU capabilities, +they start QEMU with a minimalistic configuration and issue +various commands on monitor. One of the command issued is/might +be "query-sev-capabilities" to learn values like cbitpos or +reduced-phys-bits. But as of v9.0.0-1145-g16dcf200dc the monitor +command returns an error instead. + +This creates a chicken-egg problem because in order to query +those aforementioned values QEMU needs to be started with a +'sev-guest' object. But to start QEMU with the values must be +known. + +I think it's safe to assume that the default path ("/dev/sev") +provides the same data as user provided one. So fall back to it. + +Fixes: 16dcf200dc951c1cde3e5b442457db5f690b8cf0 +Signed-off-by: Michal Privoznik +Link: https://lore.kernel.org/r/157f93712c23818be193ce785f648f0060b33dee.1719218926.git.mprivozn@redhat.com +Signed-off-by: Paolo Bonzini +(cherry picked from commit 3fb24530b2bb1346a44e17becefc9865b40a2257) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 53b7f7315b..491fab74fd 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -585,13 +585,13 @@ static SevCapability *sev_get_capabilities(Error **errp) + } + + sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); +- if (!sev_common) { +- error_setg(errp, "SEV is not configured"); +- return NULL; ++ if (sev_common) { ++ sev_device = object_property_get_str(OBJECT(sev_common), "sev-device", ++ &error_abort); ++ } else { ++ sev_device = g_strdup(DEFAULT_SEV_DEVICE); + } + +- sev_device = object_property_get_str(OBJECT(sev_common), "sev-device", +- &error_abort); + fd = open(sev_device, O_RDWR); + if (fd < 0) { + error_setg_errno(errp, errno, "SEV: Failed to open %s", +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Fix-error-message-in-sev_get_capabilities.patch b/SOURCES/kvm-i386-sev-Fix-error-message-in-sev_get_capabilities.patch new file mode 100644 index 0000000..b23e008 --- /dev/null +++ b/SOURCES/kvm-i386-sev-Fix-error-message-in-sev_get_capabilities.patch @@ -0,0 +1,48 @@ +From b672cdf8c10a530b5bcf6dd4489632891eb2c731 Mon Sep 17 00:00:00 2001 +From: Michal Privoznik +Date: Mon, 24 Jun 2024 10:52:48 +0200 +Subject: [PATCH 088/100] i386/sev: Fix error message in sev_get_capabilities() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [88/91] ff8a8b27af02e565172ffe39d0571c234317713d (bonzini/rhel-qemu-kvm) + +When a custom path is provided to sev-guest object and opening +the path fails an error message is reported. But the error +message still mentions DEFAULT_SEV_DEVICE ("/dev/sev") instead of +the custom path. + +Fixes: 16dcf200dc951c1cde3e5b442457db5f690b8cf0 +Signed-off-by: Michal Privoznik +Reviewed-by: Philippe Mathieu-Daudé +Link: https://lore.kernel.org/r/b4648905d399780063dc70851d3d6a3cd28719a5.1719218926.git.mprivozn@redhat.com +Signed-off-by: Paolo Bonzini +(cherry picked from commit e306ae87e0ef04bc7a5dec6db693f6ea09d64d45) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 37de80adc7..53b7f7315b 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -595,7 +595,7 @@ static SevCapability *sev_get_capabilities(Error **errp) + fd = open(sev_device, O_RDWR); + if (fd < 0) { + error_setg_errno(errp, errno, "SEV: Failed to open %s", +- DEFAULT_SEV_DEVICE); ++ sev_device); + g_free(sev_device); + return NULL; + } +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Introduce-sev-common-type-to-encapsulate-co.patch b/SOURCES/kvm-i386-sev-Introduce-sev-common-type-to-encapsulate-co.patch new file mode 100644 index 0000000..2d167af --- /dev/null +++ b/SOURCES/kvm-i386-sev-Introduce-sev-common-type-to-encapsulate-co.patch @@ -0,0 +1,1118 @@ +From e6cf2115eb9db545821180b8a978cdccc6a2c2db Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:16 -0500 +Subject: [PATCH 056/100] i386/sev: Introduce "sev-common" type to encapsulate + common SEV state + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [56/91] b52d5c9c5e4997d2fd791fa36dd5d4c836dfc32f (bonzini/rhel-qemu-kvm) + +Currently all SEV/SEV-ES functionality is managed through a single +'sev-guest' QOM type. With upcoming support for SEV-SNP, taking this +same approach won't work well since some of the properties/state +managed by 'sev-guest' is not applicable to SEV-SNP, which will instead +rely on a new QOM type with its own set of properties/state. + +To prepare for this, this patch moves common state into an abstract +'sev-common' parent type to encapsulate properties/state that are +common to both SEV/SEV-ES and SEV-SNP, leaving only SEV/SEV-ES-specific +properties/state in the current 'sev-guest' type. This should not +affect current behavior or command-line options. + +As part of this patch, some related changes are also made: + + - a static 'sev_guest' variable is currently used to keep track of + the 'sev-guest' instance. SEV-SNP would similarly introduce an + 'sev_snp_guest' static variable. But these instances are now + available via qdev_get_machine()->cgs, so switch to using that + instead and drop the static variable. + + - 'sev_guest' is currently used as the name for the static variable + holding a pointer to the 'sev-guest' instance. Re-purpose the name + as a local variable referring the 'sev-guest' instance, and use + that consistently throughout the code so it can be easily + distinguished from sev-common/sev-snp-guest instances. + + - 'sev' is generally used as the name for local variables holding a + pointer to the 'sev-guest' instance. In cases where that now points + to common state, use the name 'sev_common'; in cases where that now + points to state specific to 'sev-guest' instance, use the name + 'sev_guest' + +In order to enable kernel-hashes for SNP, pull it from +SevGuestProperties to its parent SevCommonProperties so +it will be available for both SEV and SNP. + +Signed-off-by: Michael Roth +Co-developed-by: Dov Murik +Signed-off-by: Dov Murik +Acked-by: Markus Armbruster (QAPI schema) +Co-developed-by: Pankaj Gupta +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-5-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 16dcf200dc951c1cde3e5b442457db5f690b8cf0) +Signed-off-by: Paolo Bonzini +--- + qapi/qom.json | 40 ++-- + target/i386/sev.c | 489 ++++++++++++++++++++++++++-------------------- + target/i386/sev.h | 3 + + 3 files changed, 301 insertions(+), 231 deletions(-) + +diff --git a/qapi/qom.json b/qapi/qom.json +index 38dde6d785..056b38f491 100644 +--- a/qapi/qom.json ++++ b/qapi/qom.json +@@ -875,20 +875,12 @@ + 'data': { '*filename': 'str' } } + + ## +-# @SevGuestProperties: ++# @SevCommonProperties: + # +-# Properties for sev-guest objects. ++# Properties common to objects that are derivatives of sev-common. + # + # @sev-device: SEV device to use (default: "/dev/sev") + # +-# @dh-cert-file: guest owners DH certificate (encoded with base64) +-# +-# @session-file: guest owners session parameters (encoded with base64) +-# +-# @policy: SEV policy value (default: 0x1) +-# +-# @handle: SEV firmware handle (default: 0) +-# + # @cbitpos: C-bit location in page table entry (default: 0) + # + # @reduced-phys-bits: number of bits in physical addresses that become +@@ -898,6 +890,27 @@ + # designated guest firmware page for measured boot with -kernel + # (default: false) (since 6.2) + # ++# Since: 9.1 ++## ++{ 'struct': 'SevCommonProperties', ++ 'data': { '*sev-device': 'str', ++ '*cbitpos': 'uint32', ++ 'reduced-phys-bits': 'uint32', ++ '*kernel-hashes': 'bool' } } ++ ++## ++# @SevGuestProperties: ++# ++# Properties for sev-guest objects. ++# ++# @dh-cert-file: guest owners DH certificate (encoded with base64) ++# ++# @session-file: guest owners session parameters (encoded with base64) ++# ++# @policy: SEV policy value (default: 0x1) ++# ++# @handle: SEV firmware handle (default: 0) ++# + # @legacy-vm-type: Use legacy KVM_SEV_INIT KVM interface for creating the VM. + # The newer KVM_SEV_INIT2 interface syncs additional vCPU + # state when initializing the VMSA structures, which will +@@ -909,14 +922,11 @@ + # Since: 2.12 + ## + { 'struct': 'SevGuestProperties', +- 'data': { '*sev-device': 'str', +- '*dh-cert-file': 'str', ++ 'base': 'SevCommonProperties', ++ 'data': { '*dh-cert-file': 'str', + '*session-file': 'str', + '*policy': 'uint32', + '*handle': 'uint32', +- '*cbitpos': 'uint32', +- 'reduced-phys-bits': 'uint32', +- '*kernel-hashes': 'bool', + '*legacy-vm-type': 'bool' } } + + ## +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 67ed32e5ea..33e606eea0 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -40,49 +40,59 @@ + #include "hw/i386/pc.h" + #include "exec/address-spaces.h" + +-#define TYPE_SEV_GUEST "sev-guest" +-OBJECT_DECLARE_SIMPLE_TYPE(SevGuestState, SEV_GUEST) ++OBJECT_DECLARE_TYPE(SevCommonState, SevCommonStateClass, SEV_COMMON) ++OBJECT_DECLARE_TYPE(SevGuestState, SevCommonStateClass, SEV_GUEST) + +- +-/** +- * SevGuestState: +- * +- * The SevGuestState object is used for creating and managing a SEV +- * guest. +- * +- * # $QEMU \ +- * -object sev-guest,id=sev0 \ +- * -machine ...,memory-encryption=sev0 +- */ +-struct SevGuestState { ++struct SevCommonState { + X86ConfidentialGuest parent_obj; + + int kvm_type; + + /* configuration parameters */ + char *sev_device; +- uint32_t policy; +- char *dh_cert_file; +- char *session_file; + uint32_t cbitpos; + uint32_t reduced_phys_bits; + bool kernel_hashes; +- bool legacy_vm_type; + + /* runtime state */ +- uint32_t handle; + uint8_t api_major; + uint8_t api_minor; + uint8_t build_id; + int sev_fd; + SevState state; +- gchar *measurement; + + uint32_t reset_cs; + uint32_t reset_ip; + bool reset_data_valid; + }; + ++struct SevCommonStateClass { ++ X86ConfidentialGuestClass parent_class; ++ ++}; ++ ++/** ++ * SevGuestState: ++ * ++ * The SevGuestState object is used for creating and managing a SEV ++ * guest. ++ * ++ * # $QEMU \ ++ * -object sev-guest,id=sev0 \ ++ * -machine ...,memory-encryption=sev0 ++ */ ++struct SevGuestState { ++ SevCommonState parent_obj; ++ gchar *measurement; ++ ++ /* configuration parameters */ ++ uint32_t handle; ++ uint32_t policy; ++ char *dh_cert_file; ++ char *session_file; ++ bool legacy_vm_type; ++}; ++ + #define DEFAULT_GUEST_POLICY 0x1 /* disable debug */ + #define DEFAULT_SEV_DEVICE "/dev/sev" + +@@ -128,7 +138,6 @@ typedef struct QEMU_PACKED PaddedSevHashTable { + + QEMU_BUILD_BUG_ON(sizeof(PaddedSevHashTable) % 16 != 0); + +-static SevGuestState *sev_guest; + static Error *sev_mig_blocker; + + static const char *const sev_fw_errlist[] = { +@@ -209,21 +218,21 @@ fw_error_to_str(int code) + } + + static bool +-sev_check_state(const SevGuestState *sev, SevState state) ++sev_check_state(const SevCommonState *sev_common, SevState state) + { +- assert(sev); +- return sev->state == state ? true : false; ++ assert(sev_common); ++ return sev_common->state == state ? true : false; + } + + static void +-sev_set_guest_state(SevGuestState *sev, SevState new_state) ++sev_set_guest_state(SevCommonState *sev_common, SevState new_state) + { + assert(new_state < SEV_STATE__MAX); +- assert(sev); ++ assert(sev_common); + +- trace_kvm_sev_change_state(SevState_str(sev->state), ++ trace_kvm_sev_change_state(SevState_str(sev_common->state), + SevState_str(new_state)); +- sev->state = new_state; ++ sev_common->state = new_state; + } + + static void +@@ -290,121 +299,61 @@ static struct RAMBlockNotifier sev_ram_notifier = { + .ram_block_removed = sev_ram_block_removed, + }; + +-static void +-sev_guest_finalize(Object *obj) +-{ +-} +- +-static char * +-sev_guest_get_session_file(Object *obj, Error **errp) +-{ +- SevGuestState *s = SEV_GUEST(obj); +- +- return s->session_file ? g_strdup(s->session_file) : NULL; +-} +- +-static void +-sev_guest_set_session_file(Object *obj, const char *value, Error **errp) +-{ +- SevGuestState *s = SEV_GUEST(obj); +- +- s->session_file = g_strdup(value); +-} +- +-static char * +-sev_guest_get_dh_cert_file(Object *obj, Error **errp) +-{ +- SevGuestState *s = SEV_GUEST(obj); +- +- return g_strdup(s->dh_cert_file); +-} +- +-static void +-sev_guest_set_dh_cert_file(Object *obj, const char *value, Error **errp) +-{ +- SevGuestState *s = SEV_GUEST(obj); +- +- s->dh_cert_file = g_strdup(value); +-} +- +-static char * +-sev_guest_get_sev_device(Object *obj, Error **errp) +-{ +- SevGuestState *sev = SEV_GUEST(obj); +- +- return g_strdup(sev->sev_device); +-} +- +-static void +-sev_guest_set_sev_device(Object *obj, const char *value, Error **errp) +-{ +- SevGuestState *sev = SEV_GUEST(obj); +- +- sev->sev_device = g_strdup(value); +-} +- +-static bool sev_guest_get_kernel_hashes(Object *obj, Error **errp) +-{ +- SevGuestState *sev = SEV_GUEST(obj); +- +- return sev->kernel_hashes; +-} +- +-static void sev_guest_set_kernel_hashes(Object *obj, bool value, Error **errp) +-{ +- SevGuestState *sev = SEV_GUEST(obj); +- +- sev->kernel_hashes = value; +-} +- +-static bool sev_guest_get_legacy_vm_type(Object *obj, Error **errp) +-{ +- return SEV_GUEST(obj)->legacy_vm_type; +-} +- +-static void sev_guest_set_legacy_vm_type(Object *obj, bool value, Error **errp) +-{ +- SEV_GUEST(obj)->legacy_vm_type = value; +-} +- + bool + sev_enabled(void) + { +- return !!sev_guest; ++ ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; ++ ++ return !!object_dynamic_cast(OBJECT(cgs), TYPE_SEV_COMMON); + } + + bool + sev_es_enabled(void) + { +- return sev_enabled() && (sev_guest->policy & SEV_POLICY_ES); ++ ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; ++ ++ return sev_enabled() && (SEV_GUEST(cgs)->policy & SEV_POLICY_ES); + } + + uint32_t + sev_get_cbit_position(void) + { +- return sev_guest ? sev_guest->cbitpos : 0; ++ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); ++ ++ return sev_common ? sev_common->cbitpos : 0; + } + + uint32_t + sev_get_reduced_phys_bits(void) + { +- return sev_guest ? sev_guest->reduced_phys_bits : 0; ++ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); ++ ++ return sev_common ? sev_common->reduced_phys_bits : 0; + } + + static SevInfo *sev_get_info(void) + { + SevInfo *info; ++ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); ++ SevGuestState *sev_guest = ++ (SevGuestState *)object_dynamic_cast(OBJECT(sev_common), ++ TYPE_SEV_GUEST); + + info = g_new0(SevInfo, 1); + info->enabled = sev_enabled(); + + if (info->enabled) { +- info->api_major = sev_guest->api_major; +- info->api_minor = sev_guest->api_minor; +- info->build_id = sev_guest->build_id; +- info->policy = sev_guest->policy; +- info->state = sev_guest->state; +- info->handle = sev_guest->handle; ++ if (sev_guest) { ++ info->handle = sev_guest->handle; ++ } ++ info->api_major = sev_common->api_major; ++ info->api_minor = sev_common->api_minor; ++ info->build_id = sev_common->build_id; ++ info->state = sev_common->state; ++ /* we only report the lower 32-bits of policy for SNP, ok for now... */ ++ info->policy = ++ (uint32_t)object_property_get_uint(OBJECT(sev_common), ++ "policy", NULL); + } + + return info; +@@ -530,6 +479,8 @@ static SevCapability *sev_get_capabilities(Error **errp) + size_t pdh_len = 0, cert_chain_len = 0, cpu0_id_len = 0; + uint32_t ebx; + int fd; ++ SevCommonState *sev_common; ++ char *sev_device; + + if (!kvm_enabled()) { + error_setg(errp, "KVM not enabled"); +@@ -540,12 +491,21 @@ static SevCapability *sev_get_capabilities(Error **errp) + return NULL; + } + +- fd = open(DEFAULT_SEV_DEVICE, O_RDWR); ++ sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); ++ if (!sev_common) { ++ error_setg(errp, "SEV is not configured"); ++ } ++ ++ sev_device = object_property_get_str(OBJECT(sev_common), "sev-device", ++ &error_abort); ++ fd = open(sev_device, O_RDWR); + if (fd < 0) { + error_setg_errno(errp, errno, "SEV: Failed to open %s", + DEFAULT_SEV_DEVICE); ++ g_free(sev_device); + return NULL; + } ++ g_free(sev_device); + + if (sev_get_pdh_info(fd, &pdh_data, &pdh_len, + &cert_chain_data, &cert_chain_len, errp)) { +@@ -588,7 +548,7 @@ static SevAttestationReport *sev_get_attestation_report(const char *mnonce, + { + struct kvm_sev_attestation_report input = {}; + SevAttestationReport *report = NULL; +- SevGuestState *sev = sev_guest; ++ SevCommonState *sev_common; + g_autofree guchar *data = NULL; + g_autofree guchar *buf = NULL; + gsize len; +@@ -613,8 +573,10 @@ static SevAttestationReport *sev_get_attestation_report(const char *mnonce, + return NULL; + } + ++ sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); ++ + /* Query the report length */ +- ret = sev_ioctl(sev->sev_fd, KVM_SEV_GET_ATTESTATION_REPORT, ++ ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_GET_ATTESTATION_REPORT, + &input, &err); + if (ret < 0) { + if (err != SEV_RET_INVALID_LEN) { +@@ -630,7 +592,7 @@ static SevAttestationReport *sev_get_attestation_report(const char *mnonce, + memcpy(input.mnonce, buf, sizeof(input.mnonce)); + + /* Query the report */ +- ret = sev_ioctl(sev->sev_fd, KVM_SEV_GET_ATTESTATION_REPORT, ++ ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_GET_ATTESTATION_REPORT, + &input, &err); + if (ret) { + error_setg_errno(errp, errno, "SEV: Failed to get attestation report" +@@ -670,26 +632,27 @@ sev_read_file_base64(const char *filename, guchar **data, gsize *len) + } + + static int +-sev_launch_start(SevGuestState *sev) ++sev_launch_start(SevGuestState *sev_guest) + { + gsize sz; + int ret = 1; + int fw_error, rc; + struct kvm_sev_launch_start start = { +- .handle = sev->handle, .policy = sev->policy ++ .handle = sev_guest->handle, .policy = sev_guest->policy + }; + guchar *session = NULL, *dh_cert = NULL; ++ SevCommonState *sev_common = SEV_COMMON(sev_guest); + +- if (sev->session_file) { +- if (sev_read_file_base64(sev->session_file, &session, &sz) < 0) { ++ if (sev_guest->session_file) { ++ if (sev_read_file_base64(sev_guest->session_file, &session, &sz) < 0) { + goto out; + } + start.session_uaddr = (unsigned long)session; + start.session_len = sz; + } + +- if (sev->dh_cert_file) { +- if (sev_read_file_base64(sev->dh_cert_file, &dh_cert, &sz) < 0) { ++ if (sev_guest->dh_cert_file) { ++ if (sev_read_file_base64(sev_guest->dh_cert_file, &dh_cert, &sz) < 0) { + goto out; + } + start.dh_uaddr = (unsigned long)dh_cert; +@@ -697,15 +660,15 @@ sev_launch_start(SevGuestState *sev) + } + + trace_kvm_sev_launch_start(start.policy, session, dh_cert); +- rc = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_START, &start, &fw_error); ++ rc = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_START, &start, &fw_error); + if (rc < 0) { + error_report("%s: LAUNCH_START ret=%d fw_error=%d '%s'", + __func__, ret, fw_error, fw_error_to_str(fw_error)); + goto out; + } + +- sev_set_guest_state(sev, SEV_STATE_LAUNCH_UPDATE); +- sev->handle = start.handle; ++ sev_set_guest_state(sev_common, SEV_STATE_LAUNCH_UPDATE); ++ sev_guest->handle = start.handle; + ret = 0; + + out: +@@ -715,7 +678,7 @@ out: + } + + static int +-sev_launch_update_data(SevGuestState *sev, uint8_t *addr, uint64_t len) ++sev_launch_update_data(SevGuestState *sev_guest, uint8_t *addr, uint64_t len) + { + int ret, fw_error; + struct kvm_sev_launch_update_data update; +@@ -727,7 +690,7 @@ sev_launch_update_data(SevGuestState *sev, uint8_t *addr, uint64_t len) + update.uaddr = (uintptr_t)addr; + update.len = len; + trace_kvm_sev_launch_update_data(addr, len); +- ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_UPDATE_DATA, ++ ret = sev_ioctl(SEV_COMMON(sev_guest)->sev_fd, KVM_SEV_LAUNCH_UPDATE_DATA, + &update, &fw_error); + if (ret) { + error_report("%s: LAUNCH_UPDATE ret=%d fw_error=%d '%s'", +@@ -738,11 +701,12 @@ sev_launch_update_data(SevGuestState *sev, uint8_t *addr, uint64_t len) + } + + static int +-sev_launch_update_vmsa(SevGuestState *sev) ++sev_launch_update_vmsa(SevGuestState *sev_guest) + { + int ret, fw_error; + +- ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL, &fw_error); ++ ret = sev_ioctl(SEV_COMMON(sev_guest)->sev_fd, KVM_SEV_LAUNCH_UPDATE_VMSA, ++ NULL, &fw_error); + if (ret) { + error_report("%s: LAUNCH_UPDATE_VMSA ret=%d fw_error=%d '%s'", + __func__, ret, fw_error, fw_error_to_str(fw_error)); +@@ -754,18 +718,19 @@ sev_launch_update_vmsa(SevGuestState *sev) + static void + sev_launch_get_measure(Notifier *notifier, void *unused) + { +- SevGuestState *sev = sev_guest; ++ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); ++ SevGuestState *sev_guest = SEV_GUEST(sev_common); + int ret, error; + g_autofree guchar *data = NULL; + struct kvm_sev_launch_measure measurement = {}; + +- if (!sev_check_state(sev, SEV_STATE_LAUNCH_UPDATE)) { ++ if (!sev_check_state(sev_common, SEV_STATE_LAUNCH_UPDATE)) { + return; + } + + if (sev_es_enabled()) { + /* measure all the VM save areas before getting launch_measure */ +- ret = sev_launch_update_vmsa(sev); ++ ret = sev_launch_update_vmsa(sev_guest); + if (ret) { + exit(1); + } +@@ -773,7 +738,7 @@ sev_launch_get_measure(Notifier *notifier, void *unused) + } + + /* query the measurement blob length */ +- ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_MEASURE, ++ ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_MEASURE, + &measurement, &error); + if (!measurement.len) { + error_report("%s: LAUNCH_MEASURE ret=%d fw_error=%d '%s'", +@@ -785,7 +750,7 @@ sev_launch_get_measure(Notifier *notifier, void *unused) + measurement.uaddr = (unsigned long)data; + + /* get the measurement blob */ +- ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_MEASURE, ++ ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_MEASURE, + &measurement, &error); + if (ret) { + error_report("%s: LAUNCH_MEASURE ret=%d fw_error=%d '%s'", +@@ -793,17 +758,19 @@ sev_launch_get_measure(Notifier *notifier, void *unused) + return; + } + +- sev_set_guest_state(sev, SEV_STATE_LAUNCH_SECRET); ++ sev_set_guest_state(sev_common, SEV_STATE_LAUNCH_SECRET); + + /* encode the measurement value and emit the event */ +- sev->measurement = g_base64_encode(data, measurement.len); +- trace_kvm_sev_launch_measurement(sev->measurement); ++ sev_guest->measurement = g_base64_encode(data, measurement.len); ++ trace_kvm_sev_launch_measurement(sev_guest->measurement); + } + + static char *sev_get_launch_measurement(void) + { ++ SevGuestState *sev_guest = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); ++ + if (sev_guest && +- sev_guest->state >= SEV_STATE_LAUNCH_SECRET) { ++ SEV_COMMON(sev_guest)->state >= SEV_STATE_LAUNCH_SECRET) { + return g_strdup(sev_guest->measurement); + } + +@@ -832,19 +799,20 @@ static Notifier sev_machine_done_notify = { + }; + + static void +-sev_launch_finish(SevGuestState *sev) ++sev_launch_finish(SevGuestState *sev_guest) + { + int ret, error; + + trace_kvm_sev_launch_finish(); +- ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_FINISH, 0, &error); ++ ret = sev_ioctl(SEV_COMMON(sev_guest)->sev_fd, KVM_SEV_LAUNCH_FINISH, 0, ++ &error); + if (ret) { + error_report("%s: LAUNCH_FINISH ret=%d fw_error=%d '%s'", + __func__, ret, error, fw_error_to_str(error)); + exit(1); + } + +- sev_set_guest_state(sev, SEV_STATE_RUNNING); ++ sev_set_guest_state(SEV_COMMON(sev_guest), SEV_STATE_RUNNING); + + /* add migration blocker */ + error_setg(&sev_mig_blocker, +@@ -855,38 +823,40 @@ sev_launch_finish(SevGuestState *sev) + static void + sev_vm_state_change(void *opaque, bool running, RunState state) + { +- SevGuestState *sev = opaque; ++ SevCommonState *sev_common = opaque; + + if (running) { +- if (!sev_check_state(sev, SEV_STATE_RUNNING)) { +- sev_launch_finish(sev); ++ if (!sev_check_state(sev_common, SEV_STATE_RUNNING)) { ++ sev_launch_finish(SEV_GUEST(sev_common)); + } + } + } + + static int sev_kvm_type(X86ConfidentialGuest *cg) + { +- SevGuestState *sev = SEV_GUEST(cg); ++ SevCommonState *sev_common = SEV_COMMON(cg); ++ SevGuestState *sev_guest = SEV_GUEST(sev_common); + int kvm_type; + +- if (sev->kvm_type != -1) { ++ if (sev_common->kvm_type != -1) { + goto out; + } + +- kvm_type = (sev->policy & SEV_POLICY_ES) ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM; +- if (kvm_is_vm_type_supported(kvm_type) && !sev->legacy_vm_type) { +- sev->kvm_type = kvm_type; ++ kvm_type = (sev_guest->policy & SEV_POLICY_ES) ? ++ KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM; ++ if (kvm_is_vm_type_supported(kvm_type) && !sev_guest->legacy_vm_type) { ++ sev_common->kvm_type = kvm_type; + } else { +- sev->kvm_type = KVM_X86_DEFAULT_VM; ++ sev_common->kvm_type = KVM_X86_DEFAULT_VM; + } + + out: +- return sev->kvm_type; ++ return sev_common->kvm_type; + } + + static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + { +- SevGuestState *sev = SEV_GUEST(cgs); ++ SevCommonState *sev_common = SEV_COMMON(cgs); + char *devname; + int ret, fw_error, cmd; + uint32_t ebx; +@@ -899,8 +869,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + return -1; + } + +- sev_guest = sev; +- sev->state = SEV_STATE_UNINIT; ++ sev_common->state = SEV_STATE_UNINIT; + + host_cpuid(0x8000001F, 0, NULL, &ebx, NULL, NULL); + host_cbitpos = ebx & 0x3f; +@@ -910,9 +879,9 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + * register of CPUID 0x8000001F. No need to verify the range as the + * comparison against the host value accomplishes that. + */ +- if (host_cbitpos != sev->cbitpos) { ++ if (host_cbitpos != sev_common->cbitpos) { + error_setg(errp, "%s: cbitpos check failed, host '%d' requested '%d'", +- __func__, host_cbitpos, sev->cbitpos); ++ __func__, host_cbitpos, sev_common->cbitpos); + goto err; + } + +@@ -921,16 +890,17 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + * the EBX register of CPUID 0x8000001F, so verify the supplied value + * is in the range of 1 to 63. + */ +- if (sev->reduced_phys_bits < 1 || sev->reduced_phys_bits > 63) { ++ if (sev_common->reduced_phys_bits < 1 || ++ sev_common->reduced_phys_bits > 63) { + error_setg(errp, "%s: reduced_phys_bits check failed," + " it should be in the range of 1 to 63, requested '%d'", +- __func__, sev->reduced_phys_bits); ++ __func__, sev_common->reduced_phys_bits); + goto err; + } + +- devname = object_property_get_str(OBJECT(sev), "sev-device", NULL); +- sev->sev_fd = open(devname, O_RDWR); +- if (sev->sev_fd < 0) { ++ devname = object_property_get_str(OBJECT(sev_common), "sev-device", NULL); ++ sev_common->sev_fd = open(devname, O_RDWR); ++ if (sev_common->sev_fd < 0) { + error_setg(errp, "%s: Failed to open %s '%s'", __func__, + devname, strerror(errno)); + g_free(devname); +@@ -938,7 +908,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + } + g_free(devname); + +- ret = sev_platform_ioctl(sev->sev_fd, SEV_PLATFORM_STATUS, &status, ++ ret = sev_platform_ioctl(sev_common->sev_fd, SEV_PLATFORM_STATUS, &status, + &fw_error); + if (ret) { + error_setg(errp, "%s: failed to get platform status ret=%d " +@@ -946,9 +916,9 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + fw_error_to_str(fw_error)); + goto err; + } +- sev->build_id = status.build; +- sev->api_major = status.api_major; +- sev->api_minor = status.api_minor; ++ sev_common->build_id = status.build; ++ sev_common->api_major = status.api_major; ++ sev_common->api_minor = status.api_minor; + + if (sev_es_enabled()) { + if (!kvm_kernel_irqchip_allowed()) { +@@ -966,14 +936,14 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + } + + trace_kvm_sev_init(); +- if (sev_kvm_type(X86_CONFIDENTIAL_GUEST(sev)) == KVM_X86_DEFAULT_VM) { ++ if (sev_kvm_type(X86_CONFIDENTIAL_GUEST(sev_common)) == KVM_X86_DEFAULT_VM) { + cmd = sev_es_enabled() ? KVM_SEV_ES_INIT : KVM_SEV_INIT; + +- ret = sev_ioctl(sev->sev_fd, cmd, NULL, &fw_error); ++ ret = sev_ioctl(sev_common->sev_fd, cmd, NULL, &fw_error); + } else { + struct kvm_sev_init args = { 0 }; + +- ret = sev_ioctl(sev->sev_fd, KVM_SEV_INIT2, &args, &fw_error); ++ ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_INIT2, &args, &fw_error); + } + + if (ret) { +@@ -982,7 +952,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + goto err; + } + +- ret = sev_launch_start(sev); ++ sev_launch_start(SEV_GUEST(sev_common)); + if (ret) { + error_setg(errp, "%s: failed to create encryption context", __func__); + goto err; +@@ -990,13 +960,12 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + + ram_block_notifier_add(&sev_ram_notifier); + qemu_add_machine_init_done_notifier(&sev_machine_done_notify); +- qemu_add_vm_change_state_handler(sev_vm_state_change, sev); ++ qemu_add_vm_change_state_handler(sev_vm_state_change, sev_common); + + cgs->ready = true; + + return 0; + err: +- sev_guest = NULL; + ram_block_discard_disable(false); + return -1; + } +@@ -1004,13 +973,15 @@ err: + int + sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp) + { +- if (!sev_guest) { ++ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); ++ ++ if (!sev_common) { + return 0; + } + + /* if SEV is in update state then encrypt the data else do nothing */ +- if (sev_check_state(sev_guest, SEV_STATE_LAUNCH_UPDATE)) { +- int ret = sev_launch_update_data(sev_guest, ptr, len); ++ if (sev_check_state(sev_common, SEV_STATE_LAUNCH_UPDATE)) { ++ int ret = sev_launch_update_data(SEV_GUEST(sev_common), ptr, len); + if (ret < 0) { + error_setg(errp, "SEV: Failed to encrypt pflash rom"); + return ret; +@@ -1030,16 +1001,17 @@ int sev_inject_launch_secret(const char *packet_hdr, const char *secret, + void *hva; + gsize hdr_sz = 0, data_sz = 0; + MemoryRegion *mr = NULL; ++ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + +- if (!sev_guest) { ++ if (!sev_common) { + error_setg(errp, "SEV not enabled for guest"); + return 1; + } + + /* secret can be injected only in this state */ +- if (!sev_check_state(sev_guest, SEV_STATE_LAUNCH_SECRET)) { ++ if (!sev_check_state(sev_common, SEV_STATE_LAUNCH_SECRET)) { + error_setg(errp, "SEV: Not in correct state. (LSECRET) %x", +- sev_guest->state); ++ sev_common->state); + return 1; + } + +@@ -1073,7 +1045,7 @@ int sev_inject_launch_secret(const char *packet_hdr, const char *secret, + trace_kvm_sev_launch_secret(gpa, input.guest_uaddr, + input.trans_uaddr, input.trans_len); + +- ret = sev_ioctl(sev_guest->sev_fd, KVM_SEV_LAUNCH_SECRET, ++ ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_SECRET, + &input, &error); + if (ret) { + error_setg(errp, "SEV: failed to inject secret ret=%d fw_error=%d '%s'", +@@ -1180,9 +1152,10 @@ void sev_es_set_reset_vector(CPUState *cpu) + { + X86CPU *x86; + CPUX86State *env; ++ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + + /* Only update if we have valid reset information */ +- if (!sev_guest || !sev_guest->reset_data_valid) { ++ if (!sev_common || !sev_common->reset_data_valid) { + return; + } + +@@ -1194,11 +1167,11 @@ void sev_es_set_reset_vector(CPUState *cpu) + x86 = X86_CPU(cpu); + env = &x86->env; + +- cpu_x86_load_seg_cache(env, R_CS, 0xf000, sev_guest->reset_cs, 0xffff, ++ cpu_x86_load_seg_cache(env, R_CS, 0xf000, sev_common->reset_cs, 0xffff, + DESC_P_MASK | DESC_S_MASK | DESC_CS_MASK | + DESC_R_MASK | DESC_A_MASK); + +- env->eip = sev_guest->reset_ip; ++ env->eip = sev_common->reset_ip; + } + + int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size) +@@ -1206,6 +1179,7 @@ int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size) + CPUState *cpu; + uint32_t addr; + int ret; ++ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + + if (!sev_es_enabled()) { + return 0; +@@ -1219,9 +1193,9 @@ int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size) + } + + if (addr) { +- sev_guest->reset_cs = addr & 0xffff0000; +- sev_guest->reset_ip = addr & 0x0000ffff; +- sev_guest->reset_data_valid = true; ++ sev_common->reset_cs = addr & 0xffff0000; ++ sev_common->reset_ip = addr & 0x0000ffff; ++ sev_common->reset_data_valid = true; + + CPU_FOREACH(cpu) { + sev_es_set_reset_vector(cpu); +@@ -1267,12 +1241,13 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) + hwaddr mapped_len = sizeof(*padded_ht); + MemTxAttrs attrs = { 0 }; + bool ret = true; ++ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + + /* + * Only add the kernel hashes if the sev-guest configuration explicitly + * stated kernel-hashes=on. + */ +- if (!sev_guest->kernel_hashes) { ++ if (!sev_common->kernel_hashes) { + return false; + } + +@@ -1363,8 +1338,30 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) + return ret; + } + ++static char * ++sev_common_get_sev_device(Object *obj, Error **errp) ++{ ++ return g_strdup(SEV_COMMON(obj)->sev_device); ++} ++ + static void +-sev_guest_class_init(ObjectClass *oc, void *data) ++sev_common_set_sev_device(Object *obj, const char *value, Error **errp) ++{ ++ SEV_COMMON(obj)->sev_device = g_strdup(value); ++} ++ ++static bool sev_common_get_kernel_hashes(Object *obj, Error **errp) ++{ ++ return SEV_COMMON(obj)->kernel_hashes; ++} ++ ++static void sev_common_set_kernel_hashes(Object *obj, bool value, Error **errp) ++{ ++ SEV_COMMON(obj)->kernel_hashes = value; ++} ++ ++static void ++sev_common_class_init(ObjectClass *oc, void *data) + { + ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); + X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); +@@ -1373,10 +1370,87 @@ sev_guest_class_init(ObjectClass *oc, void *data) + x86_klass->kvm_type = sev_kvm_type; + + object_class_property_add_str(oc, "sev-device", +- sev_guest_get_sev_device, +- sev_guest_set_sev_device); ++ sev_common_get_sev_device, ++ sev_common_set_sev_device); + object_class_property_set_description(oc, "sev-device", + "SEV device to use"); ++ object_class_property_add_bool(oc, "kernel-hashes", ++ sev_common_get_kernel_hashes, ++ sev_common_set_kernel_hashes); ++ object_class_property_set_description(oc, "kernel-hashes", ++ "add kernel hashes to guest firmware for measured Linux boot"); ++} ++ ++static void ++sev_common_instance_init(Object *obj) ++{ ++ SevCommonState *sev_common = SEV_COMMON(obj); ++ ++ sev_common->kvm_type = -1; ++ ++ sev_common->sev_device = g_strdup(DEFAULT_SEV_DEVICE); ++ ++ object_property_add_uint32_ptr(obj, "cbitpos", &sev_common->cbitpos, ++ OBJ_PROP_FLAG_READWRITE); ++ object_property_add_uint32_ptr(obj, "reduced-phys-bits", ++ &sev_common->reduced_phys_bits, ++ OBJ_PROP_FLAG_READWRITE); ++} ++ ++/* sev guest info common to sev/sev-es/sev-snp */ ++static const TypeInfo sev_common_info = { ++ .parent = TYPE_X86_CONFIDENTIAL_GUEST, ++ .name = TYPE_SEV_COMMON, ++ .instance_size = sizeof(SevCommonState), ++ .instance_init = sev_common_instance_init, ++ .class_size = sizeof(SevCommonStateClass), ++ .class_init = sev_common_class_init, ++ .abstract = true, ++ .interfaces = (InterfaceInfo[]) { ++ { TYPE_USER_CREATABLE }, ++ { } ++ } ++}; ++ ++static char * ++sev_guest_get_dh_cert_file(Object *obj, Error **errp) ++{ ++ return g_strdup(SEV_GUEST(obj)->dh_cert_file); ++} ++ ++static void ++sev_guest_set_dh_cert_file(Object *obj, const char *value, Error **errp) ++{ ++ SEV_GUEST(obj)->dh_cert_file = g_strdup(value); ++} ++ ++static char * ++sev_guest_get_session_file(Object *obj, Error **errp) ++{ ++ SevGuestState *sev_guest = SEV_GUEST(obj); ++ ++ return sev_guest->session_file ? g_strdup(sev_guest->session_file) : NULL; ++} ++ ++static void ++sev_guest_set_session_file(Object *obj, const char *value, Error **errp) ++{ ++ SEV_GUEST(obj)->session_file = g_strdup(value); ++} ++ ++static bool sev_guest_get_legacy_vm_type(Object *obj, Error **errp) ++{ ++ return SEV_GUEST(obj)->legacy_vm_type; ++} ++ ++static void sev_guest_set_legacy_vm_type(Object *obj, bool value, Error **errp) ++{ ++ SEV_GUEST(obj)->legacy_vm_type = value; ++} ++ ++static void ++sev_guest_class_init(ObjectClass *oc, void *data) ++{ + object_class_property_add_str(oc, "dh-cert-file", + sev_guest_get_dh_cert_file, + sev_guest_set_dh_cert_file); +@@ -1387,11 +1461,6 @@ sev_guest_class_init(ObjectClass *oc, void *data) + sev_guest_set_session_file); + object_class_property_set_description(oc, "session-file", + "guest owners session parameters (encoded with base64)"); +- object_class_property_add_bool(oc, "kernel-hashes", +- sev_guest_get_kernel_hashes, +- sev_guest_set_kernel_hashes); +- object_class_property_set_description(oc, "kernel-hashes", +- "add kernel hashes to guest firmware for measured Linux boot"); + object_class_property_add_bool(oc, "legacy-vm-type", + sev_guest_get_legacy_vm_type, + sev_guest_set_legacy_vm_type); +@@ -1402,41 +1471,29 @@ sev_guest_class_init(ObjectClass *oc, void *data) + static void + sev_guest_instance_init(Object *obj) + { +- SevGuestState *sev = SEV_GUEST(obj); +- +- sev->kvm_type = -1; ++ SevGuestState *sev_guest = SEV_GUEST(obj); + +- sev->sev_device = g_strdup(DEFAULT_SEV_DEVICE); +- sev->policy = DEFAULT_GUEST_POLICY; +- object_property_add_uint32_ptr(obj, "policy", &sev->policy, +- OBJ_PROP_FLAG_READWRITE); +- object_property_add_uint32_ptr(obj, "handle", &sev->handle, ++ sev_guest->policy = DEFAULT_GUEST_POLICY; ++ object_property_add_uint32_ptr(obj, "handle", &sev_guest->handle, + OBJ_PROP_FLAG_READWRITE); +- object_property_add_uint32_ptr(obj, "cbitpos", &sev->cbitpos, +- OBJ_PROP_FLAG_READWRITE); +- object_property_add_uint32_ptr(obj, "reduced-phys-bits", +- &sev->reduced_phys_bits, ++ object_property_add_uint32_ptr(obj, "policy", &sev_guest->policy, + OBJ_PROP_FLAG_READWRITE); + object_apply_compat_props(obj); + } + +-/* sev guest info */ ++/* guest info specific sev/sev-es */ + static const TypeInfo sev_guest_info = { +- .parent = TYPE_X86_CONFIDENTIAL_GUEST, ++ .parent = TYPE_SEV_COMMON, + .name = TYPE_SEV_GUEST, + .instance_size = sizeof(SevGuestState), +- .instance_finalize = sev_guest_finalize, +- .class_init = sev_guest_class_init, + .instance_init = sev_guest_instance_init, +- .interfaces = (InterfaceInfo[]) { +- { TYPE_USER_CREATABLE }, +- { } +- } ++ .class_init = sev_guest_class_init, + }; + + static void + sev_register_types(void) + { ++ type_register_static(&sev_common_info); + type_register_static(&sev_guest_info); + } + +diff --git a/target/i386/sev.h b/target/i386/sev.h +index 9e10d09539..668374eef3 100644 +--- a/target/i386/sev.h ++++ b/target/i386/sev.h +@@ -20,6 +20,9 @@ + + #include "exec/confidential-guest-support.h" + ++#define TYPE_SEV_COMMON "sev-common" ++#define TYPE_SEV_GUEST "sev-guest" ++ + #define SEV_POLICY_NODBG 0x1 + #define SEV_POLICY_NOKS 0x2 + #define SEV_POLICY_ES 0x4 +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Introduce-sev-snp-guest-object.patch b/SOURCES/kvm-i386-sev-Introduce-sev-snp-guest-object.patch new file mode 100644 index 0000000..b347bf6 --- /dev/null +++ b/SOURCES/kvm-i386-sev-Introduce-sev-snp-guest-object.patch @@ -0,0 +1,530 @@ +From 900859fd3445b9a71f1a9a8befda17f0c33f3923 Mon Sep 17 00:00:00 2001 +From: Brijesh Singh +Date: Thu, 30 May 2024 06:16:19 -0500 +Subject: [PATCH 059/100] i386/sev: Introduce 'sev-snp-guest' object + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [59/91] 3e585113d209176c2b97ad5e4fe943f19dfdcaeb (bonzini/rhel-qemu-kvm) + +SEV-SNP support relies on a different set of properties/state than the +existing 'sev-guest' object. This patch introduces the 'sev-snp-guest' +object, which can be used to configure an SEV-SNP guest. For example, +a default-configured SEV-SNP guest with no additional information +passed in for use with attestation: + + -object sev-snp-guest,id=sev0 + +or a fully-specified SEV-SNP guest where all spec-defined binary +blobs are passed in as base64-encoded strings: + + -object sev-snp-guest,id=sev0, \ + policy=0x30000, \ + init-flags=0, \ + id-block=YWFhYWFhYWFhYWFhYWFhCg==, \ + id-auth=CxHK/OKLkXGn/KpAC7Wl1FSiisWDbGTEKz..., \ + author-key-enabled=on, \ + host-data=LNkCWBRC5CcdGXirbNUV1OrsR28s..., \ + guest-visible-workarounds=AA==, \ + +See the QAPI schema updates included in this patch for more usage +details. + +In some cases these blobs may be up to 4096 characters, but this is +generally well below the default limit for linux hosts where +command-line sizes are defined by the sysconf-configurable ARG_MAX +value, which defaults to 2097152 characters for Ubuntu hosts, for +example. + +Signed-off-by: Brijesh Singh +Co-developed-by: Michael Roth +Acked-by: Markus Armbruster (for QAPI schema) +Signed-off-by: Michael Roth +Co-developed-by: Pankaj Gupta +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-8-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 7b34df44260b391e33bc3acf1ced30019d9aadf1) +Signed-off-by: Paolo Bonzini +--- + docs/system/i386/amd-memory-encryption.rst | 70 +++++- + qapi/qom.json | 58 +++++ + target/i386/sev.c | 253 +++++++++++++++++++++ + target/i386/sev.h | 1 + + 4 files changed, 380 insertions(+), 2 deletions(-) + +diff --git a/docs/system/i386/amd-memory-encryption.rst b/docs/system/i386/amd-memory-encryption.rst +index e9bc142bc1..748f5094ba 100644 +--- a/docs/system/i386/amd-memory-encryption.rst ++++ b/docs/system/i386/amd-memory-encryption.rst +@@ -25,8 +25,8 @@ support for notifying a guest's operating system when certain types of VMEXITs + are about to occur. This allows the guest to selectively share information with + the hypervisor to satisfy the requested function. + +-Launching +---------- ++Launching (SEV and SEV-ES) ++-------------------------- + + Boot images (such as bios) must be encrypted before a guest can be booted. The + ``MEMORY_ENCRYPT_OP`` ioctl provides commands to encrypt the images: ``LAUNCH_START``, +@@ -161,6 +161,72 @@ The value of GCTX.LD is + If kernel hashes are not used, or SEV-ES is disabled, use empty blobs for + ``kernel_hashes_blob`` and ``vmsas_blob`` as needed. + ++Launching (SEV-SNP) ++------------------- ++Boot images (such as bios) must be encrypted before a guest can be booted. The ++``MEMORY_ENCRYPT_OP`` ioctl provides commands to encrypt the images: ++``SNP_LAUNCH_START``, ``SNP_LAUNCH_UPDATE``, and ``SNP_LAUNCH_FINISH``. These ++three commands communicate with SEV-SNP firmware to generate a fresh memory ++encryption key for the VM, encrypt the boot images for a successful launch. For ++more details on the SEV-SNP firmware interfaces used by these commands please ++see the SEV-SNP Firmware ABI. ++ ++``SNP_LAUNCH_START`` is called first to create a cryptographic launch context ++within the firmware. To create this context, the guest owner must provide a ++guest policy and other parameters as described in the SEV-SNP firmware ++specification. The launch parameters should be specified as described in the ++QAPI schema for the sev-snp-guest object. ++ ++The ``SNP_LAUNCH_START`` uses the following parameters, which can be configured ++by the corresponding parameters documented in the QAPI schema for the ++'sev-snp-guest' object. ++ +++--------+-------+----------+-------------------------------------------------+ ++| key | type | default | meaning | +++---------------------------+-------------------------------------------------+ ++| policy | hex | 0x30000 | a 64-bit guest policy | +++---------------------------+-------------------------------------------------+ ++| guest-visible-workarounds | string| 0 | 16-byte base64 encoded string| ++| | | | for guest OS visible | ++| | | | workarounds. | +++---------------------------+-------------------------------------------------+ ++ ++``SNP_LAUNCH_UPDATE`` encrypts the memory region using the cryptographic context ++created via the ``SNP_LAUNCH_START`` command. If required, this command can be ++called multiple times to encrypt different memory regions. The command also ++calculates the measurement of the memory contents as it encrypts. ++ ++``SNP_LAUNCH_FINISH`` finalizes the guest launch flow. Optionally, while ++finalizing the launch the firmware can perform checks on the launch digest ++computing through the ``SNP_LAUNCH_UPDATE``. To perform the check the user must ++supply the id block, authentication blob and host data that should be included ++in the attestation report. See the SEV-SNP spec for further details. ++ ++The ``SNP_LAUNCH_FINISH`` uses the following parameters, which can be configured ++by the corresponding parameters documented in the QAPI schema for the ++'sev-snp-guest' object. ++ +++--------------------+-------+----------+-------------------------------------+ ++| key | type | default | meaning | +++--------------------+-------+----------+-------------------------------------+ ++| id-block | string| none | base64 encoded ID block | +++--------------------+-------+----------+-------------------------------------+ ++| id-auth | string| none | base64 encoded authentication | ++| | | | information | +++--------------------+-------+----------+-------------------------------------+ ++| author-key-enabled | bool | 0 | auth block contains author key | +++--------------------+-------+----------+-------------------------------------+ ++| host_data | string| none | host provided data | +++--------------------+-------+----------+-------------------------------------+ ++ ++To launch a SEV-SNP guest (additional parameters are documented in the QAPI ++schema for the 'sev-snp-guest' object):: ++ ++ # ${QEMU} \ ++ -machine ...,confidential-guest-support=sev0 \ ++ -object sev-snp-guest,id=sev0,cbitpos=51,reduced-phys-bits=1 ++ ++ + Debugging + --------- + +diff --git a/qapi/qom.json b/qapi/qom.json +index 056b38f491..8bd299265e 100644 +--- a/qapi/qom.json ++++ b/qapi/qom.json +@@ -929,6 +929,62 @@ + '*handle': 'uint32', + '*legacy-vm-type': 'bool' } } + ++## ++# @SevSnpGuestProperties: ++# ++# Properties for sev-snp-guest objects. Most of these are direct ++# arguments for the KVM_SNP_* interfaces documented in the Linux ++# kernel source under ++# Documentation/arch/x86/amd-memory-encryption.rst, which are in turn ++# closely coupled with the SNP_INIT/SNP_LAUNCH_* firmware commands ++# documented in the SEV-SNP Firmware ABI Specification (Rev 0.9). ++# ++# More usage information is also available in the QEMU source tree ++# under docs/amd-memory-encryption. ++# ++# @policy: the 'POLICY' parameter to the SNP_LAUNCH_START command, as ++# defined in the SEV-SNP firmware ABI (default: 0x30000) ++# ++# @guest-visible-workarounds: 16-byte, base64-encoded blob to report ++# hypervisor-defined workarounds, corresponding to the 'GOSVW' ++# parameter of the SNP_LAUNCH_START command defined in the SEV-SNP ++# firmware ABI (default: all-zero) ++# ++# @id-block: 96-byte, base64-encoded blob to provide the 'ID Block' ++# structure for the SNP_LAUNCH_FINISH command defined in the ++# SEV-SNP firmware ABI (default: all-zero) ++# ++# @id-auth: 4096-byte, base64-encoded blob to provide the 'ID ++# Authentication Information Structure' for the SNP_LAUNCH_FINISH ++# command defined in the SEV-SNP firmware ABI (default: all-zero) ++# ++# @author-key-enabled: true if 'id-auth' blob contains the 'AUTHOR_KEY' ++# field defined SEV-SNP firmware ABI (default: false) ++# ++# @host-data: 32-byte, base64-encoded, user-defined blob to provide to ++# the guest, as documented for the 'HOST_DATA' parameter of the ++# SNP_LAUNCH_FINISH command in the SEV-SNP firmware ABI (default: ++# all-zero) ++# ++# @vcek-disabled: Guests are by default allowed to choose between VLEK ++# (Versioned Loaded Endorsement Key) or VCEK (Versioned Chip ++# Endorsement Key) when requesting attestation reports from ++# firmware. Set this to true to disable the use of VCEK. ++# (default: false) (since: 9.1) ++# ++# Since: 9.1 ++## ++{ 'struct': 'SevSnpGuestProperties', ++ 'base': 'SevCommonProperties', ++ 'data': { ++ '*policy': 'uint64', ++ '*guest-visible-workarounds': 'str', ++ '*id-block': 'str', ++ '*id-auth': 'str', ++ '*author-key-enabled': 'bool', ++ '*host-data': 'str', ++ '*vcek-disabled': 'bool' } } ++ + ## + # @ThreadContextProperties: + # +@@ -1007,6 +1063,7 @@ + { 'name': 'secret_keyring', + 'if': 'CONFIG_SECRET_KEYRING' }, + 'sev-guest', ++ 'sev-snp-guest', + 'thread-context', + 's390-pv-guest', + 'throttle-group', +@@ -1077,6 +1134,7 @@ + 'secret_keyring': { 'type': 'SecretKeyringProperties', + 'if': 'CONFIG_SECRET_KEYRING' }, + 'sev-guest': 'SevGuestProperties', ++ 'sev-snp-guest': 'SevSnpGuestProperties', + 'thread-context': 'ThreadContextProperties', + 'throttle-group': 'ThrottleGroupProperties', + 'tls-creds-anon': 'TlsCredsAnonProperties', +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 28a018ed83..a81b3228d4 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -42,6 +42,7 @@ + + OBJECT_DECLARE_TYPE(SevCommonState, SevCommonStateClass, SEV_COMMON) + OBJECT_DECLARE_TYPE(SevGuestState, SevCommonStateClass, SEV_GUEST) ++OBJECT_DECLARE_TYPE(SevSnpGuestState, SevCommonStateClass, SEV_SNP_GUEST) + + struct SevCommonState { + X86ConfidentialGuest parent_obj; +@@ -96,8 +97,22 @@ struct SevGuestState { + bool legacy_vm_type; + }; + ++struct SevSnpGuestState { ++ SevCommonState parent_obj; ++ ++ /* configuration parameters */ ++ char *guest_visible_workarounds; ++ char *id_block; ++ char *id_auth; ++ char *host_data; ++ ++ struct kvm_sev_snp_launch_start kvm_start_conf; ++ struct kvm_sev_snp_launch_finish kvm_finish_conf; ++}; ++ + #define DEFAULT_GUEST_POLICY 0x1 /* disable debug */ + #define DEFAULT_SEV_DEVICE "/dev/sev" ++#define DEFAULT_SEV_SNP_POLICY 0x30000 + + #define SEV_INFO_BLOCK_GUID "00f771de-1a7e-4fcb-890e-68c77e2fb44e" + typedef struct __attribute__((__packed__)) SevInfoBlock { +@@ -1500,11 +1515,249 @@ static const TypeInfo sev_guest_info = { + .class_init = sev_guest_class_init, + }; + ++static void ++sev_snp_guest_get_policy(Object *obj, Visitor *v, const char *name, ++ void *opaque, Error **errp) ++{ ++ visit_type_uint64(v, name, ++ (uint64_t *)&SEV_SNP_GUEST(obj)->kvm_start_conf.policy, ++ errp); ++} ++ ++static void ++sev_snp_guest_set_policy(Object *obj, Visitor *v, const char *name, ++ void *opaque, Error **errp) ++{ ++ visit_type_uint64(v, name, ++ (uint64_t *)&SEV_SNP_GUEST(obj)->kvm_start_conf.policy, ++ errp); ++} ++ ++static char * ++sev_snp_guest_get_guest_visible_workarounds(Object *obj, Error **errp) ++{ ++ return g_strdup(SEV_SNP_GUEST(obj)->guest_visible_workarounds); ++} ++ ++static void ++sev_snp_guest_set_guest_visible_workarounds(Object *obj, const char *value, ++ Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ struct kvm_sev_snp_launch_start *start = &sev_snp_guest->kvm_start_conf; ++ g_autofree guchar *blob; ++ gsize len; ++ ++ g_free(sev_snp_guest->guest_visible_workarounds); ++ ++ /* store the base64 str so we don't need to re-encode in getter */ ++ sev_snp_guest->guest_visible_workarounds = g_strdup(value); ++ ++ blob = qbase64_decode(sev_snp_guest->guest_visible_workarounds, ++ -1, &len, errp); ++ if (!blob) { ++ return; ++ } ++ ++ if (len != sizeof(start->gosvw)) { ++ error_setg(errp, "parameter length of %lu exceeds max of %lu", ++ len, sizeof(start->gosvw)); ++ return; ++ } ++ ++ memcpy(start->gosvw, blob, len); ++} ++ ++static char * ++sev_snp_guest_get_id_block(Object *obj, Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ ++ return g_strdup(sev_snp_guest->id_block); ++} ++ ++static void ++sev_snp_guest_set_id_block(Object *obj, const char *value, Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ struct kvm_sev_snp_launch_finish *finish = &sev_snp_guest->kvm_finish_conf; ++ gsize len; ++ ++ g_free(sev_snp_guest->id_block); ++ g_free((guchar *)finish->id_block_uaddr); ++ ++ /* store the base64 str so we don't need to re-encode in getter */ ++ sev_snp_guest->id_block = g_strdup(value); ++ ++ finish->id_block_uaddr = ++ (uint64_t)qbase64_decode(sev_snp_guest->id_block, -1, &len, errp); ++ ++ if (!finish->id_block_uaddr) { ++ return; ++ } ++ ++ if (len != KVM_SEV_SNP_ID_BLOCK_SIZE) { ++ error_setg(errp, "parameter length of %lu not equal to %u", ++ len, KVM_SEV_SNP_ID_BLOCK_SIZE); ++ return; ++ } ++ ++ finish->id_block_en = (len) ? 1 : 0; ++} ++ ++static char * ++sev_snp_guest_get_id_auth(Object *obj, Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ ++ return g_strdup(sev_snp_guest->id_auth); ++} ++ ++static void ++sev_snp_guest_set_id_auth(Object *obj, const char *value, Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ struct kvm_sev_snp_launch_finish *finish = &sev_snp_guest->kvm_finish_conf; ++ gsize len; ++ ++ g_free(sev_snp_guest->id_auth); ++ g_free((guchar *)finish->id_auth_uaddr); ++ ++ /* store the base64 str so we don't need to re-encode in getter */ ++ sev_snp_guest->id_auth = g_strdup(value); ++ ++ finish->id_auth_uaddr = ++ (uint64_t)qbase64_decode(sev_snp_guest->id_auth, -1, &len, errp); ++ ++ if (!finish->id_auth_uaddr) { ++ return; ++ } ++ ++ if (len > KVM_SEV_SNP_ID_AUTH_SIZE) { ++ error_setg(errp, "parameter length:ID_AUTH %lu exceeds max of %u", ++ len, KVM_SEV_SNP_ID_AUTH_SIZE); ++ return; ++ } ++} ++ ++static bool ++sev_snp_guest_get_author_key_enabled(Object *obj, Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ ++ return !!sev_snp_guest->kvm_finish_conf.auth_key_en; ++} ++ ++static void ++sev_snp_guest_set_author_key_enabled(Object *obj, bool value, Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ ++ sev_snp_guest->kvm_finish_conf.auth_key_en = value; ++} ++ ++static bool ++sev_snp_guest_get_vcek_disabled(Object *obj, Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ ++ return !!sev_snp_guest->kvm_finish_conf.vcek_disabled; ++} ++ ++static void ++sev_snp_guest_set_vcek_disabled(Object *obj, bool value, Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ ++ sev_snp_guest->kvm_finish_conf.vcek_disabled = value; ++} ++ ++static char * ++sev_snp_guest_get_host_data(Object *obj, Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ ++ return g_strdup(sev_snp_guest->host_data); ++} ++ ++static void ++sev_snp_guest_set_host_data(Object *obj, const char *value, Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ struct kvm_sev_snp_launch_finish *finish = &sev_snp_guest->kvm_finish_conf; ++ g_autofree guchar *blob; ++ gsize len; ++ ++ g_free(sev_snp_guest->host_data); ++ ++ /* store the base64 str so we don't need to re-encode in getter */ ++ sev_snp_guest->host_data = g_strdup(value); ++ ++ blob = qbase64_decode(sev_snp_guest->host_data, -1, &len, errp); ++ ++ if (!blob) { ++ return; ++ } ++ ++ if (len != sizeof(finish->host_data)) { ++ error_setg(errp, "parameter length of %lu not equal to %lu", ++ len, sizeof(finish->host_data)); ++ return; ++ } ++ ++ memcpy(finish->host_data, blob, len); ++} ++ ++static void ++sev_snp_guest_class_init(ObjectClass *oc, void *data) ++{ ++ object_class_property_add(oc, "policy", "uint64", ++ sev_snp_guest_get_policy, ++ sev_snp_guest_set_policy, NULL, NULL); ++ object_class_property_add_str(oc, "guest-visible-workarounds", ++ sev_snp_guest_get_guest_visible_workarounds, ++ sev_snp_guest_set_guest_visible_workarounds); ++ object_class_property_add_str(oc, "id-block", ++ sev_snp_guest_get_id_block, ++ sev_snp_guest_set_id_block); ++ object_class_property_add_str(oc, "id-auth", ++ sev_snp_guest_get_id_auth, ++ sev_snp_guest_set_id_auth); ++ object_class_property_add_bool(oc, "author-key-enabled", ++ sev_snp_guest_get_author_key_enabled, ++ sev_snp_guest_set_author_key_enabled); ++ object_class_property_add_bool(oc, "vcek-required", ++ sev_snp_guest_get_vcek_disabled, ++ sev_snp_guest_set_vcek_disabled); ++ object_class_property_add_str(oc, "host-data", ++ sev_snp_guest_get_host_data, ++ sev_snp_guest_set_host_data); ++} ++ ++static void ++sev_snp_guest_instance_init(Object *obj) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ ++ /* default init/start/finish params for kvm */ ++ sev_snp_guest->kvm_start_conf.policy = DEFAULT_SEV_SNP_POLICY; ++} ++ ++/* guest info specific to sev-snp */ ++static const TypeInfo sev_snp_guest_info = { ++ .parent = TYPE_SEV_COMMON, ++ .name = TYPE_SEV_SNP_GUEST, ++ .instance_size = sizeof(SevSnpGuestState), ++ .class_init = sev_snp_guest_class_init, ++ .instance_init = sev_snp_guest_instance_init, ++}; ++ + static void + sev_register_types(void) + { + type_register_static(&sev_common_info); + type_register_static(&sev_guest_info); ++ type_register_static(&sev_snp_guest_info); + } + + type_init(sev_register_types); +diff --git a/target/i386/sev.h b/target/i386/sev.h +index 668374eef3..bedc667eeb 100644 +--- a/target/i386/sev.h ++++ b/target/i386/sev.h +@@ -22,6 +22,7 @@ + + #define TYPE_SEV_COMMON "sev-common" + #define TYPE_SEV_GUEST "sev-guest" ++#define TYPE_SEV_SNP_GUEST "sev-snp-guest" + + #define SEV_POLICY_NODBG 0x1 + #define SEV_POLICY_NOKS 0x2 +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Invoke-launch_updata_data-for-SEV-class.patch b/SOURCES/kvm-i386-sev-Invoke-launch_updata_data-for-SEV-class.patch new file mode 100644 index 0000000..265da66 --- /dev/null +++ b/SOURCES/kvm-i386-sev-Invoke-launch_updata_data-for-SEV-class.patch @@ -0,0 +1,85 @@ +From be37914ae54c8aebc218cf41b37bc0ea1563daae Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 31 May 2024 12:51:44 +0200 +Subject: [PATCH 074/100] i386/sev: Invoke launch_updata_data() for SEV class + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [74/91] f1b588a9ffecd6944a78186d88a6be3849698710 (bonzini/rhel-qemu-kvm) + +Add launch_update_data() in SevCommonStateClass and +invoke as sev_launch_update_data() for SEV object. + +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-26-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 9861405a8f845133b7984322c2df0c43a45553c3) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 7b5c4b4874..8834cf9441 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -74,6 +74,7 @@ struct SevCommonStateClass { + /* public */ + int (*launch_start)(SevCommonState *sev_common); + void (*launch_finish)(SevCommonState *sev_common); ++ int (*launch_update_data)(SevCommonState *sev_common, hwaddr gpa, uint8_t *ptr, uint64_t len); + int (*kvm_init)(ConfidentialGuestSupport *cgs, Error **errp); + }; + +@@ -929,7 +930,7 @@ out: + } + + static int +-sev_launch_update_data(SevGuestState *sev_guest, uint8_t *addr, uint64_t len) ++sev_launch_update_data(SevCommonState *sev_common, hwaddr gpa, uint8_t *addr, uint64_t len) + { + int ret, fw_error; + struct kvm_sev_launch_update_data update; +@@ -941,7 +942,7 @@ sev_launch_update_data(SevGuestState *sev_guest, uint8_t *addr, uint64_t len) + update.uaddr = (uintptr_t)addr; + update.len = len; + trace_kvm_sev_launch_update_data(addr, len); +- ret = sev_ioctl(SEV_COMMON(sev_guest)->sev_fd, KVM_SEV_LAUNCH_UPDATE_DATA, ++ ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_UPDATE_DATA, + &update, &fw_error); + if (ret) { + error_report("%s: LAUNCH_UPDATE ret=%d fw_error=%d '%s'", +@@ -1487,6 +1488,7 @@ int + sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp) + { + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); ++ SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(sev_common); + + if (!sev_common) { + return 0; +@@ -1494,7 +1496,9 @@ sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp) + + /* if SEV is in update state then encrypt the data else do nothing */ + if (sev_check_state(sev_common, SEV_STATE_LAUNCH_UPDATE)) { +- int ret = sev_launch_update_data(SEV_GUEST(sev_common), ptr, len); ++ int ret; ++ ++ ret = klass->launch_update_data(sev_common, gpa, ptr, len); + if (ret < 0) { + error_setg(errp, "SEV: Failed to encrypt pflash rom"); + return ret; +@@ -1968,6 +1972,7 @@ sev_guest_class_init(ObjectClass *oc, void *data) + + klass->launch_start = sev_launch_start; + klass->launch_finish = sev_launch_finish; ++ klass->launch_update_data = sev_launch_update_data; + klass->kvm_init = sev_kvm_init; + x86_klass->kvm_type = sev_kvm_type; + +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Invoke-launch_updata_data-for-SNP-class.patch b/SOURCES/kvm-i386-sev-Invoke-launch_updata_data-for-SNP-class.patch new file mode 100644 index 0000000..f28004d --- /dev/null +++ b/SOURCES/kvm-i386-sev-Invoke-launch_updata_data-for-SNP-class.patch @@ -0,0 +1,55 @@ +From 32899eb4fa5143b795b107de4857adce2cf1d434 Mon Sep 17 00:00:00 2001 +From: Pankaj Gupta +Date: Thu, 30 May 2024 06:16:38 -0500 +Subject: [PATCH 075/100] i386/sev: Invoke launch_updata_data() for SNP class + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [75/91] 3520af5847f8dddb6d7fe7ad5feb308230f387b9 (bonzini/rhel-qemu-kvm) + +Invoke as sev_snp_launch_update_data() for SNP object. + +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-27-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 0765d136eba400ad1cb7cae18438bb10eace64dc) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 8834cf9441..eaf5fc6c6b 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -1091,6 +1091,15 @@ snp_launch_update_data(uint64_t gpa, void *hva, + return 0; + } + ++static int ++sev_snp_launch_update_data(SevCommonState *sev_common, hwaddr gpa, ++ uint8_t *ptr, uint64_t len) ++{ ++ int ret = snp_launch_update_data(gpa, ptr, len, ++ KVM_SEV_SNP_PAGE_TYPE_NORMAL); ++ return ret; ++} ++ + static int + sev_snp_cpuid_info_fill(SnpCpuidInfo *snp_cpuid_info, + const KvmCpuidInfo *kvm_cpuid_info) +@@ -2216,6 +2225,7 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data) + + klass->launch_start = sev_snp_launch_start; + klass->launch_finish = sev_snp_launch_finish; ++ klass->launch_update_data = sev_snp_launch_update_data; + klass->kvm_init = sev_snp_kvm_init; + x86_klass->kvm_type = sev_snp_kvm_type; + +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Move-SEV_COMMON-null-check-before-dereferen.patch b/SOURCES/kvm-i386-sev-Move-SEV_COMMON-null-check-before-dereferen.patch new file mode 100644 index 0000000..e38615b --- /dev/null +++ b/SOURCES/kvm-i386-sev-Move-SEV_COMMON-null-check-before-dereferen.patch @@ -0,0 +1,47 @@ +From fa6076291eb45255bc2fe523399d7d0647fc5570 Mon Sep 17 00:00:00 2001 +From: Pankaj Gupta +Date: Fri, 7 Jun 2024 13:36:10 -0500 +Subject: [PATCH 085/100] i386/sev: Move SEV_COMMON null check before + dereferencing + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [85/91] e8d2bfd077766a5e7777b9337d0e77146f883224 (bonzini/rhel-qemu-kvm) + +Fixes Coverity CID 1546886. + +Fixes: 9861405a8f ("i386/sev: Invoke launch_updata_data() for SEV class") +Signed-off-by: Pankaj Gupta +Message-ID: <20240607183611.1111100-3-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 48779faef3c8e2fe70bd8285bffa731bd76dc844) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 7c9df621de..f18432f58e 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -1529,11 +1529,12 @@ int + sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp) + { + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); +- SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(sev_common); ++ SevCommonStateClass *klass; + + if (!sev_common) { + return 0; + } ++ klass = SEV_COMMON_GET_CLASS(sev_common); + + /* if SEV is in update state then encrypt the data else do nothing */ + if (sev_check_state(sev_common, SEV_STATE_LAUNCH_UPDATE)) { +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Move-sev_launch_finish-to-separate-class-me.patch b/SOURCES/kvm-i386-sev-Move-sev_launch_finish-to-separate-class-me.patch new file mode 100644 index 0000000..250a723 --- /dev/null +++ b/SOURCES/kvm-i386-sev-Move-sev_launch_finish-to-separate-class-me.patch @@ -0,0 +1,88 @@ +From 4d96ca893126d4c17c9fe03c76973b1d4a414f21 Mon Sep 17 00:00:00 2001 +From: Pankaj Gupta +Date: Thu, 30 May 2024 06:16:18 -0500 +Subject: [PATCH 058/100] i386/sev: Move sev_launch_finish to separate class + method + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [58/91] 7865710d320a6df7038ef7016d350aa9cdcea326 (bonzini/rhel-qemu-kvm) + +When sev-snp-guest objects are introduced there will be a number of +differences in how the launch finish is handled compared to the existing +sev-guest object. Move sev_launch_finish() to a class method to make it +easier to implement SNP-specific launch update functionality later. + +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-7-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit bce615a14aec07cab0488e5a242f6a91e641efcb) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index b2aa0d6f99..28a018ed83 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -71,6 +71,7 @@ struct SevCommonStateClass { + + /* public */ + int (*launch_start)(SevCommonState *sev_common); ++ void (*launch_finish)(SevCommonState *sev_common); + }; + + /** +@@ -801,12 +802,12 @@ static Notifier sev_machine_done_notify = { + }; + + static void +-sev_launch_finish(SevGuestState *sev_guest) ++sev_launch_finish(SevCommonState *sev_common) + { + int ret, error; + + trace_kvm_sev_launch_finish(); +- ret = sev_ioctl(SEV_COMMON(sev_guest)->sev_fd, KVM_SEV_LAUNCH_FINISH, 0, ++ ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_FINISH, 0, + &error); + if (ret) { + error_report("%s: LAUNCH_FINISH ret=%d fw_error=%d '%s'", +@@ -814,7 +815,7 @@ sev_launch_finish(SevGuestState *sev_guest) + exit(1); + } + +- sev_set_guest_state(SEV_COMMON(sev_guest), SEV_STATE_RUNNING); ++ sev_set_guest_state(sev_common, SEV_STATE_RUNNING); + + /* add migration blocker */ + error_setg(&sev_mig_blocker, +@@ -826,10 +827,11 @@ static void + sev_vm_state_change(void *opaque, bool running, RunState state) + { + SevCommonState *sev_common = opaque; ++ SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(opaque); + + if (running) { + if (!sev_check_state(sev_common, SEV_STATE_RUNNING)) { +- sev_launch_finish(SEV_GUEST(sev_common)); ++ klass->launch_finish(sev_common); + } + } + } +@@ -1457,6 +1459,7 @@ sev_guest_class_init(ObjectClass *oc, void *data) + SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); + + klass->launch_start = sev_launch_start; ++ klass->launch_finish = sev_launch_finish; + + object_class_property_add_str(oc, "dh-cert-file", + sev_guest_get_dh_cert_file, +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Move-sev_launch_update-to-separate-class-me.patch b/SOURCES/kvm-i386-sev-Move-sev_launch_update-to-separate-class-me.patch new file mode 100644 index 0000000..12824ec --- /dev/null +++ b/SOURCES/kvm-i386-sev-Move-sev_launch_update-to-separate-class-me.patch @@ -0,0 +1,91 @@ +From a170ba2c7dbf2775eb9047779d3643a2a81bb372 Mon Sep 17 00:00:00 2001 +From: Pankaj Gupta +Date: Thu, 30 May 2024 06:16:17 -0500 +Subject: [PATCH 057/100] i386/sev: Move sev_launch_update to separate class + method + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [57/91] 4f31e7afaec6f2c2a7c06cda4d7d27d4037e53e0 (bonzini/rhel-qemu-kvm) + +When sev-snp-guest objects are introduced there will be a number of +differences in how the launch data is handled compared to the existing +sev-guest object. Move sev_launch_start() to a class method to make it +easier to implement SNP-specific launch update functionality later. + +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-6-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 6600f1ac0c81cbe67faf048ea07f78542dea925f) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 13 ++++++++++--- + 1 file changed, 10 insertions(+), 3 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 33e606eea0..b2aa0d6f99 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -69,6 +69,8 @@ struct SevCommonState { + struct SevCommonStateClass { + X86ConfidentialGuestClass parent_class; + ++ /* public */ ++ int (*launch_start)(SevCommonState *sev_common); + }; + + /** +@@ -632,16 +634,16 @@ sev_read_file_base64(const char *filename, guchar **data, gsize *len) + } + + static int +-sev_launch_start(SevGuestState *sev_guest) ++sev_launch_start(SevCommonState *sev_common) + { + gsize sz; + int ret = 1; + int fw_error, rc; ++ SevGuestState *sev_guest = SEV_GUEST(sev_common); + struct kvm_sev_launch_start start = { + .handle = sev_guest->handle, .policy = sev_guest->policy + }; + guchar *session = NULL, *dh_cert = NULL; +- SevCommonState *sev_common = SEV_COMMON(sev_guest); + + if (sev_guest->session_file) { + if (sev_read_file_base64(sev_guest->session_file, &session, &sz) < 0) { +@@ -862,6 +864,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + uint32_t ebx; + uint32_t host_cbitpos; + struct sev_user_data_status status = {}; ++ SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(cgs); + + ret = ram_block_discard_disable(true); + if (ret) { +@@ -952,7 +955,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + goto err; + } + +- sev_launch_start(SEV_GUEST(sev_common)); ++ ret = klass->launch_start(sev_common); + if (ret) { + error_setg(errp, "%s: failed to create encryption context", __func__); + goto err; +@@ -1451,6 +1454,10 @@ static void sev_guest_set_legacy_vm_type(Object *obj, bool value, Error **errp) + static void + sev_guest_class_init(ObjectClass *oc, void *data) + { ++ SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); ++ ++ klass->launch_start = sev_launch_start; ++ + object_class_property_add_str(oc, "dh-cert-file", + sev_guest_get_dh_cert_file, + sev_guest_set_dh_cert_file); +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Reorder-struct-declarations.patch b/SOURCES/kvm-i386-sev-Reorder-struct-declarations.patch new file mode 100644 index 0000000..746317d --- /dev/null +++ b/SOURCES/kvm-i386-sev-Reorder-struct-declarations.patch @@ -0,0 +1,134 @@ +From d009fa2cebebd1da80f4f2f5d0c4fffb87e02afc Mon Sep 17 00:00:00 2001 +From: Dov Murik +Date: Thu, 30 May 2024 06:16:34 -0500 +Subject: [PATCH 079/100] i386/sev: Reorder struct declarations + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [79/91] 1274d4620e88dda99ec10173ca5e3cd4184c8fb6 (bonzini/rhel-qemu-kvm) + +Move the declaration of PaddedSevHashTable before SevSnpGuest so +we can add a new such field to the latter. + +No functional change intended. + +Signed-off-by: Dov Murik +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-23-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit cc483bf911931f405dea682c74a3d8b9b6c54369) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 84 +++++++++++++++++++++++------------------------ + 1 file changed, 42 insertions(+), 42 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 73f9406715..3fce4c08eb 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -46,6 +46,48 @@ OBJECT_DECLARE_TYPE(SevCommonState, SevCommonStateClass, SEV_COMMON) + OBJECT_DECLARE_TYPE(SevGuestState, SevCommonStateClass, SEV_GUEST) + OBJECT_DECLARE_TYPE(SevSnpGuestState, SevCommonStateClass, SEV_SNP_GUEST) + ++/* hard code sha256 digest size */ ++#define HASH_SIZE 32 ++ ++typedef struct QEMU_PACKED SevHashTableEntry { ++ QemuUUID guid; ++ uint16_t len; ++ uint8_t hash[HASH_SIZE]; ++} SevHashTableEntry; ++ ++typedef struct QEMU_PACKED SevHashTable { ++ QemuUUID guid; ++ uint16_t len; ++ SevHashTableEntry cmdline; ++ SevHashTableEntry initrd; ++ SevHashTableEntry kernel; ++} SevHashTable; ++ ++/* ++ * Data encrypted by sev_encrypt_flash() must be padded to a multiple of ++ * 16 bytes. ++ */ ++typedef struct QEMU_PACKED PaddedSevHashTable { ++ SevHashTable ht; ++ uint8_t padding[ROUND_UP(sizeof(SevHashTable), 16) - sizeof(SevHashTable)]; ++} PaddedSevHashTable; ++ ++QEMU_BUILD_BUG_ON(sizeof(PaddedSevHashTable) % 16 != 0); ++ ++#define SEV_INFO_BLOCK_GUID "00f771de-1a7e-4fcb-890e-68c77e2fb44e" ++typedef struct __attribute__((__packed__)) SevInfoBlock { ++ /* SEV-ES Reset Vector Address */ ++ uint32_t reset_addr; ++} SevInfoBlock; ++ ++#define SEV_HASH_TABLE_RV_GUID "7255371f-3a3b-4b04-927b-1da6efa8d454" ++typedef struct QEMU_PACKED SevHashTableDescriptor { ++ /* SEV hash table area guest address */ ++ uint32_t base; ++ /* SEV hash table area size (in bytes) */ ++ uint32_t size; ++} SevHashTableDescriptor; ++ + struct SevCommonState { + X86ConfidentialGuest parent_obj; + +@@ -128,48 +170,6 @@ typedef struct SevLaunchUpdateData { + + static QTAILQ_HEAD(, SevLaunchUpdateData) launch_update; + +-#define SEV_INFO_BLOCK_GUID "00f771de-1a7e-4fcb-890e-68c77e2fb44e" +-typedef struct __attribute__((__packed__)) SevInfoBlock { +- /* SEV-ES Reset Vector Address */ +- uint32_t reset_addr; +-} SevInfoBlock; +- +-#define SEV_HASH_TABLE_RV_GUID "7255371f-3a3b-4b04-927b-1da6efa8d454" +-typedef struct QEMU_PACKED SevHashTableDescriptor { +- /* SEV hash table area guest address */ +- uint32_t base; +- /* SEV hash table area size (in bytes) */ +- uint32_t size; +-} SevHashTableDescriptor; +- +-/* hard code sha256 digest size */ +-#define HASH_SIZE 32 +- +-typedef struct QEMU_PACKED SevHashTableEntry { +- QemuUUID guid; +- uint16_t len; +- uint8_t hash[HASH_SIZE]; +-} SevHashTableEntry; +- +-typedef struct QEMU_PACKED SevHashTable { +- QemuUUID guid; +- uint16_t len; +- SevHashTableEntry cmdline; +- SevHashTableEntry initrd; +- SevHashTableEntry kernel; +-} SevHashTable; +- +-/* +- * Data encrypted by sev_encrypt_flash() must be padded to a multiple of +- * 16 bytes. +- */ +-typedef struct QEMU_PACKED PaddedSevHashTable { +- SevHashTable ht; +- uint8_t padding[ROUND_UP(sizeof(SevHashTable), 16) - sizeof(SevHashTable)]; +-} PaddedSevHashTable; +- +-QEMU_BUILD_BUG_ON(sizeof(PaddedSevHashTable) % 16 != 0); +- + static Error *sev_mig_blocker; + + static const char *const sev_fw_errlist[] = { +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Replace-error_report-with-error_setg.patch b/SOURCES/kvm-i386-sev-Replace-error_report-with-error_setg.patch new file mode 100644 index 0000000..ba66cde --- /dev/null +++ b/SOURCES/kvm-i386-sev-Replace-error_report-with-error_setg.patch @@ -0,0 +1,46 @@ +From 80c1d78e31b2567d1c610c8939b75d159ff6ea27 Mon Sep 17 00:00:00 2001 +From: Pankaj Gupta +Date: Thu, 30 May 2024 06:16:13 -0500 +Subject: [PATCH 055/100] i386/sev: Replace error_report with error_setg + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [55/91] 1e15fc2458687e564af9fa5022c29e79ddc8edfd (bonzini/rhel-qemu-kvm) + +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-2-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 18c453409a3a84cf7b2c764c5a03fb429a73bbeb) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index d30b68c11e..67ed32e5ea 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -952,13 +952,13 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + + if (sev_es_enabled()) { + if (!kvm_kernel_irqchip_allowed()) { +- error_report("%s: SEV-ES guests require in-kernel irqchip support", +- __func__); ++ error_setg(errp, "%s: SEV-ES guests require in-kernel irqchip" ++ "support", __func__); + goto err; + } + + if (!(status.flags & SEV_STATUS_FLAGS_CONFIG_ES)) { +- error_report("%s: guest policy requires SEV-ES, but " ++ error_setg(errp, "%s: guest policy requires SEV-ES, but " + "host SEV-ES support unavailable", + __func__); + goto err; +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Return-when-sev_common-is-null.patch b/SOURCES/kvm-i386-sev-Return-when-sev_common-is-null.patch new file mode 100644 index 0000000..6fc68aa --- /dev/null +++ b/SOURCES/kvm-i386-sev-Return-when-sev_common-is-null.patch @@ -0,0 +1,40 @@ +From 88da6d01b1de2b92adb5c47c6d482876a054705f Mon Sep 17 00:00:00 2001 +From: Pankaj Gupta +Date: Fri, 7 Jun 2024 13:36:11 -0500 +Subject: [PATCH 086/100] i386/sev: Return when sev_common is null + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [86/91] 02ce4a6a51ce9fd961f417c13db0a760673591ba (bonzini/rhel-qemu-kvm) + +Fixes Coverity CID 1546885. + +Fixes: 16dcf200dc ("i386/sev: Introduce "sev-common" type to encapsulate common SEV state") +Signed-off-by: Pankaj Gupta +Message-ID: <20240607183611.1111100-4-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit cd7093a7a168a823d07671348996f049d45e8f67) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index f18432f58e..c40562dce3 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -587,6 +587,7 @@ static SevCapability *sev_get_capabilities(Error **errp) + sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + if (!sev_common) { + error_setg(errp, "SEV is not configured"); ++ return NULL; + } + + sev_device = object_property_get_str(OBJECT(sev_common), "sev-device", +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Set-CPU-state-to-protected-once-SNP-guest-p.patch b/SOURCES/kvm-i386-sev-Set-CPU-state-to-protected-once-SNP-guest-p.patch new file mode 100644 index 0000000..8548e22 --- /dev/null +++ b/SOURCES/kvm-i386-sev-Set-CPU-state-to-protected-once-SNP-guest-p.patch @@ -0,0 +1,47 @@ +From c7649ac1b958dc48de50f32b1ad80d84b17945a8 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:29 -0500 +Subject: [PATCH 069/100] i386/sev: Set CPU state to protected once SNP guest + payload is finalized + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [69/91] 09280f987a186511ec7d62c3f340b2148e8556d7 (bonzini/rhel-qemu-kvm) + +Once KVM_SNP_LAUNCH_FINISH is called the vCPU state is copied into the +vCPU's VMSA page and measured/encrypted. Any attempt to read/write CPU +state afterward will only be acting on the initial data and so are +effectively no-ops. + +Set the vCPU state to protected at this point so that QEMU don't +continue trying to re-sync vCPU data during guest runtime. + +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-18-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 3d44fdff60ea66fbd7a33f5d32b50843cd80f48a) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index ef2e592ca7..e84e4395a5 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -997,6 +997,7 @@ sev_snp_launch_finish(SevCommonState *sev_common) + exit(1); + } + ++ kvm_mark_guest_state_protected(); + sev_set_guest_state(sev_common, SEV_STATE_RUNNING); + + /* add migration blocker */ +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Switch-to-use-confidential_guest_kvm_init.patch b/SOURCES/kvm-i386-sev-Switch-to-use-confidential_guest_kvm_init.patch new file mode 100644 index 0000000..05ccb0a --- /dev/null +++ b/SOURCES/kvm-i386-sev-Switch-to-use-confidential_guest_kvm_init.patch @@ -0,0 +1,268 @@ +From 5540bb5ca052531563df1ade68995e268ae65224 Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Thu, 29 Feb 2024 01:00:36 -0500 +Subject: [PATCH 012/100] i386/sev: Switch to use confidential_guest_kvm_init() + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [12/91] 6f5f8d1b818826f7ee4b6ae527963ef23c97f531 (bonzini/rhel-qemu-kvm) + +Use confidential_guest_kvm_init() instead of calling SEV +specific sev_kvm_init(). This allows the introduction of multiple +confidential-guest-support subclasses for different x86 vendors. + +As a bonus, stubs are not needed anymore since there is no +direct call from target/i386/kvm/kvm.c to SEV code. + +Signed-off-by: Xiaoyao Li +Message-Id: <20240229060038.606591-1-xiaoyao.li@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 637c95b37b106c2eeba313e0abb38ec12e918a59) +Signed-off-by: Paolo Bonzini +--- + target/i386/kvm/kvm.c | 10 +-- + target/i386/kvm/meson.build | 2 - + target/i386/kvm/sev-stub.c | 21 ------ + target/i386/sev.c | 127 ++++++++++++++++++------------------ + target/i386/sev.h | 2 - + 5 files changed, 69 insertions(+), 93 deletions(-) + delete mode 100644 target/i386/kvm/sev-stub.c + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 5f30b649a0..e271652620 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -2543,10 +2543,12 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + * mechanisms are supported in future (e.g. TDX), they'll need + * their own initialization either here or elsewhere. + */ +- ret = sev_kvm_init(ms->cgs, &local_err); +- if (ret < 0) { +- error_report_err(local_err); +- return ret; ++ if (ms->cgs) { ++ ret = confidential_guest_kvm_init(ms->cgs, &local_err); ++ if (ret < 0) { ++ error_report_err(local_err); ++ return ret; ++ } + } + + has_xcrs = kvm_check_extension(s, KVM_CAP_XCRS); +diff --git a/target/i386/kvm/meson.build b/target/i386/kvm/meson.build +index 84d9143e60..e7850981e6 100644 +--- a/target/i386/kvm/meson.build ++++ b/target/i386/kvm/meson.build +@@ -7,8 +7,6 @@ i386_kvm_ss.add(files( + + i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files('xen-emu.c')) + +-i386_kvm_ss.add(when: 'CONFIG_SEV', if_false: files('sev-stub.c')) +- + i386_system_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c'), if_false: files('hyperv-stub.c')) + + i386_system_ss.add_all(when: 'CONFIG_KVM', if_true: i386_kvm_ss) +diff --git a/target/i386/kvm/sev-stub.c b/target/i386/kvm/sev-stub.c +deleted file mode 100644 +index 1be5341e8a..0000000000 +--- a/target/i386/kvm/sev-stub.c ++++ /dev/null +@@ -1,21 +0,0 @@ +-/* +- * QEMU SEV stub +- * +- * Copyright Advanced Micro Devices 2018 +- * +- * Authors: +- * Brijesh Singh +- * +- * This work is licensed under the terms of the GNU GPL, version 2 or later. +- * See the COPYING file in the top-level directory. +- * +- */ +- +-#include "qemu/osdep.h" +-#include "sev.h" +- +-int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) +-{ +- /* If we get here, cgs must be some non-SEV thing */ +- return 0; +-} +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 72930ff0dc..b8f79d34d1 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -353,63 +353,6 @@ static void sev_guest_set_kernel_hashes(Object *obj, bool value, Error **errp) + sev->kernel_hashes = value; + } + +-static void +-sev_guest_class_init(ObjectClass *oc, void *data) +-{ +- object_class_property_add_str(oc, "sev-device", +- sev_guest_get_sev_device, +- sev_guest_set_sev_device); +- object_class_property_set_description(oc, "sev-device", +- "SEV device to use"); +- object_class_property_add_str(oc, "dh-cert-file", +- sev_guest_get_dh_cert_file, +- sev_guest_set_dh_cert_file); +- object_class_property_set_description(oc, "dh-cert-file", +- "guest owners DH certificate (encoded with base64)"); +- object_class_property_add_str(oc, "session-file", +- sev_guest_get_session_file, +- sev_guest_set_session_file); +- object_class_property_set_description(oc, "session-file", +- "guest owners session parameters (encoded with base64)"); +- object_class_property_add_bool(oc, "kernel-hashes", +- sev_guest_get_kernel_hashes, +- sev_guest_set_kernel_hashes); +- object_class_property_set_description(oc, "kernel-hashes", +- "add kernel hashes to guest firmware for measured Linux boot"); +-} +- +-static void +-sev_guest_instance_init(Object *obj) +-{ +- SevGuestState *sev = SEV_GUEST(obj); +- +- sev->sev_device = g_strdup(DEFAULT_SEV_DEVICE); +- sev->policy = DEFAULT_GUEST_POLICY; +- object_property_add_uint32_ptr(obj, "policy", &sev->policy, +- OBJ_PROP_FLAG_READWRITE); +- object_property_add_uint32_ptr(obj, "handle", &sev->handle, +- OBJ_PROP_FLAG_READWRITE); +- object_property_add_uint32_ptr(obj, "cbitpos", &sev->cbitpos, +- OBJ_PROP_FLAG_READWRITE); +- object_property_add_uint32_ptr(obj, "reduced-phys-bits", +- &sev->reduced_phys_bits, +- OBJ_PROP_FLAG_READWRITE); +-} +- +-/* sev guest info */ +-static const TypeInfo sev_guest_info = { +- .parent = TYPE_CONFIDENTIAL_GUEST_SUPPORT, +- .name = TYPE_SEV_GUEST, +- .instance_size = sizeof(SevGuestState), +- .instance_finalize = sev_guest_finalize, +- .class_init = sev_guest_class_init, +- .instance_init = sev_guest_instance_init, +- .interfaces = (InterfaceInfo[]) { +- { TYPE_USER_CREATABLE }, +- { } +- } +-}; +- + bool + sev_enabled(void) + { +@@ -906,20 +849,15 @@ sev_vm_state_change(void *opaque, bool running, RunState state) + } + } + +-int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) ++static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + { +- SevGuestState *sev +- = (SevGuestState *)object_dynamic_cast(OBJECT(cgs), TYPE_SEV_GUEST); ++ SevGuestState *sev = SEV_GUEST(cgs); + char *devname; + int ret, fw_error, cmd; + uint32_t ebx; + uint32_t host_cbitpos; + struct sev_user_data_status status = {}; + +- if (!sev) { +- return 0; +- } +- + ret = ram_block_discard_disable(true); + if (ret) { + error_report("%s: cannot disable RAM discard", __func__); +@@ -1384,6 +1322,67 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) + return ret; + } + ++static void ++sev_guest_class_init(ObjectClass *oc, void *data) ++{ ++ ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); ++ ++ klass->kvm_init = sev_kvm_init; ++ ++ object_class_property_add_str(oc, "sev-device", ++ sev_guest_get_sev_device, ++ sev_guest_set_sev_device); ++ object_class_property_set_description(oc, "sev-device", ++ "SEV device to use"); ++ object_class_property_add_str(oc, "dh-cert-file", ++ sev_guest_get_dh_cert_file, ++ sev_guest_set_dh_cert_file); ++ object_class_property_set_description(oc, "dh-cert-file", ++ "guest owners DH certificate (encoded with base64)"); ++ object_class_property_add_str(oc, "session-file", ++ sev_guest_get_session_file, ++ sev_guest_set_session_file); ++ object_class_property_set_description(oc, "session-file", ++ "guest owners session parameters (encoded with base64)"); ++ object_class_property_add_bool(oc, "kernel-hashes", ++ sev_guest_get_kernel_hashes, ++ sev_guest_set_kernel_hashes); ++ object_class_property_set_description(oc, "kernel-hashes", ++ "add kernel hashes to guest firmware for measured Linux boot"); ++} ++ ++static void ++sev_guest_instance_init(Object *obj) ++{ ++ SevGuestState *sev = SEV_GUEST(obj); ++ ++ sev->sev_device = g_strdup(DEFAULT_SEV_DEVICE); ++ sev->policy = DEFAULT_GUEST_POLICY; ++ object_property_add_uint32_ptr(obj, "policy", &sev->policy, ++ OBJ_PROP_FLAG_READWRITE); ++ object_property_add_uint32_ptr(obj, "handle", &sev->handle, ++ OBJ_PROP_FLAG_READWRITE); ++ object_property_add_uint32_ptr(obj, "cbitpos", &sev->cbitpos, ++ OBJ_PROP_FLAG_READWRITE); ++ object_property_add_uint32_ptr(obj, "reduced-phys-bits", ++ &sev->reduced_phys_bits, ++ OBJ_PROP_FLAG_READWRITE); ++} ++ ++/* sev guest info */ ++static const TypeInfo sev_guest_info = { ++ .parent = TYPE_CONFIDENTIAL_GUEST_SUPPORT, ++ .name = TYPE_SEV_GUEST, ++ .instance_size = sizeof(SevGuestState), ++ .instance_finalize = sev_guest_finalize, ++ .class_init = sev_guest_class_init, ++ .instance_init = sev_guest_instance_init, ++ .interfaces = (InterfaceInfo[]) { ++ { TYPE_USER_CREATABLE }, ++ { } ++ } ++}; ++ + static void + sev_register_types(void) + { +diff --git a/target/i386/sev.h b/target/i386/sev.h +index e7499c95b1..9e10d09539 100644 +--- a/target/i386/sev.h ++++ b/target/i386/sev.h +@@ -57,6 +57,4 @@ int sev_inject_launch_secret(const char *hdr, const char *secret, + int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size); + void sev_es_set_reset_vector(CPUState *cpu); + +-int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); +- + #endif +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Update-checks-and-information-related-to-re.patch b/SOURCES/kvm-i386-sev-Update-checks-and-information-related-to-re.patch deleted file mode 100644 index e9d28d3..0000000 --- a/SOURCES/kvm-i386-sev-Update-checks-and-information-related-to-re.patch +++ /dev/null @@ -1,77 +0,0 @@ -From 5c0d254762caaffd574bd95dbfc1df416e6e2509 Mon Sep 17 00:00:00 2001 -From: Bandan Das -Date: Thu, 3 Aug 2023 14:22:55 -0400 -Subject: [PATCH 12/14] i386/sev: Update checks and information related to - reduced-phys-bits - -RH-Author: Bandan Das -RH-MergeRequest: 196: Updates to SEV reduced-phys-bits parameter -RH-Bugzilla: 2214839 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/4] 7c5e7ea9f6cd39e84e5b60417c849430296399fd (bdas1/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2214839 - -commit 8168fed9f84e3128f7628969ae78af49433d5ce7 -Author: Tom Lendacky -Date: Fri Sep 30 10:14:29 2022 -0500 - - i386/sev: Update checks and information related to reduced-phys-bits - - The value of the reduced-phys-bits parameter is propogated to the CPUID - information exposed to the guest. Update the current validation check to - account for the size of the CPUID field (6-bits), ensuring the value is - in the range of 1 to 63. - - Maintain backward compatibility, to an extent, by allowing a value greater - than 1 (so that the previously documented value of 5 still works), but not - allowing anything over 63. - - Fixes: d8575c6c02 ("sev/i386: add command to initialize the memory encryption context") - Signed-off-by: Tom Lendacky - Reviewed-by: Dr. David Alan Gilbert - Message-Id: - Signed-off-by: Paolo Bonzini - -Signed-off-by: Bandan Das ---- - target/i386/sev.c | 17 ++++++++++++++--- - 1 file changed, 14 insertions(+), 3 deletions(-) - -diff --git a/target/i386/sev.c b/target/i386/sev.c -index 859e06f6ad..fe2144c038 100644 ---- a/target/i386/sev.c -+++ b/target/i386/sev.c -@@ -932,15 +932,26 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) - host_cpuid(0x8000001F, 0, NULL, &ebx, NULL, NULL); - host_cbitpos = ebx & 0x3f; - -+ /* -+ * The cbitpos value will be placed in bit positions 5:0 of the EBX -+ * register of CPUID 0x8000001F. No need to verify the range as the -+ * comparison against the host value accomplishes that. -+ */ - if (host_cbitpos != sev->cbitpos) { - error_setg(errp, "%s: cbitpos check failed, host '%d' requested '%d'", - __func__, host_cbitpos, sev->cbitpos); - goto err; - } - -- if (sev->reduced_phys_bits < 1) { -- error_setg(errp, "%s: reduced_phys_bits check failed, it should be >=1," -- " requested '%d'", __func__, sev->reduced_phys_bits); -+ /* -+ * The reduced-phys-bits value will be placed in bit positions 11:6 of -+ * the EBX register of CPUID 0x8000001F, so verify the supplied value -+ * is in the range of 1 to 63. -+ */ -+ if (sev->reduced_phys_bits < 1 || sev->reduced_phys_bits > 63) { -+ error_setg(errp, "%s: reduced_phys_bits check failed," -+ " it should be in the range of 1 to 63, requested '%d'", -+ __func__, sev->reduced_phys_bits); - goto err; - } - --- -2.39.3 - diff --git a/SOURCES/kvm-i386-sev-Update-query-sev-QAPI-format-to-handle-SEV-.patch b/SOURCES/kvm-i386-sev-Update-query-sev-QAPI-format-to-handle-SEV-.patch new file mode 100644 index 0000000..27852d5 --- /dev/null +++ b/SOURCES/kvm-i386-sev-Update-query-sev-QAPI-format-to-handle-SEV-.patch @@ -0,0 +1,240 @@ +From a870e7c31d9605baea4741d82521612b6164c99b Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:26 -0500 +Subject: [PATCH 066/100] i386/sev: Update query-sev QAPI format to handle + SEV-SNP + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [66/91] a19b3e226e857f3995176e7d2ef1ce2e4329a885 (bonzini/rhel-qemu-kvm) + +Most of the current 'query-sev' command is relevant to both legacy +SEV/SEV-ES guests and SEV-SNP guests, with 2 exceptions: + + - 'policy' is a 64-bit field for SEV-SNP, not 32-bit, and + the meaning of the bit positions has changed + - 'handle' is not relevant to SEV-SNP + +To address this, this patch adds a new 'sev-type' field that can be +used as a discriminator to select between SEV and SEV-SNP-specific +fields/formats without breaking compatibility for existing management +tools (so long as management tools that add support for launching +SEV-SNP guest update their handling of query-sev appropriately). + +The corresponding HMP command has also been fixed up similarly. + +Signed-off-by: Michael Roth +Co-developed-by:Pankaj Gupta +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-15-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 59d3740cb4ac0f010ce35877572904f6297284b4) +Signed-off-by: Paolo Bonzini +--- + qapi/misc-target.json | 72 ++++++++++++++++++++++++++++++++++--------- + target/i386/sev.c | 55 +++++++++++++++++++++------------ + target/i386/sev.h | 3 ++ + 3 files changed, 96 insertions(+), 34 deletions(-) + +diff --git a/qapi/misc-target.json b/qapi/misc-target.json +index 4e0a6492a9..2d7d4d89bd 100644 +--- a/qapi/misc-target.json ++++ b/qapi/misc-target.json +@@ -47,6 +47,50 @@ + 'send-update', 'receive-update' ], + 'if': 'TARGET_I386' } + ++## ++# @SevGuestType: ++# ++# An enumeration indicating the type of SEV guest being run. ++# ++# @sev: The guest is a legacy SEV or SEV-ES guest. ++# ++# @sev-snp: The guest is an SEV-SNP guest. ++# ++# Since: 6.2 ++## ++{ 'enum': 'SevGuestType', ++ 'data': [ 'sev', 'sev-snp' ], ++ 'if': 'TARGET_I386' } ++ ++## ++# @SevGuestInfo: ++# ++# Information specific to legacy SEV/SEV-ES guests. ++# ++# @policy: SEV policy value ++# ++# @handle: SEV firmware handle ++# ++# Since: 2.12 ++## ++{ 'struct': 'SevGuestInfo', ++ 'data': { 'policy': 'uint32', ++ 'handle': 'uint32' }, ++ 'if': 'TARGET_I386' } ++ ++## ++# @SevSnpGuestInfo: ++# ++# Information specific to SEV-SNP guests. ++# ++# @snp-policy: SEV-SNP policy value ++# ++# Since: 9.1 ++## ++{ 'struct': 'SevSnpGuestInfo', ++ 'data': { 'snp-policy': 'uint64' }, ++ 'if': 'TARGET_I386' } ++ + ## + # @SevInfo: + # +@@ -60,25 +104,25 @@ + # + # @build-id: SEV FW build id + # +-# @policy: SEV policy value +-# + # @state: SEV guest state + # +-# @handle: SEV firmware handle ++# @sev-type: Type of SEV guest being run + # + # Since: 2.12 + ## +-{ 'struct': 'SevInfo', +- 'data': { 'enabled': 'bool', +- 'api-major': 'uint8', +- 'api-minor' : 'uint8', +- 'build-id' : 'uint8', +- 'policy' : 'uint32', +- 'state' : 'SevState', +- 'handle' : 'uint32' +- }, +- 'if': 'TARGET_I386' +-} ++{ 'union': 'SevInfo', ++ 'base': { 'enabled': 'bool', ++ 'api-major': 'uint8', ++ 'api-minor' : 'uint8', ++ 'build-id' : 'uint8', ++ 'state' : 'SevState', ++ 'sev-type' : 'SevGuestType' }, ++ 'discriminator': 'sev-type', ++ 'data': { ++ 'sev': 'SevGuestInfo', ++ 'sev-snp': 'SevSnpGuestInfo' }, ++ 'if': 'TARGET_I386' } ++ + + ## + # @query-sev: +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 072cc4f853..43d1c48bd9 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -363,25 +363,27 @@ static SevInfo *sev_get_info(void) + { + SevInfo *info; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); +- SevGuestState *sev_guest = +- (SevGuestState *)object_dynamic_cast(OBJECT(sev_common), +- TYPE_SEV_GUEST); + + info = g_new0(SevInfo, 1); + info->enabled = sev_enabled(); + + if (info->enabled) { +- if (sev_guest) { +- info->handle = sev_guest->handle; +- } + info->api_major = sev_common->api_major; + info->api_minor = sev_common->api_minor; + info->build_id = sev_common->build_id; + info->state = sev_common->state; +- /* we only report the lower 32-bits of policy for SNP, ok for now... */ +- info->policy = +- (uint32_t)object_property_get_uint(OBJECT(sev_common), +- "policy", NULL); ++ ++ if (sev_snp_enabled()) { ++ info->sev_type = SEV_GUEST_TYPE_SEV_SNP; ++ info->u.sev_snp.snp_policy = ++ object_property_get_uint(OBJECT(sev_common), "policy", NULL); ++ } else { ++ info->sev_type = SEV_GUEST_TYPE_SEV; ++ info->u.sev.handle = SEV_GUEST(sev_common)->handle; ++ info->u.sev.policy = ++ (uint32_t)object_property_get_uint(OBJECT(sev_common), ++ "policy", NULL); ++ } + } + + return info; +@@ -404,20 +406,33 @@ void hmp_info_sev(Monitor *mon, const QDict *qdict) + { + SevInfo *info = sev_get_info(); + +- if (info && info->enabled) { +- monitor_printf(mon, "handle: %d\n", info->handle); +- monitor_printf(mon, "state: %s\n", SevState_str(info->state)); +- monitor_printf(mon, "build: %d\n", info->build_id); +- monitor_printf(mon, "api version: %d.%d\n", +- info->api_major, info->api_minor); ++ if (!info || !info->enabled) { ++ monitor_printf(mon, "SEV is not enabled\n"); ++ goto out; ++ } ++ ++ monitor_printf(mon, "SEV type: %s\n", SevGuestType_str(info->sev_type)); ++ monitor_printf(mon, "state: %s\n", SevState_str(info->state)); ++ monitor_printf(mon, "build: %d\n", info->build_id); ++ monitor_printf(mon, "api version: %d.%d\n", info->api_major, ++ info->api_minor); ++ ++ if (sev_snp_enabled()) { + monitor_printf(mon, "debug: %s\n", +- info->policy & SEV_POLICY_NODBG ? "off" : "on"); +- monitor_printf(mon, "key-sharing: %s\n", +- info->policy & SEV_POLICY_NOKS ? "off" : "on"); ++ info->u.sev_snp.snp_policy & SEV_SNP_POLICY_DBG ? "on" ++ : "off"); ++ monitor_printf(mon, "SMT allowed: %s\n", ++ info->u.sev_snp.snp_policy & SEV_SNP_POLICY_SMT ? "on" ++ : "off"); + } else { +- monitor_printf(mon, "SEV is not enabled\n"); ++ monitor_printf(mon, "handle: %d\n", info->u.sev.handle); ++ monitor_printf(mon, "debug: %s\n", ++ info->u.sev.policy & SEV_POLICY_NODBG ? "off" : "on"); ++ monitor_printf(mon, "key-sharing: %s\n", ++ info->u.sev.policy & SEV_POLICY_NOKS ? "off" : "on"); + } + ++out: + qapi_free_SevInfo(info); + } + +diff --git a/target/i386/sev.h b/target/i386/sev.h +index 94295ee74f..5dc4767b1e 100644 +--- a/target/i386/sev.h ++++ b/target/i386/sev.h +@@ -31,6 +31,9 @@ + #define SEV_POLICY_DOMAIN 0x10 + #define SEV_POLICY_SEV 0x20 + ++#define SEV_SNP_POLICY_SMT 0x10000 ++#define SEV_SNP_POLICY_DBG 0x80000 ++ + typedef struct SevKernelLoaderContext { + char *setup_data; + size_t setup_size; +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-fix-unreachable-code-coverity-issue.patch b/SOURCES/kvm-i386-sev-fix-unreachable-code-coverity-issue.patch new file mode 100644 index 0000000..56f9f6f --- /dev/null +++ b/SOURCES/kvm-i386-sev-fix-unreachable-code-coverity-issue.patch @@ -0,0 +1,51 @@ +From 98057e3adafa052b21a4fe5ef22835d30df3e644 Mon Sep 17 00:00:00 2001 +From: Pankaj Gupta +Date: Fri, 7 Jun 2024 13:36:09 -0500 +Subject: [PATCH 084/100] i386/sev: fix unreachable code coverity issue + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [84/91] dc7bf28f491bf675b22a98ea593fba72d8bc415a (bonzini/rhel-qemu-kvm) + +Set 'finish->id_block_en' early, so that it is properly reset. + +Fixes coverity CID 1546887. + +Fixes: 7b34df4426 ("i386/sev: Introduce 'sev-snp-guest' object") +Signed-off-by: Pankaj Gupta +Message-ID: <20240607183611.1111100-2-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit c94eb5db8e409c932da9eb187e68d4cdc14acc5b) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 004c667ac1..7c9df621de 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -2165,6 +2165,7 @@ sev_snp_guest_set_id_block(Object *obj, const char *value, Error **errp) + struct kvm_sev_snp_launch_finish *finish = &sev_snp_guest->kvm_finish_conf; + gsize len; + ++ finish->id_block_en = 0; + g_free(sev_snp_guest->id_block); + g_free((guchar *)finish->id_block_uaddr); + +@@ -2184,7 +2185,7 @@ sev_snp_guest_set_id_block(Object *obj, const char *value, Error **errp) + return; + } + +- finish->id_block_en = (len) ? 1 : 0; ++ finish->id_block_en = 1; + } + + static char * +-- +2.39.3 + diff --git a/SOURCES/kvm-introduce-pc_rhel_9_5_compat.patch b/SOURCES/kvm-introduce-pc_rhel_9_5_compat.patch new file mode 100644 index 0000000..9a17dda --- /dev/null +++ b/SOURCES/kvm-introduce-pc_rhel_9_5_compat.patch @@ -0,0 +1,81 @@ +From deae6c3b57c3919946a5ce1613e667a3240cf158 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 15 Apr 2024 12:45:09 +0200 +Subject: [PATCH 001/100] introduce pc_rhel_9_5_compat + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [1/91] cfd402fa5080eddba7c954e81ed79f9a1dd654cf (bonzini/rhel-qemu-kvm) + +Allow undoing backported changes that impact guest ABI. + +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc.c | 4 ++++ + hw/i386/pc_piix.c | 2 ++ + hw/i386/pc_q35.c | 2 ++ + include/hw/i386/pc.h | 3 +++ + 4 files changed, 11 insertions(+) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 4a154c1a9a..648762d908 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -348,6 +348,10 @@ GlobalProperty pc_rhel_compat[] = { + }; + const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + ++GlobalProperty pc_rhel_9_5_compat[] = { ++}; ++const size_t pc_rhel_9_5_compat_len = G_N_ELEMENTS(pc_rhel_9_5_compat); ++ + GlobalProperty pc_rhel_9_3_compat[] = { + /* pc_rhel_9_3_compat from pc_compat_8_0 */ + { "virtio-mem", "unplugged-inaccessible", "auto" }, +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 6b260682eb..bef3e8b73e 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -1015,6 +1015,8 @@ static void pc_machine_rhel760_options(MachineClass *m) + object_class_property_set_description(oc, "x-south-bridge", + "Use a different south bridge than PIIX3"); + ++ compat_props_add(m->compat_props, pc_rhel_9_5_compat, ++ pc_rhel_9_5_compat_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_5, + hw_compat_rhel_9_5_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_4, +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 2b54944c0f..9adcdadce8 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -734,6 +734,8 @@ static void pc_q35_machine_rhel940_options(MachineClass *m) + pcmc->smbios_stream_product = "RHEL"; + pcmc->smbios_stream_version = "9.4.0"; + ++ compat_props_add(m->compat_props, pc_rhel_9_5_compat, ++ pc_rhel_9_5_compat_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_5, + hw_compat_rhel_9_5_len); + } +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index a984c951ad..87420783ab 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -294,6 +294,9 @@ extern const size_t pc_compat_2_0_len; + extern GlobalProperty pc_rhel_compat[]; + extern const size_t pc_rhel_compat_len; + ++extern GlobalProperty pc_rhel_9_5_compat[]; ++extern const size_t pc_rhel_9_5_compat_len; ++ + extern GlobalProperty pc_rhel_9_3_compat[]; + extern const size_t pc_rhel_9_3_compat_len; + +-- +2.39.3 + diff --git a/SOURCES/kvm-iotests-244-Don-t-store-data-file-with-protocol-in-i.patch b/SOURCES/kvm-iotests-244-Don-t-store-data-file-with-protocol-in-i.patch new file mode 100644 index 0000000..137cb53 --- /dev/null +++ b/SOURCES/kvm-iotests-244-Don-t-store-data-file-with-protocol-in-i.patch @@ -0,0 +1,61 @@ +From 16c2e9e339a4c83055fd39e032fa16a0e732ed17 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 25 Apr 2024 14:49:40 +0200 +Subject: [PATCH 2/4] iotests/244: Don't store data-file with protocol in image + +RH-Author: Hana Czenczek +RH-MergeRequest: 1: CVE 2024-4467 (PRDSC) +RH-Jira: RHEL-35611 +RH-CVE: CVE-2024-4467 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Eric Blake +RH-Commit: [2/4] 92e00dab8be1570b13172353d77d2af44cb4e22b + +We want to disable filename parsing for data files because it's too easy +to abuse in malicious image files. Make the test ready for the change by +passing the data file explicitly in command line options. + +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Hanna Czenczek +Upstream: N/A, embargoed +Signed-off-by: Hanna Czenczek +--- + tests/qemu-iotests/244 | 19 ++++++++++++++++--- + 1 file changed, 16 insertions(+), 3 deletions(-) + +diff --git a/tests/qemu-iotests/244 b/tests/qemu-iotests/244 +index 3e61fa25bb..bb9cc6512f 100755 +--- a/tests/qemu-iotests/244 ++++ b/tests/qemu-iotests/244 +@@ -215,9 +215,22 @@ $QEMU_IMG convert -f $IMGFMT -O $IMGFMT -n -C "$TEST_IMG.src" "$TEST_IMG" + $QEMU_IMG compare -f $IMGFMT -F $IMGFMT "$TEST_IMG.src" "$TEST_IMG" + + # blkdebug doesn't support copy offloading, so this tests the error path +-$QEMU_IMG amend -f $IMGFMT -o "data_file=blkdebug::$TEST_IMG.data" "$TEST_IMG" +-$QEMU_IMG convert -f $IMGFMT -O $IMGFMT -n -C "$TEST_IMG.src" "$TEST_IMG" +-$QEMU_IMG compare -f $IMGFMT -F $IMGFMT "$TEST_IMG.src" "$TEST_IMG" ++test_img_with_blkdebug="json:{ ++ 'driver': 'qcow2', ++ 'file': { ++ 'driver': 'file', ++ 'filename': '$TEST_IMG' ++ }, ++ 'data-file': { ++ 'driver': 'blkdebug', ++ 'image': { ++ 'driver': 'file', ++ 'filename': '$TEST_IMG.data' ++ } ++ } ++}" ++$QEMU_IMG convert -f $IMGFMT -O $IMGFMT -n -C "$TEST_IMG.src" "$test_img_with_blkdebug" ++$QEMU_IMG compare -f $IMGFMT -F $IMGFMT "$TEST_IMG.src" "$test_img_with_blkdebug" + + echo + echo "=== Flushing should flush the data file ===" +-- +2.39.3 + diff --git a/SOURCES/kvm-iotests-270-Don-t-store-data-file-with-json-prefix-i.patch b/SOURCES/kvm-iotests-270-Don-t-store-data-file-with-json-prefix-i.patch new file mode 100644 index 0000000..2c1d6ae --- /dev/null +++ b/SOURCES/kvm-iotests-270-Don-t-store-data-file-with-json-prefix-i.patch @@ -0,0 +1,64 @@ +From d70daa2eb5b670513ccd36c0baa5b36ec8cef666 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 25 Apr 2024 14:49:40 +0200 +Subject: [PATCH 3/4] iotests/270: Don't store data-file with json: prefix in + image + +RH-Author: Hana Czenczek +RH-MergeRequest: 1: CVE 2024-4467 (PRDSC) +RH-Jira: RHEL-35611 +RH-CVE: CVE-2024-4467 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Eric Blake +RH-Commit: [3/4] 705bcc2819ce8e0f8b9d660a93bc48de26413aec + +We want to disable filename parsing for data files because it's too easy +to abuse in malicious image files. Make the test ready for the change by +passing the data file explicitly in command line options. + +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Hanna Czenczek +Upstream: N/A, embargoed +Signed-off-by: Hanna Czenczek +--- + tests/qemu-iotests/270 | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) + +diff --git a/tests/qemu-iotests/270 b/tests/qemu-iotests/270 +index 74352342db..c37b674aa2 100755 +--- a/tests/qemu-iotests/270 ++++ b/tests/qemu-iotests/270 +@@ -60,8 +60,16 @@ _make_test_img -o cluster_size=2M,data_file="$TEST_IMG.orig" \ + # "write" 2G of data without using any space. + # (qemu-img create does not like it, though, because null-co does not + # support image creation.) +-$QEMU_IMG amend -o data_file="json:{'driver':'null-co',,'size':'4294967296'}" \ +- "$TEST_IMG" ++test_img_with_null_data="json:{ ++ 'driver': '$IMGFMT', ++ 'file': { ++ 'filename': '$TEST_IMG' ++ }, ++ 'data-file': { ++ 'driver': 'null-co', ++ 'size':'4294967296' ++ } ++}" + + # This gives us a range of: + # 2^31 - 512 + 768 - 1 = 2^31 + 255 > 2^31 +@@ -74,7 +82,7 @@ $QEMU_IMG amend -o data_file="json:{'driver':'null-co',,'size':'4294967296'}" \ + # on L2 boundaries, we need large L2 tables; hence the cluster size of + # 2 MB. (Anything from 256 kB should work, though, because then one L2 + # table covers 8 GB.) +-$QEMU_IO -c "write 768 $((2 ** 31 - 512))" "$TEST_IMG" | _filter_qemu_io ++$QEMU_IO -c "write 768 $((2 ** 31 - 512))" "$test_img_with_null_data" | _filter_qemu_io + + _check_test_img + +-- +2.39.3 + diff --git a/SOURCES/kvm-iotests-Test-commit-with-iothreads-and-ongoing-I-O.patch b/SOURCES/kvm-iotests-Test-commit-with-iothreads-and-ongoing-I-O.patch deleted file mode 100644 index 1fc5697..0000000 --- a/SOURCES/kvm-iotests-Test-commit-with-iothreads-and-ongoing-I-O.patch +++ /dev/null @@ -1,144 +0,0 @@ -From 399bfc04fb8352af6d2f4c984e68c334d2043368 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 17 May 2023 17:28:34 +0200 -Subject: [PATCH 04/21] iotests: Test commit with iothreads and ongoing I/O - -RH-Author: Kevin Wolf -RH-MergeRequest: 166: block/graph-lock: Disable locking for now -RH-Bugzilla: 2186725 -RH-Acked-by: Eric Blake -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [4/4] 1e42fde5951ae12bddc4eea2320f066f7079878f (kmwolf/centos-qemu-kvm) - -This tests exercises graph locking, draining, and graph modifications -with AioContext switches a lot. Amongst others, it serves as a -regression test for bdrv_graph_wrlock() deadlocking because it is called -with a locked AioContext and for AioContext handling in the NBD server. - -Signed-off-by: Kevin Wolf -Message-Id: <20230517152834.277483-4-kwolf@redhat.com> -Tested-by: Eric Blake -Reviewed-by: Eric Blake -Signed-off-by: Kevin Wolf -(cherry picked from commit 95fdd8db61848d31fde1d9b32da7f3f76babfa25) -Signed-off-by: Kevin Wolf ---- - tests/qemu-iotests/iotests.py | 4 ++ - .../qemu-iotests/tests/graph-changes-while-io | 56 +++++++++++++++++-- - .../tests/graph-changes-while-io.out | 4 +- - 3 files changed, 58 insertions(+), 6 deletions(-) - -diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py -index 3e82c634cf..7073579a7d 100644 ---- a/tests/qemu-iotests/iotests.py -+++ b/tests/qemu-iotests/iotests.py -@@ -462,6 +462,10 @@ def qmp(self, cmd: str, args: Optional[Dict[str, object]] = None) \ - assert self._qmp is not None - return self._qmp.cmd(cmd, args) - -+ def get_qmp(self) -> QEMUMonitorProtocol: -+ assert self._qmp is not None -+ return self._qmp -+ - def stop(self, kill_signal=15): - self._p.send_signal(kill_signal) - self._p.wait() -diff --git a/tests/qemu-iotests/tests/graph-changes-while-io b/tests/qemu-iotests/tests/graph-changes-while-io -index 7664f33689..750e7d4d38 100755 ---- a/tests/qemu-iotests/tests/graph-changes-while-io -+++ b/tests/qemu-iotests/tests/graph-changes-while-io -@@ -22,19 +22,19 @@ - import os - from threading import Thread - import iotests --from iotests import imgfmt, qemu_img, qemu_img_create, QMPTestCase, \ -- QemuStorageDaemon -+from iotests import imgfmt, qemu_img, qemu_img_create, qemu_io, \ -+ QMPTestCase, QemuStorageDaemon - - - top = os.path.join(iotests.test_dir, 'top.img') - nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock') - - --def do_qemu_img_bench() -> None: -+def do_qemu_img_bench(count: int = 2000000) -> None: - """ - Do some I/O requests on `nbd_sock`. - """ -- qemu_img('bench', '-f', 'raw', '-c', '2000000', -+ qemu_img('bench', '-f', 'raw', '-c', str(count), - f'nbd+unix:///node0?socket={nbd_sock}') - - -@@ -84,6 +84,54 @@ class TestGraphChangesWhileIO(QMPTestCase): - - bench_thr.join() - -+ def test_commit_while_io(self) -> None: -+ # Run qemu-img bench in the background -+ bench_thr = Thread(target=do_qemu_img_bench, args=(200000, )) -+ bench_thr.start() -+ -+ qemu_io('-c', 'write 0 64k', top) -+ qemu_io('-c', 'write 128k 64k', top) -+ -+ result = self.qsd.qmp('blockdev-add', { -+ 'driver': imgfmt, -+ 'node-name': 'overlay', -+ 'backing': None, -+ 'file': { -+ 'driver': 'file', -+ 'filename': top -+ } -+ }) -+ self.assert_qmp(result, 'return', {}) -+ -+ result = self.qsd.qmp('blockdev-snapshot', { -+ 'node': 'node0', -+ 'overlay': 'overlay', -+ }) -+ self.assert_qmp(result, 'return', {}) -+ -+ # While qemu-img bench is running, repeatedly commit overlay to node0 -+ while bench_thr.is_alive(): -+ result = self.qsd.qmp('block-commit', { -+ 'job-id': 'job0', -+ 'device': 'overlay', -+ }) -+ self.assert_qmp(result, 'return', {}) -+ -+ result = self.qsd.qmp('block-job-cancel', { -+ 'device': 'job0', -+ }) -+ self.assert_qmp(result, 'return', {}) -+ -+ cancelled = False -+ while not cancelled: -+ for event in self.qsd.get_qmp().get_events(wait=10.0): -+ if event['event'] != 'JOB_STATUS_CHANGE': -+ continue -+ if event['data']['status'] == 'null': -+ cancelled = True -+ -+ bench_thr.join() -+ - if __name__ == '__main__': - # Format must support raw backing files - iotests.main(supported_fmts=['qcow', 'qcow2', 'qed'], -diff --git a/tests/qemu-iotests/tests/graph-changes-while-io.out b/tests/qemu-iotests/tests/graph-changes-while-io.out -index ae1213e6f8..fbc63e62f8 100644 ---- a/tests/qemu-iotests/tests/graph-changes-while-io.out -+++ b/tests/qemu-iotests/tests/graph-changes-while-io.out -@@ -1,5 +1,5 @@ --. -+.. - ---------------------------------------------------------------------- --Ran 1 tests -+Ran 2 tests - - OK --- -2.39.3 - diff --git a/SOURCES/kvm-iotests-Test-resizing-image-attached-to-an-iothread.patch b/SOURCES/kvm-iotests-Test-resizing-image-attached-to-an-iothread.patch deleted file mode 100644 index 4e91505..0000000 --- a/SOURCES/kvm-iotests-Test-resizing-image-attached-to-an-iothread.patch +++ /dev/null @@ -1,132 +0,0 @@ -From 2c9e6892369ff99decd4030642b8dcf3875e9ebf Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Tue, 9 May 2023 15:41:33 +0200 -Subject: [PATCH 55/56] iotests: Test resizing image attached to an iothread - -RH-Author: Kevin Wolf -RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize() -RH-Bugzilla: 2185688 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Eric Blake -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [4/4] 8d31752d1e6e8c6a422d68d9cb2251fbc34b7aef (kmwolf/centos-qemu-kvm) - -This tests that trying to resize an image with QMP block_resize doesn't -hang or otherwise fail when the image is attached to a device running in -an iothread. - -This is a regression test for the recent fix that changed -qmp_block_resize, which is a coroutine based QMP handler, to avoid -calling no_coroutine_fns directly. - -Signed-off-by: Kevin Wolf -Message-Id: <20230509134133.373408-1-kwolf@redhat.com> -Reviewed-by: Eric Blake -Signed-off-by: Kevin Wolf -(cherry picked from commit e113362e4cdfdcfe1d497e569527f70a0021333a) -Signed-off-by: Kevin Wolf ---- - tests/qemu-iotests/tests/iothreads-resize | 71 +++++++++++++++++++ - tests/qemu-iotests/tests/iothreads-resize.out | 11 +++ - 2 files changed, 82 insertions(+) - create mode 100755 tests/qemu-iotests/tests/iothreads-resize - create mode 100644 tests/qemu-iotests/tests/iothreads-resize.out - -diff --git a/tests/qemu-iotests/tests/iothreads-resize b/tests/qemu-iotests/tests/iothreads-resize -new file mode 100755 -index 0000000000..36e4598c62 ---- /dev/null -+++ b/tests/qemu-iotests/tests/iothreads-resize -@@ -0,0 +1,71 @@ -+#!/usr/bin/env bash -+# group: rw auto quick -+# -+# Test resizing an image that is attached to a separate iothread -+# -+# Copyright (C) 2023 Red Hat, Inc. -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+# -+ -+# creator -+owner=kwolf@redhat.com -+ -+seq=`basename $0` -+echo "QA output created by $seq" -+ -+status=1 # failure is the default! -+ -+_cleanup() -+{ -+ _cleanup_test_img -+} -+trap "_cleanup; exit \$status" 0 1 2 3 15 -+ -+# get standard environment, filters and checks -+cd .. -+. ./common.rc -+. ./common.filter -+ -+# Resizing images is only supported by a few block drivers -+_supported_fmt raw qcow2 qed -+_supported_proto file -+_require_devices virtio-scsi-pci -+ -+size=64M -+_make_test_img $size -+ -+qmp() { -+cat < -Date: Thu, 11 May 2023 13:03:22 +0200 -Subject: [PATCH 54/56] iotests: Use alternative CPU type that is not - deprecated in RHEL - -RH-Author: Kevin Wolf -RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize() -RH-Bugzilla: 2185688 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Eric Blake -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [3/4] 038d4718c0ee7a17ff5e6f4af8fc04d07e452f8d (kmwolf/centos-qemu-kvm) - -This is a downstream-only patch that is necessary because the default -CPU in RHEL is marked as deprecated. This makes test cases fail due to -the warning in the output: - -qemu-system-x86_64: warning: CPU model qemu64-x86_64-cpu is deprecated -- use at least 'Nehalem' / 'Opteron_G4', or 'host' / 'max' - -Fixes: 318178778db60b6475d1484509bee136317156d3 -Signed-off-by: Kevin Wolf ---- - tests/qemu-iotests/testenv.py | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/tests/qemu-iotests/testenv.py b/tests/qemu-iotests/testenv.py -index 9a37ad9152..963514aab3 100644 ---- a/tests/qemu-iotests/testenv.py -+++ b/tests/qemu-iotests/testenv.py -@@ -244,6 +244,9 @@ def __init__(self, source_dir: str, build_dir: str, - if self.qemu_prog.endswith(f'qemu-system-{suffix}'): - self.qemu_options += f' -machine {machine}' - -+ if self.qemu_prog.endswith('qemu-system-x86_64'): -+ self.qemu_options += ' -cpu Nehalem' -+ - # QEMU_DEFAULT_MACHINE - self.qemu_default_machine = get_default_machine(self.qemu_prog) - --- -2.39.1 - diff --git a/SOURCES/kvm-iotests-iov-padding-New-test.patch b/SOURCES/kvm-iotests-iov-padding-New-test.patch deleted file mode 100644 index 9ef37a2..0000000 --- a/SOURCES/kvm-iotests-iov-padding-New-test.patch +++ /dev/null @@ -1,186 +0,0 @@ -From add833b5de202d6765dda56c8773985fbe7f40a6 Mon Sep 17 00:00:00 2001 -From: Hanna Czenczek -Date: Tue, 11 Apr 2023 19:34:18 +0200 -Subject: [PATCH 4/9] iotests/iov-padding: New test - -RH-Author: Hanna Czenczek -RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX -RH-Bugzilla: 2174676 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [4/5] b32715b5c2a3e2add39c5ed6e8f71df56e0b91a0 (hreitz/qemu-kvm-c-9-s) - -Test that even vectored IO requests with 1024 vector elements that are -not aligned to the device's request alignment will succeed. - -Reviewed-by: Eric Blake -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Hanna Czenczek -Message-Id: <20230411173418.19549-5-hreitz@redhat.com> -(cherry picked from commit d7e1905e3f54ff9512db4c7a946a8603b62b108d) -Signed-off-by: Hanna Czenczek ---- - tests/qemu-iotests/tests/iov-padding | 85 ++++++++++++++++++++++++ - tests/qemu-iotests/tests/iov-padding.out | 59 ++++++++++++++++ - 2 files changed, 144 insertions(+) - create mode 100755 tests/qemu-iotests/tests/iov-padding - create mode 100644 tests/qemu-iotests/tests/iov-padding.out - -diff --git a/tests/qemu-iotests/tests/iov-padding b/tests/qemu-iotests/tests/iov-padding -new file mode 100755 -index 0000000000..b9604900c7 ---- /dev/null -+++ b/tests/qemu-iotests/tests/iov-padding -@@ -0,0 +1,85 @@ -+#!/usr/bin/env bash -+# group: rw quick -+# -+# Check the interaction of request padding (to fit alignment restrictions) with -+# vectored I/O from the guest -+# -+# Copyright Red Hat -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+# -+ -+seq=$(basename $0) -+echo "QA output created by $seq" -+ -+status=1 # failure is the default! -+ -+_cleanup() -+{ -+ _cleanup_test_img -+} -+trap "_cleanup; exit \$status" 0 1 2 3 15 -+ -+# get standard environment, filters and checks -+cd .. -+. ./common.rc -+. ./common.filter -+ -+_supported_fmt raw -+_supported_proto file -+ -+_make_test_img 1M -+ -+IMGSPEC="driver=blkdebug,align=4096,image.driver=file,image.filename=$TEST_IMG" -+ -+# Four combinations: -+# - Offset 4096, length 1023 * 512 + 512: Fully aligned to 4k -+# - Offset 4096, length 1023 * 512 + 4096: Head is aligned, tail is not -+# - Offset 512, length 1023 * 512 + 512: Neither head nor tail are aligned -+# - Offset 512, length 1023 * 512 + 4096: Tail is aligned, head is not -+for start_offset in 4096 512; do -+ for last_element_length in 512 4096; do -+ length=$((1023 * 512 + $last_element_length)) -+ -+ echo -+ echo "== performing 1024-element vectored requests to image (offset: $start_offset; length: $length) ==" -+ -+ # Fill with data for testing -+ $QEMU_IO -c 'write -P 1 0 1M' "$TEST_IMG" | _filter_qemu_io -+ -+ # 1023 512-byte buffers, and then one with length $last_element_length -+ cmd_params="-P 2 $start_offset $(yes 512 | head -n 1023 | tr '\n' ' ') $last_element_length" -+ QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS_NO_FMT" $QEMU_IO \ -+ -c "writev $cmd_params" \ -+ --image-opts \ -+ "$IMGSPEC" \ -+ | _filter_qemu_io -+ -+ # Read all patterns -- read the part we just wrote with writev twice, -+ # once "normally", and once with a readv, so we see that that works, too -+ QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS_NO_FMT" $QEMU_IO \ -+ -c "read -P 1 0 $start_offset" \ -+ -c "read -P 2 $start_offset $length" \ -+ -c "readv $cmd_params" \ -+ -c "read -P 1 $((start_offset + length)) $((1024 * 1024 - length - start_offset))" \ -+ --image-opts \ -+ "$IMGSPEC" \ -+ | _filter_qemu_io -+ done -+done -+ -+# success, all done -+echo "*** done" -+rm -f $seq.full -+status=0 -diff --git a/tests/qemu-iotests/tests/iov-padding.out b/tests/qemu-iotests/tests/iov-padding.out -new file mode 100644 -index 0000000000..e07a91fac7 ---- /dev/null -+++ b/tests/qemu-iotests/tests/iov-padding.out -@@ -0,0 +1,59 @@ -+QA output created by iov-padding -+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 -+ -+== performing 1024-element vectored requests to image (offset: 4096; length: 524288) == -+wrote 1048576/1048576 bytes at offset 0 -+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+wrote 524288/524288 bytes at offset 4096 -+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 4096/4096 bytes at offset 0 -+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 524288/524288 bytes at offset 4096 -+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 524288/524288 bytes at offset 4096 -+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 520192/520192 bytes at offset 528384 -+508 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+== performing 1024-element vectored requests to image (offset: 4096; length: 527872) == -+wrote 1048576/1048576 bytes at offset 0 -+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+wrote 527872/527872 bytes at offset 4096 -+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 4096/4096 bytes at offset 0 -+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 527872/527872 bytes at offset 4096 -+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 527872/527872 bytes at offset 4096 -+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 516608/516608 bytes at offset 531968 -+504.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+== performing 1024-element vectored requests to image (offset: 512; length: 524288) == -+wrote 1048576/1048576 bytes at offset 0 -+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+wrote 524288/524288 bytes at offset 512 -+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 512/512 bytes at offset 0 -+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 524288/524288 bytes at offset 512 -+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 524288/524288 bytes at offset 512 -+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 523776/523776 bytes at offset 524800 -+511.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+== performing 1024-element vectored requests to image (offset: 512; length: 527872) == -+wrote 1048576/1048576 bytes at offset 0 -+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+wrote 527872/527872 bytes at offset 512 -+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 512/512 bytes at offset 0 -+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 527872/527872 bytes at offset 512 -+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 527872/527872 bytes at offset 512 -+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 520192/520192 bytes at offset 528384 -+508 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+*** done --- -2.39.3 - diff --git a/SOURCES/kvm-iotests-test-NBD-TLS-iothread.patch b/SOURCES/kvm-iotests-test-NBD-TLS-iothread.patch new file mode 100644 index 0000000..5640a7e --- /dev/null +++ b/SOURCES/kvm-iotests-test-NBD-TLS-iothread.patch @@ -0,0 +1,275 @@ +From 88adaeaecb1d7753aa8ac3da40f617d93eaf8bdc Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Fri, 17 May 2024 21:50:15 -0500 +Subject: [PATCH 2/4] iotests: test NBD+TLS+iothread +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Blake +RH-MergeRequest: 244: qio: Inherit follow_coroutine_ctx across TLS +RH-Jira: RHEL-33440 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [2/2] 29c3128a158b7c49fa79c141a9adcc12693f7de4 (ebblake/centos-qemu-kvm) + +Prevent regressions when using NBD with TLS in the presence of +iothreads, adding coverage the fix to qio channels made in the +previous patch. + +The shell function pick_unused_port() was copied from +nbdkit.git/tests/functions.sh.in, where it had all authors from Red +Hat, agreeing to the resulting relicensing from 2-clause BSD to GPLv2. + +CC: qemu-stable@nongnu.org +CC: "Richard W.M. Jones" +Signed-off-by: Eric Blake +Message-ID: <20240531180639.1392905-6-eblake@redhat.com> +Reviewed-by: Daniel P. Berrangé +(cherry picked from commit a73c99378022ebb785481e84cfe1e81097546268) +Jira: https://issues.redhat.com/browse/RHEL-33440 +Signed-off-by: Eric Blake +--- + tests/qemu-iotests/tests/nbd-tls-iothread | 168 ++++++++++++++++++ + tests/qemu-iotests/tests/nbd-tls-iothread.out | 54 ++++++ + 2 files changed, 222 insertions(+) + create mode 100755 tests/qemu-iotests/tests/nbd-tls-iothread + create mode 100644 tests/qemu-iotests/tests/nbd-tls-iothread.out + +diff --git a/tests/qemu-iotests/tests/nbd-tls-iothread b/tests/qemu-iotests/tests/nbd-tls-iothread +new file mode 100755 +index 0000000000..a2fb07206e +--- /dev/null ++++ b/tests/qemu-iotests/tests/nbd-tls-iothread +@@ -0,0 +1,168 @@ ++#!/usr/bin/env bash ++# group: rw quick ++# ++# Test of NBD+TLS+iothread ++# ++# Copyright (C) 2024 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++# creator ++owner=eblake@redhat.com ++ ++seq=`basename $0` ++echo "QA output created by $seq" ++ ++status=1 # failure is the default! ++ ++_cleanup() ++{ ++ _cleanup_qemu ++ _cleanup_test_img ++ rm -f "$dst_image" ++ tls_x509_cleanup ++} ++trap "_cleanup; exit \$status" 0 1 2 3 15 ++ ++# get standard environment, filters and checks ++cd .. ++. ./common.rc ++. ./common.filter ++. ./common.qemu ++. ./common.tls ++. ./common.nbd ++ ++_supported_fmt qcow2 # Hardcoded to qcow2 command line and QMP below ++_supported_proto file ++ ++# pick_unused_port ++# ++# Picks and returns an "unused" port, setting the global variable ++# $port. ++# ++# This is inherently racy, but we need it because qemu does not currently ++# permit NBD+TLS over a Unix domain socket ++pick_unused_port () ++{ ++ if ! (ss --version) >/dev/null 2>&1; then ++ _notrun "ss utility required, skipped this test" ++ fi ++ ++ # Start at a random port to make it less likely that two parallel ++ # tests will conflict. ++ port=$(( 50000 + (RANDOM%15000) )) ++ while ss -ltn | grep -sqE ":$port\b"; do ++ ((port++)) ++ if [ $port -eq 65000 ]; then port=50000; fi ++ done ++ echo picked unused port ++} ++ ++tls_x509_init ++ ++size=1G ++DST_IMG="$TEST_DIR/dst.qcow2" ++ ++echo ++echo "== preparing TLS creds and spare port ==" ++ ++pick_unused_port ++tls_x509_create_root_ca "ca1" ++tls_x509_create_server "ca1" "server1" ++tls_x509_create_client "ca1" "client1" ++tls_obj_base=tls-creds-x509,id=tls0,verify-peer=true,dir="${tls_dir}" ++ ++echo ++echo "== preparing image ==" ++ ++_make_test_img $size ++$QEMU_IMG create -f qcow2 "$DST_IMG" $size | _filter_img_create ++ ++echo ++echo === Starting Src QEMU === ++echo ++ ++_launch_qemu -machine q35 \ ++ -object iothread,id=iothread0 \ ++ -object "${tls_obj_base}"/client1,endpoint=client \ ++ -device '{"driver":"pcie-root-port", "id":"root0", "multifunction":true, ++ "bus":"pcie.0"}' \ ++ -device '{"driver":"virtio-scsi-pci", "id":"virtio_scsi_pci0", ++ "bus":"root0", "iothread":"iothread0"}' \ ++ -device '{"driver":"scsi-hd", "id":"image1", "drive":"drive_image1", ++ "bus":"virtio_scsi_pci0.0"}' \ ++ -blockdev '{"driver":"file", "cache":{"direct":true, "no-flush":false}, ++ "filename":"'"$TEST_IMG"'", "node-name":"drive_sys1"}' \ ++ -blockdev '{"driver":"qcow2", "node-name":"drive_image1", ++ "file":"drive_sys1"}' ++h1=$QEMU_HANDLE ++_send_qemu_cmd $h1 '{"execute": "qmp_capabilities"}' 'return' ++ ++echo ++echo === Starting Dst VM2 === ++echo ++ ++_launch_qemu -machine q35 \ ++ -object iothread,id=iothread0 \ ++ -object "${tls_obj_base}"/server1,endpoint=server \ ++ -device '{"driver":"pcie-root-port", "id":"root0", "multifunction":true, ++ "bus":"pcie.0"}' \ ++ -device '{"driver":"virtio-scsi-pci", "id":"virtio_scsi_pci0", ++ "bus":"root0", "iothread":"iothread0"}' \ ++ -device '{"driver":"scsi-hd", "id":"image1", "drive":"drive_image1", ++ "bus":"virtio_scsi_pci0.0"}' \ ++ -blockdev '{"driver":"file", "cache":{"direct":true, "no-flush":false}, ++ "filename":"'"$DST_IMG"'", "node-name":"drive_sys1"}' \ ++ -blockdev '{"driver":"qcow2", "node-name":"drive_image1", ++ "file":"drive_sys1"}' \ ++ -incoming defer ++h2=$QEMU_HANDLE ++_send_qemu_cmd $h2 '{"execute": "qmp_capabilities"}' 'return' ++ ++echo ++echo === Dst VM: Enable NBD server for incoming storage migration === ++echo ++ ++_send_qemu_cmd $h2 '{"execute": "nbd-server-start", "arguments": ++ {"addr": {"type": "inet", "data": {"host": "127.0.0.1", "port": "'$port'"}}, ++ "tls-creds": "tls0"}}' '{"return": {}}' | sed "s/\"$port\"/PORT/g" ++_send_qemu_cmd $h2 '{"execute": "block-export-add", "arguments": ++ {"node-name": "drive_image1", "type": "nbd", "writable": true, ++ "id": "drive_image1"}}' '{"return": {}}' ++ ++echo ++echo === Src VM: Mirror to dst NBD for outgoing storage migration === ++echo ++ ++_send_qemu_cmd $h1 '{"execute": "blockdev-add", "arguments": ++ {"node-name": "mirror", "driver": "nbd", ++ "server": {"type": "inet", "host": "127.0.0.1", "port": "'$port'"}, ++ "export": "drive_image1", "tls-creds": "tls0", ++ "tls-hostname": "127.0.0.1"}}' '{"return": {}}' | sed "s/\"$port\"/PORT/g" ++_send_qemu_cmd $h1 '{"execute": "blockdev-mirror", "arguments": ++ {"sync": "full", "device": "drive_image1", "target": "mirror", ++ "job-id": "drive_image1_53"}}' '{"return": {}}' ++_timed_wait_for $h1 '"ready"' ++ ++echo ++echo === Cleaning up === ++echo ++ ++_send_qemu_cmd $h1 '{"execute":"quit"}' '' ++_send_qemu_cmd $h2 '{"execute":"quit"}' '' ++ ++echo "*** done" ++rm -f $seq.full ++status=0 +diff --git a/tests/qemu-iotests/tests/nbd-tls-iothread.out b/tests/qemu-iotests/tests/nbd-tls-iothread.out +new file mode 100644 +index 0000000000..1d83d4f903 +--- /dev/null ++++ b/tests/qemu-iotests/tests/nbd-tls-iothread.out +@@ -0,0 +1,54 @@ ++QA output created by nbd-tls-iothread ++ ++== preparing TLS creds and spare port == ++picked unused port ++Generating a self signed certificate... ++Generating a signed certificate... ++Generating a signed certificate... ++ ++== preparing image == ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 ++Formatting 'TEST_DIR/dst.IMGFMT', fmt=IMGFMT size=1073741824 ++ ++=== Starting Src QEMU === ++ ++{"execute": "qmp_capabilities"} ++{"return": {}} ++ ++=== Starting Dst VM2 === ++ ++{"execute": "qmp_capabilities"} ++{"return": {}} ++ ++=== Dst VM: Enable NBD server for incoming storage migration === ++ ++{"execute": "nbd-server-start", "arguments": ++ {"addr": {"type": "inet", "data": {"host": "127.0.0.1", "port": PORT}}, ++ "tls-creds": "tls0"}} ++{"return": {}} ++{"execute": "block-export-add", "arguments": ++ {"node-name": "drive_image1", "type": "nbd", "writable": true, ++ "id": "drive_image1"}} ++{"return": {}} ++ ++=== Src VM: Mirror to dst NBD for outgoing storage migration === ++ ++{"execute": "blockdev-add", "arguments": ++ {"node-name": "mirror", "driver": "nbd", ++ "server": {"type": "inet", "host": "127.0.0.1", "port": PORT}, ++ "export": "drive_image1", "tls-creds": "tls0", ++ "tls-hostname": "127.0.0.1"}} ++{"return": {}} ++{"execute": "blockdev-mirror", "arguments": ++ {"sync": "full", "device": "drive_image1", "target": "mirror", ++ "job-id": "drive_image1_53"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "drive_image1_53"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "drive_image1_53"}} ++{"return": {}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "drive_image1_53"}} ++ ++=== Cleaning up === ++ ++{"execute":"quit"} ++{"execute":"quit"} ++*** done +-- +2.39.3 + diff --git a/SOURCES/kvm-kvm-Enable-KVM_SET_USER_MEMORY_REGION2-for-memslot.patch b/SOURCES/kvm-kvm-Enable-KVM_SET_USER_MEMORY_REGION2-for-memslot.patch new file mode 100644 index 0000000..10e98a7 --- /dev/null +++ b/SOURCES/kvm-kvm-Enable-KVM_SET_USER_MEMORY_REGION2-for-memslot.patch @@ -0,0 +1,153 @@ +From 120157257ac239050779fdddc9abb56bd39958b3 Mon Sep 17 00:00:00 2001 +From: Chao Peng +Date: Wed, 20 Mar 2024 03:39:05 -0500 +Subject: [PATCH 029/100] kvm: Enable KVM_SET_USER_MEMORY_REGION2 for memslot + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [29/91] 9a08c8699f632cd046a6307e33bd053a7cc7db46 (bonzini/rhel-qemu-kvm) + +Switch to KVM_SET_USER_MEMORY_REGION2 when supported by KVM. + +With KVM_SET_USER_MEMORY_REGION2, QEMU can set up memory region that +backend'ed both by hva-based shared memory and guest memfd based private +memory. + +Signed-off-by: Chao Peng +Co-developed-by: Xiaoyao Li +Signed-off-by: Xiaoyao Li +Message-ID: <20240320083945.991426-10-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit ce5a983233b4ca94ced88c9581014346509b5c71) +Signed-off-by: Paolo Bonzini +--- + accel/kvm/kvm-all.c | 46 +++++++++++++++++++++++++++++++++------- + accel/kvm/trace-events | 2 +- + include/sysemu/kvm_int.h | 2 ++ + 3 files changed, 41 insertions(+), 9 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index a7b9a127dd..5ef55e4dd7 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -284,35 +284,58 @@ int kvm_physical_memory_addr_from_host(KVMState *s, void *ram, + static int kvm_set_user_memory_region(KVMMemoryListener *kml, KVMSlot *slot, bool new) + { + KVMState *s = kvm_state; +- struct kvm_userspace_memory_region mem; ++ struct kvm_userspace_memory_region2 mem; + int ret; + + mem.slot = slot->slot | (kml->as_id << 16); + mem.guest_phys_addr = slot->start_addr; + mem.userspace_addr = (unsigned long)slot->ram; + mem.flags = slot->flags; ++ mem.guest_memfd = slot->guest_memfd; ++ mem.guest_memfd_offset = slot->guest_memfd_offset; + + if (slot->memory_size && !new && (mem.flags ^ slot->old_flags) & KVM_MEM_READONLY) { + /* Set the slot size to 0 before setting the slot to the desired + * value. This is needed based on KVM commit 75d61fbc. */ + mem.memory_size = 0; +- ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); ++ ++ if (kvm_guest_memfd_supported) { ++ ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION2, &mem); ++ } else { ++ ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); ++ } + if (ret < 0) { + goto err; + } + } + mem.memory_size = slot->memory_size; +- ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); ++ if (kvm_guest_memfd_supported) { ++ ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION2, &mem); ++ } else { ++ ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); ++ } + slot->old_flags = mem.flags; + err: + trace_kvm_set_user_memory(mem.slot >> 16, (uint16_t)mem.slot, mem.flags, + mem.guest_phys_addr, mem.memory_size, +- mem.userspace_addr, ret); ++ mem.userspace_addr, mem.guest_memfd, ++ mem.guest_memfd_offset, ret); + if (ret < 0) { +- error_report("%s: KVM_SET_USER_MEMORY_REGION failed, slot=%d," +- " start=0x%" PRIx64 ", size=0x%" PRIx64 ": %s", +- __func__, mem.slot, slot->start_addr, +- (uint64_t)mem.memory_size, strerror(errno)); ++ if (kvm_guest_memfd_supported) { ++ error_report("%s: KVM_SET_USER_MEMORY_REGION2 failed, slot=%d," ++ " start=0x%" PRIx64 ", size=0x%" PRIx64 "," ++ " flags=0x%" PRIx32 ", guest_memfd=%" PRId32 "," ++ " guest_memfd_offset=0x%" PRIx64 ": %s", ++ __func__, mem.slot, slot->start_addr, ++ (uint64_t)mem.memory_size, mem.flags, ++ mem.guest_memfd, (uint64_t)mem.guest_memfd_offset, ++ strerror(errno)); ++ } else { ++ error_report("%s: KVM_SET_USER_MEMORY_REGION failed, slot=%d," ++ " start=0x%" PRIx64 ", size=0x%" PRIx64 ": %s", ++ __func__, mem.slot, slot->start_addr, ++ (uint64_t)mem.memory_size, strerror(errno)); ++ } + } + return ret; + } +@@ -467,6 +490,10 @@ static int kvm_mem_flags(MemoryRegion *mr) + if (readonly && kvm_readonly_mem_allowed) { + flags |= KVM_MEM_READONLY; + } ++ if (memory_region_has_guest_memfd(mr)) { ++ assert(kvm_guest_memfd_supported); ++ flags |= KVM_MEM_GUEST_MEMFD; ++ } + return flags; + } + +@@ -1394,6 +1421,9 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, + mem->ram_start_offset = ram_start_offset; + mem->ram = ram; + mem->flags = kvm_mem_flags(mr); ++ mem->guest_memfd = mr->ram_block->guest_memfd; ++ mem->guest_memfd_offset = (uint8_t*)ram - mr->ram_block->host; ++ + kvm_slot_init_dirty_bitmap(mem); + err = kvm_set_user_memory_region(kml, mem, true); + if (err) { +diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events +index 9f599abc17..e8c52cb9e7 100644 +--- a/accel/kvm/trace-events ++++ b/accel/kvm/trace-events +@@ -15,7 +15,7 @@ kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d" + kvm_irqchip_release_virq(int virq) "virq %d" + kvm_set_ioeventfd_mmio(int fd, uint64_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%" PRIx64 " val=0x%x assign: %d size: %d match: %d" + kvm_set_ioeventfd_pio(int fd, uint16_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%x val=0x%x assign: %d size: %d match: %d" +-kvm_set_user_memory(uint16_t as, uint16_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, int ret) "AddrSpace#%d Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " ret=%d" ++kvm_set_user_memory(uint16_t as, uint16_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, uint32_t fd, uint64_t fd_offset, int ret) "AddrSpace#%d Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " guest_memfd=%d" " guest_memfd_offset=0x%" PRIx64 " ret=%d" + kvm_clear_dirty_log(uint32_t slot, uint64_t start, uint32_t size) "slot#%"PRId32" start 0x%"PRIx64" size 0x%"PRIx32 + kvm_resample_fd_notify(int gsi) "gsi %d" + kvm_dirty_ring_full(int id) "vcpu %d" +diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h +index 3496be7997..a5a3fee411 100644 +--- a/include/sysemu/kvm_int.h ++++ b/include/sysemu/kvm_int.h +@@ -30,6 +30,8 @@ typedef struct KVMSlot + int as_id; + /* Cache of the offset in ram address space */ + ram_addr_t ram_start_offset; ++ int guest_memfd; ++ hwaddr guest_memfd_offset; + } KVMSlot; + + typedef struct KVMMemoryUpdate { +-- +2.39.3 + diff --git a/SOURCES/kvm-kvm-Introduce-support-for-memory_attributes.patch b/SOURCES/kvm-kvm-Introduce-support-for-memory_attributes.patch new file mode 100644 index 0000000..1f043a9 --- /dev/null +++ b/SOURCES/kvm-kvm-Introduce-support-for-memory_attributes.patch @@ -0,0 +1,103 @@ +From 37e6c98987bb2d4be7ce1fdda4475cd0266271c3 Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Wed, 20 Mar 2024 03:39:06 -0500 +Subject: [PATCH 027/100] kvm: Introduce support for memory_attributes + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [27/91] 1b4428289949478f7390196ae4b098c5e6f36bb0 (bonzini/rhel-qemu-kvm) + +Introduce the helper functions to set the attributes of a range of +memory to private or shared. + +This is necessary to notify KVM the private/shared attribute of each gpa +range. KVM needs the information to decide the GPA needs to be mapped at +hva-based shared memory or guest_memfd based private memory. + +Signed-off-by: Xiaoyao Li +Message-ID: <20240320083945.991426-11-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 0811baed49010a9b651b8029ab6b9828b09a884f) +Signed-off-by: Paolo Bonzini +--- + accel/kvm/kvm-all.c | 32 ++++++++++++++++++++++++++++++++ + include/sysemu/kvm.h | 4 ++++ + 2 files changed, 36 insertions(+) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 9bd235c969..272e945f52 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -91,6 +91,7 @@ bool kvm_msi_use_devid; + static bool kvm_has_guest_debug; + static int kvm_sstep_flags; + static bool kvm_immediate_exit; ++static uint64_t kvm_supported_memory_attributes; + static hwaddr kvm_max_slot_size = ~0; + + static const KVMCapabilityInfo kvm_required_capabilites[] = { +@@ -1266,6 +1267,36 @@ void kvm_set_max_memslot_size(hwaddr max_slot_size) + kvm_max_slot_size = max_slot_size; + } + ++static int kvm_set_memory_attributes(hwaddr start, uint64_t size, uint64_t attr) ++{ ++ struct kvm_memory_attributes attrs; ++ int r; ++ ++ assert((attr & kvm_supported_memory_attributes) == attr); ++ attrs.attributes = attr; ++ attrs.address = start; ++ attrs.size = size; ++ attrs.flags = 0; ++ ++ r = kvm_vm_ioctl(kvm_state, KVM_SET_MEMORY_ATTRIBUTES, &attrs); ++ if (r) { ++ error_report("failed to set memory (0x%" HWADDR_PRIx "+0x%" PRIx64 ") " ++ "with attr 0x%" PRIx64 " error '%s'", ++ start, size, attr, strerror(errno)); ++ } ++ return r; ++} ++ ++int kvm_set_memory_attributes_private(hwaddr start, uint64_t size) ++{ ++ return kvm_set_memory_attributes(start, size, KVM_MEMORY_ATTRIBUTE_PRIVATE); ++} ++ ++int kvm_set_memory_attributes_shared(hwaddr start, uint64_t size) ++{ ++ return kvm_set_memory_attributes(start, size, 0); ++} ++ + /* Called with KVMMemoryListener.slots_lock held */ + static void kvm_set_phys_mem(KVMMemoryListener *kml, + MemoryRegionSection *section, bool add) +@@ -2387,6 +2418,7 @@ static int kvm_init(MachineState *ms) + goto err; + } + ++ kvm_supported_memory_attributes = kvm_check_extension(s, KVM_CAP_MEMORY_ATTRIBUTES); + kvm_immediate_exit = kvm_check_extension(s, KVM_CAP_IMMEDIATE_EXIT); + s->nr_slots = kvm_check_extension(s, KVM_CAP_NR_MEMSLOTS); + +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index 54f4d83a37..f114ff6986 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -536,4 +536,8 @@ void kvm_mark_guest_state_protected(void); + * reported for the VM. + */ + bool kvm_hwpoisoned_mem(void); ++ ++int kvm_set_memory_attributes_private(hwaddr start, uint64_t size); ++int kvm_set_memory_attributes_shared(hwaddr start, uint64_t size); ++ + #endif +-- +2.39.3 + diff --git a/SOURCES/kvm-kvm-add-support-for-guest-physical-bits.patch b/SOURCES/kvm-kvm-add-support-for-guest-physical-bits.patch new file mode 100644 index 0000000..97b94eb --- /dev/null +++ b/SOURCES/kvm-kvm-add-support-for-guest-physical-bits.patch @@ -0,0 +1,116 @@ +From 31cc494d69449811f4d995326479372da7c1241e Mon Sep 17 00:00:00 2001 +From: Gerd Hoffmann +Date: Mon, 18 Mar 2024 16:53:35 +0100 +Subject: [PATCH 003/100] kvm: add support for guest physical bits + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [3/91] abb1ba3a584152d8efabd8255b86afe609f8ffbd (bonzini/rhel-qemu-kvm) + +Query kvm for supported guest physical address bits, in cpuid +function 80000008, eax[23:16]. Usually this is identical to host +physical address bits. With NPT or EPT being used this might be +restricted to 48 (max 4-level paging address space size) even if +the host cpu supports more physical address bits. + +When set pass this to the guest, using cpuid too. Guest firmware +can use this to figure how big the usable guest physical address +space is, so PCI bar mapping are actually reachable. + +Signed-off-by: Gerd Hoffmann +Reviewed-by: Xiaoyao Li +Reviewed-by: Zhao Liu +Message-ID: <20240318155336.156197-2-kraxel@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 0d08c423688edcca857f88dab20f1fc56de2b281) +Signed-off-by: Paolo Bonzini +--- + target/i386/kvm/kvm-cpu.c | 50 ++++++++++++++++++++++++++++++++------- + 1 file changed, 42 insertions(+), 8 deletions(-) + +diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c +index b91af5051f..7ef94c681f 100644 +--- a/target/i386/kvm/kvm-cpu.c ++++ b/target/i386/kvm/kvm-cpu.c +@@ -18,10 +18,32 @@ + #include "kvm_i386.h" + #include "hw/core/accel-cpu.h" + ++static void kvm_set_guest_phys_bits(CPUState *cs) ++{ ++ X86CPU *cpu = X86_CPU(cs); ++ uint32_t eax, guest_phys_bits; ++ ++ eax = kvm_arch_get_supported_cpuid(cs->kvm_state, 0x80000008, 0, R_EAX); ++ guest_phys_bits = (eax >> 16) & 0xff; ++ if (!guest_phys_bits) { ++ return; ++ } ++ cpu->guest_phys_bits = guest_phys_bits; ++ if (cpu->guest_phys_bits > cpu->phys_bits) { ++ cpu->guest_phys_bits = cpu->phys_bits; ++ } ++ ++ if (cpu->host_phys_bits && cpu->host_phys_bits_limit && ++ cpu->guest_phys_bits > cpu->host_phys_bits_limit) { ++ cpu->guest_phys_bits = cpu->host_phys_bits_limit; ++ } ++} ++ + static bool kvm_cpu_realizefn(CPUState *cs, Error **errp) + { + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; ++ bool ret; + + /* + * The realize order is important, since x86_cpu_realize() checks if +@@ -32,13 +54,15 @@ static bool kvm_cpu_realizefn(CPUState *cs, Error **errp) + * + * realize order: + * +- * x86_cpu_realize(): +- * -> x86_cpu_expand_features() +- * -> cpu_exec_realizefn(): +- * -> accel_cpu_common_realize() +- * kvm_cpu_realizefn() -> host_cpu_realizefn() +- * -> cpu_common_realizefn() +- * -> check/update ucode_rev, phys_bits, mwait ++ * x86_cpu_realizefn(): ++ * x86_cpu_expand_features() ++ * cpu_exec_realizefn(): ++ * accel_cpu_common_realize() ++ * kvm_cpu_realizefn() ++ * host_cpu_realizefn() ++ * kvm_set_guest_phys_bits() ++ * check/update ucode_rev, phys_bits, guest_phys_bits, mwait ++ * cpu_common_realizefn() (via xcc->parent_realize) + */ + if (cpu->max_features) { + if (enable_cpu_pm && kvm_has_waitpkg()) { +@@ -50,7 +74,17 @@ static bool kvm_cpu_realizefn(CPUState *cs, Error **errp) + MSR_IA32_UCODE_REV); + } + } +- return host_cpu_realizefn(cs, errp); ++ ret = host_cpu_realizefn(cs, errp); ++ if (!ret) { ++ return ret; ++ } ++ ++ if ((env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) && ++ cpu->guest_phys_bits == -1) { ++ kvm_set_guest_phys_bits(cs); ++ } ++ ++ return true; + } + + static bool lmce_supported(void) +-- +2.39.3 + diff --git a/SOURCES/kvm-kvm-handle-KVM_EXIT_MEMORY_FAULT.patch b/SOURCES/kvm-kvm-handle-KVM_EXIT_MEMORY_FAULT.patch new file mode 100644 index 0000000..9baa06f --- /dev/null +++ b/SOURCES/kvm-kvm-handle-KVM_EXIT_MEMORY_FAULT.patch @@ -0,0 +1,194 @@ +From 33cc1b469689ee2bb7c4f745189472c74a0a98ab Mon Sep 17 00:00:00 2001 +From: Chao Peng +Date: Wed, 20 Mar 2024 03:39:08 -0500 +Subject: [PATCH 034/100] kvm: handle KVM_EXIT_MEMORY_FAULT + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [34/91] 59c672f6b19a3afcb61878775eb6425c6fdea6d5 (bonzini/rhel-qemu-kvm) + +Upon an KVM_EXIT_MEMORY_FAULT exit, userspace needs to do the memory +conversion on the RAMBlock to turn the memory into desired attribute, +switching between private and shared. + +Currently only KVM_MEMORY_EXIT_FLAG_PRIVATE in flags is valid when +KVM_EXIT_MEMORY_FAULT happens. + +Note, KVM_EXIT_MEMORY_FAULT makes sense only when the RAMBlock has +guest_memfd memory backend. + +Note, KVM_EXIT_MEMORY_FAULT returns with -EFAULT, so special handling is +added. + +When page is converted from shared to private, the original shared +memory can be discarded via ram_block_discard_range(). Note, shared +memory can be discarded only when it's not back'ed by hugetlb because +hugetlb is supposed to be pre-allocated and no need for discarding. + +Signed-off-by: Chao Peng +Co-developed-by: Xiaoyao Li +Signed-off-by: Xiaoyao Li + +Message-ID: <20240320083945.991426-13-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit c15e5684071d93174e446be318f49d8d59b15d6d) +Signed-off-by: Paolo Bonzini +--- + accel/kvm/kvm-all.c | 98 +++++++++++++++++++++++++++++++++++++----- + accel/kvm/trace-events | 2 + + include/sysemu/kvm.h | 2 + + 3 files changed, 92 insertions(+), 10 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 3f99efc8cc..09164e346c 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2900,6 +2900,69 @@ static void kvm_eat_signals(CPUState *cpu) + } while (sigismember(&chkset, SIG_IPI)); + } + ++int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private) ++{ ++ MemoryRegionSection section; ++ ram_addr_t offset; ++ MemoryRegion *mr; ++ RAMBlock *rb; ++ void *addr; ++ int ret = -1; ++ ++ trace_kvm_convert_memory(start, size, to_private ? "shared_to_private" : "private_to_shared"); ++ ++ if (!QEMU_PTR_IS_ALIGNED(start, qemu_real_host_page_size()) || ++ !QEMU_PTR_IS_ALIGNED(size, qemu_real_host_page_size())) { ++ return -1; ++ } ++ ++ if (!size) { ++ return -1; ++ } ++ ++ section = memory_region_find(get_system_memory(), start, size); ++ mr = section.mr; ++ if (!mr) { ++ return -1; ++ } ++ ++ if (!memory_region_has_guest_memfd(mr)) { ++ error_report("Converting non guest_memfd backed memory region " ++ "(0x%"HWADDR_PRIx" ,+ 0x%"HWADDR_PRIx") to %s", ++ start, size, to_private ? "private" : "shared"); ++ goto out_unref; ++ } ++ ++ if (to_private) { ++ ret = kvm_set_memory_attributes_private(start, size); ++ } else { ++ ret = kvm_set_memory_attributes_shared(start, size); ++ } ++ if (ret) { ++ goto out_unref; ++ } ++ ++ addr = memory_region_get_ram_ptr(mr) + section.offset_within_region; ++ rb = qemu_ram_block_from_host(addr, false, &offset); ++ ++ if (to_private) { ++ if (rb->page_size != qemu_real_host_page_size()) { ++ /* ++ * shared memory is backed by hugetlb, which is supposed to be ++ * pre-allocated and doesn't need to be discarded ++ */ ++ goto out_unref; ++ } ++ ret = ram_block_discard_range(rb, offset, size); ++ } else { ++ ret = ram_block_discard_guest_memfd_range(rb, offset, size); ++ } ++ ++out_unref: ++ memory_region_unref(mr); ++ return ret; ++} ++ + int kvm_cpu_exec(CPUState *cpu) + { + struct kvm_run *run = cpu->kvm_run; +@@ -2967,18 +3030,20 @@ int kvm_cpu_exec(CPUState *cpu) + ret = EXCP_INTERRUPT; + break; + } +- fprintf(stderr, "error: kvm run failed %s\n", +- strerror(-run_ret)); ++ if (!(run_ret == -EFAULT && run->exit_reason == KVM_EXIT_MEMORY_FAULT)) { ++ fprintf(stderr, "error: kvm run failed %s\n", ++ strerror(-run_ret)); + #ifdef TARGET_PPC +- if (run_ret == -EBUSY) { +- fprintf(stderr, +- "This is probably because your SMT is enabled.\n" +- "VCPU can only run on primary threads with all " +- "secondary threads offline.\n"); +- } ++ if (run_ret == -EBUSY) { ++ fprintf(stderr, ++ "This is probably because your SMT is enabled.\n" ++ "VCPU can only run on primary threads with all " ++ "secondary threads offline.\n"); ++ } + #endif +- ret = -1; +- break; ++ ret = -1; ++ break; ++ } + } + + trace_kvm_run_exit(cpu->cpu_index, run->exit_reason); +@@ -3061,6 +3126,19 @@ int kvm_cpu_exec(CPUState *cpu) + break; + } + break; ++ case KVM_EXIT_MEMORY_FAULT: ++ trace_kvm_memory_fault(run->memory_fault.gpa, ++ run->memory_fault.size, ++ run->memory_fault.flags); ++ if (run->memory_fault.flags & ~KVM_MEMORY_EXIT_FLAG_PRIVATE) { ++ error_report("KVM_EXIT_MEMORY_FAULT: Unknown flag 0x%" PRIx64, ++ (uint64_t)run->memory_fault.flags); ++ ret = -1; ++ break; ++ } ++ ret = kvm_convert_memory(run->memory_fault.gpa, run->memory_fault.size, ++ run->memory_fault.flags & KVM_MEMORY_EXIT_FLAG_PRIVATE); ++ break; + default: + ret = kvm_arch_handle_exit(cpu, run); + break; +diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events +index e8c52cb9e7..681ccb667d 100644 +--- a/accel/kvm/trace-events ++++ b/accel/kvm/trace-events +@@ -31,3 +31,5 @@ kvm_cpu_exec(void) "" + kvm_interrupt_exit_request(void) "" + kvm_io_window_exit(void) "" + kvm_run_exit_system_event(int cpu_index, uint32_t event_type) "cpu_index %d, system_even_type %"PRIu32 ++kvm_convert_memory(uint64_t start, uint64_t size, const char *msg) "start 0x%" PRIx64 " size 0x%" PRIx64 " %s" ++kvm_memory_fault(uint64_t start, uint64_t size, uint64_t flags) "start 0x%" PRIx64 " size 0x%" PRIx64 " flags 0x%" PRIx64 +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index 9e4ab7ae89..74f23dff9c 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -542,4 +542,6 @@ int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp); + int kvm_set_memory_attributes_private(hwaddr start, uint64_t size); + int kvm_set_memory_attributes_shared(hwaddr start, uint64_t size); + ++int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private); ++ + #endif +-- +2.39.3 + diff --git a/SOURCES/kvm-kvm-memory-Make-memory-type-private-by-default-if-it.patch b/SOURCES/kvm-kvm-memory-Make-memory-type-private-by-default-if-it.patch new file mode 100644 index 0000000..8f9756b --- /dev/null +++ b/SOURCES/kvm-kvm-memory-Make-memory-type-private-by-default-if-it.patch @@ -0,0 +1,56 @@ +From f9dc55dd179bb534d589af371c5c2a7886bd461e Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Wed, 20 Mar 2024 03:39:11 -0500 +Subject: [PATCH 030/100] kvm/memory: Make memory type private by default if it + has guest memfd backend + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [30/91] 5e21edf844b5629ee32c4075843b028561b97ae2 (bonzini/rhel-qemu-kvm) + +KVM side leaves the memory to shared by default, which may incur the +overhead of paging conversion on the first visit of each page. Because +the expectation is that page is likely to private for the VMs that +require private memory (has guest memfd). + +Explicitly set the memory to private when memory region has valid +guest memfd backend. + +Signed-off-by: Xiaoyao Li +Signed-off-by: Michael Roth +Message-ID: <20240320083945.991426-16-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit bd3bcf6962b664ca3bf9c60fdcc4534e8e3d0641) +Signed-off-by: Paolo Bonzini +--- + accel/kvm/kvm-all.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 5ef55e4dd7..3f99efc8cc 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -1431,6 +1431,16 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, + strerror(-err)); + abort(); + } ++ ++ if (memory_region_has_guest_memfd(mr)) { ++ err = kvm_set_memory_attributes_private(start_addr, slot_size); ++ if (err) { ++ error_report("%s: failed to set memory attribute private: %s", ++ __func__, strerror(-err)); ++ exit(1); ++ } ++ } ++ + start_addr += slot_size; + ram_start_offset += slot_size; + ram += slot_size; +-- +2.39.3 + diff --git a/SOURCES/kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch b/SOURCES/kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch deleted file mode 100644 index d6a6d73..0000000 --- a/SOURCES/kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch +++ /dev/null @@ -1,160 +0,0 @@ -From a5857fb12fcad46e27c415fe82ce13c0cb5d09c7 Mon Sep 17 00:00:00 2001 -From: Marcelo Tosatti -Date: Thu, 29 Jun 2023 14:48:32 -0300 -Subject: [PATCH 5/6] kvm: reuse per-vcpu stats fd to avoid vcpu interruption -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marcelo Tosatti -RH-MergeRequest: 177: kvm: reuse per-vcpu stats fd to avoid vcpu interruption -RH-Bugzilla: 2218644 -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Leonardo Brás -RH-Commit: [1/1] 4ec72385a9047888121485f49bacb1aff84f7018 (mtosatti/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2218644 -Commit: 3b6f485275ae95a81eec589d2773b86ca9ddec4d - -A regression has been detected in latency testing of KVM guests. -More specifically, it was observed that the cyclictest -numbers inside of an isolated vcpu (running on isolated pcpu) are: - -Where a maximum of 50us is acceptable. - -The implementation of KVM_GET_STATS_FD uses run_on_cpu to query -per vcpu statistics, which interrupts the vcpu (and is unnecessary). - -To fix this, open the per vcpu stats fd on vcpu initialization, -and read from that fd from QEMU's main thread. - -Signed-off-by: Marcelo Tosatti -Signed-off-by: Paolo Bonzini ---- - accel/kvm/kvm-all.c | 30 +++++++++++++++--------------- - include/hw/core/cpu.h | 1 + - 2 files changed, 16 insertions(+), 15 deletions(-) - -diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index cf3a88d90e..fa7ca46c66 100644 ---- a/accel/kvm/kvm-all.c -+++ b/accel/kvm/kvm-all.c -@@ -450,6 +450,8 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp) - "kvm_init_vcpu: kvm_arch_init_vcpu failed (%lu)", - kvm_arch_vcpu_id(cpu)); - } -+ cpu->kvm_vcpu_stats_fd = kvm_vcpu_ioctl(cpu, KVM_GET_STATS_FD, NULL); -+ - err: - return ret; - } -@@ -3959,7 +3961,7 @@ static StatsDescriptors *find_stats_descriptors(StatsTarget target, int stats_fd - - /* Read stats header */ - kvm_stats_header = &descriptors->kvm_stats_header; -- ret = read(stats_fd, kvm_stats_header, sizeof(*kvm_stats_header)); -+ ret = pread(stats_fd, kvm_stats_header, sizeof(*kvm_stats_header), 0); - if (ret != sizeof(*kvm_stats_header)) { - error_setg(errp, "KVM stats: failed to read stats header: " - "expected %zu actual %zu", -@@ -3990,7 +3992,8 @@ static StatsDescriptors *find_stats_descriptors(StatsTarget target, int stats_fd - } - - static void query_stats(StatsResultList **result, StatsTarget target, -- strList *names, int stats_fd, Error **errp) -+ strList *names, int stats_fd, CPUState *cpu, -+ Error **errp) - { - struct kvm_stats_desc *kvm_stats_desc; - struct kvm_stats_header *kvm_stats_header; -@@ -4048,7 +4051,7 @@ static void query_stats(StatsResultList **result, StatsTarget target, - break; - case STATS_TARGET_VCPU: - add_stats_entry(result, STATS_PROVIDER_KVM, -- current_cpu->parent_obj.canonical_path, -+ cpu->parent_obj.canonical_path, - stats_list); - break; - default: -@@ -4085,10 +4088,9 @@ static void query_stats_schema(StatsSchemaList **result, StatsTarget target, - add_stats_schema(result, STATS_PROVIDER_KVM, target, stats_list); - } - --static void query_stats_vcpu(CPUState *cpu, run_on_cpu_data data) -+static void query_stats_vcpu(CPUState *cpu, StatsArgs *kvm_stats_args) - { -- StatsArgs *kvm_stats_args = (StatsArgs *) data.host_ptr; -- int stats_fd = kvm_vcpu_ioctl(cpu, KVM_GET_STATS_FD, NULL); -+ int stats_fd = cpu->kvm_vcpu_stats_fd; - Error *local_err = NULL; - - if (stats_fd == -1) { -@@ -4097,14 +4099,13 @@ static void query_stats_vcpu(CPUState *cpu, run_on_cpu_data data) - return; - } - query_stats(kvm_stats_args->result.stats, STATS_TARGET_VCPU, -- kvm_stats_args->names, stats_fd, kvm_stats_args->errp); -- close(stats_fd); -+ kvm_stats_args->names, stats_fd, cpu, -+ kvm_stats_args->errp); - } - --static void query_stats_schema_vcpu(CPUState *cpu, run_on_cpu_data data) -+static void query_stats_schema_vcpu(CPUState *cpu, StatsArgs *kvm_stats_args) - { -- StatsArgs *kvm_stats_args = (StatsArgs *) data.host_ptr; -- int stats_fd = kvm_vcpu_ioctl(cpu, KVM_GET_STATS_FD, NULL); -+ int stats_fd = cpu->kvm_vcpu_stats_fd; - Error *local_err = NULL; - - if (stats_fd == -1) { -@@ -4114,7 +4115,6 @@ static void query_stats_schema_vcpu(CPUState *cpu, run_on_cpu_data data) - } - query_stats_schema(kvm_stats_args->result.schema, STATS_TARGET_VCPU, stats_fd, - kvm_stats_args->errp); -- close(stats_fd); - } - - static void query_stats_cb(StatsResultList **result, StatsTarget target, -@@ -4132,7 +4132,7 @@ static void query_stats_cb(StatsResultList **result, StatsTarget target, - error_setg_errno(errp, errno, "KVM stats: ioctl failed"); - return; - } -- query_stats(result, target, names, stats_fd, errp); -+ query_stats(result, target, names, stats_fd, NULL, errp); - close(stats_fd); - break; - } -@@ -4146,7 +4146,7 @@ static void query_stats_cb(StatsResultList **result, StatsTarget target, - if (!apply_str_list_filter(cpu->parent_obj.canonical_path, targets)) { - continue; - } -- run_on_cpu(cpu, query_stats_vcpu, RUN_ON_CPU_HOST_PTR(&stats_args)); -+ query_stats_vcpu(cpu, &stats_args); - } - break; - } -@@ -4172,6 +4172,6 @@ void query_stats_schemas_cb(StatsSchemaList **result, Error **errp) - if (first_cpu) { - stats_args.result.schema = result; - stats_args.errp = errp; -- run_on_cpu(first_cpu, query_stats_schema_vcpu, RUN_ON_CPU_HOST_PTR(&stats_args)); -+ query_stats_schema_vcpu(first_cpu, &stats_args); - } - } -diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h -index 397fd3ac68..ae96be07e7 100644 ---- a/include/hw/core/cpu.h -+++ b/include/hw/core/cpu.h -@@ -399,6 +399,7 @@ struct CPUState { - struct kvm_dirty_gfn *kvm_dirty_gfns; - uint32_t kvm_fetch_index; - uint64_t dirty_pages; -+ int kvm_vcpu_stats_fd; - - /* Use by accel-block: CPU is executing an ioctl() */ - QemuLockCnt in_ioctl_lock; --- -2.39.3 - diff --git a/SOURCES/kvm-kvm-tdx-Don-t-complain-when-converting-vMMIO-region-.patch b/SOURCES/kvm-kvm-tdx-Don-t-complain-when-converting-vMMIO-region-.patch new file mode 100644 index 0000000..7b578b5 --- /dev/null +++ b/SOURCES/kvm-kvm-tdx-Don-t-complain-when-converting-vMMIO-region-.patch @@ -0,0 +1,61 @@ +From aeaa7061139202448d466b7e18682081f9cd2097 Mon Sep 17 00:00:00 2001 +From: Isaku Yamahata +Date: Thu, 29 Feb 2024 01:36:54 -0500 +Subject: [PATCH 035/100] kvm/tdx: Don't complain when converting vMMIO region + to shared + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [35/91] c42870771d7af5badc2e10d42be9b5620d72f95d (bonzini/rhel-qemu-kvm) + +Because vMMIO region needs to be shared region, guest TD may explicitly +convert such region from private to shared. Don't complain such +conversion. + +Signed-off-by: Isaku Yamahata +Signed-off-by: Xiaoyao Li +Message-ID: <20240229063726.610065-34-xiaoyao.li@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit c5d9425ef4da9f43fc0903905ad415456d1ab843) +Signed-off-by: Paolo Bonzini +--- + accel/kvm/kvm-all.c | 19 ++++++++++++++++--- + 1 file changed, 16 insertions(+), 3 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 09164e346c..6efaff90a7 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2927,9 +2927,22 @@ int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private) + } + + if (!memory_region_has_guest_memfd(mr)) { +- error_report("Converting non guest_memfd backed memory region " +- "(0x%"HWADDR_PRIx" ,+ 0x%"HWADDR_PRIx") to %s", +- start, size, to_private ? "private" : "shared"); ++ /* ++ * Because vMMIO region must be shared, guest TD may convert vMMIO ++ * region to shared explicitly. Don't complain such case. See ++ * memory_region_type() for checking if the region is MMIO region. ++ */ ++ if (!to_private && ++ !memory_region_is_ram(mr) && ++ !memory_region_is_ram_device(mr) && ++ !memory_region_is_rom(mr) && ++ !memory_region_is_romd(mr)) { ++ ret = 0; ++ } else { ++ error_report("Convert non guest_memfd backed memory region " ++ "(0x%"HWADDR_PRIx" ,+ 0x%"HWADDR_PRIx") to %s", ++ start, size, to_private ? "private" : "shared"); ++ } + goto out_unref; + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-kvm-tdx-Ignore-memory-conversion-to-shared-of-unassi.patch b/SOURCES/kvm-kvm-tdx-Ignore-memory-conversion-to-shared-of-unassi.patch new file mode 100644 index 0000000..c0f2bc6 --- /dev/null +++ b/SOURCES/kvm-kvm-tdx-Ignore-memory-conversion-to-shared-of-unassi.patch @@ -0,0 +1,62 @@ +From 2b2dfff3e383c99d0f759a8c12659d1a0ce50e8e Mon Sep 17 00:00:00 2001 +From: Isaku Yamahata +Date: Thu, 29 Feb 2024 01:36:55 -0500 +Subject: [PATCH 036/100] kvm/tdx: Ignore memory conversion to shared of + unassigned region + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [36/91] 84515b9dcfc2e07b272bb2477acf6430e9d33f28 (bonzini/rhel-qemu-kvm) + +TDX requires vMMIO region to be shared. For KVM, MMIO region is the region +which kvm memslot isn't assigned to (except in-kernel emulation). +qemu has the memory region for vMMIO at each device level. + +While OVMF issues MapGPA(to-shared) conservatively on 32bit PCI MMIO +region, qemu doesn't find corresponding vMMIO region because it's before +PCI device allocation and memory_region_find() finds the device region, not +PCI bus region. It's safe to ignore MapGPA(to-shared) because when guest +accesses those region they use GPA with shared bit set for vMMIO. Ignore +memory conversion request of non-assigned region to shared and return +success. Otherwise OVMF is confused and panics there. + +Signed-off-by: Isaku Yamahata +Signed-off-by: Xiaoyao Li +Message-ID: <20240229063726.610065-35-xiaoyao.li@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 565f4768bb9cf840b2f8cca41483bb91aa3196a3) +Signed-off-by: Paolo Bonzini +--- + accel/kvm/kvm-all.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 6efaff90a7..f6268855b4 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2923,6 +2923,18 @@ int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private) + section = memory_region_find(get_system_memory(), start, size); + mr = section.mr; + if (!mr) { ++ /* ++ * Ignore converting non-assigned region to shared. ++ * ++ * TDX requires vMMIO region to be shared to inject #VE to guest. ++ * OVMF issues conservatively MapGPA(shared) on 32bit PCI MMIO region, ++ * and vIO-APIC 0xFEC00000 4K page. ++ * OVMF assigns 32bit PCI MMIO region to ++ * [top of low memory: typically 2GB=0xC000000, 0xFC00000) ++ */ ++ if (!to_private) { ++ return 0; ++ } + return -1; + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-linux-aio-add-IO_CMD_FDSYNC-command-support.patch b/SOURCES/kvm-linux-aio-add-IO_CMD_FDSYNC-command-support.patch new file mode 100644 index 0000000..1aba040 --- /dev/null +++ b/SOURCES/kvm-linux-aio-add-IO_CMD_FDSYNC-command-support.patch @@ -0,0 +1,127 @@ +From 287ebf9f0b8a62dc49fd7802472c1ae57f653e44 Mon Sep 17 00:00:00 2001 +From: Prasad Pandit +Date: Thu, 25 Apr 2024 12:34:12 +0530 +Subject: [PATCH 1/5] linux-aio: add IO_CMD_FDSYNC command support + +RH-Author: Prasad Pandit +RH-MergeRequest: 249: linux-aio: add IO_CMD_FDSYNC command support +RH-Jira: RHEL-42411 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Kevin Wolf +RH-Commit: [1/1] 9beff6506d2eca7741b1c11b5acdc19b635c7c75 (pjp/cs-qemu-kvm) + +Libaio defines IO_CMD_FDSYNC command to sync all outstanding +asynchronous I/O operations, by flushing out file data to the +disk storage. Enable linux-aio to submit such aio request. + +When using aio=native without fdsync() support, QEMU creates +pthreads, and destroying these pthreads results in TLB flushes. +In a real-time guest environment, TLB flushes cause a latency +spike. This patch helps to avoid such spikes. + +Jira: https://issues.redhat.com/browse/RHEL-42411 +Reviewed-by: Stefan Hajnoczi +Signed-off-by: Prasad Pandit +Message-ID: <20240425070412.37248-1-ppandit@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 24687abf237e3c15816d689a8e4b08d7c3190dcb) +Signed-off-by: Prasad Pandit +--- + block/file-posix.c | 9 +++++++++ + block/linux-aio.c | 21 ++++++++++++++++++++- + include/block/raw-aio.h | 1 + + 3 files changed, 30 insertions(+), 1 deletion(-) + +diff --git a/block/file-posix.c b/block/file-posix.c +index 35684f7e21..9831b08fb6 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -159,6 +159,7 @@ typedef struct BDRVRawState { + bool has_discard:1; + bool has_write_zeroes:1; + bool use_linux_aio:1; ++ bool has_laio_fdsync:1; + bool use_linux_io_uring:1; + int page_cache_inconsistent; /* errno from fdatasync failure */ + bool has_fallocate; +@@ -718,6 +719,9 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, + ret = -EINVAL; + goto fail; + } ++ if (s->use_linux_aio) { ++ s->has_laio_fdsync = laio_has_fdsync(s->fd); ++ } + #else + if (s->use_linux_aio) { + error_setg(errp, "aio=native was specified, but is not supported " +@@ -2599,6 +2603,11 @@ static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs) + if (raw_check_linux_io_uring(s)) { + return luring_co_submit(bs, s->fd, 0, NULL, QEMU_AIO_FLUSH); + } ++#endif ++#ifdef CONFIG_LINUX_AIO ++ if (s->has_laio_fdsync && raw_check_linux_aio(s)) { ++ return laio_co_submit(s->fd, 0, NULL, QEMU_AIO_FLUSH, 0); ++ } + #endif + return raw_thread_pool_submit(handle_aiocb_flush, &acb); + } +diff --git a/block/linux-aio.c b/block/linux-aio.c +index ec05d946f3..e3b5ec9aba 100644 +--- a/block/linux-aio.c ++++ b/block/linux-aio.c +@@ -384,6 +384,9 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset, + case QEMU_AIO_READ: + io_prep_preadv(iocbs, fd, qiov->iov, qiov->niov, offset); + break; ++ case QEMU_AIO_FLUSH: ++ io_prep_fdsync(iocbs, fd); ++ break; + /* Currently Linux kernel does not support other operations */ + default: + fprintf(stderr, "%s: invalid AIO request type 0x%x.\n", +@@ -412,7 +415,7 @@ int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov, + AioContext *ctx = qemu_get_current_aio_context(); + struct qemu_laiocb laiocb = { + .co = qemu_coroutine_self(), +- .nbytes = qiov->size, ++ .nbytes = qiov ? qiov->size : 0, + .ctx = aio_get_linux_aio(ctx), + .ret = -EINPROGRESS, + .is_read = (type == QEMU_AIO_READ), +@@ -486,3 +489,19 @@ void laio_cleanup(LinuxAioState *s) + } + g_free(s); + } ++ ++bool laio_has_fdsync(int fd) ++{ ++ struct iocb cb; ++ struct iocb *cbs[] = {&cb, NULL}; ++ ++ io_context_t ctx = 0; ++ io_setup(1, &ctx); ++ ++ /* check if host kernel supports IO_CMD_FDSYNC */ ++ io_prep_fdsync(&cb, fd); ++ int ret = io_submit(ctx, 1, cbs); ++ ++ io_destroy(ctx); ++ return (ret == -EINVAL) ? false : true; ++} +diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h +index 20e000b8ef..626706827f 100644 +--- a/include/block/raw-aio.h ++++ b/include/block/raw-aio.h +@@ -60,6 +60,7 @@ void laio_cleanup(LinuxAioState *s); + int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov, + int type, uint64_t dev_max_batch); + ++bool laio_has_fdsync(int); + void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context); + void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context); + #endif +-- +2.39.3 + diff --git a/SOURCES/kvm-linux-headers-Update-to-current-kvm-next.patch b/SOURCES/kvm-linux-headers-Update-to-current-kvm-next.patch new file mode 100644 index 0000000..2fd35fd --- /dev/null +++ b/SOURCES/kvm-linux-headers-Update-to-current-kvm-next.patch @@ -0,0 +1,189 @@ +From c3e2bc3319882c16fa36eafc7a613073746cfc8b Mon Sep 17 00:00:00 2001 +From: Pankaj Gupta +Date: Thu, 30 May 2024 06:16:14 -0500 +Subject: [PATCH 052/100] linux-headers: Update to current kvm/next + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [52/91] df77e867072f60110b8387a54ba2db6226b35007 (bonzini/rhel-qemu-kvm) + +This updates kernel headers to commit 6f627b425378 ("KVM: SVM: Add module +parameter to enable SEV-SNP", 2024-05-12). The SNP host patches will +be included in Linux 6.11, to be released next July. + +Also brings in an linux-headers/linux/vhost.h fix from v6.9-rc4. + +Co-developed-by: Michael Roth +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-3-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 5f69e42da5b40a2213f4db70ca461f554abca686) +Signed-off-by: Paolo Bonzini +--- + linux-headers/asm-loongarch/kvm.h | 4 +++ + linux-headers/asm-riscv/kvm.h | 1 + + linux-headers/asm-x86/kvm.h | 52 ++++++++++++++++++++++++++++++- + linux-headers/linux/vhost.h | 15 ++++----- + 4 files changed, 64 insertions(+), 8 deletions(-) + +diff --git a/linux-headers/asm-loongarch/kvm.h b/linux-headers/asm-loongarch/kvm.h +index 109785922c..f9abef3823 100644 +--- a/linux-headers/asm-loongarch/kvm.h ++++ b/linux-headers/asm-loongarch/kvm.h +@@ -17,6 +17,8 @@ + #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + #define KVM_DIRTY_LOG_PAGE_OFFSET 64 + ++#define KVM_GUESTDBG_USE_SW_BP 0x00010000 ++ + /* + * for KVM_GET_REGS and KVM_SET_REGS + */ +@@ -72,6 +74,8 @@ struct kvm_fpu { + + #define KVM_REG_LOONGARCH_COUNTER (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 1) + #define KVM_REG_LOONGARCH_VCPU_RESET (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 2) ++/* Debugging: Special instruction for software breakpoint */ ++#define KVM_REG_LOONGARCH_DEBUG_INST (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 3) + + #define LOONGARCH_REG_SHIFT 3 + #define LOONGARCH_REG_64(TYPE, REG) (TYPE | KVM_REG_SIZE_U64 | (REG << LOONGARCH_REG_SHIFT)) +diff --git a/linux-headers/asm-riscv/kvm.h b/linux-headers/asm-riscv/kvm.h +index b1c503c295..e878e7cc39 100644 +--- a/linux-headers/asm-riscv/kvm.h ++++ b/linux-headers/asm-riscv/kvm.h +@@ -167,6 +167,7 @@ enum KVM_RISCV_ISA_EXT_ID { + KVM_RISCV_ISA_EXT_ZFA, + KVM_RISCV_ISA_EXT_ZTSO, + KVM_RISCV_ISA_EXT_ZACAS, ++ KVM_RISCV_ISA_EXT_SSCOFPMF, + KVM_RISCV_ISA_EXT_MAX, + }; + +diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h +index 31c95c2dfe..1c8f918234 100644 +--- a/linux-headers/asm-x86/kvm.h ++++ b/linux-headers/asm-x86/kvm.h +@@ -695,6 +695,11 @@ enum sev_cmd_id { + /* Second time is the charm; improved versions of the above ioctls. */ + KVM_SEV_INIT2, + ++ /* SNP-specific commands */ ++ KVM_SEV_SNP_LAUNCH_START = 100, ++ KVM_SEV_SNP_LAUNCH_UPDATE, ++ KVM_SEV_SNP_LAUNCH_FINISH, ++ + KVM_SEV_NR_MAX, + }; + +@@ -709,7 +714,9 @@ struct kvm_sev_cmd { + struct kvm_sev_init { + __u64 vmsa_features; + __u32 flags; +- __u32 pad[9]; ++ __u16 ghcb_version; ++ __u16 pad1; ++ __u32 pad2[8]; + }; + + struct kvm_sev_launch_start { +@@ -820,6 +827,48 @@ struct kvm_sev_receive_update_data { + __u32 pad2; + }; + ++struct kvm_sev_snp_launch_start { ++ __u64 policy; ++ __u8 gosvw[16]; ++ __u16 flags; ++ __u8 pad0[6]; ++ __u64 pad1[4]; ++}; ++ ++/* Kept in sync with firmware values for simplicity. */ ++#define KVM_SEV_SNP_PAGE_TYPE_NORMAL 0x1 ++#define KVM_SEV_SNP_PAGE_TYPE_ZERO 0x3 ++#define KVM_SEV_SNP_PAGE_TYPE_UNMEASURED 0x4 ++#define KVM_SEV_SNP_PAGE_TYPE_SECRETS 0x5 ++#define KVM_SEV_SNP_PAGE_TYPE_CPUID 0x6 ++ ++struct kvm_sev_snp_launch_update { ++ __u64 gfn_start; ++ __u64 uaddr; ++ __u64 len; ++ __u8 type; ++ __u8 pad0; ++ __u16 flags; ++ __u32 pad1; ++ __u64 pad2[4]; ++}; ++ ++#define KVM_SEV_SNP_ID_BLOCK_SIZE 96 ++#define KVM_SEV_SNP_ID_AUTH_SIZE 4096 ++#define KVM_SEV_SNP_FINISH_DATA_SIZE 32 ++ ++struct kvm_sev_snp_launch_finish { ++ __u64 id_block_uaddr; ++ __u64 id_auth_uaddr; ++ __u8 id_block_en; ++ __u8 auth_key_en; ++ __u8 vcek_disabled; ++ __u8 host_data[KVM_SEV_SNP_FINISH_DATA_SIZE]; ++ __u8 pad0[3]; ++ __u16 flags; ++ __u64 pad1[4]; ++}; ++ + #define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) + #define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) + +@@ -870,5 +919,6 @@ struct kvm_hyperv_eventfd { + #define KVM_X86_SW_PROTECTED_VM 1 + #define KVM_X86_SEV_VM 2 + #define KVM_X86_SEV_ES_VM 3 ++#define KVM_X86_SNP_VM 4 + + #endif /* _ASM_X86_KVM_H */ +diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h +index bea6973906..b95dd84eef 100644 +--- a/linux-headers/linux/vhost.h ++++ b/linux-headers/linux/vhost.h +@@ -179,12 +179,6 @@ + /* Get the config size */ + #define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32) + +-/* Get the count of all virtqueues */ +-#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32) +- +-/* Get the number of virtqueue groups. */ +-#define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32) +- + /* Get the number of address spaces. */ + #define VHOST_VDPA_GET_AS_NUM _IOR(VHOST_VIRTIO, 0x7A, unsigned int) + +@@ -228,10 +222,17 @@ + #define VHOST_VDPA_GET_VRING_DESC_GROUP _IOWR(VHOST_VIRTIO, 0x7F, \ + struct vhost_vring_state) + ++ ++/* Get the count of all virtqueues */ ++#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32) ++ ++/* Get the number of virtqueue groups. */ ++#define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32) ++ + /* Get the queue size of a specific virtqueue. + * userspace set the vring index in vhost_vring_state.index + * kernel set the queue size in vhost_vring_state.num + */ +-#define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x80, \ ++#define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x82, \ + struct vhost_vring_state) + #endif +-- +2.39.3 + diff --git a/SOURCES/kvm-linux-headers-update-to-current-kvm-next.patch b/SOURCES/kvm-linux-headers-update-to-current-kvm-next.patch new file mode 100644 index 0000000..4c3dd73 --- /dev/null +++ b/SOURCES/kvm-linux-headers-update-to-current-kvm-next.patch @@ -0,0 +1,2471 @@ +From 530296e1669c9730f261a269d5b911ea56dfcce7 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Tue, 23 Apr 2024 11:46:47 +0200 +Subject: [PATCH 017/100] linux-headers: update to current kvm/next + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [17/91] 5660d4967f10a84802de16c24540e95095eaffd5 (bonzini/rhel-qemu-kvm) + +Signed-off-by: Paolo Bonzini +(cherry picked from commit ab0c7fb22b56523f24d6e127cd4d10ecff67bf85) +Signed-off-by: Paolo Bonzini +--- + hw/i386/x86.c | 8 - + include/standard-headers/asm-x86/bootparam.h | 17 +- + include/standard-headers/asm-x86/kvm_para.h | 3 +- + include/standard-headers/asm-x86/setup_data.h | 83 +++ + include/standard-headers/linux/ethtool.h | 48 ++ + include/standard-headers/linux/fuse.h | 39 +- + .../linux/input-event-codes.h | 1 + + include/standard-headers/linux/virtio_gpu.h | 2 + + include/standard-headers/linux/virtio_pci.h | 10 +- + include/standard-headers/linux/virtio_snd.h | 154 ++++ + linux-headers/asm-arm64/kvm.h | 15 +- + linux-headers/asm-arm64/sve_context.h | 11 + + linux-headers/asm-generic/bitsperlong.h | 4 + + linux-headers/asm-loongarch/kvm.h | 2 - + linux-headers/asm-mips/kvm.h | 2 - + linux-headers/asm-powerpc/kvm.h | 45 +- + linux-headers/asm-riscv/kvm.h | 3 +- + linux-headers/asm-s390/kvm.h | 315 +++++++- + linux-headers/asm-x86/kvm.h | 328 ++++++++- + linux-headers/linux/bits.h | 15 + + linux-headers/linux/kvm.h | 689 +----------------- + linux-headers/linux/psp-sev.h | 59 ++ + linux-headers/linux/vhost.h | 7 + + 23 files changed, 1120 insertions(+), 740 deletions(-) + create mode 100644 include/standard-headers/asm-x86/setup_data.h + create mode 100644 linux-headers/linux/bits.h + +diff --git a/hw/i386/x86.c b/hw/i386/x86.c +index ffbda48917..84a4801977 100644 +--- a/hw/i386/x86.c ++++ b/hw/i386/x86.c +@@ -679,14 +679,6 @@ DeviceState *ioapic_init_secondary(GSIState *gsi_state) + return dev; + } + +-struct setup_data { +- uint64_t next; +- uint32_t type; +- uint32_t len; +- uint8_t data[]; +-} __attribute__((packed)); +- +- + /* + * The entry point into the kernel for PVH boot is different from + * the native entry point. The PVH entry is defined by the x86/HVM +diff --git a/include/standard-headers/asm-x86/bootparam.h b/include/standard-headers/asm-x86/bootparam.h +index 0b06d2bff1..b582a105c0 100644 +--- a/include/standard-headers/asm-x86/bootparam.h ++++ b/include/standard-headers/asm-x86/bootparam.h +@@ -2,21 +2,7 @@ + #ifndef _ASM_X86_BOOTPARAM_H + #define _ASM_X86_BOOTPARAM_H + +-/* setup_data/setup_indirect types */ +-#define SETUP_NONE 0 +-#define SETUP_E820_EXT 1 +-#define SETUP_DTB 2 +-#define SETUP_PCI 3 +-#define SETUP_EFI 4 +-#define SETUP_APPLE_PROPERTIES 5 +-#define SETUP_JAILHOUSE 6 +-#define SETUP_CC_BLOB 7 +-#define SETUP_IMA 8 +-#define SETUP_RNG_SEED 9 +-#define SETUP_ENUM_MAX SETUP_RNG_SEED +- +-#define SETUP_INDIRECT (1<<31) +-#define SETUP_TYPE_MAX (SETUP_ENUM_MAX | SETUP_INDIRECT) ++#include "standard-headers/asm-x86/setup_data.h" + + /* ram_size flags */ + #define RAMDISK_IMAGE_START_MASK 0x07FF +@@ -38,6 +24,7 @@ + #define XLF_EFI_KEXEC (1<<4) + #define XLF_5LEVEL (1<<5) + #define XLF_5LEVEL_ENABLED (1<<6) ++#define XLF_MEM_ENCRYPTION (1<<7) + + + #endif /* _ASM_X86_BOOTPARAM_H */ +diff --git a/include/standard-headers/asm-x86/kvm_para.h b/include/standard-headers/asm-x86/kvm_para.h +index f0235e58a1..9a011d20f0 100644 +--- a/include/standard-headers/asm-x86/kvm_para.h ++++ b/include/standard-headers/asm-x86/kvm_para.h +@@ -92,7 +92,7 @@ struct kvm_clock_pairing { + #define KVM_ASYNC_PF_DELIVERY_AS_INT (1 << 3) + + /* MSR_KVM_ASYNC_PF_INT */ +-#define KVM_ASYNC_PF_VEC_MASK GENMASK(7, 0) ++#define KVM_ASYNC_PF_VEC_MASK __GENMASK(7, 0) + + /* MSR_KVM_MIGRATION_CONTROL */ + #define KVM_MIGRATION_READY (1 << 0) +@@ -142,7 +142,6 @@ struct kvm_vcpu_pv_apf_data { + uint32_t token; + + uint8_t pad[56]; +- uint32_t enabled; + }; + + #define KVM_PV_EOI_BIT 0 +diff --git a/include/standard-headers/asm-x86/setup_data.h b/include/standard-headers/asm-x86/setup_data.h +new file mode 100644 +index 0000000000..09355f54c5 +--- /dev/null ++++ b/include/standard-headers/asm-x86/setup_data.h +@@ -0,0 +1,83 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++#ifndef _ASM_X86_SETUP_DATA_H ++#define _ASM_X86_SETUP_DATA_H ++ ++/* setup_data/setup_indirect types */ ++#define SETUP_NONE 0 ++#define SETUP_E820_EXT 1 ++#define SETUP_DTB 2 ++#define SETUP_PCI 3 ++#define SETUP_EFI 4 ++#define SETUP_APPLE_PROPERTIES 5 ++#define SETUP_JAILHOUSE 6 ++#define SETUP_CC_BLOB 7 ++#define SETUP_IMA 8 ++#define SETUP_RNG_SEED 9 ++#define SETUP_ENUM_MAX SETUP_RNG_SEED ++ ++#define SETUP_INDIRECT (1<<31) ++#define SETUP_TYPE_MAX (SETUP_ENUM_MAX | SETUP_INDIRECT) ++ ++#ifndef __ASSEMBLY__ ++ ++#include "standard-headers/linux/types.h" ++ ++/* extensible setup data list node */ ++struct setup_data { ++ uint64_t next; ++ uint32_t type; ++ uint32_t len; ++ uint8_t data[]; ++}; ++ ++/* extensible setup indirect data node */ ++struct setup_indirect { ++ uint32_t type; ++ uint32_t reserved; /* Reserved, must be set to zero. */ ++ uint64_t len; ++ uint64_t addr; ++}; ++ ++/* ++ * The E820 memory region entry of the boot protocol ABI: ++ */ ++struct boot_e820_entry { ++ uint64_t addr; ++ uint64_t size; ++ uint32_t type; ++} QEMU_PACKED; ++ ++/* ++ * The boot loader is passing platform information via this Jailhouse-specific ++ * setup data structure. ++ */ ++struct jailhouse_setup_data { ++ struct { ++ uint16_t version; ++ uint16_t compatible_version; ++ } QEMU_PACKED hdr; ++ struct { ++ uint16_t pm_timer_address; ++ uint16_t num_cpus; ++ uint64_t pci_mmconfig_base; ++ uint32_t tsc_khz; ++ uint32_t apic_khz; ++ uint8_t standard_ioapic; ++ uint8_t cpu_ids[255]; ++ } QEMU_PACKED v1; ++ struct { ++ uint32_t flags; ++ } QEMU_PACKED v2; ++} QEMU_PACKED; ++ ++/* ++ * IMA buffer setup data information from the previous kernel during kexec ++ */ ++struct ima_setup_data { ++ uint64_t addr; ++ uint64_t size; ++} QEMU_PACKED; ++ ++#endif /* __ASSEMBLY__ */ ++ ++#endif /* _ASM_X86_SETUP_DATA_H */ +diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h +index dfb54eff6f..01503784d2 100644 +--- a/include/standard-headers/linux/ethtool.h ++++ b/include/standard-headers/linux/ethtool.h +@@ -2023,6 +2023,53 @@ static inline int ethtool_validate_duplex(uint8_t duplex) + #define IPV4_FLOW 0x10 /* hash only */ + #define IPV6_FLOW 0x11 /* hash only */ + #define ETHER_FLOW 0x12 /* spec only (ether_spec) */ ++ ++/* Used for GTP-U IPv4 and IPv6. ++ * The format of GTP packets only includes ++ * elements such as TEID and GTP version. ++ * It is primarily intended for data communication of the UE. ++ */ ++#define GTPU_V4_FLOW 0x13 /* hash only */ ++#define GTPU_V6_FLOW 0x14 /* hash only */ ++ ++/* Use for GTP-C IPv4 and v6. ++ * The format of these GTP packets does not include TEID. ++ * Primarily expected to be used for communication ++ * to create sessions for UE data communication, ++ * commonly referred to as CSR (Create Session Request). ++ */ ++#define GTPC_V4_FLOW 0x15 /* hash only */ ++#define GTPC_V6_FLOW 0x16 /* hash only */ ++ ++/* Use for GTP-C IPv4 and v6. ++ * Unlike GTPC_V4_FLOW, the format of these GTP packets includes TEID. ++ * After session creation, it becomes this packet. ++ * This is mainly used for requests to realize UE handover. ++ */ ++#define GTPC_TEID_V4_FLOW 0x17 /* hash only */ ++#define GTPC_TEID_V6_FLOW 0x18 /* hash only */ ++ ++/* Use for GTP-U and extended headers for the PSC (PDU Session Container). ++ * The format of these GTP packets includes TEID and QFI. ++ * In 5G communication using UPF (User Plane Function), ++ * data communication with this extended header is performed. ++ */ ++#define GTPU_EH_V4_FLOW 0x19 /* hash only */ ++#define GTPU_EH_V6_FLOW 0x1a /* hash only */ ++ ++/* Use for GTP-U IPv4 and v6 PSC (PDU Session Container) extended headers. ++ * This differs from GTPU_EH_V(4|6)_FLOW in that it is distinguished by ++ * UL/DL included in the PSC. ++ * There are differences in the data included based on Downlink/Uplink, ++ * and can be used to distinguish packets. ++ * The functions described so far are useful when you want to ++ * handle communication from the mobile network in UPF, PGW, etc. ++ */ ++#define GTPU_UL_V4_FLOW 0x1b /* hash only */ ++#define GTPU_UL_V6_FLOW 0x1c /* hash only */ ++#define GTPU_DL_V4_FLOW 0x1d /* hash only */ ++#define GTPU_DL_V6_FLOW 0x1e /* hash only */ ++ + /* Flag to enable additional fields in struct ethtool_rx_flow_spec */ + #define FLOW_EXT 0x80000000 + #define FLOW_MAC_EXT 0x40000000 +@@ -2037,6 +2084,7 @@ static inline int ethtool_validate_duplex(uint8_t duplex) + #define RXH_IP_DST (1 << 5) + #define RXH_L4_B_0_1 (1 << 6) /* src port in case of TCP/UDP/SCTP */ + #define RXH_L4_B_2_3 (1 << 7) /* dst port in case of TCP/UDP/SCTP */ ++#define RXH_GTP_TEID (1 << 8) /* teid in case of GTP */ + #define RXH_DISCARD (1 << 31) + + #define RX_CLS_FLOW_DISC 0xffffffffffffffffULL +diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h +index fc0dcd10ae..bac9dbc49f 100644 +--- a/include/standard-headers/linux/fuse.h ++++ b/include/standard-headers/linux/fuse.h +@@ -211,6 +211,12 @@ + * 7.39 + * - add FUSE_DIRECT_IO_ALLOW_MMAP + * - add FUSE_STATX and related structures ++ * ++ * 7.40 ++ * - add max_stack_depth to fuse_init_out, add FUSE_PASSTHROUGH init flag ++ * - add backing_id to fuse_open_out, add FOPEN_PASSTHROUGH open flag ++ * - add FUSE_NO_EXPORT_SUPPORT init flag ++ * - add FUSE_NOTIFY_RESEND, add FUSE_HAS_RESEND init flag + */ + + #ifndef _LINUX_FUSE_H +@@ -242,7 +248,7 @@ + #define FUSE_KERNEL_VERSION 7 + + /** Minor version number of this interface */ +-#define FUSE_KERNEL_MINOR_VERSION 39 ++#define FUSE_KERNEL_MINOR_VERSION 40 + + /** The node ID of the root inode */ + #define FUSE_ROOT_ID 1 +@@ -349,6 +355,7 @@ struct fuse_file_lock { + * FOPEN_STREAM: the file is stream-like (no file position at all) + * FOPEN_NOFLUSH: don't flush data cache on close (unless FUSE_WRITEBACK_CACHE) + * FOPEN_PARALLEL_DIRECT_WRITES: Allow concurrent direct writes on the same inode ++ * FOPEN_PASSTHROUGH: passthrough read/write io for this open file + */ + #define FOPEN_DIRECT_IO (1 << 0) + #define FOPEN_KEEP_CACHE (1 << 1) +@@ -357,6 +364,7 @@ struct fuse_file_lock { + #define FOPEN_STREAM (1 << 4) + #define FOPEN_NOFLUSH (1 << 5) + #define FOPEN_PARALLEL_DIRECT_WRITES (1 << 6) ++#define FOPEN_PASSTHROUGH (1 << 7) + + /** + * INIT request/reply flags +@@ -406,6 +414,9 @@ struct fuse_file_lock { + * symlink and mknod (single group that matches parent) + * FUSE_HAS_EXPIRE_ONLY: kernel supports expiry-only entry invalidation + * FUSE_DIRECT_IO_ALLOW_MMAP: allow shared mmap in FOPEN_DIRECT_IO mode. ++ * FUSE_NO_EXPORT_SUPPORT: explicitly disable export support ++ * FUSE_HAS_RESEND: kernel supports resending pending requests, and the high bit ++ * of the request ID indicates resend requests + */ + #define FUSE_ASYNC_READ (1 << 0) + #define FUSE_POSIX_LOCKS (1 << 1) +@@ -445,6 +456,9 @@ struct fuse_file_lock { + #define FUSE_CREATE_SUPP_GROUP (1ULL << 34) + #define FUSE_HAS_EXPIRE_ONLY (1ULL << 35) + #define FUSE_DIRECT_IO_ALLOW_MMAP (1ULL << 36) ++#define FUSE_PASSTHROUGH (1ULL << 37) ++#define FUSE_NO_EXPORT_SUPPORT (1ULL << 38) ++#define FUSE_HAS_RESEND (1ULL << 39) + + /* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */ + #define FUSE_DIRECT_IO_RELAX FUSE_DIRECT_IO_ALLOW_MMAP +@@ -631,6 +645,7 @@ enum fuse_notify_code { + FUSE_NOTIFY_STORE = 4, + FUSE_NOTIFY_RETRIEVE = 5, + FUSE_NOTIFY_DELETE = 6, ++ FUSE_NOTIFY_RESEND = 7, + FUSE_NOTIFY_CODE_MAX, + }; + +@@ -757,7 +772,7 @@ struct fuse_create_in { + struct fuse_open_out { + uint64_t fh; + uint32_t open_flags; +- uint32_t padding; ++ int32_t backing_id; + }; + + struct fuse_release_in { +@@ -873,7 +888,8 @@ struct fuse_init_out { + uint16_t max_pages; + uint16_t map_alignment; + uint32_t flags2; +- uint32_t unused[7]; ++ uint32_t max_stack_depth; ++ uint32_t unused[6]; + }; + + #define CUSE_INIT_INFO_MAX 4096 +@@ -956,6 +972,14 @@ struct fuse_fallocate_in { + uint32_t padding; + }; + ++/** ++ * FUSE request unique ID flag ++ * ++ * Indicates whether this is a resend request. The receiver should handle this ++ * request accordingly. ++ */ ++#define FUSE_UNIQUE_RESEND (1ULL << 63) ++ + struct fuse_in_header { + uint32_t len; + uint32_t opcode; +@@ -1045,9 +1069,18 @@ struct fuse_notify_retrieve_in { + uint64_t dummy4; + }; + ++struct fuse_backing_map { ++ int32_t fd; ++ uint32_t flags; ++ uint64_t padding; ++}; ++ + /* Device ioctls: */ + #define FUSE_DEV_IOC_MAGIC 229 + #define FUSE_DEV_IOC_CLONE _IOR(FUSE_DEV_IOC_MAGIC, 0, uint32_t) ++#define FUSE_DEV_IOC_BACKING_OPEN _IOW(FUSE_DEV_IOC_MAGIC, 1, \ ++ struct fuse_backing_map) ++#define FUSE_DEV_IOC_BACKING_CLOSE _IOW(FUSE_DEV_IOC_MAGIC, 2, uint32_t) + + struct fuse_lseek_in { + uint64_t fh; +diff --git a/include/standard-headers/linux/input-event-codes.h b/include/standard-headers/linux/input-event-codes.h +index f6bab08540..2221b0c383 100644 +--- a/include/standard-headers/linux/input-event-codes.h ++++ b/include/standard-headers/linux/input-event-codes.h +@@ -602,6 +602,7 @@ + + #define KEY_ALS_TOGGLE 0x230 /* Ambient light sensor */ + #define KEY_ROTATE_LOCK_TOGGLE 0x231 /* Display rotation lock */ ++#define KEY_REFRESH_RATE_TOGGLE 0x232 /* Display refresh rate toggle */ + + #define KEY_BUTTONCONFIG 0x240 /* AL Button Configuration */ + #define KEY_TASKMANAGER 0x241 /* AL Task/Project Manager */ +diff --git a/include/standard-headers/linux/virtio_gpu.h b/include/standard-headers/linux/virtio_gpu.h +index 2da48d3d4c..2db643ed8f 100644 +--- a/include/standard-headers/linux/virtio_gpu.h ++++ b/include/standard-headers/linux/virtio_gpu.h +@@ -309,6 +309,8 @@ struct virtio_gpu_cmd_submit { + + #define VIRTIO_GPU_CAPSET_VIRGL 1 + #define VIRTIO_GPU_CAPSET_VIRGL2 2 ++/* 3 is reserved for gfxstream */ ++#define VIRTIO_GPU_CAPSET_VENUS 4 + + /* VIRTIO_GPU_CMD_GET_CAPSET_INFO */ + struct virtio_gpu_get_capset_info { +diff --git a/include/standard-headers/linux/virtio_pci.h b/include/standard-headers/linux/virtio_pci.h +index 3e2bc2c97e..4010216103 100644 +--- a/include/standard-headers/linux/virtio_pci.h ++++ b/include/standard-headers/linux/virtio_pci.h +@@ -240,7 +240,7 @@ struct virtio_pci_cfg_cap { + #define VIRTIO_ADMIN_CMD_LEGACY_DEV_CFG_READ 0x5 + #define VIRTIO_ADMIN_CMD_LEGACY_NOTIFY_INFO 0x6 + +-struct QEMU_PACKED virtio_admin_cmd_hdr { ++struct virtio_admin_cmd_hdr { + uint16_t opcode; + /* + * 1 - SR-IOV +@@ -252,20 +252,20 @@ struct QEMU_PACKED virtio_admin_cmd_hdr { + uint64_t group_member_id; + }; + +-struct QEMU_PACKED virtio_admin_cmd_status { ++struct virtio_admin_cmd_status { + uint16_t status; + uint16_t status_qualifier; + /* Unused, reserved for future extensions. */ + uint8_t reserved2[4]; + }; + +-struct QEMU_PACKED virtio_admin_cmd_legacy_wr_data { ++struct virtio_admin_cmd_legacy_wr_data { + uint8_t offset; /* Starting offset of the register(s) to write. */ + uint8_t reserved[7]; + uint8_t registers[]; + }; + +-struct QEMU_PACKED virtio_admin_cmd_legacy_rd_data { ++struct virtio_admin_cmd_legacy_rd_data { + uint8_t offset; /* Starting offset of the register(s) to read. */ + }; + +@@ -275,7 +275,7 @@ struct QEMU_PACKED virtio_admin_cmd_legacy_rd_data { + + #define VIRTIO_ADMIN_CMD_MAX_NOTIFY_INFO 4 + +-struct QEMU_PACKED virtio_admin_cmd_notify_info_data { ++struct virtio_admin_cmd_notify_info_data { + uint8_t flags; /* 0 = end of list, 1 = owner device, 2 = member device */ + uint8_t bar; /* BAR of the member or the owner device */ + uint8_t padding[6]; +diff --git a/include/standard-headers/linux/virtio_snd.h b/include/standard-headers/linux/virtio_snd.h +index 1af96b9fc6..860f12e0a4 100644 +--- a/include/standard-headers/linux/virtio_snd.h ++++ b/include/standard-headers/linux/virtio_snd.h +@@ -7,6 +7,14 @@ + + #include "standard-headers/linux/virtio_types.h" + ++/******************************************************************************* ++ * FEATURE BITS ++ */ ++enum { ++ /* device supports control elements */ ++ VIRTIO_SND_F_CTLS = 0 ++}; ++ + /******************************************************************************* + * CONFIGURATION SPACE + */ +@@ -17,6 +25,8 @@ struct virtio_snd_config { + uint32_t streams; + /* # of available channel maps */ + uint32_t chmaps; ++ /* # of available control elements */ ++ uint32_t controls; + }; + + enum { +@@ -55,6 +65,15 @@ enum { + /* channel map control request types */ + VIRTIO_SND_R_CHMAP_INFO = 0x0200, + ++ /* control element request types */ ++ VIRTIO_SND_R_CTL_INFO = 0x0300, ++ VIRTIO_SND_R_CTL_ENUM_ITEMS, ++ VIRTIO_SND_R_CTL_READ, ++ VIRTIO_SND_R_CTL_WRITE, ++ VIRTIO_SND_R_CTL_TLV_READ, ++ VIRTIO_SND_R_CTL_TLV_WRITE, ++ VIRTIO_SND_R_CTL_TLV_COMMAND, ++ + /* jack event types */ + VIRTIO_SND_EVT_JACK_CONNECTED = 0x1000, + VIRTIO_SND_EVT_JACK_DISCONNECTED, +@@ -63,6 +82,9 @@ enum { + VIRTIO_SND_EVT_PCM_PERIOD_ELAPSED = 0x1100, + VIRTIO_SND_EVT_PCM_XRUN, + ++ /* control element event types */ ++ VIRTIO_SND_EVT_CTL_NOTIFY = 0x1200, ++ + /* common status codes */ + VIRTIO_SND_S_OK = 0x8000, + VIRTIO_SND_S_BAD_MSG, +@@ -331,4 +353,136 @@ struct virtio_snd_chmap_info { + uint8_t positions[VIRTIO_SND_CHMAP_MAX_SIZE]; + }; + ++/******************************************************************************* ++ * CONTROL ELEMENTS MESSAGES ++ */ ++struct virtio_snd_ctl_hdr { ++ /* VIRTIO_SND_R_CTL_XXX */ ++ struct virtio_snd_hdr hdr; ++ /* 0 ... virtio_snd_config::controls - 1 */ ++ uint32_t control_id; ++}; ++ ++/* supported roles for control elements */ ++enum { ++ VIRTIO_SND_CTL_ROLE_UNDEFINED = 0, ++ VIRTIO_SND_CTL_ROLE_VOLUME, ++ VIRTIO_SND_CTL_ROLE_MUTE, ++ VIRTIO_SND_CTL_ROLE_GAIN ++}; ++ ++/* supported value types for control elements */ ++enum { ++ VIRTIO_SND_CTL_TYPE_BOOLEAN = 0, ++ VIRTIO_SND_CTL_TYPE_INTEGER, ++ VIRTIO_SND_CTL_TYPE_INTEGER64, ++ VIRTIO_SND_CTL_TYPE_ENUMERATED, ++ VIRTIO_SND_CTL_TYPE_BYTES, ++ VIRTIO_SND_CTL_TYPE_IEC958 ++}; ++ ++/* supported access rights for control elements */ ++enum { ++ VIRTIO_SND_CTL_ACCESS_READ = 0, ++ VIRTIO_SND_CTL_ACCESS_WRITE, ++ VIRTIO_SND_CTL_ACCESS_VOLATILE, ++ VIRTIO_SND_CTL_ACCESS_INACTIVE, ++ VIRTIO_SND_CTL_ACCESS_TLV_READ, ++ VIRTIO_SND_CTL_ACCESS_TLV_WRITE, ++ VIRTIO_SND_CTL_ACCESS_TLV_COMMAND ++}; ++ ++struct virtio_snd_ctl_info { ++ /* common header */ ++ struct virtio_snd_info hdr; ++ /* element role (VIRTIO_SND_CTL_ROLE_XXX) */ ++ uint32_t role; ++ /* element value type (VIRTIO_SND_CTL_TYPE_XXX) */ ++ uint32_t type; ++ /* element access right bit map (1 << VIRTIO_SND_CTL_ACCESS_XXX) */ ++ uint32_t access; ++ /* # of members in the element value */ ++ uint32_t count; ++ /* index for an element with a non-unique name */ ++ uint32_t index; ++ /* name identifier string for the element */ ++ uint8_t name[44]; ++ /* additional information about the element's value */ ++ union { ++ /* VIRTIO_SND_CTL_TYPE_INTEGER */ ++ struct { ++ /* minimum supported value */ ++ uint32_t min; ++ /* maximum supported value */ ++ uint32_t max; ++ /* fixed step size for value (0 = variable size) */ ++ uint32_t step; ++ } integer; ++ /* VIRTIO_SND_CTL_TYPE_INTEGER64 */ ++ struct { ++ /* minimum supported value */ ++ uint64_t min; ++ /* maximum supported value */ ++ uint64_t max; ++ /* fixed step size for value (0 = variable size) */ ++ uint64_t step; ++ } integer64; ++ /* VIRTIO_SND_CTL_TYPE_ENUMERATED */ ++ struct { ++ /* # of options supported for value */ ++ uint32_t items; ++ } enumerated; ++ } value; ++}; ++ ++struct virtio_snd_ctl_enum_item { ++ /* option name */ ++ uint8_t item[64]; ++}; ++ ++struct virtio_snd_ctl_iec958 { ++ /* AES/IEC958 channel status bits */ ++ uint8_t status[24]; ++ /* AES/IEC958 subcode bits */ ++ uint8_t subcode[147]; ++ /* nothing */ ++ uint8_t pad; ++ /* AES/IEC958 subframe bits */ ++ uint8_t dig_subframe[4]; ++}; ++ ++struct virtio_snd_ctl_value { ++ union { ++ /* VIRTIO_SND_CTL_TYPE_BOOLEAN|INTEGER value */ ++ uint32_t integer[128]; ++ /* VIRTIO_SND_CTL_TYPE_INTEGER64 value */ ++ uint64_t integer64[64]; ++ /* VIRTIO_SND_CTL_TYPE_ENUMERATED value (option indexes) */ ++ uint32_t enumerated[128]; ++ /* VIRTIO_SND_CTL_TYPE_BYTES value */ ++ uint8_t bytes[512]; ++ /* VIRTIO_SND_CTL_TYPE_IEC958 value */ ++ struct virtio_snd_ctl_iec958 iec958; ++ } value; ++}; ++ ++/* supported event reason types */ ++enum { ++ /* element's value has changed */ ++ VIRTIO_SND_CTL_EVT_MASK_VALUE = 0, ++ /* element's information has changed */ ++ VIRTIO_SND_CTL_EVT_MASK_INFO, ++ /* element's metadata has changed */ ++ VIRTIO_SND_CTL_EVT_MASK_TLV ++}; ++ ++struct virtio_snd_ctl_event { ++ /* VIRTIO_SND_EVT_CTL_NOTIFY */ ++ struct virtio_snd_hdr hdr; ++ /* 0 ... virtio_snd_config::controls - 1 */ ++ uint16_t control_id; ++ /* event reason bit map (1 << VIRTIO_SND_CTL_EVT_MASK_XXX) */ ++ uint16_t mask; ++}; ++ + #endif /* VIRTIO_SND_IF_H */ +diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h +index c59ea55cd8..2af9931ae9 100644 +--- a/linux-headers/asm-arm64/kvm.h ++++ b/linux-headers/asm-arm64/kvm.h +@@ -37,9 +37,7 @@ + #include + #include + +-#define __KVM_HAVE_GUEST_DEBUG + #define __KVM_HAVE_IRQ_LINE +-#define __KVM_HAVE_READONLY_MEM + #define __KVM_HAVE_VCPU_EVENTS + + #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 +@@ -76,11 +74,11 @@ struct kvm_regs { + + /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ + #define KVM_ARM_DEVICE_TYPE_SHIFT 0 +-#define KVM_ARM_DEVICE_TYPE_MASK GENMASK(KVM_ARM_DEVICE_TYPE_SHIFT + 15, \ +- KVM_ARM_DEVICE_TYPE_SHIFT) ++#define KVM_ARM_DEVICE_TYPE_MASK __GENMASK(KVM_ARM_DEVICE_TYPE_SHIFT + 15, \ ++ KVM_ARM_DEVICE_TYPE_SHIFT) + #define KVM_ARM_DEVICE_ID_SHIFT 16 +-#define KVM_ARM_DEVICE_ID_MASK GENMASK(KVM_ARM_DEVICE_ID_SHIFT + 15, \ +- KVM_ARM_DEVICE_ID_SHIFT) ++#define KVM_ARM_DEVICE_ID_MASK __GENMASK(KVM_ARM_DEVICE_ID_SHIFT + 15, \ ++ KVM_ARM_DEVICE_ID_SHIFT) + + /* Supported device IDs */ + #define KVM_ARM_DEVICE_VGIC_V2 0 +@@ -162,6 +160,11 @@ struct kvm_sync_regs { + __u64 device_irq_level; + }; + ++/* Bits for run->s.regs.device_irq_level */ ++#define KVM_ARM_DEV_EL1_VTIMER (1 << 0) ++#define KVM_ARM_DEV_EL1_PTIMER (1 << 1) ++#define KVM_ARM_DEV_PMU (1 << 2) ++ + /* + * PMU filter structure. Describe a range of events with a particular + * action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER. +diff --git a/linux-headers/asm-arm64/sve_context.h b/linux-headers/asm-arm64/sve_context.h +index 1d0e3e1d09..d1b1ec8cb1 100644 +--- a/linux-headers/asm-arm64/sve_context.h ++++ b/linux-headers/asm-arm64/sve_context.h +@@ -13,6 +13,17 @@ + + #define __SVE_VQ_BYTES 16 /* number of bytes per quadword */ + ++/* ++ * Yes, __SVE_VQ_MAX is 512 QUADWORDS. ++ * ++ * To help ensure forward portability, this is much larger than the ++ * current maximum value defined by the SVE architecture. While arrays ++ * or static allocations can be sized based on this value, watch out! ++ * It will waste a surprisingly large amount of memory. ++ * ++ * Dynamic sizing based on the actual runtime vector length is likely to ++ * be preferable for most purposes. ++ */ + #define __SVE_VQ_MIN 1 + #define __SVE_VQ_MAX 512 + +diff --git a/linux-headers/asm-generic/bitsperlong.h b/linux-headers/asm-generic/bitsperlong.h +index 75f320fa91..1fb4f0c9f2 100644 +--- a/linux-headers/asm-generic/bitsperlong.h ++++ b/linux-headers/asm-generic/bitsperlong.h +@@ -24,4 +24,8 @@ + #endif + #endif + ++#ifndef __BITS_PER_LONG_LONG ++#define __BITS_PER_LONG_LONG 64 ++#endif ++ + #endif /* __ASM_GENERIC_BITS_PER_LONG */ +diff --git a/linux-headers/asm-loongarch/kvm.h b/linux-headers/asm-loongarch/kvm.h +index 923d0bd382..109785922c 100644 +--- a/linux-headers/asm-loongarch/kvm.h ++++ b/linux-headers/asm-loongarch/kvm.h +@@ -14,8 +14,6 @@ + * Some parts derived from the x86 version of this file. + */ + +-#define __KVM_HAVE_READONLY_MEM +- + #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + #define KVM_DIRTY_LOG_PAGE_OFFSET 64 + +diff --git a/linux-headers/asm-mips/kvm.h b/linux-headers/asm-mips/kvm.h +index edcf717c43..9673dc9cb3 100644 +--- a/linux-headers/asm-mips/kvm.h ++++ b/linux-headers/asm-mips/kvm.h +@@ -20,8 +20,6 @@ + * Some parts derived from the x86 version of this file. + */ + +-#define __KVM_HAVE_READONLY_MEM +- + #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + + /* +diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h +index 9f18fa090f..1691297a76 100644 +--- a/linux-headers/asm-powerpc/kvm.h ++++ b/linux-headers/asm-powerpc/kvm.h +@@ -28,7 +28,6 @@ + #define __KVM_HAVE_PPC_SMT + #define __KVM_HAVE_IRQCHIP + #define __KVM_HAVE_IRQ_LINE +-#define __KVM_HAVE_GUEST_DEBUG + + /* Not always available, but if it is, this is the correct offset. */ + #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 +@@ -733,4 +732,48 @@ struct kvm_ppc_xive_eq { + #define KVM_XIVE_TIMA_PAGE_OFFSET 0 + #define KVM_XIVE_ESB_PAGE_OFFSET 4 + ++/* for KVM_PPC_GET_PVINFO */ ++ ++#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0) ++ ++struct kvm_ppc_pvinfo { ++ /* out */ ++ __u32 flags; ++ __u32 hcall[4]; ++ __u8 pad[108]; ++}; ++ ++/* for KVM_PPC_GET_SMMU_INFO */ ++#define KVM_PPC_PAGE_SIZES_MAX_SZ 8 ++ ++struct kvm_ppc_one_page_size { ++ __u32 page_shift; /* Page shift (or 0) */ ++ __u32 pte_enc; /* Encoding in the HPTE (>>12) */ ++}; ++ ++struct kvm_ppc_one_seg_page_size { ++ __u32 page_shift; /* Base page shift of segment (or 0) */ ++ __u32 slb_enc; /* SLB encoding for BookS */ ++ struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ]; ++}; ++ ++#define KVM_PPC_PAGE_SIZES_REAL 0x00000001 ++#define KVM_PPC_1T_SEGMENTS 0x00000002 ++#define KVM_PPC_NO_HASH 0x00000004 ++ ++struct kvm_ppc_smmu_info { ++ __u64 flags; ++ __u32 slb_size; ++ __u16 data_keys; /* # storage keys supported for data */ ++ __u16 instr_keys; /* # storage keys supported for instructions */ ++ struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; ++}; ++ ++/* for KVM_PPC_RESIZE_HPT_{PREPARE,COMMIT} */ ++struct kvm_ppc_resize_hpt { ++ __u64 flags; ++ __u32 shift; ++ __u32 pad; ++}; ++ + #endif /* __LINUX_KVM_POWERPC_H */ +diff --git a/linux-headers/asm-riscv/kvm.h b/linux-headers/asm-riscv/kvm.h +index 7499e88a94..b1c503c295 100644 +--- a/linux-headers/asm-riscv/kvm.h ++++ b/linux-headers/asm-riscv/kvm.h +@@ -16,7 +16,6 @@ + #include + + #define __KVM_HAVE_IRQ_LINE +-#define __KVM_HAVE_READONLY_MEM + + #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + +@@ -166,6 +165,8 @@ enum KVM_RISCV_ISA_EXT_ID { + KVM_RISCV_ISA_EXT_ZVFH, + KVM_RISCV_ISA_EXT_ZVFHMIN, + KVM_RISCV_ISA_EXT_ZFA, ++ KVM_RISCV_ISA_EXT_ZTSO, ++ KVM_RISCV_ISA_EXT_ZACAS, + KVM_RISCV_ISA_EXT_MAX, + }; + +diff --git a/linux-headers/asm-s390/kvm.h b/linux-headers/asm-s390/kvm.h +index 023a2763a9..684c4e1205 100644 +--- a/linux-headers/asm-s390/kvm.h ++++ b/linux-headers/asm-s390/kvm.h +@@ -12,7 +12,320 @@ + #include + + #define __KVM_S390 +-#define __KVM_HAVE_GUEST_DEBUG ++ ++struct kvm_s390_skeys { ++ __u64 start_gfn; ++ __u64 count; ++ __u64 skeydata_addr; ++ __u32 flags; ++ __u32 reserved[9]; ++}; ++ ++#define KVM_S390_CMMA_PEEK (1 << 0) ++ ++/** ++ * kvm_s390_cmma_log - Used for CMMA migration. ++ * ++ * Used both for input and output. ++ * ++ * @start_gfn: Guest page number to start from. ++ * @count: Size of the result buffer. ++ * @flags: Control operation mode via KVM_S390_CMMA_* flags ++ * @remaining: Used with KVM_S390_GET_CMMA_BITS. Indicates how many dirty ++ * pages are still remaining. ++ * @mask: Used with KVM_S390_SET_CMMA_BITS. Bitmap of bits to actually set ++ * in the PGSTE. ++ * @values: Pointer to the values buffer. ++ * ++ * Used in KVM_S390_{G,S}ET_CMMA_BITS ioctls. ++ */ ++struct kvm_s390_cmma_log { ++ __u64 start_gfn; ++ __u32 count; ++ __u32 flags; ++ union { ++ __u64 remaining; ++ __u64 mask; ++ }; ++ __u64 values; ++}; ++ ++#define KVM_S390_RESET_POR 1 ++#define KVM_S390_RESET_CLEAR 2 ++#define KVM_S390_RESET_SUBSYSTEM 4 ++#define KVM_S390_RESET_CPU_INIT 8 ++#define KVM_S390_RESET_IPL 16 ++ ++/* for KVM_S390_MEM_OP */ ++struct kvm_s390_mem_op { ++ /* in */ ++ __u64 gaddr; /* the guest address */ ++ __u64 flags; /* flags */ ++ __u32 size; /* amount of bytes */ ++ __u32 op; /* type of operation */ ++ __u64 buf; /* buffer in userspace */ ++ union { ++ struct { ++ __u8 ar; /* the access register number */ ++ __u8 key; /* access key, ignored if flag unset */ ++ __u8 pad1[6]; /* ignored */ ++ __u64 old_addr; /* ignored if cmpxchg flag unset */ ++ }; ++ __u32 sida_offset; /* offset into the sida */ ++ __u8 reserved[32]; /* ignored */ ++ }; ++}; ++/* types for kvm_s390_mem_op->op */ ++#define KVM_S390_MEMOP_LOGICAL_READ 0 ++#define KVM_S390_MEMOP_LOGICAL_WRITE 1 ++#define KVM_S390_MEMOP_SIDA_READ 2 ++#define KVM_S390_MEMOP_SIDA_WRITE 3 ++#define KVM_S390_MEMOP_ABSOLUTE_READ 4 ++#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5 ++#define KVM_S390_MEMOP_ABSOLUTE_CMPXCHG 6 ++ ++/* flags for kvm_s390_mem_op->flags */ ++#define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) ++#define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) ++#define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2) ++ ++/* flags specifying extension support via KVM_CAP_S390_MEM_OP_EXTENSION */ ++#define KVM_S390_MEMOP_EXTENSION_CAP_BASE (1 << 0) ++#define KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG (1 << 1) ++ ++struct kvm_s390_psw { ++ __u64 mask; ++ __u64 addr; ++}; ++ ++/* valid values for type in kvm_s390_interrupt */ ++#define KVM_S390_SIGP_STOP 0xfffe0000u ++#define KVM_S390_PROGRAM_INT 0xfffe0001u ++#define KVM_S390_SIGP_SET_PREFIX 0xfffe0002u ++#define KVM_S390_RESTART 0xfffe0003u ++#define KVM_S390_INT_PFAULT_INIT 0xfffe0004u ++#define KVM_S390_INT_PFAULT_DONE 0xfffe0005u ++#define KVM_S390_MCHK 0xfffe1000u ++#define KVM_S390_INT_CLOCK_COMP 0xffff1004u ++#define KVM_S390_INT_CPU_TIMER 0xffff1005u ++#define KVM_S390_INT_VIRTIO 0xffff2603u ++#define KVM_S390_INT_SERVICE 0xffff2401u ++#define KVM_S390_INT_EMERGENCY 0xffff1201u ++#define KVM_S390_INT_EXTERNAL_CALL 0xffff1202u ++/* Anything below 0xfffe0000u is taken by INT_IO */ ++#define KVM_S390_INT_IO(ai,cssid,ssid,schid) \ ++ (((schid)) | \ ++ ((ssid) << 16) | \ ++ ((cssid) << 18) | \ ++ ((ai) << 26)) ++#define KVM_S390_INT_IO_MIN 0x00000000u ++#define KVM_S390_INT_IO_MAX 0xfffdffffu ++#define KVM_S390_INT_IO_AI_MASK 0x04000000u ++ ++ ++struct kvm_s390_interrupt { ++ __u32 type; ++ __u32 parm; ++ __u64 parm64; ++}; ++ ++struct kvm_s390_io_info { ++ __u16 subchannel_id; ++ __u16 subchannel_nr; ++ __u32 io_int_parm; ++ __u32 io_int_word; ++}; ++ ++struct kvm_s390_ext_info { ++ __u32 ext_params; ++ __u32 pad; ++ __u64 ext_params2; ++}; ++ ++struct kvm_s390_pgm_info { ++ __u64 trans_exc_code; ++ __u64 mon_code; ++ __u64 per_address; ++ __u32 data_exc_code; ++ __u16 code; ++ __u16 mon_class_nr; ++ __u8 per_code; ++ __u8 per_atmid; ++ __u8 exc_access_id; ++ __u8 per_access_id; ++ __u8 op_access_id; ++#define KVM_S390_PGM_FLAGS_ILC_VALID 0x01 ++#define KVM_S390_PGM_FLAGS_ILC_0 0x02 ++#define KVM_S390_PGM_FLAGS_ILC_1 0x04 ++#define KVM_S390_PGM_FLAGS_ILC_MASK 0x06 ++#define KVM_S390_PGM_FLAGS_NO_REWIND 0x08 ++ __u8 flags; ++ __u8 pad[2]; ++}; ++ ++struct kvm_s390_prefix_info { ++ __u32 address; ++}; ++ ++struct kvm_s390_extcall_info { ++ __u16 code; ++}; ++ ++struct kvm_s390_emerg_info { ++ __u16 code; ++}; ++ ++#define KVM_S390_STOP_FLAG_STORE_STATUS 0x01 ++struct kvm_s390_stop_info { ++ __u32 flags; ++}; ++ ++struct kvm_s390_mchk_info { ++ __u64 cr14; ++ __u64 mcic; ++ __u64 failing_storage_address; ++ __u32 ext_damage_code; ++ __u32 pad; ++ __u8 fixed_logout[16]; ++}; ++ ++struct kvm_s390_irq { ++ __u64 type; ++ union { ++ struct kvm_s390_io_info io; ++ struct kvm_s390_ext_info ext; ++ struct kvm_s390_pgm_info pgm; ++ struct kvm_s390_emerg_info emerg; ++ struct kvm_s390_extcall_info extcall; ++ struct kvm_s390_prefix_info prefix; ++ struct kvm_s390_stop_info stop; ++ struct kvm_s390_mchk_info mchk; ++ char reserved[64]; ++ } u; ++}; ++ ++struct kvm_s390_irq_state { ++ __u64 buf; ++ __u32 flags; /* will stay unused for compatibility reasons */ ++ __u32 len; ++ __u32 reserved[4]; /* will stay unused for compatibility reasons */ ++}; ++ ++struct kvm_s390_ucas_mapping { ++ __u64 user_addr; ++ __u64 vcpu_addr; ++ __u64 length; ++}; ++ ++struct kvm_s390_pv_sec_parm { ++ __u64 origin; ++ __u64 length; ++}; ++ ++struct kvm_s390_pv_unp { ++ __u64 addr; ++ __u64 size; ++ __u64 tweak; ++}; ++ ++enum pv_cmd_dmp_id { ++ KVM_PV_DUMP_INIT, ++ KVM_PV_DUMP_CONFIG_STOR_STATE, ++ KVM_PV_DUMP_COMPLETE, ++ KVM_PV_DUMP_CPU, ++}; ++ ++struct kvm_s390_pv_dmp { ++ __u64 subcmd; ++ __u64 buff_addr; ++ __u64 buff_len; ++ __u64 gaddr; /* For dump storage state */ ++ __u64 reserved[4]; ++}; ++ ++enum pv_cmd_info_id { ++ KVM_PV_INFO_VM, ++ KVM_PV_INFO_DUMP, ++}; ++ ++struct kvm_s390_pv_info_dump { ++ __u64 dump_cpu_buffer_len; ++ __u64 dump_config_mem_buffer_per_1m; ++ __u64 dump_config_finalize_len; ++}; ++ ++struct kvm_s390_pv_info_vm { ++ __u64 inst_calls_list[4]; ++ __u64 max_cpus; ++ __u64 max_guests; ++ __u64 max_guest_addr; ++ __u64 feature_indication; ++}; ++ ++struct kvm_s390_pv_info_header { ++ __u32 id; ++ __u32 len_max; ++ __u32 len_written; ++ __u32 reserved; ++}; ++ ++struct kvm_s390_pv_info { ++ struct kvm_s390_pv_info_header header; ++ union { ++ struct kvm_s390_pv_info_dump dump; ++ struct kvm_s390_pv_info_vm vm; ++ }; ++}; ++ ++enum pv_cmd_id { ++ KVM_PV_ENABLE, ++ KVM_PV_DISABLE, ++ KVM_PV_SET_SEC_PARMS, ++ KVM_PV_UNPACK, ++ KVM_PV_VERIFY, ++ KVM_PV_PREP_RESET, ++ KVM_PV_UNSHARE_ALL, ++ KVM_PV_INFO, ++ KVM_PV_DUMP, ++ KVM_PV_ASYNC_CLEANUP_PREPARE, ++ KVM_PV_ASYNC_CLEANUP_PERFORM, ++}; ++ ++struct kvm_pv_cmd { ++ __u32 cmd; /* Command to be executed */ ++ __u16 rc; /* Ultravisor return code */ ++ __u16 rrc; /* Ultravisor return reason code */ ++ __u64 data; /* Data or address */ ++ __u32 flags; /* flags for future extensions. Must be 0 for now */ ++ __u32 reserved[3]; ++}; ++ ++struct kvm_s390_zpci_op { ++ /* in */ ++ __u32 fh; /* target device */ ++ __u8 op; /* operation to perform */ ++ __u8 pad[3]; ++ union { ++ /* for KVM_S390_ZPCIOP_REG_AEN */ ++ struct { ++ __u64 ibv; /* Guest addr of interrupt bit vector */ ++ __u64 sb; /* Guest addr of summary bit */ ++ __u32 flags; ++ __u32 noi; /* Number of interrupts */ ++ __u8 isc; /* Guest interrupt subclass */ ++ __u8 sbo; /* Offset of guest summary bit vector */ ++ __u16 pad; ++ } reg_aen; ++ __u64 reserved[8]; ++ } u; ++}; ++ ++/* types for kvm_s390_zpci_op->op */ ++#define KVM_S390_ZPCIOP_REG_AEN 0 ++#define KVM_S390_ZPCIOP_DEREG_AEN 1 ++ ++/* flags for kvm_s390_zpci_op->u.reg_aen.flags */ ++#define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0) + + /* Device control API: s390-specific devices */ + #define KVM_DEV_FLIC_GET_ALL_IRQS 1 +diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h +index 003fb74534..31c95c2dfe 100644 +--- a/linux-headers/asm-x86/kvm.h ++++ b/linux-headers/asm-x86/kvm.h +@@ -7,6 +7,8 @@ + * + */ + ++#include ++#include + #include + #include + #include +@@ -40,7 +42,6 @@ + #define __KVM_HAVE_IRQ_LINE + #define __KVM_HAVE_MSI + #define __KVM_HAVE_USER_NMI +-#define __KVM_HAVE_GUEST_DEBUG + #define __KVM_HAVE_MSIX + #define __KVM_HAVE_MCE + #define __KVM_HAVE_PIT_STATE2 +@@ -49,7 +50,6 @@ + #define __KVM_HAVE_DEBUGREGS + #define __KVM_HAVE_XSAVE + #define __KVM_HAVE_XCRS +-#define __KVM_HAVE_READONLY_MEM + + /* Architectural interrupt line count. */ + #define KVM_NR_INTERRUPTS 256 +@@ -455,8 +455,13 @@ struct kvm_sync_regs { + + #define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001 + +-/* attributes for system fd (group 0) */ +-#define KVM_X86_XCOMP_GUEST_SUPP 0 ++/* vendor-independent attributes for system fd (group 0) */ ++#define KVM_X86_GRP_SYSTEM 0 ++# define KVM_X86_XCOMP_GUEST_SUPP 0 ++ ++/* vendor-specific groups and attributes for system fd */ ++#define KVM_X86_GRP_SEV 1 ++# define KVM_X86_SEV_VMSA_FEATURES 0 + + struct kvm_vmx_nested_state_data { + __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; +@@ -524,9 +529,310 @@ struct kvm_pmu_event_filter { + #define KVM_PMU_EVENT_ALLOW 0 + #define KVM_PMU_EVENT_DENY 1 + +-#define KVM_PMU_EVENT_FLAG_MASKED_EVENTS BIT(0) ++#define KVM_PMU_EVENT_FLAG_MASKED_EVENTS _BITUL(0) + #define KVM_PMU_EVENT_FLAGS_VALID_MASK (KVM_PMU_EVENT_FLAG_MASKED_EVENTS) + ++/* for KVM_CAP_MCE */ ++struct kvm_x86_mce { ++ __u64 status; ++ __u64 addr; ++ __u64 misc; ++ __u64 mcg_status; ++ __u8 bank; ++ __u8 pad1[7]; ++ __u64 pad2[3]; ++}; ++ ++/* for KVM_CAP_XEN_HVM */ ++#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0) ++#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) ++#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) ++#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) ++#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) ++#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5) ++#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6) ++#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7) ++#define KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA (1 << 8) ++ ++struct kvm_xen_hvm_config { ++ __u32 flags; ++ __u32 msr; ++ __u64 blob_addr_32; ++ __u64 blob_addr_64; ++ __u8 blob_size_32; ++ __u8 blob_size_64; ++ __u8 pad2[30]; ++}; ++ ++struct kvm_xen_hvm_attr { ++ __u16 type; ++ __u16 pad[3]; ++ union { ++ __u8 long_mode; ++ __u8 vector; ++ __u8 runstate_update_flag; ++ union { ++ __u64 gfn; ++#define KVM_XEN_INVALID_GFN ((__u64)-1) ++ __u64 hva; ++ } shared_info; ++ struct { ++ __u32 send_port; ++ __u32 type; /* EVTCHNSTAT_ipi / EVTCHNSTAT_interdomain */ ++ __u32 flags; ++#define KVM_XEN_EVTCHN_DEASSIGN (1 << 0) ++#define KVM_XEN_EVTCHN_UPDATE (1 << 1) ++#define KVM_XEN_EVTCHN_RESET (1 << 2) ++ /* ++ * Events sent by the guest are either looped back to ++ * the guest itself (potentially on a different port#) ++ * or signalled via an eventfd. ++ */ ++ union { ++ struct { ++ __u32 port; ++ __u32 vcpu; ++ __u32 priority; ++ } port; ++ struct { ++ __u32 port; /* Zero for eventfd */ ++ __s32 fd; ++ } eventfd; ++ __u32 padding[4]; ++ } deliver; ++ } evtchn; ++ __u32 xen_version; ++ __u64 pad[8]; ++ } u; ++}; ++ ++ ++/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ ++#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0 ++#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1 ++#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2 ++/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ ++#define KVM_XEN_ATTR_TYPE_EVTCHN 0x3 ++#define KVM_XEN_ATTR_TYPE_XEN_VERSION 0x4 ++/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */ ++#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG 0x5 ++/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA */ ++#define KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA 0x6 ++ ++struct kvm_xen_vcpu_attr { ++ __u16 type; ++ __u16 pad[3]; ++ union { ++ __u64 gpa; ++#define KVM_XEN_INVALID_GPA ((__u64)-1) ++ __u64 hva; ++ __u64 pad[8]; ++ struct { ++ __u64 state; ++ __u64 state_entry_time; ++ __u64 time_running; ++ __u64 time_runnable; ++ __u64 time_blocked; ++ __u64 time_offline; ++ } runstate; ++ __u32 vcpu_id; ++ struct { ++ __u32 port; ++ __u32 priority; ++ __u64 expires_ns; ++ } timer; ++ __u8 vector; ++ } u; ++}; ++ ++/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ ++#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0 ++#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1 ++#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR 0x2 ++#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 0x3 ++#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4 ++#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5 ++/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ ++#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID 0x6 ++#define KVM_XEN_VCPU_ATTR_TYPE_TIMER 0x7 ++#define KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR 0x8 ++/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA */ ++#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA 0x9 ++ ++/* Secure Encrypted Virtualization command */ ++enum sev_cmd_id { ++ /* Guest initialization commands */ ++ KVM_SEV_INIT = 0, ++ KVM_SEV_ES_INIT, ++ /* Guest launch commands */ ++ KVM_SEV_LAUNCH_START, ++ KVM_SEV_LAUNCH_UPDATE_DATA, ++ KVM_SEV_LAUNCH_UPDATE_VMSA, ++ KVM_SEV_LAUNCH_SECRET, ++ KVM_SEV_LAUNCH_MEASURE, ++ KVM_SEV_LAUNCH_FINISH, ++ /* Guest migration commands (outgoing) */ ++ KVM_SEV_SEND_START, ++ KVM_SEV_SEND_UPDATE_DATA, ++ KVM_SEV_SEND_UPDATE_VMSA, ++ KVM_SEV_SEND_FINISH, ++ /* Guest migration commands (incoming) */ ++ KVM_SEV_RECEIVE_START, ++ KVM_SEV_RECEIVE_UPDATE_DATA, ++ KVM_SEV_RECEIVE_UPDATE_VMSA, ++ KVM_SEV_RECEIVE_FINISH, ++ /* Guest status and debug commands */ ++ KVM_SEV_GUEST_STATUS, ++ KVM_SEV_DBG_DECRYPT, ++ KVM_SEV_DBG_ENCRYPT, ++ /* Guest certificates commands */ ++ KVM_SEV_CERT_EXPORT, ++ /* Attestation report */ ++ KVM_SEV_GET_ATTESTATION_REPORT, ++ /* Guest Migration Extension */ ++ KVM_SEV_SEND_CANCEL, ++ ++ /* Second time is the charm; improved versions of the above ioctls. */ ++ KVM_SEV_INIT2, ++ ++ KVM_SEV_NR_MAX, ++}; ++ ++struct kvm_sev_cmd { ++ __u32 id; ++ __u32 pad0; ++ __u64 data; ++ __u32 error; ++ __u32 sev_fd; ++}; ++ ++struct kvm_sev_init { ++ __u64 vmsa_features; ++ __u32 flags; ++ __u32 pad[9]; ++}; ++ ++struct kvm_sev_launch_start { ++ __u32 handle; ++ __u32 policy; ++ __u64 dh_uaddr; ++ __u32 dh_len; ++ __u32 pad0; ++ __u64 session_uaddr; ++ __u32 session_len; ++ __u32 pad1; ++}; ++ ++struct kvm_sev_launch_update_data { ++ __u64 uaddr; ++ __u32 len; ++ __u32 pad0; ++}; ++ ++ ++struct kvm_sev_launch_secret { ++ __u64 hdr_uaddr; ++ __u32 hdr_len; ++ __u32 pad0; ++ __u64 guest_uaddr; ++ __u32 guest_len; ++ __u32 pad1; ++ __u64 trans_uaddr; ++ __u32 trans_len; ++ __u32 pad2; ++}; ++ ++struct kvm_sev_launch_measure { ++ __u64 uaddr; ++ __u32 len; ++ __u32 pad0; ++}; ++ ++struct kvm_sev_guest_status { ++ __u32 handle; ++ __u32 policy; ++ __u32 state; ++}; ++ ++struct kvm_sev_dbg { ++ __u64 src_uaddr; ++ __u64 dst_uaddr; ++ __u32 len; ++ __u32 pad0; ++}; ++ ++struct kvm_sev_attestation_report { ++ __u8 mnonce[16]; ++ __u64 uaddr; ++ __u32 len; ++ __u32 pad0; ++}; ++ ++struct kvm_sev_send_start { ++ __u32 policy; ++ __u32 pad0; ++ __u64 pdh_cert_uaddr; ++ __u32 pdh_cert_len; ++ __u32 pad1; ++ __u64 plat_certs_uaddr; ++ __u32 plat_certs_len; ++ __u32 pad2; ++ __u64 amd_certs_uaddr; ++ __u32 amd_certs_len; ++ __u32 pad3; ++ __u64 session_uaddr; ++ __u32 session_len; ++ __u32 pad4; ++}; ++ ++struct kvm_sev_send_update_data { ++ __u64 hdr_uaddr; ++ __u32 hdr_len; ++ __u32 pad0; ++ __u64 guest_uaddr; ++ __u32 guest_len; ++ __u32 pad1; ++ __u64 trans_uaddr; ++ __u32 trans_len; ++ __u32 pad2; ++}; ++ ++struct kvm_sev_receive_start { ++ __u32 handle; ++ __u32 policy; ++ __u64 pdh_uaddr; ++ __u32 pdh_len; ++ __u32 pad0; ++ __u64 session_uaddr; ++ __u32 session_len; ++ __u32 pad1; ++}; ++ ++struct kvm_sev_receive_update_data { ++ __u64 hdr_uaddr; ++ __u32 hdr_len; ++ __u32 pad0; ++ __u64 guest_uaddr; ++ __u32 guest_len; ++ __u32 pad1; ++ __u64 trans_uaddr; ++ __u32 trans_len; ++ __u32 pad2; ++}; ++ ++#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) ++#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) ++ ++struct kvm_hyperv_eventfd { ++ __u32 conn_id; ++ __s32 fd; ++ __u32 flags; ++ __u32 padding[3]; ++}; ++ ++#define KVM_HYPERV_CONN_ID_MASK 0x00ffffff ++#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0) ++ + /* + * Masked event layout. + * Bits Description +@@ -547,10 +853,10 @@ struct kvm_pmu_event_filter { + ((__u64)(!!(exclude)) << 55)) + + #define KVM_PMU_MASKED_ENTRY_EVENT_SELECT \ +- (GENMASK_ULL(7, 0) | GENMASK_ULL(35, 32)) +-#define KVM_PMU_MASKED_ENTRY_UMASK_MASK (GENMASK_ULL(63, 56)) +-#define KVM_PMU_MASKED_ENTRY_UMASK_MATCH (GENMASK_ULL(15, 8)) +-#define KVM_PMU_MASKED_ENTRY_EXCLUDE (BIT_ULL(55)) ++ (__GENMASK_ULL(7, 0) | __GENMASK_ULL(35, 32)) ++#define KVM_PMU_MASKED_ENTRY_UMASK_MASK (__GENMASK_ULL(63, 56)) ++#define KVM_PMU_MASKED_ENTRY_UMASK_MATCH (__GENMASK_ULL(15, 8)) ++#define KVM_PMU_MASKED_ENTRY_EXCLUDE (_BITULL(55)) + #define KVM_PMU_MASKED_ENTRY_UMASK_MASK_SHIFT (56) + + /* for KVM_{GET,SET,HAS}_DEVICE_ATTR */ +@@ -558,9 +864,11 @@ struct kvm_pmu_event_filter { + #define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */ + + /* x86-specific KVM_EXIT_HYPERCALL flags. */ +-#define KVM_EXIT_HYPERCALL_LONG_MODE BIT(0) ++#define KVM_EXIT_HYPERCALL_LONG_MODE _BITULL(0) + + #define KVM_X86_DEFAULT_VM 0 + #define KVM_X86_SW_PROTECTED_VM 1 ++#define KVM_X86_SEV_VM 2 ++#define KVM_X86_SEV_ES_VM 3 + + #endif /* _ASM_X86_KVM_H */ +diff --git a/linux-headers/linux/bits.h b/linux-headers/linux/bits.h +new file mode 100644 +index 0000000000..d9897771be +--- /dev/null ++++ b/linux-headers/linux/bits.h +@@ -0,0 +1,15 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* bits.h: Macros for dealing with bitmasks. */ ++ ++#ifndef _LINUX_BITS_H ++#define _LINUX_BITS_H ++ ++#define __GENMASK(h, l) \ ++ (((~_UL(0)) - (_UL(1) << (l)) + 1) & \ ++ (~_UL(0) >> (__BITS_PER_LONG - 1 - (h)))) ++ ++#define __GENMASK_ULL(h, l) \ ++ (((~_ULL(0)) - (_ULL(1) << (l)) + 1) & \ ++ (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) ++ ++#endif /* _LINUX_BITS_H */ +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index 17839229b2..038731cdef 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -16,6 +16,11 @@ + + #define KVM_API_VERSION 12 + ++/* ++ * Backwards-compatible definitions. ++ */ ++#define __KVM_HAVE_GUEST_DEBUG ++ + /* for KVM_SET_USER_MEMORY_REGION */ + struct kvm_userspace_memory_region { + __u32 slot; +@@ -85,43 +90,6 @@ struct kvm_pit_config { + + #define KVM_PIT_SPEAKER_DUMMY 1 + +-struct kvm_s390_skeys { +- __u64 start_gfn; +- __u64 count; +- __u64 skeydata_addr; +- __u32 flags; +- __u32 reserved[9]; +-}; +- +-#define KVM_S390_CMMA_PEEK (1 << 0) +- +-/** +- * kvm_s390_cmma_log - Used for CMMA migration. +- * +- * Used both for input and output. +- * +- * @start_gfn: Guest page number to start from. +- * @count: Size of the result buffer. +- * @flags: Control operation mode via KVM_S390_CMMA_* flags +- * @remaining: Used with KVM_S390_GET_CMMA_BITS. Indicates how many dirty +- * pages are still remaining. +- * @mask: Used with KVM_S390_SET_CMMA_BITS. Bitmap of bits to actually set +- * in the PGSTE. +- * @values: Pointer to the values buffer. +- * +- * Used in KVM_S390_{G,S}ET_CMMA_BITS ioctls. +- */ +-struct kvm_s390_cmma_log { +- __u64 start_gfn; +- __u32 count; +- __u32 flags; +- union { +- __u64 remaining; +- __u64 mask; +- }; +- __u64 values; +-}; +- + struct kvm_hyperv_exit { + #define KVM_EXIT_HYPERV_SYNIC 1 + #define KVM_EXIT_HYPERV_HCALL 2 +@@ -313,11 +281,6 @@ struct kvm_run { + __u32 ipb; + } s390_sieic; + /* KVM_EXIT_S390_RESET */ +-#define KVM_S390_RESET_POR 1 +-#define KVM_S390_RESET_CLEAR 2 +-#define KVM_S390_RESET_SUBSYSTEM 4 +-#define KVM_S390_RESET_CPU_INIT 8 +-#define KVM_S390_RESET_IPL 16 + __u64 s390_reset_flags; + /* KVM_EXIT_S390_UCONTROL */ + struct { +@@ -532,43 +495,6 @@ struct kvm_translation { + __u8 pad[5]; + }; + +-/* for KVM_S390_MEM_OP */ +-struct kvm_s390_mem_op { +- /* in */ +- __u64 gaddr; /* the guest address */ +- __u64 flags; /* flags */ +- __u32 size; /* amount of bytes */ +- __u32 op; /* type of operation */ +- __u64 buf; /* buffer in userspace */ +- union { +- struct { +- __u8 ar; /* the access register number */ +- __u8 key; /* access key, ignored if flag unset */ +- __u8 pad1[6]; /* ignored */ +- __u64 old_addr; /* ignored if cmpxchg flag unset */ +- }; +- __u32 sida_offset; /* offset into the sida */ +- __u8 reserved[32]; /* ignored */ +- }; +-}; +-/* types for kvm_s390_mem_op->op */ +-#define KVM_S390_MEMOP_LOGICAL_READ 0 +-#define KVM_S390_MEMOP_LOGICAL_WRITE 1 +-#define KVM_S390_MEMOP_SIDA_READ 2 +-#define KVM_S390_MEMOP_SIDA_WRITE 3 +-#define KVM_S390_MEMOP_ABSOLUTE_READ 4 +-#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5 +-#define KVM_S390_MEMOP_ABSOLUTE_CMPXCHG 6 +- +-/* flags for kvm_s390_mem_op->flags */ +-#define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) +-#define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) +-#define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2) +- +-/* flags specifying extension support via KVM_CAP_S390_MEM_OP_EXTENSION */ +-#define KVM_S390_MEMOP_EXTENSION_CAP_BASE (1 << 0) +-#define KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG (1 << 1) +- + /* for KVM_INTERRUPT */ + struct kvm_interrupt { + /* in */ +@@ -633,124 +559,6 @@ struct kvm_mp_state { + __u32 mp_state; + }; + +-struct kvm_s390_psw { +- __u64 mask; +- __u64 addr; +-}; +- +-/* valid values for type in kvm_s390_interrupt */ +-#define KVM_S390_SIGP_STOP 0xfffe0000u +-#define KVM_S390_PROGRAM_INT 0xfffe0001u +-#define KVM_S390_SIGP_SET_PREFIX 0xfffe0002u +-#define KVM_S390_RESTART 0xfffe0003u +-#define KVM_S390_INT_PFAULT_INIT 0xfffe0004u +-#define KVM_S390_INT_PFAULT_DONE 0xfffe0005u +-#define KVM_S390_MCHK 0xfffe1000u +-#define KVM_S390_INT_CLOCK_COMP 0xffff1004u +-#define KVM_S390_INT_CPU_TIMER 0xffff1005u +-#define KVM_S390_INT_VIRTIO 0xffff2603u +-#define KVM_S390_INT_SERVICE 0xffff2401u +-#define KVM_S390_INT_EMERGENCY 0xffff1201u +-#define KVM_S390_INT_EXTERNAL_CALL 0xffff1202u +-/* Anything below 0xfffe0000u is taken by INT_IO */ +-#define KVM_S390_INT_IO(ai,cssid,ssid,schid) \ +- (((schid)) | \ +- ((ssid) << 16) | \ +- ((cssid) << 18) | \ +- ((ai) << 26)) +-#define KVM_S390_INT_IO_MIN 0x00000000u +-#define KVM_S390_INT_IO_MAX 0xfffdffffu +-#define KVM_S390_INT_IO_AI_MASK 0x04000000u +- +- +-struct kvm_s390_interrupt { +- __u32 type; +- __u32 parm; +- __u64 parm64; +-}; +- +-struct kvm_s390_io_info { +- __u16 subchannel_id; +- __u16 subchannel_nr; +- __u32 io_int_parm; +- __u32 io_int_word; +-}; +- +-struct kvm_s390_ext_info { +- __u32 ext_params; +- __u32 pad; +- __u64 ext_params2; +-}; +- +-struct kvm_s390_pgm_info { +- __u64 trans_exc_code; +- __u64 mon_code; +- __u64 per_address; +- __u32 data_exc_code; +- __u16 code; +- __u16 mon_class_nr; +- __u8 per_code; +- __u8 per_atmid; +- __u8 exc_access_id; +- __u8 per_access_id; +- __u8 op_access_id; +-#define KVM_S390_PGM_FLAGS_ILC_VALID 0x01 +-#define KVM_S390_PGM_FLAGS_ILC_0 0x02 +-#define KVM_S390_PGM_FLAGS_ILC_1 0x04 +-#define KVM_S390_PGM_FLAGS_ILC_MASK 0x06 +-#define KVM_S390_PGM_FLAGS_NO_REWIND 0x08 +- __u8 flags; +- __u8 pad[2]; +-}; +- +-struct kvm_s390_prefix_info { +- __u32 address; +-}; +- +-struct kvm_s390_extcall_info { +- __u16 code; +-}; +- +-struct kvm_s390_emerg_info { +- __u16 code; +-}; +- +-#define KVM_S390_STOP_FLAG_STORE_STATUS 0x01 +-struct kvm_s390_stop_info { +- __u32 flags; +-}; +- +-struct kvm_s390_mchk_info { +- __u64 cr14; +- __u64 mcic; +- __u64 failing_storage_address; +- __u32 ext_damage_code; +- __u32 pad; +- __u8 fixed_logout[16]; +-}; +- +-struct kvm_s390_irq { +- __u64 type; +- union { +- struct kvm_s390_io_info io; +- struct kvm_s390_ext_info ext; +- struct kvm_s390_pgm_info pgm; +- struct kvm_s390_emerg_info emerg; +- struct kvm_s390_extcall_info extcall; +- struct kvm_s390_prefix_info prefix; +- struct kvm_s390_stop_info stop; +- struct kvm_s390_mchk_info mchk; +- char reserved[64]; +- } u; +-}; +- +-struct kvm_s390_irq_state { +- __u64 buf; +- __u32 flags; /* will stay unused for compatibility reasons */ +- __u32 len; +- __u32 reserved[4]; /* will stay unused for compatibility reasons */ +-}; +- + /* for KVM_SET_GUEST_DEBUG */ + + #define KVM_GUESTDBG_ENABLE 0x00000001 +@@ -806,50 +614,6 @@ struct kvm_enable_cap { + __u8 pad[64]; + }; + +-/* for KVM_PPC_GET_PVINFO */ +- +-#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0) +- +-struct kvm_ppc_pvinfo { +- /* out */ +- __u32 flags; +- __u32 hcall[4]; +- __u8 pad[108]; +-}; +- +-/* for KVM_PPC_GET_SMMU_INFO */ +-#define KVM_PPC_PAGE_SIZES_MAX_SZ 8 +- +-struct kvm_ppc_one_page_size { +- __u32 page_shift; /* Page shift (or 0) */ +- __u32 pte_enc; /* Encoding in the HPTE (>>12) */ +-}; +- +-struct kvm_ppc_one_seg_page_size { +- __u32 page_shift; /* Base page shift of segment (or 0) */ +- __u32 slb_enc; /* SLB encoding for BookS */ +- struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ]; +-}; +- +-#define KVM_PPC_PAGE_SIZES_REAL 0x00000001 +-#define KVM_PPC_1T_SEGMENTS 0x00000002 +-#define KVM_PPC_NO_HASH 0x00000004 +- +-struct kvm_ppc_smmu_info { +- __u64 flags; +- __u32 slb_size; +- __u16 data_keys; /* # storage keys supported for data */ +- __u16 instr_keys; /* # storage keys supported for instructions */ +- struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; +-}; +- +-/* for KVM_PPC_RESIZE_HPT_{PREPARE,COMMIT} */ +-struct kvm_ppc_resize_hpt { +- __u64 flags; +- __u32 shift; +- __u32 pad; +-}; +- + #define KVMIO 0xAE + + /* machine type bits, to be used as argument to KVM_CREATE_VM */ +@@ -919,9 +683,7 @@ struct kvm_ppc_resize_hpt { + /* Bug in KVM_SET_USER_MEMORY_REGION fixed: */ + #define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21 + #define KVM_CAP_USER_NMI 22 +-#ifdef __KVM_HAVE_GUEST_DEBUG + #define KVM_CAP_SET_GUEST_DEBUG 23 +-#endif + #ifdef __KVM_HAVE_PIT + #define KVM_CAP_REINJECT_CONTROL 24 + #endif +@@ -1152,8 +914,6 @@ struct kvm_ppc_resize_hpt { + #define KVM_CAP_GUEST_MEMFD 234 + #define KVM_CAP_VM_TYPES 235 + +-#ifdef KVM_CAP_IRQ_ROUTING +- + struct kvm_irq_routing_irqchip { + __u32 irqchip; + __u32 pin; +@@ -1218,42 +978,6 @@ struct kvm_irq_routing { + struct kvm_irq_routing_entry entries[]; + }; + +-#endif +- +-#ifdef KVM_CAP_MCE +-/* x86 MCE */ +-struct kvm_x86_mce { +- __u64 status; +- __u64 addr; +- __u64 misc; +- __u64 mcg_status; +- __u8 bank; +- __u8 pad1[7]; +- __u64 pad2[3]; +-}; +-#endif +- +-#ifdef KVM_CAP_XEN_HVM +-#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0) +-#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) +-#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) +-#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) +-#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) +-#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5) +-#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6) +-#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7) +- +-struct kvm_xen_hvm_config { +- __u32 flags; +- __u32 msr; +- __u64 blob_addr_32; +- __u64 blob_addr_64; +- __u8 blob_size_32; +- __u8 blob_size_64; +- __u8 pad2[30]; +-}; +-#endif +- + #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0) + /* + * Available with KVM_CAP_IRQFD_RESAMPLE +@@ -1438,11 +1162,6 @@ struct kvm_vfio_spapr_tce { + struct kvm_userspace_memory_region2) + + /* enable ucontrol for s390 */ +-struct kvm_s390_ucas_mapping { +- __u64 user_addr; +- __u64 vcpu_addr; +- __u64 length; +-}; + #define KVM_S390_UCAS_MAP _IOW(KVMIO, 0x50, struct kvm_s390_ucas_mapping) + #define KVM_S390_UCAS_UNMAP _IOW(KVMIO, 0x51, struct kvm_s390_ucas_mapping) + #define KVM_S390_VCPU_FAULT _IOW(KVMIO, 0x52, unsigned long) +@@ -1637,89 +1356,6 @@ struct kvm_enc_region { + #define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) + #define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) + +-struct kvm_s390_pv_sec_parm { +- __u64 origin; +- __u64 length; +-}; +- +-struct kvm_s390_pv_unp { +- __u64 addr; +- __u64 size; +- __u64 tweak; +-}; +- +-enum pv_cmd_dmp_id { +- KVM_PV_DUMP_INIT, +- KVM_PV_DUMP_CONFIG_STOR_STATE, +- KVM_PV_DUMP_COMPLETE, +- KVM_PV_DUMP_CPU, +-}; +- +-struct kvm_s390_pv_dmp { +- __u64 subcmd; +- __u64 buff_addr; +- __u64 buff_len; +- __u64 gaddr; /* For dump storage state */ +- __u64 reserved[4]; +-}; +- +-enum pv_cmd_info_id { +- KVM_PV_INFO_VM, +- KVM_PV_INFO_DUMP, +-}; +- +-struct kvm_s390_pv_info_dump { +- __u64 dump_cpu_buffer_len; +- __u64 dump_config_mem_buffer_per_1m; +- __u64 dump_config_finalize_len; +-}; +- +-struct kvm_s390_pv_info_vm { +- __u64 inst_calls_list[4]; +- __u64 max_cpus; +- __u64 max_guests; +- __u64 max_guest_addr; +- __u64 feature_indication; +-}; +- +-struct kvm_s390_pv_info_header { +- __u32 id; +- __u32 len_max; +- __u32 len_written; +- __u32 reserved; +-}; +- +-struct kvm_s390_pv_info { +- struct kvm_s390_pv_info_header header; +- union { +- struct kvm_s390_pv_info_dump dump; +- struct kvm_s390_pv_info_vm vm; +- }; +-}; +- +-enum pv_cmd_id { +- KVM_PV_ENABLE, +- KVM_PV_DISABLE, +- KVM_PV_SET_SEC_PARMS, +- KVM_PV_UNPACK, +- KVM_PV_VERIFY, +- KVM_PV_PREP_RESET, +- KVM_PV_UNSHARE_ALL, +- KVM_PV_INFO, +- KVM_PV_DUMP, +- KVM_PV_ASYNC_CLEANUP_PREPARE, +- KVM_PV_ASYNC_CLEANUP_PERFORM, +-}; +- +-struct kvm_pv_cmd { +- __u32 cmd; /* Command to be executed */ +- __u16 rc; /* Ultravisor return code */ +- __u16 rrc; /* Ultravisor return reason code */ +- __u64 data; /* Data or address */ +- __u32 flags; /* flags for future extensions. Must be 0 for now */ +- __u32 reserved[3]; +-}; +- + /* Available with KVM_CAP_S390_PROTECTED */ + #define KVM_S390_PV_COMMAND _IOWR(KVMIO, 0xc5, struct kvm_pv_cmd) + +@@ -1733,58 +1369,6 @@ struct kvm_pv_cmd { + #define KVM_XEN_HVM_GET_ATTR _IOWR(KVMIO, 0xc8, struct kvm_xen_hvm_attr) + #define KVM_XEN_HVM_SET_ATTR _IOW(KVMIO, 0xc9, struct kvm_xen_hvm_attr) + +-struct kvm_xen_hvm_attr { +- __u16 type; +- __u16 pad[3]; +- union { +- __u8 long_mode; +- __u8 vector; +- __u8 runstate_update_flag; +- struct { +- __u64 gfn; +-#define KVM_XEN_INVALID_GFN ((__u64)-1) +- } shared_info; +- struct { +- __u32 send_port; +- __u32 type; /* EVTCHNSTAT_ipi / EVTCHNSTAT_interdomain */ +- __u32 flags; +-#define KVM_XEN_EVTCHN_DEASSIGN (1 << 0) +-#define KVM_XEN_EVTCHN_UPDATE (1 << 1) +-#define KVM_XEN_EVTCHN_RESET (1 << 2) +- /* +- * Events sent by the guest are either looped back to +- * the guest itself (potentially on a different port#) +- * or signalled via an eventfd. +- */ +- union { +- struct { +- __u32 port; +- __u32 vcpu; +- __u32 priority; +- } port; +- struct { +- __u32 port; /* Zero for eventfd */ +- __s32 fd; +- } eventfd; +- __u32 padding[4]; +- } deliver; +- } evtchn; +- __u32 xen_version; +- __u64 pad[8]; +- } u; +-}; +- +- +-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ +-#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0 +-#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1 +-#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2 +-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ +-#define KVM_XEN_ATTR_TYPE_EVTCHN 0x3 +-#define KVM_XEN_ATTR_TYPE_XEN_VERSION 0x4 +-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */ +-#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG 0x5 +- + /* Per-vCPU Xen attributes */ + #define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr) + #define KVM_XEN_VCPU_SET_ATTR _IOW(KVMIO, 0xcb, struct kvm_xen_vcpu_attr) +@@ -1795,242 +1379,6 @@ struct kvm_xen_hvm_attr { + #define KVM_GET_SREGS2 _IOR(KVMIO, 0xcc, struct kvm_sregs2) + #define KVM_SET_SREGS2 _IOW(KVMIO, 0xcd, struct kvm_sregs2) + +-struct kvm_xen_vcpu_attr { +- __u16 type; +- __u16 pad[3]; +- union { +- __u64 gpa; +-#define KVM_XEN_INVALID_GPA ((__u64)-1) +- __u64 pad[8]; +- struct { +- __u64 state; +- __u64 state_entry_time; +- __u64 time_running; +- __u64 time_runnable; +- __u64 time_blocked; +- __u64 time_offline; +- } runstate; +- __u32 vcpu_id; +- struct { +- __u32 port; +- __u32 priority; +- __u64 expires_ns; +- } timer; +- __u8 vector; +- } u; +-}; +- +-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ +-#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0 +-#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1 +-#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR 0x2 +-#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 0x3 +-#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4 +-#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5 +-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ +-#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID 0x6 +-#define KVM_XEN_VCPU_ATTR_TYPE_TIMER 0x7 +-#define KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR 0x8 +- +-/* Secure Encrypted Virtualization command */ +-enum sev_cmd_id { +- /* Guest initialization commands */ +- KVM_SEV_INIT = 0, +- KVM_SEV_ES_INIT, +- /* Guest launch commands */ +- KVM_SEV_LAUNCH_START, +- KVM_SEV_LAUNCH_UPDATE_DATA, +- KVM_SEV_LAUNCH_UPDATE_VMSA, +- KVM_SEV_LAUNCH_SECRET, +- KVM_SEV_LAUNCH_MEASURE, +- KVM_SEV_LAUNCH_FINISH, +- /* Guest migration commands (outgoing) */ +- KVM_SEV_SEND_START, +- KVM_SEV_SEND_UPDATE_DATA, +- KVM_SEV_SEND_UPDATE_VMSA, +- KVM_SEV_SEND_FINISH, +- /* Guest migration commands (incoming) */ +- KVM_SEV_RECEIVE_START, +- KVM_SEV_RECEIVE_UPDATE_DATA, +- KVM_SEV_RECEIVE_UPDATE_VMSA, +- KVM_SEV_RECEIVE_FINISH, +- /* Guest status and debug commands */ +- KVM_SEV_GUEST_STATUS, +- KVM_SEV_DBG_DECRYPT, +- KVM_SEV_DBG_ENCRYPT, +- /* Guest certificates commands */ +- KVM_SEV_CERT_EXPORT, +- /* Attestation report */ +- KVM_SEV_GET_ATTESTATION_REPORT, +- /* Guest Migration Extension */ +- KVM_SEV_SEND_CANCEL, +- +- KVM_SEV_NR_MAX, +-}; +- +-struct kvm_sev_cmd { +- __u32 id; +- __u64 data; +- __u32 error; +- __u32 sev_fd; +-}; +- +-struct kvm_sev_launch_start { +- __u32 handle; +- __u32 policy; +- __u64 dh_uaddr; +- __u32 dh_len; +- __u64 session_uaddr; +- __u32 session_len; +-}; +- +-struct kvm_sev_launch_update_data { +- __u64 uaddr; +- __u32 len; +-}; +- +- +-struct kvm_sev_launch_secret { +- __u64 hdr_uaddr; +- __u32 hdr_len; +- __u64 guest_uaddr; +- __u32 guest_len; +- __u64 trans_uaddr; +- __u32 trans_len; +-}; +- +-struct kvm_sev_launch_measure { +- __u64 uaddr; +- __u32 len; +-}; +- +-struct kvm_sev_guest_status { +- __u32 handle; +- __u32 policy; +- __u32 state; +-}; +- +-struct kvm_sev_dbg { +- __u64 src_uaddr; +- __u64 dst_uaddr; +- __u32 len; +-}; +- +-struct kvm_sev_attestation_report { +- __u8 mnonce[16]; +- __u64 uaddr; +- __u32 len; +-}; +- +-struct kvm_sev_send_start { +- __u32 policy; +- __u64 pdh_cert_uaddr; +- __u32 pdh_cert_len; +- __u64 plat_certs_uaddr; +- __u32 plat_certs_len; +- __u64 amd_certs_uaddr; +- __u32 amd_certs_len; +- __u64 session_uaddr; +- __u32 session_len; +-}; +- +-struct kvm_sev_send_update_data { +- __u64 hdr_uaddr; +- __u32 hdr_len; +- __u64 guest_uaddr; +- __u32 guest_len; +- __u64 trans_uaddr; +- __u32 trans_len; +-}; +- +-struct kvm_sev_receive_start { +- __u32 handle; +- __u32 policy; +- __u64 pdh_uaddr; +- __u32 pdh_len; +- __u64 session_uaddr; +- __u32 session_len; +-}; +- +-struct kvm_sev_receive_update_data { +- __u64 hdr_uaddr; +- __u32 hdr_len; +- __u64 guest_uaddr; +- __u32 guest_len; +- __u64 trans_uaddr; +- __u32 trans_len; +-}; +- +-#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) +-#define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) +-#define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) +- +-struct kvm_assigned_pci_dev { +- __u32 assigned_dev_id; +- __u32 busnr; +- __u32 devfn; +- __u32 flags; +- __u32 segnr; +- union { +- __u32 reserved[11]; +- }; +-}; +- +-#define KVM_DEV_IRQ_HOST_INTX (1 << 0) +-#define KVM_DEV_IRQ_HOST_MSI (1 << 1) +-#define KVM_DEV_IRQ_HOST_MSIX (1 << 2) +- +-#define KVM_DEV_IRQ_GUEST_INTX (1 << 8) +-#define KVM_DEV_IRQ_GUEST_MSI (1 << 9) +-#define KVM_DEV_IRQ_GUEST_MSIX (1 << 10) +- +-#define KVM_DEV_IRQ_HOST_MASK 0x00ff +-#define KVM_DEV_IRQ_GUEST_MASK 0xff00 +- +-struct kvm_assigned_irq { +- __u32 assigned_dev_id; +- __u32 host_irq; /* ignored (legacy field) */ +- __u32 guest_irq; +- __u32 flags; +- union { +- __u32 reserved[12]; +- }; +-}; +- +-struct kvm_assigned_msix_nr { +- __u32 assigned_dev_id; +- __u16 entry_nr; +- __u16 padding; +-}; +- +-#define KVM_MAX_MSIX_PER_DEV 256 +-struct kvm_assigned_msix_entry { +- __u32 assigned_dev_id; +- __u32 gsi; +- __u16 entry; /* The index of entry in the MSI-X table */ +- __u16 padding[3]; +-}; +- +-#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) +-#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) +- +-/* Available with KVM_CAP_ARM_USER_IRQ */ +- +-/* Bits for run->s.regs.device_irq_level */ +-#define KVM_ARM_DEV_EL1_VTIMER (1 << 0) +-#define KVM_ARM_DEV_EL1_PTIMER (1 << 1) +-#define KVM_ARM_DEV_PMU (1 << 2) +- +-struct kvm_hyperv_eventfd { +- __u32 conn_id; +- __s32 fd; +- __u32 flags; +- __u32 padding[3]; +-}; +- +-#define KVM_HYPERV_CONN_ID_MASK 0x00ffffff +-#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0) +- + #define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0) + #define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1) + +@@ -2176,33 +1524,6 @@ struct kvm_stats_desc { + /* Available with KVM_CAP_S390_ZPCI_OP */ + #define KVM_S390_ZPCI_OP _IOW(KVMIO, 0xd1, struct kvm_s390_zpci_op) + +-struct kvm_s390_zpci_op { +- /* in */ +- __u32 fh; /* target device */ +- __u8 op; /* operation to perform */ +- __u8 pad[3]; +- union { +- /* for KVM_S390_ZPCIOP_REG_AEN */ +- struct { +- __u64 ibv; /* Guest addr of interrupt bit vector */ +- __u64 sb; /* Guest addr of summary bit */ +- __u32 flags; +- __u32 noi; /* Number of interrupts */ +- __u8 isc; /* Guest interrupt subclass */ +- __u8 sbo; /* Offset of guest summary bit vector */ +- __u16 pad; +- } reg_aen; +- __u64 reserved[8]; +- } u; +-}; +- +-/* types for kvm_s390_zpci_op->op */ +-#define KVM_S390_ZPCIOP_REG_AEN 0 +-#define KVM_S390_ZPCIOP_DEREG_AEN 1 +- +-/* flags for kvm_s390_zpci_op->u.reg_aen.flags */ +-#define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0) +- + /* Available with KVM_CAP_MEMORY_ATTRIBUTES */ + #define KVM_SET_MEMORY_ATTRIBUTES _IOW(KVMIO, 0xd2, struct kvm_memory_attributes) + +diff --git a/linux-headers/linux/psp-sev.h b/linux-headers/linux/psp-sev.h +index bcb21339ee..c3046c6bff 100644 +--- a/linux-headers/linux/psp-sev.h ++++ b/linux-headers/linux/psp-sev.h +@@ -28,6 +28,9 @@ enum { + SEV_PEK_CERT_IMPORT, + SEV_GET_ID, /* This command is deprecated, use SEV_GET_ID2 */ + SEV_GET_ID2, ++ SNP_PLATFORM_STATUS, ++ SNP_COMMIT, ++ SNP_SET_CONFIG, + + SEV_MAX, + }; +@@ -69,6 +72,12 @@ typedef enum { + SEV_RET_RESOURCE_LIMIT, + SEV_RET_SECURE_DATA_INVALID, + SEV_RET_INVALID_KEY = 0x27, ++ SEV_RET_INVALID_PAGE_SIZE, ++ SEV_RET_INVALID_PAGE_STATE, ++ SEV_RET_INVALID_MDATA_ENTRY, ++ SEV_RET_INVALID_PAGE_OWNER, ++ SEV_RET_INVALID_PAGE_AEAD_OFLOW, ++ SEV_RET_RMP_INIT_REQUIRED, + SEV_RET_MAX, + } sev_ret_code; + +@@ -155,6 +164,56 @@ struct sev_user_data_get_id2 { + __u32 length; /* In/Out */ + } __attribute__((packed)); + ++/** ++ * struct sev_user_data_snp_status - SNP status ++ * ++ * @api_major: API major version ++ * @api_minor: API minor version ++ * @state: current platform state ++ * @is_rmp_initialized: whether RMP is initialized or not ++ * @rsvd: reserved ++ * @build_id: firmware build id for the API version ++ * @mask_chip_id: whether chip id is present in attestation reports or not ++ * @mask_chip_key: whether attestation reports are signed or not ++ * @vlek_en: VLEK (Version Loaded Endorsement Key) hashstick is loaded ++ * @rsvd1: reserved ++ * @guest_count: the number of guest currently managed by the firmware ++ * @current_tcb_version: current TCB version ++ * @reported_tcb_version: reported TCB version ++ */ ++struct sev_user_data_snp_status { ++ __u8 api_major; /* Out */ ++ __u8 api_minor; /* Out */ ++ __u8 state; /* Out */ ++ __u8 is_rmp_initialized:1; /* Out */ ++ __u8 rsvd:7; ++ __u32 build_id; /* Out */ ++ __u32 mask_chip_id:1; /* Out */ ++ __u32 mask_chip_key:1; /* Out */ ++ __u32 vlek_en:1; /* Out */ ++ __u32 rsvd1:29; ++ __u32 guest_count; /* Out */ ++ __u64 current_tcb_version; /* Out */ ++ __u64 reported_tcb_version; /* Out */ ++} __attribute__((packed)); ++ ++/** ++ * struct sev_user_data_snp_config - system wide configuration value for SNP. ++ * ++ * @reported_tcb: the TCB version to report in the guest attestation report. ++ * @mask_chip_id: whether chip id is present in attestation reports or not ++ * @mask_chip_key: whether attestation reports are signed or not ++ * @rsvd: reserved ++ * @rsvd1: reserved ++ */ ++struct sev_user_data_snp_config { ++ __u64 reported_tcb ; /* In */ ++ __u32 mask_chip_id:1; /* In */ ++ __u32 mask_chip_key:1; /* In */ ++ __u32 rsvd:30; /* In */ ++ __u8 rsvd1[52]; ++} __attribute__((packed)); ++ + /** + * struct sev_issue_cmd - SEV ioctl parameters + * +diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h +index 649560c685..bea6973906 100644 +--- a/linux-headers/linux/vhost.h ++++ b/linux-headers/linux/vhost.h +@@ -227,4 +227,11 @@ + */ + #define VHOST_VDPA_GET_VRING_DESC_GROUP _IOWR(VHOST_VIRTIO, 0x7F, \ + struct vhost_vring_state) ++ ++/* Get the queue size of a specific virtqueue. ++ * userspace set the vring index in vhost_vring_state.index ++ * kernel set the queue size in vhost_vring_state.num ++ */ ++#define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x80, \ ++ struct vhost_vring_state) + #endif +-- +2.39.3 + diff --git a/SOURCES/kvm-loongarch-mark-loongarch_ipi_iocsr-re-entrnacy-safe.patch b/SOURCES/kvm-loongarch-mark-loongarch_ipi_iocsr-re-entrnacy-safe.patch deleted file mode 100644 index c1100a5..0000000 --- a/SOURCES/kvm-loongarch-mark-loongarch_ipi_iocsr-re-entrnacy-safe.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 6de2f37d9a5db6578554929227377e4fd6d2feb3 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 9 May 2023 10:29:03 -0400 -Subject: [PATCH 14/21] loongarch: mark loongarch_ipi_iocsr re-entrnacy safe - -RH-Author: Jon Maloy -RH-MergeRequest: 165: memory: prevent dma-reentracy issues -RH-Jira: RHEL-516 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [10/13] 02435b9148b906960137de32eb5a3c4961e44a57 (jmaloy/jmaloy-qemu-kvm-2) - -Jira: https://issues.redhat.com/browse/RHEL-516 -Upstream: Merged -CVE: CVE-2023-2680 - -commit 6d0589e0e6c64b888864a2bf980537be20389264 -Author: Alexander Bulekov -Date: Sat May 6 07:21:45 2023 -0400 - - loongarch: mark loongarch_ipi_iocsr re-entrnacy safe - - loongarch_ipi_iocsr MRs rely on re-entrant IO through the ipi_send - function. As such, mark these MRs re-entrancy-safe. - - Fixes: a2e1753b80 ("memory: prevent dma-reentracy issues") - Signed-off-by: Alexander Bulekov - Reviewed-by: Song Gao - Message-Id: <20230506112145.3563708-1-alxndr@bu.edu> - Signed-off-by: Song Gao - -Signed-off-by: Jon Maloy ---- - hw/intc/loongarch_ipi.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/hw/intc/loongarch_ipi.c b/hw/intc/loongarch_ipi.c -index aa4bf9eb74..40e98af2ce 100644 ---- a/hw/intc/loongarch_ipi.c -+++ b/hw/intc/loongarch_ipi.c -@@ -215,6 +215,10 @@ static void loongarch_ipi_init(Object *obj) - for (cpu = 0; cpu < MAX_IPI_CORE_NUM; cpu++) { - memory_region_init_io(&s->ipi_iocsr_mem[cpu], obj, &loongarch_ipi_ops, - &lams->ipi_core[cpu], "loongarch_ipi_iocsr", 0x48); -+ -+ /* loongarch_ipi_iocsr performs re-entrant IO through ipi_send */ -+ s->ipi_iocsr_mem[cpu].disable_reentrancy_guard = true; -+ - sysbus_init_mmio(sbd, &s->ipi_iocsr_mem[cpu]); - - memory_region_init_io(&s->ipi64_iocsr_mem[cpu], obj, &loongarch_ipi64_ops, --- -2.39.3 - diff --git a/SOURCES/kvm-lsi53c895a-disable-reentrancy-detection-for-MMIO-reg.patch b/SOURCES/kvm-lsi53c895a-disable-reentrancy-detection-for-MMIO-reg.patch deleted file mode 100644 index 359d53f..0000000 --- a/SOURCES/kvm-lsi53c895a-disable-reentrancy-detection-for-MMIO-reg.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 0660a7a6994db0db9f6d0b84f6345aa06dc61761 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Mon, 29 May 2023 14:21:08 -0400 -Subject: [PATCH 16/21] lsi53c895a: disable reentrancy detection for MMIO - region, too - -RH-Author: Jon Maloy -RH-MergeRequest: 165: memory: prevent dma-reentracy issues -RH-Jira: RHEL-516 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [12/13] fb9da8b68cdf0dc0b0bd8fb8540849c944d0bf20 (jmaloy/jmaloy-qemu-kvm-2) - -Jira: https://issues.redhat.com/browse/RHEL-516 -Upstream: Merged -CVE: CVE-2023-2680 - -commit d139fe9ad8a27bcc50b4ead77d2f97d191a0e95e -Author: Thomas Huth -Date: Tue May 16 11:05:56 2023 +0200 - - lsi53c895a: disable reentrancy detection for MMIO region, too - - While trying to use a SCSI disk on the LSI controller with an - older version of Fedora (25), I'm getting: - - qemu: warning: Blocked re-entrant IO on MemoryRegion: lsi-mmio at addr: 0x34 - - and the SCSI controller is not usable. Seems like we have to - disable the reentrancy checker for the MMIO region, too, to - get this working again. - - The problem could be reproduced it like this: - - ./qemu-system-x86_64 -accel kvm -m 2G -machine q35 \ - -device lsi53c810,id=lsi1 -device scsi-hd,drive=d0 \ - -drive if=none,id=d0,file=.../somedisk.qcow2 \ - -cdrom Fedora-Everything-netinst-i386-25-1.3.iso - - Where somedisk.qcow2 is an image that contains already some partitions - and file systems. - - In the boot menu of Fedora, go to - "Troubleshooting" -> "Rescue a Fedora system" -> "3) Skip to shell" - - Then check "dmesg | grep -i 53c" for failure messages, and try to mount - a partition from somedisk.qcow2. - - Message-Id: <20230516090556.553813-1-thuth@redhat.com> - Signed-off-by: Thomas Huth - -Signed-off-by: Jon Maloy ---- - hw/scsi/lsi53c895a.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c -index db27872963..048436352b 100644 ---- a/hw/scsi/lsi53c895a.c -+++ b/hw/scsi/lsi53c895a.c -@@ -2307,6 +2307,7 @@ static void lsi_scsi_realize(PCIDevice *dev, Error **errp) - * re-entrancy guard. - */ - s->ram_io.disable_reentrancy_guard = true; -+ s->mmio_io.disable_reentrancy_guard = true; - - address_space_init(&s->pci_io_as, pci_address_space_io(dev), "lsi-pci-io"); - qdev_init_gpio_out(d, &s->ext_irq, 1); --- -2.39.3 - diff --git a/SOURCES/kvm-lsi53c895a-disable-reentrancy-detection-for-script-R.patch b/SOURCES/kvm-lsi53c895a-disable-reentrancy-detection-for-script-R.patch deleted file mode 100644 index e671c92..0000000 --- a/SOURCES/kvm-lsi53c895a-disable-reentrancy-detection-for-script-R.patch +++ /dev/null @@ -1,58 +0,0 @@ -From 621808c6c4da3adcc073231493d487d6360386c9 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 9 May 2023 10:29:03 -0400 -Subject: [PATCH 09/21] lsi53c895a: disable reentrancy detection for script RAM - -RH-Author: Jon Maloy -RH-MergeRequest: 165: memory: prevent dma-reentracy issues -RH-Jira: RHEL-516 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [5/13] 765d65fc3fb735eb4b52a408ccff91b538ad32b6 (jmaloy/jmaloy-qemu-kvm-2) - -Jira: https://issues.redhat.com/browse/RHEL-516 -Upstream: Merged -CVE: CVE-2023-2680 - -commit bfd6e7ae6a72b84e2eb9574f56e6ec037f05182c -Author: Alexander Bulekov -Date: Thu Apr 27 17:10:10 2023 -0400 - - lsi53c895a: disable reentrancy detection for script RAM - - As the code is designed to use the memory APIs to access the script ram, - disable reentrancy checks for the pseudo-RAM ram_io MemoryRegion. - - In the future, ram_io may be converted from an IO to a proper RAM MemoryRegion. - - Reported-by: Fiona Ebner - Signed-off-by: Alexander Bulekov - Reviewed-by: Thomas Huth - Reviewed-by: Darren Kenny - Message-Id: <20230427211013.2994127-6-alxndr@bu.edu> - Signed-off-by: Thomas Huth - -Signed-off-by: Jon Maloy ---- - hw/scsi/lsi53c895a.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c -index af93557a9a..db27872963 100644 ---- a/hw/scsi/lsi53c895a.c -+++ b/hw/scsi/lsi53c895a.c -@@ -2302,6 +2302,12 @@ static void lsi_scsi_realize(PCIDevice *dev, Error **errp) - memory_region_init_io(&s->io_io, OBJECT(s), &lsi_io_ops, s, - "lsi-io", 256); - -+ /* -+ * Since we use the address-space API to interact with ram_io, disable the -+ * re-entrancy guard. -+ */ -+ s->ram_io.disable_reentrancy_guard = true; -+ - address_space_init(&s->pci_io_as, pci_address_space_io(dev), "lsi-pci-io"); - qdev_init_gpio_out(d, &s->ext_irq, 1); - --- -2.39.3 - diff --git a/SOURCES/kvm-machine-allow-early-use-of-machine_require_guest_mem.patch b/SOURCES/kvm-machine-allow-early-use-of-machine_require_guest_mem.patch new file mode 100644 index 0000000..6524d6a --- /dev/null +++ b/SOURCES/kvm-machine-allow-early-use-of-machine_require_guest_mem.patch @@ -0,0 +1,71 @@ +From 9f485c8df885bcd1ff6c5692463c6168bfec07fb Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 31 May 2024 13:29:53 +0200 +Subject: [PATCH 054/100] machine: allow early use of + machine_require_guest_memfd + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [54/91] fd8c1a6624d5f27268215c8aa70dfc9d37bdb981 (bonzini/rhel-qemu-kvm) + +Ask the ConfidentialGuestSupport object whether to use guest_memfd +for KVM-backend private memory. This bool can be set in instance_init +(or user_complete) so that it is available when the machine is created. + +Signed-off-by: Paolo Bonzini +(cherry picked from commit dc0d28ca46c0e7ee3c055ad4da24022995bd3765) +Signed-off-by: Paolo Bonzini +--- + hw/core/machine.c | 2 +- + include/exec/confidential-guest-support.h | 5 +++++ + include/hw/boards.h | 1 - + 3 files changed, 6 insertions(+), 2 deletions(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 07b994e136..2055e0d312 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -1482,7 +1482,7 @@ bool machine_mem_merge(MachineState *machine) + + bool machine_require_guest_memfd(MachineState *machine) + { +- return machine->require_guest_memfd; ++ return machine->cgs && machine->cgs->require_guest_memfd; + } + + static char *cpu_slot_to_string(const CPUArchId *cpu) +diff --git a/include/exec/confidential-guest-support.h b/include/exec/confidential-guest-support.h +index e5b188cffb..02dc4e518f 100644 +--- a/include/exec/confidential-guest-support.h ++++ b/include/exec/confidential-guest-support.h +@@ -31,6 +31,11 @@ OBJECT_DECLARE_TYPE(ConfidentialGuestSupport, + struct ConfidentialGuestSupport { + Object parent; + ++ /* ++ * True if the machine should use guest_memfd for RAM. ++ */ ++ bool require_guest_memfd; ++ + /* + * ready: flag set by CGS initialization code once it's ready to + * start executing instructions in a potentially-secure +diff --git a/include/hw/boards.h b/include/hw/boards.h +index 815a1c4b26..0d1f9533ef 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -373,7 +373,6 @@ struct MachineState { + char *dt_compatible; + bool dump_guest_core; + bool mem_merge; +- bool require_guest_memfd; + bool usb; + bool usb_disabled; + char *firmware; +-- +2.39.3 + diff --git a/SOURCES/kvm-memory-Introduce-memory_region_init_ram_guest_memfd.patch b/SOURCES/kvm-memory-Introduce-memory_region_init_ram_guest_memfd.patch new file mode 100644 index 0000000..538e82d --- /dev/null +++ b/SOURCES/kvm-memory-Introduce-memory_region_init_ram_guest_memfd.patch @@ -0,0 +1,85 @@ +From 331c58d87dde8b4757e1d1e09d9b16bac2952d22 Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Thu, 30 May 2024 06:16:15 -0500 +Subject: [PATCH 081/100] memory: Introduce + memory_region_init_ram_guest_memfd() + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [81/91] d5b0898d791f3f90d1acda0230f96ca9bf5be5e4 (bonzini/rhel-qemu-kvm) + +Introduce memory_region_init_ram_guest_memfd() to allocate private +guset memfd on the MemoryRegion initialization. It's for the use case of +TDVF, which must be private on TDX case. + +Signed-off-by: Xiaoyao Li +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-4-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit a0aa6db7ce72a08703774107185e639e73e7754c) +Signed-off-by: Paolo Bonzini +--- + include/exec/memory.h | 6 ++++++ + system/memory.c | 24 ++++++++++++++++++++++++ + 2 files changed, 30 insertions(+) + +diff --git a/include/exec/memory.h b/include/exec/memory.h +index 679a847685..1e351f6fc8 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -1603,6 +1603,12 @@ bool memory_region_init_ram(MemoryRegion *mr, + uint64_t size, + Error **errp); + ++bool memory_region_init_ram_guest_memfd(MemoryRegion *mr, ++ Object *owner, ++ const char *name, ++ uint64_t size, ++ Error **errp); ++ + /** + * memory_region_init_rom: Initialize a ROM memory region. + * +diff --git a/system/memory.c b/system/memory.c +index c756950c0c..b09065eef3 100644 +--- a/system/memory.c ++++ b/system/memory.c +@@ -3606,6 +3606,30 @@ bool memory_region_init_ram(MemoryRegion *mr, + return true; + } + ++bool memory_region_init_ram_guest_memfd(MemoryRegion *mr, ++ Object *owner, ++ const char *name, ++ uint64_t size, ++ Error **errp) ++{ ++ DeviceState *owner_dev; ++ ++ if (!memory_region_init_ram_flags_nomigrate(mr, owner, name, size, ++ RAM_GUEST_MEMFD, errp)) { ++ return false; ++ } ++ /* This will assert if owner is neither NULL nor a DeviceState. ++ * We only want the owner here for the purposes of defining a ++ * unique name for migration. TODO: Ideally we should implement ++ * a naming scheme for Objects which are not DeviceStates, in ++ * which case we can relax this restriction. ++ */ ++ owner_dev = DEVICE(owner); ++ vmstate_register_ram(mr, owner_dev); ++ ++ return true; ++} ++ + bool memory_region_init_rom(MemoryRegion *mr, + Object *owner, + const char *name, +-- +2.39.3 + diff --git a/SOURCES/kvm-memory-prevent-dma-reentracy-issues.patch b/SOURCES/kvm-memory-prevent-dma-reentracy-issues.patch deleted file mode 100644 index d3697dc..0000000 --- a/SOURCES/kvm-memory-prevent-dma-reentracy-issues.patch +++ /dev/null @@ -1,150 +0,0 @@ -From 0bc9295be331781491e993b6f1b0dca959194f13 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 9 May 2023 10:29:03 -0400 -Subject: [PATCH 05/21] memory: prevent dma-reentracy issues - -RH-Author: Jon Maloy -RH-MergeRequest: 165: memory: prevent dma-reentracy issues -RH-Jira: RHEL-516 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/13] d4a762d3b156200a65d09cde58cd6d77b229071e (jmaloy/jmaloy-qemu-kvm-2) - -Jira: https://issues.redhat.com/browse/RHEL-516 -Upstream: Merged -CVE: CVE-2023-2680 -CVE: CVE-2023-0330 - -commit a2e1753b8054344f32cf94f31c6399a58794a380 -Author: Alexander Bulekov -Date: Thu Apr 27 17:10:06 2023 -0400 - - memory: prevent dma-reentracy issues - - Add a flag to the DeviceState, when a device is engaged in PIO/MMIO/DMA. - This flag is set/checked prior to calling a device's MemoryRegion - handlers, and set when device code initiates DMA. The purpose of this - flag is to prevent two types of DMA-based reentrancy issues: - - 1.) mmio -> dma -> mmio case - 2.) bh -> dma write -> mmio case - - These issues have led to problems such as stack-exhaustion and - use-after-frees. - - Summary of the problem from Peter Maydell: - https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com - - Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62 - Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540 - Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541 - Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556 - Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557 - Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827 - Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1282 - Resolves: CVE-2023-0330 - - Signed-off-by: Alexander Bulekov - Reviewed-by: Thomas Huth - Message-Id: <20230427211013.2994127-2-alxndr@bu.edu> - [thuth: Replace warn_report() with warn_report_once()] - Signed-off-by: Thomas Huth - -Signed-off-by: Jon Maloy ---- - include/exec/memory.h | 5 +++++ - include/hw/qdev-core.h | 7 +++++++ - softmmu/memory.c | 16 ++++++++++++++++ - 3 files changed, 28 insertions(+) - -diff --git a/include/exec/memory.h b/include/exec/memory.h -index 15ade918ba..e45ce6061f 100644 ---- a/include/exec/memory.h -+++ b/include/exec/memory.h -@@ -767,6 +767,8 @@ struct MemoryRegion { - bool is_iommu; - RAMBlock *ram_block; - Object *owner; -+ /* owner as TYPE_DEVICE. Used for re-entrancy checks in MR access hotpath */ -+ DeviceState *dev; - - const MemoryRegionOps *ops; - void *opaque; -@@ -791,6 +793,9 @@ struct MemoryRegion { - unsigned ioeventfd_nb; - MemoryRegionIoeventfd *ioeventfds; - RamDiscardManager *rdm; /* Only for RAM */ -+ -+ /* For devices designed to perform re-entrant IO into their own IO MRs */ -+ bool disable_reentrancy_guard; - }; - - struct IOMMUMemoryRegion { -diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h -index bd50ad5ee1..7623703943 100644 ---- a/include/hw/qdev-core.h -+++ b/include/hw/qdev-core.h -@@ -162,6 +162,10 @@ struct NamedClockList { - QLIST_ENTRY(NamedClockList) node; - }; - -+typedef struct { -+ bool engaged_in_io; -+} MemReentrancyGuard; -+ - /** - * DeviceState: - * @realized: Indicates whether the device has been fully constructed. -@@ -194,6 +198,9 @@ struct DeviceState { - int alias_required_for_version; - ResettableState reset; - GSList *unplug_blockers; -+ -+ /* Is the device currently in mmio/pio/dma? Used to prevent re-entrancy */ -+ MemReentrancyGuard mem_reentrancy_guard; - }; - - struct DeviceListener { -diff --git a/softmmu/memory.c b/softmmu/memory.c -index b1a6cae6f5..b7b3386e9d 100644 ---- a/softmmu/memory.c -+++ b/softmmu/memory.c -@@ -542,6 +542,18 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, - access_size_max = 4; - } - -+ /* Do not allow more than one simultaneous access to a device's IO Regions */ -+ if (mr->dev && !mr->disable_reentrancy_guard && -+ !mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) { -+ if (mr->dev->mem_reentrancy_guard.engaged_in_io) { -+ warn_report_once("Blocked re-entrant IO on MemoryRegion: " -+ "%s at addr: 0x%" HWADDR_PRIX, -+ memory_region_name(mr), addr); -+ return MEMTX_ACCESS_ERROR; -+ } -+ mr->dev->mem_reentrancy_guard.engaged_in_io = true; -+ } -+ - /* FIXME: support unaligned access? */ - access_size = MAX(MIN(size, access_size_max), access_size_min); - access_mask = MAKE_64BIT_MASK(0, access_size * 8); -@@ -556,6 +568,9 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, - access_mask, attrs); - } - } -+ if (mr->dev) { -+ mr->dev->mem_reentrancy_guard.engaged_in_io = false; -+ } - return r; - } - -@@ -1170,6 +1185,7 @@ static void memory_region_do_init(MemoryRegion *mr, - } - mr->name = g_strdup(name); - mr->owner = owner; -+ mr->dev = (DeviceState *) object_dynamic_cast(mr->owner, TYPE_DEVICE); - mr->ram_block = NULL; - - if (name) { --- -2.39.3 - diff --git a/SOURCES/kvm-memory-stricter-checks-prior-to-unsetting-engaged_in.patch b/SOURCES/kvm-memory-stricter-checks-prior-to-unsetting-engaged_in.patch deleted file mode 100644 index f45abea..0000000 --- a/SOURCES/kvm-memory-stricter-checks-prior-to-unsetting-engaged_in.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 3f2042e33acb6db91594e12ebd63b9abd9e753cc Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Wed, 7 Jun 2023 11:45:09 -0400 -Subject: [PATCH 15/21] memory: stricter checks prior to unsetting - engaged_in_io - -RH-Author: Jon Maloy -RH-MergeRequest: 165: memory: prevent dma-reentracy issues -RH-Jira: RHEL-516 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [11/13] b8e1a4b49dd7fa3b7948d32f46dfe1d7f7a4c1cf (jmaloy/jmaloy-qemu-kvm-2) - -Jira: https://issues.redhat.com/browse/RHEL-516 -Upstream: Merged -CVE: CVE-2023-2680 - -commit 3884bf6468ac6bbb58c2b3feaa74e87f821b52f3 -Author: Alexander Bulekov -Date: Tue May 16 04:40:02 2023 -0400 - - memory: stricter checks prior to unsetting engaged_in_io - - engaged_in_io could be unset by an MR with re-entrancy checks disabled. - Ensure that only MRs that can set the engaged_in_io flag can unset it. - - Signed-off-by: Alexander Bulekov - Message-Id: <20230516084002.3813836-1-alxndr@bu.edu> - Reviewed-by: Darren Kenny - Signed-off-by: Thomas Huth - -Signed-off-by: Jon Maloy ---- - softmmu/memory.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/softmmu/memory.c b/softmmu/memory.c -index b7b3386e9d..26424f1d78 100644 ---- a/softmmu/memory.c -+++ b/softmmu/memory.c -@@ -534,6 +534,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, - unsigned access_size; - unsigned i; - MemTxResult r = MEMTX_OK; -+ bool reentrancy_guard_applied = false; - - if (!access_size_min) { - access_size_min = 1; -@@ -552,6 +553,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, - return MEMTX_ACCESS_ERROR; - } - mr->dev->mem_reentrancy_guard.engaged_in_io = true; -+ reentrancy_guard_applied = true; - } - - /* FIXME: support unaligned access? */ -@@ -568,7 +570,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, - access_mask, attrs); - } - } -- if (mr->dev) { -+ if (mr->dev && reentrancy_guard_applied) { - mr->dev->mem_reentrancy_guard.engaged_in_io = false; - } - return r; --- -2.39.3 - diff --git a/SOURCES/kvm-migration-Add-switchover-ack-capability.patch b/SOURCES/kvm-migration-Add-switchover-ack-capability.patch deleted file mode 100644 index 399c9ed..0000000 --- a/SOURCES/kvm-migration-Add-switchover-ack-capability.patch +++ /dev/null @@ -1,162 +0,0 @@ -From 8f89d3bc8f226cd038bf88b9fb3ef43b0fb33034 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 10/37] migration: Add switchover ack capability -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [8/28] 2f4ca020783bd617eca13b18289fce764279833b (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 6574232fff6a -Author: Avihai Horon -Date: Wed Jun 21 14:11:54 2023 +0300 - - migration: Add switchover ack capability - - Migration downtime estimation is calculated based on bandwidth and - remaining migration data. This assumes that loading of migration data in - the destination takes a negligible amount of time and that downtime - depends only on network speed. - - While this may be true for RAM, it's not necessarily true for other - migrated devices. For example, loading the data of a VFIO device in the - destination might require from the device to allocate resources, prepare - internal data structures and so on. These operations can take a - significant amount of time which can increase migration downtime. - - This patch adds a new capability "switchover ack" that prevents the - source from stopping the VM and completing the migration until an ACK - is received from the destination that it's OK to do so. - - This can be used by migrated devices in various ways to reduce downtime. - For example, a device can send initial precopy metadata to pre-allocate - resources in the destination and use this capability to make sure that - the pre-allocation is completed before the source VM is stopped, so it - will have full effect. - - This new capability relies on the return path capability to communicate - from the destination back to the source. - - The actual implementation of the capability will be added in the - following patches. - - Signed-off-by: Avihai Horon - Reviewed-by: Peter Xu - Acked-by: Markus Armbruster - Tested-by: YangHang Liu - Acked-by: Alex Williamson - Signed-off-by: Cédric Le Goater - -Conflicts: - - qapi/migration.json - re-indent of @switchover-ack to avoid ../qapi/migration.json:482:1: - unexpected de-indent (expected at least 17 spaces) - -Signed-off-by: Cédric Le Goater ---- - migration/options.c | 21 +++++++++++++++++++++ - migration/options.h | 1 + - qapi/migration.json | 14 +++++++++++++- - 3 files changed, 35 insertions(+), 1 deletion(-) - -diff --git a/migration/options.c b/migration/options.c -index a76984276d..c3df6c6dde 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -182,6 +182,8 @@ Property migration_properties[] = { - DEFINE_PROP_MIG_CAP("x-zero-copy-send", - MIGRATION_CAPABILITY_ZERO_COPY_SEND), - #endif -+ DEFINE_PROP_MIG_CAP("x-switchover-ack", -+ MIGRATION_CAPABILITY_SWITCHOVER_ACK), - - DEFINE_PROP_END_OF_LIST(), - }; -@@ -305,6 +307,13 @@ bool migrate_return_path(void) - return s->capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; - } - -+bool migrate_switchover_ack(void) -+{ -+ MigrationState *s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_SWITCHOVER_ACK]; -+} -+ - bool migrate_validate_uuid(void) - { - MigrationState *s = migrate_get_current(); -@@ -532,6 +541,18 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) - } - } - -+ if (new_caps[MIGRATION_CAPABILITY_SWITCHOVER_ACK]) { -+ if (!new_caps[MIGRATION_CAPABILITY_RETURN_PATH]) { -+ error_setg(errp, "Capability 'switchover-ack' requires capability " -+ "'return-path'"); -+ return false; -+ } -+ -+ /* Disable this capability until it's implemented */ -+ error_setg(errp, "'switchover-ack' is not implemented yet"); -+ return false; -+ } -+ - return true; - } - -diff --git a/migration/options.h b/migration/options.h -index 7b0f7245ad..0fc7be6869 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -47,6 +47,7 @@ bool migrate_postcopy_ram(void); - bool migrate_rdma_pin_all(void); - bool migrate_release_ram(void); - bool migrate_return_path(void); -+bool migrate_switchover_ack(void); - bool migrate_validate_uuid(void); - bool migrate_xbzrle(void); - bool migrate_zero_blocks(void); -diff --git a/qapi/migration.json b/qapi/migration.json -index 2c35b7b9cf..b6a58347cc 100644 ---- a/qapi/migration.json -+++ b/qapi/migration.json -@@ -478,6 +478,18 @@ - # should not affect the correctness of postcopy migration. - # (since 7.1) - # -+# @switchover-ack: If enabled, migration will not stop the source VM -+# and complete the migration until an ACK is received -+# from the destination that it's OK to do so. -+# Exactly when this ACK is sent depends on the -+# migrated devices that use this feature. For -+# example, a device can use it to make sure some of -+# its data is sent and loaded in the destination -+# before doing switchover. This can reduce downtime -+# if devices that support this capability are -+# present. 'return-path' capability must be enabled -+# to use it. (since 8.1) -+# - # Features: - # @unstable: Members @x-colo and @x-ignore-shared are experimental. - # -@@ -492,7 +504,7 @@ - 'dirty-bitmaps', 'postcopy-blocktime', 'late-block-activate', - { 'name': 'x-ignore-shared', 'features': [ 'unstable' ] }, - 'validate-uuid', 'background-snapshot', -- 'zero-copy-send', 'postcopy-preempt'] } -+ 'zero-copy-send', 'postcopy-preempt', 'switchover-ack'] } - - ## - # @MigrationCapabilityStatus: --- -2.39.3 - diff --git a/SOURCES/kvm-migration-Allow-postcopy_ram_supported_by_host-to-re.patch b/SOURCES/kvm-migration-Allow-postcopy_ram_supported_by_host-to-re.patch deleted file mode 100644 index 7c9748b..0000000 --- a/SOURCES/kvm-migration-Allow-postcopy_ram_supported_by_host-to-re.patch +++ /dev/null @@ -1,308 +0,0 @@ -From e2c2910edf90186ca0d7d13c9943caa284e95ea9 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Tue, 25 Apr 2023 21:15:14 -0400 -Subject: [PATCH 51/56] migration: Allow postcopy_ram_supported_by_host() to - report err -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [50/50] 08c44affc11c27ddf1aa7ce0dfacbaf5effb80cb (peterx/qemu-kvm) - -Instead of print it to STDERR, bring the error upwards so that it can be -reported via QMP responses. - -E.g.: - -{ "execute": "migrate-set-capabilities" , - "arguments": { "capabilities": - [ { "capability": "postcopy-ram", "state": true } ] } } - -{ "error": - { "class": "GenericError", - "desc": "Postcopy is not supported: Host backend files need to be TMPFS - or HUGETLBFS only" } } - -Signed-off-by: Peter Xu -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -(cherry picked from commit 74c38cf7fd24c60e4f0a90585d17250478260877) -Signed-off-by: Peter Xu ---- - migration/options.c | 8 ++---- - migration/postcopy-ram.c | 60 +++++++++++++++++++++------------------- - migration/postcopy-ram.h | 3 +- - migration/savevm.c | 3 +- - 4 files changed, 39 insertions(+), 35 deletions(-) - -diff --git a/migration/options.c b/migration/options.c -index 4701c75a4d..e51d667e14 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -302,6 +302,7 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) - { - MigrationIncomingState *mis = migration_incoming_get_current(); - -+ ERRP_GUARD(); - #ifndef CONFIG_LIVE_BLOCK_MIGRATION - if (new_caps[MIGRATION_CAPABILITY_BLOCK]) { - error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) " -@@ -327,11 +328,8 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) - */ - if (!old_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM] && - runstate_check(RUN_STATE_INMIGRATE) && -- !postcopy_ram_supported_by_host(mis)) { -- /* postcopy_ram_supported_by_host will have emitted a more -- * detailed message -- */ -- error_setg(errp, "Postcopy is not supported"); -+ !postcopy_ram_supported_by_host(mis, errp)) { -+ error_prepend(errp, "Postcopy is not supported: "); - return false; - } - -diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c -index 0711500036..75aa276bb1 100644 ---- a/migration/postcopy-ram.c -+++ b/migration/postcopy-ram.c -@@ -283,11 +283,13 @@ static bool request_ufd_features(int ufd, uint64_t features) - return true; - } - --static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) -+static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis, -+ Error **errp) - { - uint64_t asked_features = 0; - static uint64_t supported_features; - -+ ERRP_GUARD(); - /* - * it's not possible to - * request UFFD_API twice per one fd -@@ -295,7 +297,7 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) - */ - if (!supported_features) { - if (!receive_ufd_features(&supported_features)) { -- error_report("%s failed", __func__); -+ error_setg(errp, "Userfault feature detection failed"); - return false; - } - } -@@ -317,8 +319,7 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) - * userfault file descriptor - */ - if (!request_ufd_features(ufd, asked_features)) { -- error_report("%s failed: features %" PRIu64, __func__, -- asked_features); -+ error_setg(errp, "Failed features %" PRIu64, asked_features); - return false; - } - -@@ -329,7 +330,8 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) - have_hp = supported_features & UFFD_FEATURE_MISSING_HUGETLBFS; - #endif - if (!have_hp) { -- error_report("Userfault on this host does not support huge pages"); -+ error_setg(errp, -+ "Userfault on this host does not support huge pages"); - return false; - } - } -@@ -338,7 +340,7 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) - - /* Callback from postcopy_ram_supported_by_host block iterator. - */ --static int test_ramblock_postcopiable(RAMBlock *rb) -+static int test_ramblock_postcopiable(RAMBlock *rb, Error **errp) - { - const char *block_name = qemu_ram_get_idstr(rb); - ram_addr_t length = qemu_ram_get_used_length(rb); -@@ -346,16 +348,18 @@ static int test_ramblock_postcopiable(RAMBlock *rb) - QemuFsType fs; - - if (length % pagesize) { -- error_report("Postcopy requires RAM blocks to be a page size multiple," -- " block %s is 0x" RAM_ADDR_FMT " bytes with a " -- "page size of 0x%zx", block_name, length, pagesize); -+ error_setg(errp, -+ "Postcopy requires RAM blocks to be a page size multiple," -+ " block %s is 0x" RAM_ADDR_FMT " bytes with a " -+ "page size of 0x%zx", block_name, length, pagesize); - return 1; - } - - if (rb->fd >= 0) { - fs = qemu_fd_getfs(rb->fd); - if (fs != QEMU_FS_TYPE_TMPFS && fs != QEMU_FS_TYPE_HUGETLBFS) { -- error_report("Host backend files need to be TMPFS or HUGETLBFS only"); -+ error_setg(errp, -+ "Host backend files need to be TMPFS or HUGETLBFS only"); - return 1; - } - } -@@ -368,7 +372,7 @@ static int test_ramblock_postcopiable(RAMBlock *rb) - * normally fine since if the postcopy succeeds it gets turned back on at the - * end. - */ --bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) -+bool postcopy_ram_supported_by_host(MigrationIncomingState *mis, Error **errp) - { - long pagesize = qemu_real_host_page_size(); - int ufd = -1; -@@ -377,29 +381,27 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) - struct uffdio_register reg_struct; - struct uffdio_range range_struct; - uint64_t feature_mask; -- Error *local_err = NULL; - RAMBlock *block; - -+ ERRP_GUARD(); - if (qemu_target_page_size() > pagesize) { -- error_report("Target page size bigger than host page size"); -+ error_setg(errp, "Target page size bigger than host page size"); - goto out; - } - - ufd = uffd_open(O_CLOEXEC); - if (ufd == -1) { -- error_report("%s: userfaultfd not available: %s", __func__, -- strerror(errno)); -+ error_setg(errp, "Userfaultfd not available: %s", strerror(errno)); - goto out; - } - - /* Give devices a chance to object */ -- if (postcopy_notify(POSTCOPY_NOTIFY_PROBE, &local_err)) { -- error_report_err(local_err); -+ if (postcopy_notify(POSTCOPY_NOTIFY_PROBE, errp)) { - goto out; - } - - /* Version and features check */ -- if (!ufd_check_and_apply(ufd, mis)) { -+ if (!ufd_check_and_apply(ufd, mis, errp)) { - goto out; - } - -@@ -417,7 +419,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) - * affect in reality, or we can revisit. - */ - RAMBLOCK_FOREACH(block) { -- if (test_ramblock_postcopiable(block)) { -+ if (test_ramblock_postcopiable(block, errp)) { - goto out; - } - } -@@ -427,7 +429,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) - * it was enabled. - */ - if (munlockall()) { -- error_report("%s: munlockall: %s", __func__, strerror(errno)); -+ error_setg(errp, "munlockall() failed: %s", strerror(errno)); - goto out; - } - -@@ -439,8 +441,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) - testarea = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE | - MAP_ANONYMOUS, -1, 0); - if (testarea == MAP_FAILED) { -- error_report("%s: Failed to map test area: %s", __func__, -- strerror(errno)); -+ error_setg(errp, "Failed to map test area: %s", strerror(errno)); - goto out; - } - g_assert(QEMU_PTR_IS_ALIGNED(testarea, pagesize)); -@@ -450,14 +451,14 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) - reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING; - - if (ioctl(ufd, UFFDIO_REGISTER, ®_struct)) { -- error_report("%s userfault register: %s", __func__, strerror(errno)); -+ error_setg(errp, "UFFDIO_REGISTER failed: %s", strerror(errno)); - goto out; - } - - range_struct.start = (uintptr_t)testarea; - range_struct.len = pagesize; - if (ioctl(ufd, UFFDIO_UNREGISTER, &range_struct)) { -- error_report("%s userfault unregister: %s", __func__, strerror(errno)); -+ error_setg(errp, "UFFDIO_UNREGISTER failed: %s", strerror(errno)); - goto out; - } - -@@ -465,8 +466,8 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) - (__u64)1 << _UFFDIO_COPY | - (__u64)1 << _UFFDIO_ZEROPAGE; - if ((reg_struct.ioctls & feature_mask) != feature_mask) { -- error_report("Missing userfault map features: %" PRIx64, -- (uint64_t)(~reg_struct.ioctls & feature_mask)); -+ error_setg(errp, "Missing userfault map features: %" PRIx64, -+ (uint64_t)(~reg_struct.ioctls & feature_mask)); - goto out; - } - -@@ -1188,6 +1189,8 @@ static int postcopy_temp_pages_setup(MigrationIncomingState *mis) - - int postcopy_ram_incoming_setup(MigrationIncomingState *mis) - { -+ Error *local_err = NULL; -+ - /* Open the fd for the kernel to give us userfaults */ - mis->userfault_fd = uffd_open(O_CLOEXEC | O_NONBLOCK); - if (mis->userfault_fd == -1) { -@@ -1200,7 +1203,8 @@ int postcopy_ram_incoming_setup(MigrationIncomingState *mis) - * Although the host check already tested the API, we need to - * do the check again as an ABI handshake on the new fd. - */ -- if (!ufd_check_and_apply(mis->userfault_fd, mis)) { -+ if (!ufd_check_and_apply(mis->userfault_fd, mis, &local_err)) { -+ error_report_err(local_err); - return -1; - } - -@@ -1360,7 +1364,7 @@ void fill_destination_postcopy_migration_info(MigrationInfo *info) - { - } - --bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) -+bool postcopy_ram_supported_by_host(MigrationIncomingState *mis, Error **errp) - { - error_report("%s: No OS support", __func__); - return false; -diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h -index b4867a32d5..442ab89752 100644 ---- a/migration/postcopy-ram.h -+++ b/migration/postcopy-ram.h -@@ -14,7 +14,8 @@ - #define QEMU_POSTCOPY_RAM_H - - /* Return true if the host supports everything we need to do postcopy-ram */ --bool postcopy_ram_supported_by_host(MigrationIncomingState *mis); -+bool postcopy_ram_supported_by_host(MigrationIncomingState *mis, -+ Error **errp); - - /* - * Make all of RAM sensitive to accesses to areas that haven't yet been written -diff --git a/migration/savevm.c b/migration/savevm.c -index 9671211339..211eff3a8b 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -1753,7 +1753,8 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis, - return -EINVAL; - } - -- if (!postcopy_ram_supported_by_host(mis)) { -+ if (!postcopy_ram_supported_by_host(mis, &local_err)) { -+ error_report_err(local_err); - postcopy_state_set(POSTCOPY_INCOMING_NONE); - return -1; - } --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Attempt-disk-reactivation-in-more-failure-.patch b/SOURCES/kvm-migration-Attempt-disk-reactivation-in-more-failure-.patch deleted file mode 100644 index d1620f0..0000000 --- a/SOURCES/kvm-migration-Attempt-disk-reactivation-in-more-failure-.patch +++ /dev/null @@ -1,111 +0,0 @@ -From 3691bb5f956e3c60dbf6de183011b31dbc7a7801 Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Tue, 2 May 2023 15:52:12 -0500 -Subject: [PATCH 01/56] migration: Attempt disk reactivation in more failure - scenarios - -RH-Author: Eric Blake -RH-MergeRequest: 161: Avoid migration assertion from failed NFS server. -RH-Bugzilla: 2058982 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Acked-by: Kevin Wolf -RH-Commit: [1/1] 5999b747b314641259d3b8809033b057805eed3f (ebblake/centos-qemu-kvm) - -Commit fe904ea824 added a fail_inactivate label, which tries to -reactivate disks on the source after a failure while s->state == -MIGRATION_STATUS_ACTIVE, but didn't actually use the label if -qemu_savevm_state_complete_precopy() failed. This failure to -reactivate is also present in commit 6039dd5b1c (also covering the new -s->state == MIGRATION_STATUS_DEVICE state) and 403d18ae (ensuring -s->block_inactive is set more reliably). - -Consolidate the two labels back into one - no matter HOW migration is -failed, if there is any chance we can reach vm_start() after having -attempted inactivation, it is essential that we have tried to restart -disks before then. This also makes the cleanup more like -migrate_fd_cancel(). - -Suggested-by: Kevin Wolf -Signed-off-by: Eric Blake -Message-Id: <20230502205212.134680-1-eblake@redhat.com> -Acked-by: Peter Xu -Reviewed-by: Juan Quintela -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 6dab4c93ecfae48e2e67b984d1032c1e988d3005) -[eblake: downstream migrate_colo() => migrate_colo_enabled()] -Signed-off-by: Eric Blake ---- - migration/migration.c | 24 ++++++++++++++---------- - 1 file changed, 14 insertions(+), 10 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 08007cef4e..99f86bd6c2 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -3443,6 +3443,11 @@ static void migration_completion(MigrationState *s) - MIGRATION_STATUS_DEVICE); - } - if (ret >= 0) { -+ /* -+ * Inactivate disks except in COLO, and track that we -+ * have done so in order to remember to reactivate -+ * them if migration fails or is cancelled. -+ */ - s->block_inactive = !migrate_colo_enabled(); - qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); - ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, -@@ -3487,13 +3492,13 @@ static void migration_completion(MigrationState *s) - rp_error = await_return_path_close_on_source(s); - trace_migration_return_path_end_after(rp_error); - if (rp_error) { -- goto fail_invalidate; -+ goto fail; - } - } - - if (qemu_file_get_error(s->to_dst_file)) { - trace_migration_completion_file_err(); -- goto fail_invalidate; -+ goto fail; - } - - if (migrate_colo_enabled() && s->state == MIGRATION_STATUS_ACTIVE) { -@@ -3507,26 +3512,25 @@ static void migration_completion(MigrationState *s) - - return; - --fail_invalidate: -- /* If not doing postcopy, vm_start() will be called: let's regain -- * control on images. -- */ -- if (s->state == MIGRATION_STATUS_ACTIVE || -- s->state == MIGRATION_STATUS_DEVICE) { -+fail: -+ if (s->block_inactive && (s->state == MIGRATION_STATUS_ACTIVE || -+ s->state == MIGRATION_STATUS_DEVICE)) { -+ /* -+ * If not doing postcopy, vm_start() will be called: let's -+ * regain control on images. -+ */ - Error *local_err = NULL; - - qemu_mutex_lock_iothread(); - bdrv_activate_all(&local_err); - if (local_err) { - error_report_err(local_err); -- s->block_inactive = true; - } else { - s->block_inactive = false; - } - qemu_mutex_unlock_iothread(); - } - --fail: - migrate_set_state(&s->state, current_active_state, - MIGRATION_STATUS_FAILED); - } --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Create-migrate_cap_set.patch b/SOURCES/kvm-migration-Create-migrate_cap_set.patch deleted file mode 100644 index 33268bb..0000000 --- a/SOURCES/kvm-migration-Create-migrate_cap_set.patch +++ /dev/null @@ -1,93 +0,0 @@ -From d772464e9a51a085e10864b2dc7ffd49991fc23b Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 21:02:42 +0100 -Subject: [PATCH 22/56] migration: Create migrate_cap_set() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [21/50] 5b12f04013cf2d374a869134bb67c938c789e24d (peterx/qemu-kvm) - -And remove the convoluted use of qmp_migrate_set_capabilities() to -enable disable MIGRATION_CAPABILITY_BLOCK. - -Signed-off-by: Juan Quintela -Reviewed-by: Fabiano Rosas -(cherry picked from commit 9eb1109cfba5415dd0b0cb82e80fc5e42fe861b7) -Signed-off-by: Peter Xu ---- - migration/migration.c | 34 ++++++++++++++++------------------ - 1 file changed, 16 insertions(+), 18 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index b745d829a4..18058fb597 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1912,25 +1912,24 @@ void migrate_set_state(int *state, int old_state, int new_state) - } - } - --static MigrationCapabilityStatus *migrate_cap_add(MigrationCapability index, -- bool state) -+static bool migrate_cap_set(int cap, bool value, Error **errp) - { -- MigrationCapabilityStatus *cap; -- -- cap = g_new0(MigrationCapabilityStatus, 1); -- cap->capability = index; -- cap->state = state; -+ MigrationState *s = migrate_get_current(); -+ bool new_caps[MIGRATION_CAPABILITY__MAX]; - -- return cap; --} -+ if (migration_is_running(s->state)) { -+ error_setg(errp, QERR_MIGRATION_ACTIVE); -+ return false; -+ } - --void migrate_set_block_enabled(bool value, Error **errp) --{ -- MigrationCapabilityStatusList *cap = NULL; -+ memcpy(new_caps, s->capabilities, sizeof(new_caps)); -+ new_caps[cap] = value; - -- QAPI_LIST_PREPEND(cap, migrate_cap_add(MIGRATION_CAPABILITY_BLOCK, value)); -- qmp_migrate_set_capabilities(cap, errp); -- qapi_free_MigrationCapabilityStatusList(cap); -+ if (!migrate_caps_check(s->capabilities, new_caps, errp)) { -+ return false; -+ } -+ s->capabilities[cap] = value; -+ return true; - } - - static void migrate_set_block_incremental(MigrationState *s, bool value) -@@ -1942,7 +1941,7 @@ static void block_cleanup_parameters(MigrationState *s) - { - if (s->must_remove_block_options) { - /* setting to false can never fail */ -- migrate_set_block_enabled(false, &error_abort); -+ migrate_cap_set(MIGRATION_CAPABILITY_BLOCK, false, &error_abort); - migrate_set_block_incremental(s, false); - s->must_remove_block_options = false; - } -@@ -2429,8 +2428,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, - "current migration capabilities"); - return false; - } -- migrate_set_block_enabled(true, &local_err); -- if (local_err) { -+ if (!migrate_cap_set(MIGRATION_CAPABILITY_BLOCK, true, &local_err)) { - error_propagate(errp, local_err); - return false; - } --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Create-migrate_checkpoint_delay.patch b/SOURCES/kvm-migration-Create-migrate_checkpoint_delay.patch deleted file mode 100644 index 408d258..0000000 --- a/SOURCES/kvm-migration-Create-migrate_checkpoint_delay.patch +++ /dev/null @@ -1,84 +0,0 @@ -From a17bee3c8ab48daa471ec53bed0e2cb0bb41fc76 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 01:04:55 +0100 -Subject: [PATCH 41/56] migration: Create migrate_checkpoint_delay() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [40/50] b972d3f12e49dc27aa78eb723ca6d0fac4d174d8 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Fabiano Rosas -(cherry picked from commit f94a858fa3e72ba954a338c01ae9fecc15fcce5c) -Signed-off-by: Peter Xu ---- - migration/colo.c | 5 ++--- - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - 3 files changed, 12 insertions(+), 3 deletions(-) - -diff --git a/migration/colo.c b/migration/colo.c -index 93b78c9270..07bfa21fea 100644 ---- a/migration/colo.c -+++ b/migration/colo.c -@@ -576,7 +576,7 @@ static void colo_process_checkpoint(MigrationState *s) - trace_colo_vm_state_change("stop", "run"); - - timer_mod(s->colo_delay_timer, qemu_clock_get_ms(QEMU_CLOCK_HOST) + -- s->parameters.x_checkpoint_delay); -+ migrate_checkpoint_delay()); - - while (s->state == MIGRATION_STATUS_COLO) { - if (failover_get_state() != FAILOVER_STATUS_NONE) { -@@ -651,8 +651,7 @@ void colo_checkpoint_notify(void *opaque) - - qemu_event_set(&s->colo_checkpoint_event); - s->colo_checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST); -- next_notify_time = s->colo_checkpoint_time + -- s->parameters.x_checkpoint_delay; -+ next_notify_time = s->colo_checkpoint_time + migrate_checkpoint_delay(); - timer_mod(s->colo_delay_timer, next_notify_time); - } - -diff --git a/migration/options.c b/migration/options.c -index b9f3815f7e..0e102e5700 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -472,6 +472,15 @@ bool migrate_block_incremental(void) - return s->parameters.block_incremental; - } - -+uint32_t migrate_checkpoint_delay(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.x_checkpoint_delay; -+} -+ - int migrate_compress_level(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index aa54443353..adc2879bbb 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -46,6 +46,7 @@ bool migrate_cap_set(int cap, bool value, Error **errp); - /* parameters */ - - bool migrate_block_incremental(void); -+uint32_t migrate_checkpoint_delay(void); - int migrate_compress_level(void); - int migrate_compress_threads(void); - int migrate_compress_wait_thread(void); --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Create-migrate_cpu_throttle_increment-func.patch b/SOURCES/kvm-migration-Create-migrate_cpu_throttle_increment-func.patch deleted file mode 100644 index 65bad3c..0000000 --- a/SOURCES/kvm-migration-Create-migrate_cpu_throttle_increment-func.patch +++ /dev/null @@ -1,75 +0,0 @@ -From 7ff430e011780dad00e5ebaad0318c5fa3aec102 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 10:20:49 +0100 -Subject: [PATCH 45/56] migration: Create migrate_cpu_throttle_increment() - function -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [44/50] aec990a106a0347b265f5c056a516e0b91e8183c (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Fabiano Rosas -(cherry picked from commit 9605c2ac282c565bb00b5f344217161bef29eff8) -Signed-off-by: Peter Xu ---- - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - migration/ram.c | 2 +- - 3 files changed, 11 insertions(+), 1 deletion(-) - -diff --git a/migration/options.c b/migration/options.c -index f7fb6999f7..31435d2b45 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -509,6 +509,15 @@ int migrate_compress_wait_thread(void) - return s->parameters.compress_wait_thread; - } - -+uint8_t migrate_cpu_throttle_increment(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.cpu_throttle_increment; -+} -+ - uint8_t migrate_cpu_throttle_initial(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index fd8b91d767..49b29bdafd 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -50,6 +50,7 @@ uint32_t migrate_checkpoint_delay(void); - int migrate_compress_level(void); - int migrate_compress_threads(void); - int migrate_compress_wait_thread(void); -+uint8_t migrate_cpu_throttle_increment(void); - uint8_t migrate_cpu_throttle_initial(void); - int migrate_decompress_threads(void); - uint8_t migrate_max_cpu_throttle(void); -diff --git a/migration/ram.c b/migration/ram.c -index 5e855d5c22..5645745a42 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -713,7 +713,7 @@ static void mig_throttle_guest_down(uint64_t bytes_dirty_period, - { - MigrationState *s = migrate_get_current(); - uint64_t pct_initial = migrate_cpu_throttle_initial(); -- uint64_t pct_increment = s->parameters.cpu_throttle_increment; -+ uint64_t pct_increment = migrate_cpu_throttle_increment(); - bool pct_tailslow = s->parameters.cpu_throttle_tailslow; - int pct_max = migrate_max_cpu_throttle(); - --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Create-migrate_cpu_throttle_initial-to-opt.patch b/SOURCES/kvm-migration-Create-migrate_cpu_throttle_initial-to-opt.patch deleted file mode 100644 index aab2013..0000000 --- a/SOURCES/kvm-migration-Create-migrate_cpu_throttle_initial-to-opt.patch +++ /dev/null @@ -1,75 +0,0 @@ -From fdc2f14bfb3ef8897310a7db63287a9bab1fb858 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 01:22:44 +0100 -Subject: [PATCH 44/56] migration: Create migrate_cpu_throttle_initial() to - option.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [43/50] e0e0db7218f28aefd4bd022edbaec236e2030cb1 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Fabiano Rosas -(cherry picked from commit 2a8ec38082f8098f2693bb3632175453c0c84a51) -Signed-off-by: Peter Xu ---- - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - migration/ram.c | 2 +- - 3 files changed, 11 insertions(+), 1 deletion(-) - -diff --git a/migration/options.c b/migration/options.c -index 418aafac64..f7fb6999f7 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -509,6 +509,15 @@ int migrate_compress_wait_thread(void) - return s->parameters.compress_wait_thread; - } - -+uint8_t migrate_cpu_throttle_initial(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.cpu_throttle_initial; -+} -+ - int migrate_decompress_threads(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index 72b1a320b7..fd8b91d767 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -50,6 +50,7 @@ uint32_t migrate_checkpoint_delay(void); - int migrate_compress_level(void); - int migrate_compress_threads(void); - int migrate_compress_wait_thread(void); -+uint8_t migrate_cpu_throttle_initial(void); - int migrate_decompress_threads(void); - uint8_t migrate_max_cpu_throttle(void); - int64_t migrate_max_postcopy_bandwidth(void); -diff --git a/migration/ram.c b/migration/ram.c -index 5c786513ef..5e855d5c22 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -712,7 +712,7 @@ static void mig_throttle_guest_down(uint64_t bytes_dirty_period, - uint64_t bytes_dirty_threshold) - { - MigrationState *s = migrate_get_current(); -- uint64_t pct_initial = s->parameters.cpu_throttle_initial; -+ uint64_t pct_initial = migrate_cpu_throttle_initial(); - uint64_t pct_increment = s->parameters.cpu_throttle_increment; - bool pct_tailslow = s->parameters.cpu_throttle_tailslow; - int pct_max = migrate_max_cpu_throttle(); --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Create-migrate_cpu_throttle_tailslow-funct.patch b/SOURCES/kvm-migration-Create-migrate_cpu_throttle_tailslow-funct.patch deleted file mode 100644 index e36f003..0000000 --- a/SOURCES/kvm-migration-Create-migrate_cpu_throttle_tailslow-funct.patch +++ /dev/null @@ -1,78 +0,0 @@ -From b88c51c4b02639e28da73143b1da7bd3d6706ce5 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 10:29:51 +0100 -Subject: [PATCH 46/56] migration: Create migrate_cpu_throttle_tailslow() - function -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [45/50] e93e96392405c60f75abbf288e4fddb191bbc996 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Fabiano Rosas -(cherry picked from commit 873f674c559e3162a6e6e92994301d400c5cc873) -Signed-off-by: Peter Xu ---- - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - migration/ram.c | 3 +-- - 3 files changed, 11 insertions(+), 2 deletions(-) - -diff --git a/migration/options.c b/migration/options.c -index 31435d2b45..615534c151 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -527,6 +527,15 @@ uint8_t migrate_cpu_throttle_initial(void) - return s->parameters.cpu_throttle_initial; - } - -+bool migrate_cpu_throttle_tailslow(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.cpu_throttle_tailslow; -+} -+ - int migrate_decompress_threads(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index 49b29bdafd..99f6bbd7a1 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -52,6 +52,7 @@ int migrate_compress_threads(void); - int migrate_compress_wait_thread(void); - uint8_t migrate_cpu_throttle_increment(void); - uint8_t migrate_cpu_throttle_initial(void); -+bool migrate_cpu_throttle_tailslow(void); - int migrate_decompress_threads(void); - uint8_t migrate_max_cpu_throttle(void); - int64_t migrate_max_postcopy_bandwidth(void); -diff --git a/migration/ram.c b/migration/ram.c -index 5645745a42..01356f60a4 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -711,10 +711,9 @@ static size_t save_page_header(PageSearchStatus *pss, QEMUFile *f, - static void mig_throttle_guest_down(uint64_t bytes_dirty_period, - uint64_t bytes_dirty_threshold) - { -- MigrationState *s = migrate_get_current(); - uint64_t pct_initial = migrate_cpu_throttle_initial(); - uint64_t pct_increment = migrate_cpu_throttle_increment(); -- bool pct_tailslow = s->parameters.cpu_throttle_tailslow; -+ bool pct_tailslow = migrate_cpu_throttle_tailslow(); - int pct_max = migrate_max_cpu_throttle(); - - uint64_t throttle_now = cpu_throttle_get_percentage(); --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Create-migrate_max_bandwidth-function.patch b/SOURCES/kvm-migration-Create-migrate_max_bandwidth-function.patch deleted file mode 100644 index ba1d34c..0000000 --- a/SOURCES/kvm-migration-Create-migrate_max_bandwidth-function.patch +++ /dev/null @@ -1,232 +0,0 @@ -From b6228b3122f5c1f220f92042277ab1bfbb5ba086 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 11:00:12 +0100 -Subject: [PATCH 48/56] migration: Create migrate_max_bandwidth() function -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [47/50] 3874656f70cb9c2a30f4d63e146539480d422326 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Fabiano Rosas -(cherry picked from commit 9c894df3a37d675652390f7dbbe2f65b7bad7efa) -Signed-off-by: Peter Xu ---- - migration/migration.c | 70 +------------------------------------- - migration/options.c | 79 +++++++++++++++++++++++++++++++++++++++++++ - migration/options.h | 1 + - 3 files changed, 81 insertions(+), 69 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 46a5ea4d42..c2e109329d 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -886,74 +886,6 @@ void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value) - migrate_send_rp_message(mis, MIG_RP_MSG_RESUME_ACK, sizeof(buf), &buf); - } - --MigrationParameters *qmp_query_migrate_parameters(Error **errp) --{ -- MigrationParameters *params; -- MigrationState *s = migrate_get_current(); -- -- /* TODO use QAPI_CLONE() instead of duplicating it inline */ -- params = g_malloc0(sizeof(*params)); -- params->has_compress_level = true; -- params->compress_level = s->parameters.compress_level; -- params->has_compress_threads = true; -- params->compress_threads = s->parameters.compress_threads; -- params->has_compress_wait_thread = true; -- params->compress_wait_thread = s->parameters.compress_wait_thread; -- params->has_decompress_threads = true; -- params->decompress_threads = s->parameters.decompress_threads; -- params->has_throttle_trigger_threshold = true; -- params->throttle_trigger_threshold = s->parameters.throttle_trigger_threshold; -- params->has_cpu_throttle_initial = true; -- params->cpu_throttle_initial = s->parameters.cpu_throttle_initial; -- params->has_cpu_throttle_increment = true; -- params->cpu_throttle_increment = s->parameters.cpu_throttle_increment; -- params->has_cpu_throttle_tailslow = true; -- params->cpu_throttle_tailslow = s->parameters.cpu_throttle_tailslow; -- params->tls_creds = g_strdup(s->parameters.tls_creds); -- params->tls_hostname = g_strdup(s->parameters.tls_hostname); -- params->tls_authz = g_strdup(s->parameters.tls_authz ? -- s->parameters.tls_authz : ""); -- params->has_max_bandwidth = true; -- params->max_bandwidth = s->parameters.max_bandwidth; -- params->has_downtime_limit = true; -- params->downtime_limit = s->parameters.downtime_limit; -- params->has_x_checkpoint_delay = true; -- params->x_checkpoint_delay = s->parameters.x_checkpoint_delay; -- params->has_block_incremental = true; -- params->block_incremental = s->parameters.block_incremental; -- params->has_multifd_channels = true; -- params->multifd_channels = s->parameters.multifd_channels; -- params->has_multifd_compression = true; -- params->multifd_compression = s->parameters.multifd_compression; -- params->has_multifd_zlib_level = true; -- params->multifd_zlib_level = s->parameters.multifd_zlib_level; -- params->has_multifd_zstd_level = true; -- params->multifd_zstd_level = s->parameters.multifd_zstd_level; -- params->has_xbzrle_cache_size = true; -- params->xbzrle_cache_size = s->parameters.xbzrle_cache_size; -- params->has_max_postcopy_bandwidth = true; -- params->max_postcopy_bandwidth = s->parameters.max_postcopy_bandwidth; -- params->has_max_cpu_throttle = true; -- params->max_cpu_throttle = s->parameters.max_cpu_throttle; -- params->has_announce_initial = true; -- params->announce_initial = s->parameters.announce_initial; -- params->has_announce_max = true; -- params->announce_max = s->parameters.announce_max; -- params->has_announce_rounds = true; -- params->announce_rounds = s->parameters.announce_rounds; -- params->has_announce_step = true; -- params->announce_step = s->parameters.announce_step; -- -- if (s->parameters.has_block_bitmap_mapping) { -- params->has_block_bitmap_mapping = true; -- params->block_bitmap_mapping = -- QAPI_CLONE(BitmapMigrationNodeAliasList, -- s->parameters.block_bitmap_mapping); -- } -- -- return params; --} -- - /* - * Return true if we're already in the middle of a migration - * (i.e. any of the active or setup states) -@@ -3775,7 +3707,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) - XFER_LIMIT_RATIO; - } else { - /* This is a fresh new migration */ -- rate_limit = s->parameters.max_bandwidth / XFER_LIMIT_RATIO; -+ rate_limit = migrate_max_bandwidth() / XFER_LIMIT_RATIO; - - /* Notify before starting migration thread */ - notifier_list_notify(&migration_state_notifiers, s); -diff --git a/migration/options.c b/migration/options.c -index 8bd2d949ae..8e8753d9be 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -12,8 +12,10 @@ - */ - - #include "qemu/osdep.h" -+#include "qapi/clone-visitor.h" - #include "qapi/error.h" - #include "qapi/qapi-commands-migration.h" -+#include "qapi/qapi-visit-migration.h" - #include "qapi/qmp/qerror.h" - #include "sysemu/runstate.h" - #include "migration/misc.h" -@@ -562,6 +564,15 @@ uint8_t migrate_max_cpu_throttle(void) - return s->parameters.max_cpu_throttle; - } - -+uint64_t migrate_max_bandwidth(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.max_bandwidth; -+} -+ - int64_t migrate_max_postcopy_bandwidth(void) - { - MigrationState *s; -@@ -641,3 +652,71 @@ AnnounceParameters *migrate_announce_params(void) - - return ≈ - } -+ -+MigrationParameters *qmp_query_migrate_parameters(Error **errp) -+{ -+ MigrationParameters *params; -+ MigrationState *s = migrate_get_current(); -+ -+ /* TODO use QAPI_CLONE() instead of duplicating it inline */ -+ params = g_malloc0(sizeof(*params)); -+ params->has_compress_level = true; -+ params->compress_level = s->parameters.compress_level; -+ params->has_compress_threads = true; -+ params->compress_threads = s->parameters.compress_threads; -+ params->has_compress_wait_thread = true; -+ params->compress_wait_thread = s->parameters.compress_wait_thread; -+ params->has_decompress_threads = true; -+ params->decompress_threads = s->parameters.decompress_threads; -+ params->has_throttle_trigger_threshold = true; -+ params->throttle_trigger_threshold = s->parameters.throttle_trigger_threshold; -+ params->has_cpu_throttle_initial = true; -+ params->cpu_throttle_initial = s->parameters.cpu_throttle_initial; -+ params->has_cpu_throttle_increment = true; -+ params->cpu_throttle_increment = s->parameters.cpu_throttle_increment; -+ params->has_cpu_throttle_tailslow = true; -+ params->cpu_throttle_tailslow = s->parameters.cpu_throttle_tailslow; -+ params->tls_creds = g_strdup(s->parameters.tls_creds); -+ params->tls_hostname = g_strdup(s->parameters.tls_hostname); -+ params->tls_authz = g_strdup(s->parameters.tls_authz ? -+ s->parameters.tls_authz : ""); -+ params->has_max_bandwidth = true; -+ params->max_bandwidth = s->parameters.max_bandwidth; -+ params->has_downtime_limit = true; -+ params->downtime_limit = s->parameters.downtime_limit; -+ params->has_x_checkpoint_delay = true; -+ params->x_checkpoint_delay = s->parameters.x_checkpoint_delay; -+ params->has_block_incremental = true; -+ params->block_incremental = s->parameters.block_incremental; -+ params->has_multifd_channels = true; -+ params->multifd_channels = s->parameters.multifd_channels; -+ params->has_multifd_compression = true; -+ params->multifd_compression = s->parameters.multifd_compression; -+ params->has_multifd_zlib_level = true; -+ params->multifd_zlib_level = s->parameters.multifd_zlib_level; -+ params->has_multifd_zstd_level = true; -+ params->multifd_zstd_level = s->parameters.multifd_zstd_level; -+ params->has_xbzrle_cache_size = true; -+ params->xbzrle_cache_size = s->parameters.xbzrle_cache_size; -+ params->has_max_postcopy_bandwidth = true; -+ params->max_postcopy_bandwidth = s->parameters.max_postcopy_bandwidth; -+ params->has_max_cpu_throttle = true; -+ params->max_cpu_throttle = s->parameters.max_cpu_throttle; -+ params->has_announce_initial = true; -+ params->announce_initial = s->parameters.announce_initial; -+ params->has_announce_max = true; -+ params->announce_max = s->parameters.announce_max; -+ params->has_announce_rounds = true; -+ params->announce_rounds = s->parameters.announce_rounds; -+ params->has_announce_step = true; -+ params->announce_step = s->parameters.announce_step; -+ -+ if (s->parameters.has_block_bitmap_mapping) { -+ params->has_block_bitmap_mapping = true; -+ params->block_bitmap_mapping = -+ QAPI_CLONE(BitmapMigrationNodeAliasList, -+ s->parameters.block_bitmap_mapping); -+ } -+ -+ return params; -+} -diff --git a/migration/options.h b/migration/options.h -index 093bc907a1..1b78fa9f3d 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -64,6 +64,7 @@ uint8_t migrate_cpu_throttle_initial(void); - bool migrate_cpu_throttle_tailslow(void); - int migrate_decompress_threads(void); - uint8_t migrate_max_cpu_throttle(void); -+uint64_t migrate_max_bandwidth(void); - int64_t migrate_max_postcopy_bandwidth(void); - int migrate_multifd_channels(void); - MultiFDCompression migrate_multifd_compression(void); --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Create-migrate_max_cpu_throttle.patch b/SOURCES/kvm-migration-Create-migrate_max_cpu_throttle.patch deleted file mode 100644 index 6628b80..0000000 --- a/SOURCES/kvm-migration-Create-migrate_max_cpu_throttle.patch +++ /dev/null @@ -1,88 +0,0 @@ -From f0d4e34b00f66d2336b755a34a1ba226571641c4 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 01:13:01 +0100 -Subject: [PATCH 42/56] migration: Create migrate_max_cpu_throttle() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [41/50] fc7537c06d8e1f53d7bb552661f6ddb0133a978d (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Fabiano Rosas -(cherry picked from commit 24155bd0520035d5148c0af5b925932c4d8064a8) -Signed-off-by: Peter Xu ---- - migration/migration.h | 2 -- - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - migration/ram.c | 2 +- - 4 files changed, 11 insertions(+), 3 deletions(-) - -diff --git a/migration/migration.h b/migration/migration.h -index 86051af132..3ae938b19c 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -451,8 +451,6 @@ bool migrate_postcopy(void); - - int migrate_use_tls(void); - --int migrate_max_cpu_throttle(void); -- - uint64_t ram_get_total_transferred_pages(void); - - /* Sending on the return path - generic and then for each message type */ -diff --git a/migration/options.c b/migration/options.c -index 0e102e5700..2cb04fbbd1 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -517,6 +517,15 @@ int migrate_decompress_threads(void) - return s->parameters.decompress_threads; - } - -+uint8_t migrate_max_cpu_throttle(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.max_cpu_throttle; -+} -+ - int64_t migrate_max_postcopy_bandwidth(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index adc2879bbb..72b1a320b7 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -51,6 +51,7 @@ int migrate_compress_level(void); - int migrate_compress_threads(void); - int migrate_compress_wait_thread(void); - int migrate_decompress_threads(void); -+uint8_t migrate_max_cpu_throttle(void); - int64_t migrate_max_postcopy_bandwidth(void); - int migrate_multifd_channels(void); - MultiFDCompression migrate_multifd_compression(void); -diff --git a/migration/ram.c b/migration/ram.c -index e82cee97c3..5c786513ef 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -715,7 +715,7 @@ static void mig_throttle_guest_down(uint64_t bytes_dirty_period, - uint64_t pct_initial = s->parameters.cpu_throttle_initial; - uint64_t pct_increment = s->parameters.cpu_throttle_increment; - bool pct_tailslow = s->parameters.cpu_throttle_tailslow; -- int pct_max = s->parameters.max_cpu_throttle; -+ int pct_max = migrate_max_cpu_throttle(); - - uint64_t throttle_now = cpu_throttle_get_percentage(); - uint64_t cpu_now, cpu_ideal, throttle_inc; --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Create-migrate_rdma_pin_all-function.patch b/SOURCES/kvm-migration-Create-migrate_rdma_pin_all-function.patch deleted file mode 100644 index c7799f1..0000000 --- a/SOURCES/kvm-migration-Create-migrate_rdma_pin_all-function.patch +++ /dev/null @@ -1,95 +0,0 @@ -From e4ef0f2cee6cdf2cf4bd225ac9e610f41d66dfcb Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 22:41:55 +0100 -Subject: [PATCH 32/56] migration: Create migrate_rdma_pin_all() function -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [31/50] 206d96d47d9ee73ddc89dd01186560bf62ea5295 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy - ---- - -Fixed missing space after comma (fabiano) - -(cherry picked from commit 17cba690cdd42108369fafe6b07bff09872fbea6) -Signed-off-by: Peter Xu ---- - migration/options.c | 7 +++++++ - migration/options.h | 1 + - migration/rdma.c | 6 +++--- - 3 files changed, 11 insertions(+), 3 deletions(-) - -diff --git a/migration/options.c b/migration/options.c -index 2003e413da..9c9b8e5863 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -138,6 +138,13 @@ bool migrate_postcopy_ram(void) - return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; - } - -+bool migrate_rdma_pin_all(void) -+{ -+ MigrationState *s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL]; -+} -+ - bool migrate_release_ram(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index 316efd1063..25c002b37a 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -30,6 +30,7 @@ bool migrate_pause_before_switchover(void); - bool migrate_postcopy_blocktime(void); - bool migrate_postcopy_preempt(void); - bool migrate_postcopy_ram(void); -+bool migrate_rdma_pin_all(void); - bool migrate_release_ram(void); - bool migrate_return_path(void); - bool migrate_validate_uuid(void); -diff --git a/migration/rdma.c b/migration/rdma.c -index bf55e2f163..0af5e944f0 100644 ---- a/migration/rdma.c -+++ b/migration/rdma.c -@@ -35,6 +35,7 @@ - #include - #include "trace.h" - #include "qom/object.h" -+#include "options.h" - #include - - /* -@@ -4178,8 +4179,7 @@ void rdma_start_outgoing_migration(void *opaque, - goto err; - } - -- ret = qemu_rdma_source_init(rdma, -- s->capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp); -+ ret = qemu_rdma_source_init(rdma, migrate_rdma_pin_all(), errp); - - if (ret) { - goto err; -@@ -4201,7 +4201,7 @@ void rdma_start_outgoing_migration(void *opaque, - } - - ret = qemu_rdma_source_init(rdma_return_path, -- s->capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp); -+ migrate_rdma_pin_all(), errp); - - if (ret) { - goto return_path_err; --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Create-migrate_throttle_trigger_threshold.patch b/SOURCES/kvm-migration-Create-migrate_throttle_trigger_threshold.patch deleted file mode 100644 index 5fc1072..0000000 --- a/SOURCES/kvm-migration-Create-migrate_throttle_trigger_threshold.patch +++ /dev/null @@ -1,75 +0,0 @@ -From 27862b9d31da6447b60f185cdad95764018c6bc6 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 00:59:13 +0100 -Subject: [PATCH 40/56] migration: Create migrate_throttle_trigger_threshold() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [39/50] b8af9080c49be3d38bd2784d61289be89c03db3e (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Fabiano Rosas -(cherry picked from commit 6499efdb16e5c1288b4c8390d3bf68b313329b8b) -Signed-off-by: Peter Xu ---- - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - migration/ram.c | 3 +-- - 3 files changed, 11 insertions(+), 2 deletions(-) - -diff --git a/migration/options.c b/migration/options.c -index 2b6d88b4b9..b9f3815f7e 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -554,6 +554,15 @@ int migrate_multifd_zstd_level(void) - return s->parameters.multifd_zstd_level; - } - -+uint8_t migrate_throttle_trigger_threshold(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.throttle_trigger_threshold; -+} -+ - uint64_t migrate_xbzrle_cache_size(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index 96d5a8e6e4..aa54443353 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -55,6 +55,7 @@ int migrate_multifd_channels(void); - MultiFDCompression migrate_multifd_compression(void); - int migrate_multifd_zlib_level(void); - int migrate_multifd_zstd_level(void); -+uint8_t migrate_throttle_trigger_threshold(void); - uint64_t migrate_xbzrle_cache_size(void); - - #endif -diff --git a/migration/ram.c b/migration/ram.c -index 4576d0d849..e82cee97c3 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -1178,8 +1178,7 @@ static void migration_update_rates(RAMState *rs, int64_t end_time) - - static void migration_trigger_throttle(RAMState *rs) - { -- MigrationState *s = migrate_get_current(); -- uint64_t threshold = s->parameters.throttle_trigger_threshold; -+ uint64_t threshold = migrate_throttle_trigger_threshold(); - uint64_t bytes_xfer_period = - stat64_get(&ram_counters.transferred) - rs->bytes_xfer_prev; - uint64_t bytes_dirty_period = rs->num_dirty_pages_period * TARGET_PAGE_SIZE; --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Create-options.c.patch b/SOURCES/kvm-migration-Create-options.c.patch deleted file mode 100644 index ea60202..0000000 --- a/SOURCES/kvm-migration-Create-options.c.patch +++ /dev/null @@ -1,524 +0,0 @@ -From 282634a835f4711c8b501dd76c344058bc399fbd Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 21:18:45 +0100 -Subject: [PATCH 23/56] migration: Create options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [22/50] 10c9be528b9fcfae93f1a12fcd09db1a69e58f64 (peterx/qemu-kvm) - -We move there all capabilities helpers from migration.c. - -Signed-off-by: Juan Quintela -Reviewed-by: Dr. David Alan Gilbert - ---- - -Following David advise: -- looked through the history, capabilities are newer than 2012, so we - can remove that bit of the header. -- This part is posterior to Anthony. - Original Author is Orit. Once there, - I put myself. Peter Xu also did quite a bit of work here. - Anyone else wants/needs to be there? I didn't search too hard - because nobody asked before to be added. - -What do you think? - -(cherry picked from commit 1f0776f1c03312aad5d6a5f98871240bc3af01e5) -Signed-off-by: Peter Xu ---- - hw/virtio/virtio-balloon.c | 1 + - migration/block-dirty-bitmap.c | 1 + - migration/block.c | 1 + - migration/colo.c | 1 + - migration/meson.build | 1 + - migration/migration.c | 109 +---------------------------- - migration/migration.h | 12 ---- - migration/options.c | 124 +++++++++++++++++++++++++++++++++ - migration/options.h | 32 +++++++++ - migration/postcopy-ram.c | 1 + - migration/ram.c | 1 + - migration/savevm.c | 1 + - migration/socket.c | 1 + - 13 files changed, 166 insertions(+), 120 deletions(-) - create mode 100644 migration/options.c - create mode 100644 migration/options.h - -diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c -index 746f07c4d2..43092aa634 100644 ---- a/hw/virtio/virtio-balloon.c -+++ b/hw/virtio/virtio-balloon.c -@@ -32,6 +32,7 @@ - #include "qemu/error-report.h" - #include "migration/misc.h" - #include "migration/migration.h" -+#include "migration/options.h" - - #include "hw/virtio/virtio-bus.h" - #include "hw/virtio/virtio-access.h" -diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c -index fe73aa94b1..a6ffae0002 100644 ---- a/migration/block-dirty-bitmap.c -+++ b/migration/block-dirty-bitmap.c -@@ -79,6 +79,7 @@ - #include "qapi/qapi-visit-migration.h" - #include "qapi/clone-visitor.h" - #include "trace.h" -+#include "options.h" - - #define CHUNK_SIZE (1 << 10) - -diff --git a/migration/block.c b/migration/block.c -index b2497bbd32..4b167fa5cf 100644 ---- a/migration/block.c -+++ b/migration/block.c -@@ -28,6 +28,7 @@ - #include "migration/vmstate.h" - #include "sysemu/block-backend.h" - #include "trace.h" -+#include "options.h" - - #define BLK_MIG_BLOCK_SIZE (1ULL << 20) - #define BDRV_SECTORS_PER_DIRTY_CHUNK (BLK_MIG_BLOCK_SIZE >> BDRV_SECTOR_BITS) -diff --git a/migration/colo.c b/migration/colo.c -index 0716e64689..93b78c9270 100644 ---- a/migration/colo.c -+++ b/migration/colo.c -@@ -36,6 +36,7 @@ - #include "sysemu/cpus.h" - #include "sysemu/runstate.h" - #include "net/filter.h" -+#include "options.h" - - static bool vmstate_loading; - static Notifier packets_compare_notifier; -diff --git a/migration/meson.build b/migration/meson.build -index 0d1bb9f96e..480ff6854a 100644 ---- a/migration/meson.build -+++ b/migration/meson.build -@@ -22,6 +22,7 @@ softmmu_ss.add(files( - 'migration.c', - 'multifd.c', - 'multifd-zlib.c', -+ 'options.c', - 'postcopy-ram.c', - 'savevm.c', - 'socket.c', -diff --git a/migration/migration.c b/migration/migration.c -index 18058fb597..66ea55be06 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -63,6 +63,7 @@ - #include "sysemu/cpus.h" - #include "yank_functions.h" - #include "sysemu/qtest.h" -+#include "options.h" - - #define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ - -@@ -357,15 +358,6 @@ static void migrate_generate_event(int new_state) - } - } - --static bool migrate_late_block_activate(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; --} -- - /* - * Send a message on the return channel back to the source - * of the migration. -@@ -2525,56 +2517,11 @@ void qmp_migrate_continue(MigrationStatus state, Error **errp) - qemu_sem_post(&s->pause_sem); - } - --bool migrate_release_ram(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; --} -- --bool migrate_postcopy_ram(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; --} -- - bool migrate_postcopy(void) - { - return migrate_postcopy_ram() || migrate_dirty_bitmaps(); - } - --bool migrate_auto_converge(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; --} -- --bool migrate_zero_blocks(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; --} -- --bool migrate_postcopy_blocktime(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; --} -- - bool migrate_use_compression(void) - { - MigrationState *s; -@@ -2620,33 +2567,6 @@ int migrate_decompress_threads(void) - return s->parameters.decompress_threads; - } - --bool migrate_dirty_bitmaps(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; --} -- --bool migrate_ignore_shared(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED]; --} -- --bool migrate_validate_uuid(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; --} -- - bool migrate_use_events(void) - { - MigrationState *s; -@@ -2665,15 +2585,6 @@ bool migrate_use_multifd(void) - return s->capabilities[MIGRATION_CAPABILITY_MULTIFD]; - } - --bool migrate_pause_before_switchover(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; --} -- - int migrate_multifd_channels(void) - { - MigrationState *s; -@@ -2785,24 +2696,6 @@ bool migrate_use_block_incremental(void) - return s->parameters.block_incremental; - } - --bool migrate_background_snapshot(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; --} -- --bool migrate_postcopy_preempt(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]; --} -- - /* migration thread support */ - /* - * Something bad happened to the RP stream, mark an error -diff --git a/migration/migration.h b/migration/migration.h -index 04e0860b4e..a25fed6ef0 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -449,16 +449,7 @@ MigrationState *migrate_get_current(void); - - bool migrate_postcopy(void); - --bool migrate_release_ram(void); --bool migrate_postcopy_ram(void); --bool migrate_zero_blocks(void); --bool migrate_dirty_bitmaps(void); --bool migrate_ignore_shared(void); --bool migrate_validate_uuid(void); -- --bool migrate_auto_converge(void); - bool migrate_use_multifd(void); --bool migrate_pause_before_switchover(void); - int migrate_multifd_channels(void); - MultiFDCompression migrate_multifd_compression(void); - int migrate_multifd_zlib_level(void); -@@ -487,9 +478,6 @@ int migrate_compress_threads(void); - int migrate_compress_wait_thread(void); - int migrate_decompress_threads(void); - bool migrate_use_events(void); --bool migrate_postcopy_blocktime(void); --bool migrate_background_snapshot(void); --bool migrate_postcopy_preempt(void); - - /* Sending on the return path - generic and then for each message type */ - void migrate_send_rp_shut(MigrationIncomingState *mis, -diff --git a/migration/options.c b/migration/options.c -new file mode 100644 -index 0000000000..88a9a45913 ---- /dev/null -+++ b/migration/options.c -@@ -0,0 +1,124 @@ -+/* -+ * QEMU migration capabilities -+ * -+ * Copyright (c) 2012-2023 Red Hat Inc -+ * -+ * Authors: -+ * Orit Wasserman -+ * Juan Quintela -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or later. -+ * See the COPYING file in the top-level directory. -+ */ -+ -+#include "qemu/osdep.h" -+#include "migration.h" -+#include "options.h" -+ -+bool migrate_auto_converge(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; -+} -+ -+bool migrate_background_snapshot(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; -+} -+ -+bool migrate_dirty_bitmaps(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; -+} -+ -+bool migrate_ignore_shared(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED]; -+} -+ -+bool migrate_late_block_activate(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; -+} -+ -+bool migrate_pause_before_switchover(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; -+} -+ -+bool migrate_postcopy_blocktime(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; -+} -+ -+bool migrate_postcopy_preempt(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]; -+} -+ -+bool migrate_postcopy_ram(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; -+} -+ -+bool migrate_release_ram(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; -+} -+ -+bool migrate_validate_uuid(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; -+} -+ -+bool migrate_zero_blocks(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; -+} -diff --git a/migration/options.h b/migration/options.h -new file mode 100644 -index 0000000000..0dfa0af245 ---- /dev/null -+++ b/migration/options.h -@@ -0,0 +1,32 @@ -+/* -+ * QEMU migration capabilities -+ * -+ * Copyright (c) 2012-2023 Red Hat Inc -+ * -+ * Authors: -+ * Orit Wasserman -+ * Juan Quintela -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or later. -+ * See the COPYING file in the top-level directory. -+ */ -+ -+#ifndef QEMU_MIGRATION_OPTIONS_H -+#define QEMU_MIGRATION_OPTIONS_H -+ -+/* capabilities */ -+ -+bool migrate_auto_converge(void); -+bool migrate_background_snapshot(void); -+bool migrate_dirty_bitmaps(void); -+bool migrate_ignore_shared(void); -+bool migrate_late_block_activate(void); -+bool migrate_pause_before_switchover(void); -+bool migrate_postcopy_blocktime(void); -+bool migrate_postcopy_preempt(void); -+bool migrate_postcopy_ram(void); -+bool migrate_release_ram(void); -+bool migrate_validate_uuid(void); -+bool migrate_zero_blocks(void); -+ -+#endif -diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c -index d7b48dd920..0711500036 100644 ---- a/migration/postcopy-ram.c -+++ b/migration/postcopy-ram.c -@@ -37,6 +37,7 @@ - #include "tls.h" - #include "qemu/userfaultfd.h" - #include "qemu/mmap-alloc.h" -+#include "options.h" - - /* Arbitrary limit on size of each discard command, - * keeps them around ~200 bytes -diff --git a/migration/ram.c b/migration/ram.c -index 229714045a..912ccd89fa 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -57,6 +57,7 @@ - #include "qemu/iov.h" - #include "multifd.h" - #include "sysemu/runstate.h" -+#include "options.h" - - #include "hw/boards.h" /* for machine_dump_guest_core() */ - -diff --git a/migration/savevm.c b/migration/savevm.c -index 589ef926ab..ebcf571e37 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -67,6 +67,7 @@ - #include "qemu/yank.h" - #include "yank_functions.h" - #include "sysemu/qtest.h" -+#include "options.h" - - const unsigned int postcopy_ram_discard_version; - -diff --git a/migration/socket.c b/migration/socket.c -index e6fdf3c5e1..ebf9ac41af 100644 ---- a/migration/socket.c -+++ b/migration/socket.c -@@ -27,6 +27,7 @@ - #include "io/net-listener.h" - #include "trace.h" - #include "postcopy-ram.h" -+#include "options.h" - - struct SocketOutgoingArgs { - SocketAddress *saddr; --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Enable-switchover-ack-capability.patch b/SOURCES/kvm-migration-Enable-switchover-ack-capability.patch deleted file mode 100644 index e08e5df..0000000 --- a/SOURCES/kvm-migration-Enable-switchover-ack-capability.patch +++ /dev/null @@ -1,56 +0,0 @@ -From bbe565f7d3b7fe46971e020e9bd8e79dc9ffa69c Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 12/37] migration: Enable switchover ack capability -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [10/28] c4a7d7d26a97181c9516d133a6610bfa5dcb1d16 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 538ef4fe2f72 -Author: Avihai Horon -Date: Wed Jun 21 14:11:56 2023 +0300 - - migration: Enable switchover ack capability - - Now that switchover ack logic has been implemented, enable the - capability. - - Signed-off-by: Avihai Horon - Reviewed-by: Juan Quintela - Reviewed-by: Peter Xu - Tested-by: YangHang Liu - Acked-by: Alex Williamson - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - migration/options.c | 4 ---- - 1 file changed, 4 deletions(-) - -diff --git a/migration/options.c b/migration/options.c -index c3df6c6dde..ccd7ef3907 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -547,10 +547,6 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) - "'return-path'"); - return false; - } -- -- /* Disable this capability until it's implemented */ -- error_setg(errp, "'switchover-ack' is not implemented yet"); -- return false; - } - - return true; --- -2.39.3 - diff --git a/SOURCES/kvm-migration-Handle-block-device-inactivation-failures-.patch b/SOURCES/kvm-migration-Handle-block-device-inactivation-failures-.patch deleted file mode 100644 index 26c8437..0000000 --- a/SOURCES/kvm-migration-Handle-block-device-inactivation-failures-.patch +++ /dev/null @@ -1,116 +0,0 @@ -From 2aac64623d8d2d06d248c1bcc71aa13572fc843c Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Fri, 14 Apr 2023 10:33:58 -0500 -Subject: [PATCH 1/2] migration: Handle block device inactivation failures - better - -RH-Author: Eric Blake -RH-MergeRequest: 161: Avoid migration assertion from failed NFS server. -RH-Bugzilla: 2058982 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [1/2] 5ae143c9234f6eee9fc5154944172bcd56975b36 (ebblake/centos-qemu-kvm) - -Consider what happens when performing a migration between two host -machines connected to an NFS server serving multiple block devices to -the guest, when the NFS server becomes unavailable. The migration -attempts to inactivate all block devices on the source (a necessary -step before the destination can take over); but if the NFS server is -non-responsive, the attempt to inactivate can itself fail. When that -happens, the destination fails to get the migrated guest (good, -because the source wasn't able to flush everything properly): - - (qemu) qemu-kvm: load of migration failed: Input/output error - -at which point, our only hope for the guest is for the source to take -back control. With the current code base, the host outputs a message, but then appears to resume: - - (qemu) qemu-kvm: qemu_savevm_state_complete_precopy_non_iterable: bdrv_inactivate_all() failed (-1) - - (src qemu)info status - VM status: running - -but a second migration attempt now asserts: - - (src qemu) qemu-kvm: ../block.c:6738: int bdrv_inactivate_recurse(BlockDriverState *): Assertion `!(bs->open_flags & BDRV_O_INACTIVE)' failed. - -Whether the guest is recoverable on the source after the first failure -is debatable, but what we do not want is to have qemu itself fail due -to an assertion. It looks like the problem is as follows: - -In migration.c:migration_completion(), the source sets 'inactivate' to -true (since COLO is not enabled), then tries -savevm.c:qemu_savevm_state_complete_precopy() with a request to -inactivate block devices. In turn, this calls -block.c:bdrv_inactivate_all(), which fails when flushing runs up -against the non-responsive NFS server. With savevm failing, we are -now left in a state where some, but not all, of the block devices have -been inactivated; but migration_completion() then jumps to 'fail' -rather than 'fail_invalidate' and skips an attempt to reclaim those -those disks by calling bdrv_activate_all(). Even if we do attempt to -reclaim disks, we aren't taking note of failure there, either. - -Thus, we have reached a state where the migration engine has forgotten -all state about whether a block device is inactive, because we did not -set s->block_inactive in enough places; so migration allows the source -to reach vm_start() and resume execution, violating the block layer -invariant that the guest CPUs should not be restarted while a device -is inactive. Note that the code in migration.c:migrate_fd_cancel() -will also try to reactivate all block devices if s->block_inactive was -set, but because we failed to set that flag after the first failure, -the source assumes it has reclaimed all devices, even though it still -has remaining inactivated devices and does not try again. Normally, -qmp_cont() will also try to reactivate all disks (or correctly fail if -the disks are not reclaimable because NFS is not yet back up), but the -auto-resumption of the source after a migration failure does not go -through qmp_cont(). And because we have left the block layer in an -inconsistent state with devices still inactivated, the later migration -attempt is hitting the assertion failure. - -Since it is important to not resume the source with inactive disks, -this patch marks s->block_inactive before attempting inactivation, -rather than after succeeding, in order to prevent any vm_start() until -it has successfully reactivated all devices. - -See also https://bugzilla.redhat.com/show_bug.cgi?id=2058982 - -Signed-off-by: Eric Blake -Reviewed-by: Juan Quintela -Acked-by: Lukas Straub -Tested-by: Lukas Straub -Signed-off-by: Juan Quintela -(cherry picked from commit 403d18ae384239876764bbfa111d6cc5dcb673d1) ---- - migration/migration.c | 5 ++--- - 1 file changed, 2 insertions(+), 3 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index bda4789193..cb0d42c061 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -3444,13 +3444,11 @@ static void migration_completion(MigrationState *s) - MIGRATION_STATUS_DEVICE); - } - if (ret >= 0) { -+ s->block_inactive = inactivate; - qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); - ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, - inactivate); - } -- if (inactivate && ret >= 0) { -- s->block_inactive = true; -- } - } - qemu_mutex_unlock_iothread(); - -@@ -3522,6 +3520,7 @@ fail_invalidate: - bdrv_activate_all(&local_err); - if (local_err) { - error_report_err(local_err); -+ s->block_inactive = true; - } else { - s->block_inactive = false; - } --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Implement-switchover-ack-logic.patch b/SOURCES/kvm-migration-Implement-switchover-ack-logic.patch deleted file mode 100644 index 49b9f12..0000000 --- a/SOURCES/kvm-migration-Implement-switchover-ack-logic.patch +++ /dev/null @@ -1,339 +0,0 @@ -From 387c39f198d94f600be525e363edc7ca916dc261 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 11/37] migration: Implement switchover ack logic -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [9/28] 853e1978f3b9f87942863bba894a0ed908bde6b1 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 1b4adb10f898 -Author: Avihai Horon -Date: Wed Jun 21 14:11:55 2023 +0300 - - migration: Implement switchover ack logic - - Implement switchover ack logic. This prevents the source from stopping - the VM and completing the migration until an ACK is received from the - destination that it's OK to do so. - - To achieve this, a new SaveVMHandlers handler switchover_ack_needed() - and a new return path message MIG_RP_MSG_SWITCHOVER_ACK are added. - - The switchover_ack_needed() handler is called during migration setup in - the destination to check if switchover ack is used by the migrated - device. - - When switchover is approved by all migrated devices in the destination - that support this capability, the MIG_RP_MSG_SWITCHOVER_ACK return path - message is sent to the source to notify it that it's OK to do - switchover. - - Signed-off-by: Avihai Horon - Reviewed-by: Peter Xu - Tested-by: YangHang Liu - Acked-by: Alex Williamson - Signed-off-by: Cédric Le Goater - -Conflicts: - - migration/migration.c - context changes due to commit f4584076fc31 ("migration: switch - from .vm_was_running to .vm_old_state") - -Signed-off-by: Cédric Le Goater ---- - include/migration/register.h | 2 ++ - migration/migration.c | 32 +++++++++++++++++++-- - migration/migration.h | 14 ++++++++++ - migration/savevm.c | 54 ++++++++++++++++++++++++++++++++++++ - migration/savevm.h | 1 + - migration/trace-events | 3 ++ - 6 files changed, 104 insertions(+), 2 deletions(-) - -diff --git a/include/migration/register.h b/include/migration/register.h -index a8dfd8fefd..90914f32f5 100644 ---- a/include/migration/register.h -+++ b/include/migration/register.h -@@ -71,6 +71,8 @@ typedef struct SaveVMHandlers { - int (*load_cleanup)(void *opaque); - /* Called when postcopy migration wants to resume from failure */ - int (*resume_prepare)(MigrationState *s, void *opaque); -+ /* Checks if switchover ack should be used. Called only in dest */ -+ bool (*switchover_ack_needed)(void *opaque); - } SaveVMHandlers; - - int register_savevm_live(const char *idstr, -diff --git a/migration/migration.c b/migration/migration.c -index 1ac5f19bc2..9bf1caee6c 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -76,6 +76,7 @@ enum mig_rp_message_type { - MIG_RP_MSG_REQ_PAGES, /* data (start: be64, len: be32) */ - MIG_RP_MSG_RECV_BITMAP, /* send recved_bitmap back to source */ - MIG_RP_MSG_RESUME_ACK, /* tell source that we are ready to resume */ -+ MIG_RP_MSG_SWITCHOVER_ACK, /* Tell source it's OK to do switchover */ - - MIG_RP_MSG_MAX - }; -@@ -756,6 +757,11 @@ bool migration_has_all_channels(void) - return true; - } - -+int migrate_send_rp_switchover_ack(MigrationIncomingState *mis) -+{ -+ return migrate_send_rp_message(mis, MIG_RP_MSG_SWITCHOVER_ACK, 0, NULL); -+} -+ - /* - * Send a 'SHUT' message on the return channel with the given value - * to indicate that we've finished with the RP. Non-0 value indicates -@@ -1415,6 +1421,7 @@ void migrate_init(MigrationState *s) - s->vm_was_running = false; - s->iteration_initial_bytes = 0; - s->threshold_size = 0; -+ s->switchover_acked = false; - } - - int migrate_add_blocker_internal(Error *reason, Error **errp) -@@ -1731,6 +1738,7 @@ static struct rp_cmd_args { - [MIG_RP_MSG_REQ_PAGES_ID] = { .len = -1, .name = "REQ_PAGES_ID" }, - [MIG_RP_MSG_RECV_BITMAP] = { .len = -1, .name = "RECV_BITMAP" }, - [MIG_RP_MSG_RESUME_ACK] = { .len = 4, .name = "RESUME_ACK" }, -+ [MIG_RP_MSG_SWITCHOVER_ACK] = { .len = 0, .name = "SWITCHOVER_ACK" }, - [MIG_RP_MSG_MAX] = { .len = -1, .name = "MAX" }, - }; - -@@ -1969,6 +1977,11 @@ retry: - } - break; - -+ case MIG_RP_MSG_SWITCHOVER_ACK: -+ ms->switchover_acked = true; -+ trace_source_return_path_thread_switchover_acked(); -+ break; -+ - default: - break; - } -@@ -2720,6 +2733,20 @@ static void migration_update_counters(MigrationState *s, - bandwidth, s->threshold_size); - } - -+static bool migration_can_switchover(MigrationState *s) -+{ -+ if (!migrate_switchover_ack()) { -+ return true; -+ } -+ -+ /* No reason to wait for switchover ACK if VM is stopped */ -+ if (!runstate_is_running()) { -+ return true; -+ } -+ -+ return s->switchover_acked; -+} -+ - /* Migration thread iteration status */ - typedef enum { - MIG_ITERATE_RESUME, /* Resume current iteration */ -@@ -2735,6 +2762,7 @@ static MigIterateState migration_iteration_run(MigrationState *s) - { - uint64_t must_precopy, can_postcopy; - bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE; -+ bool can_switchover = migration_can_switchover(s); - - qemu_savevm_state_pending_estimate(&must_precopy, &can_postcopy); - uint64_t pending_size = must_precopy + can_postcopy; -@@ -2747,14 +2775,14 @@ static MigIterateState migration_iteration_run(MigrationState *s) - trace_migrate_pending_exact(pending_size, must_precopy, can_postcopy); - } - -- if (!pending_size || pending_size < s->threshold_size) { -+ if ((!pending_size || pending_size < s->threshold_size) && can_switchover) { - trace_migration_thread_low_pending(pending_size); - migration_completion(s); - return MIG_ITERATE_BREAK; - } - - /* Still a significant amount to transfer */ -- if (!in_postcopy && must_precopy <= s->threshold_size && -+ if (!in_postcopy && must_precopy <= s->threshold_size && can_switchover && - qatomic_read(&s->start_postcopy)) { - if (postcopy_start(s)) { - error_report("%s: postcopy failed to start", __func__); -diff --git a/migration/migration.h b/migration/migration.h -index 2b71df8617..e9679f8029 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -204,6 +204,13 @@ struct MigrationIncomingState { - * contains valid information. - */ - QemuMutex page_request_mutex; -+ -+ /* -+ * Number of devices that have yet to approve switchover. When this reaches -+ * zero an ACK that it's OK to do switchover is sent to the source. No lock -+ * is needed as this field is updated serially. -+ */ -+ unsigned int switchover_ack_pending_num; - }; - - MigrationIncomingState *migration_incoming_get_current(void); -@@ -421,6 +428,12 @@ struct MigrationState { - - /* QEMU_VM_VMDESCRIPTION content filled for all non-iterable devices. */ - JSONWriter *vmdesc; -+ -+ /* -+ * Indicates whether an ACK from the destination that it's OK to do -+ * switchover has been received. -+ */ -+ bool switchover_acked; - }; - - void migrate_set_state(int *state, int old_state, int new_state); -@@ -461,6 +474,7 @@ int migrate_send_rp_message_req_pages(MigrationIncomingState *mis, - void migrate_send_rp_recv_bitmap(MigrationIncomingState *mis, - char *block_name); - void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value); -+int migrate_send_rp_switchover_ack(MigrationIncomingState *mis); - - void dirty_bitmap_mig_before_vm_start(void); - void dirty_bitmap_mig_cancel_outgoing(void); -diff --git a/migration/savevm.c b/migration/savevm.c -index 211eff3a8b..aff70e6263 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -2358,6 +2358,21 @@ static int loadvm_process_command(QEMUFile *f) - error_report("CMD_OPEN_RETURN_PATH failed"); - return -1; - } -+ -+ /* -+ * Switchover ack is enabled but no device uses it, so send an ACK to -+ * source that it's OK to switchover. Do it here, after return path has -+ * been created. -+ */ -+ if (migrate_switchover_ack() && !mis->switchover_ack_pending_num) { -+ int ret = migrate_send_rp_switchover_ack(mis); -+ if (ret) { -+ error_report( -+ "Could not send switchover ack RP MSG, err %d (%s)", ret, -+ strerror(-ret)); -+ return ret; -+ } -+ } - break; - - case MIG_CMD_PING: -@@ -2584,6 +2599,23 @@ static int qemu_loadvm_state_header(QEMUFile *f) - return 0; - } - -+static void qemu_loadvm_state_switchover_ack_needed(MigrationIncomingState *mis) -+{ -+ SaveStateEntry *se; -+ -+ QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { -+ if (!se->ops || !se->ops->switchover_ack_needed) { -+ continue; -+ } -+ -+ if (se->ops->switchover_ack_needed(se->opaque)) { -+ mis->switchover_ack_pending_num++; -+ } -+ } -+ -+ trace_loadvm_state_switchover_ack_needed(mis->switchover_ack_pending_num); -+} -+ - static int qemu_loadvm_state_setup(QEMUFile *f) - { - SaveStateEntry *se; -@@ -2787,6 +2819,10 @@ int qemu_loadvm_state(QEMUFile *f) - return -EINVAL; - } - -+ if (migrate_switchover_ack()) { -+ qemu_loadvm_state_switchover_ack_needed(mis); -+ } -+ - cpu_synchronize_all_pre_loadvm(); - - ret = qemu_loadvm_state_main(f, mis); -@@ -2860,6 +2896,24 @@ int qemu_load_device_state(QEMUFile *f) - return 0; - } - -+int qemu_loadvm_approve_switchover(void) -+{ -+ MigrationIncomingState *mis = migration_incoming_get_current(); -+ -+ if (!mis->switchover_ack_pending_num) { -+ return -EINVAL; -+ } -+ -+ mis->switchover_ack_pending_num--; -+ trace_loadvm_approve_switchover(mis->switchover_ack_pending_num); -+ -+ if (mis->switchover_ack_pending_num) { -+ return 0; -+ } -+ -+ return migrate_send_rp_switchover_ack(mis); -+} -+ - bool save_snapshot(const char *name, bool overwrite, const char *vmstate, - bool has_devices, strList *devices, Error **errp) - { -diff --git a/migration/savevm.h b/migration/savevm.h -index fb636735f0..e894bbc143 100644 ---- a/migration/savevm.h -+++ b/migration/savevm.h -@@ -65,6 +65,7 @@ int qemu_loadvm_state(QEMUFile *f); - void qemu_loadvm_state_cleanup(void); - int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis); - int qemu_load_device_state(QEMUFile *f); -+int qemu_loadvm_approve_switchover(void); - int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f, - bool in_postcopy, bool inactivate_disks); - -diff --git a/migration/trace-events b/migration/trace-events -index 92161eeac5..cda807d271 100644 ---- a/migration/trace-events -+++ b/migration/trace-events -@@ -7,6 +7,7 @@ qemu_loadvm_state_section_partend(uint32_t section_id) "%u" - qemu_loadvm_state_post_main(int ret) "%d" - qemu_loadvm_state_section_startfull(uint32_t section_id, const char *idstr, uint32_t instance_id, uint32_t version_id) "%u(%s) %u %u" - qemu_savevm_send_packaged(void) "" -+loadvm_state_switchover_ack_needed(unsigned int switchover_ack_pending_num) "Switchover ack pending num=%u" - loadvm_state_setup(void) "" - loadvm_state_cleanup(void) "" - loadvm_handle_cmd_packaged(unsigned int length) "%u" -@@ -23,6 +24,7 @@ loadvm_postcopy_ram_handle_discard_end(void) "" - loadvm_postcopy_ram_handle_discard_header(const char *ramid, uint16_t len) "%s: %ud" - loadvm_process_command(const char *s, uint16_t len) "com=%s len=%d" - loadvm_process_command_ping(uint32_t val) "0x%x" -+loadvm_approve_switchover(unsigned int switchover_ack_pending_num) "Switchover ack pending num=%u" - postcopy_ram_listen_thread_exit(void) "" - postcopy_ram_listen_thread_start(void) "" - qemu_savevm_send_postcopy_advise(void) "" -@@ -180,6 +182,7 @@ source_return_path_thread_loop_top(void) "" - source_return_path_thread_pong(uint32_t val) "0x%x" - source_return_path_thread_shut(uint32_t val) "0x%x" - source_return_path_thread_resume_ack(uint32_t v) "%"PRIu32 -+source_return_path_thread_switchover_acked(void) "" - migration_thread_low_pending(uint64_t pending) "%" PRIu64 - migrate_transferred(uint64_t tranferred, uint64_t time_spent, uint64_t bandwidth, uint64_t size) "transferred %" PRIu64 " time_spent %" PRIu64 " bandwidth %" PRIu64 " max_size %" PRId64 - process_incoming_migration_co_end(int ret, int ps) "ret=%d postcopy-state=%d" --- -2.39.3 - diff --git a/SOURCES/kvm-migration-Make-all-functions-check-have-the-same-for.patch b/SOURCES/kvm-migration-Make-all-functions-check-have-the-same-for.patch deleted file mode 100644 index f873f3f..0000000 --- a/SOURCES/kvm-migration-Make-all-functions-check-have-the-same-for.patch +++ /dev/null @@ -1,431 +0,0 @@ -From eaccfc91b34f93dcaf597e6b39f78741da618ff3 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 08/37] migration: Make all functions check have the same - format -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [6/28] 774df2a81502d3eab5d5b8f64fa9b69f8be43669 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 8f9c532756c5 -Author: Juan Quintela -Date: Wed Mar 1 23:11:08 2023 +0100 - - migration: Make all functions check have the same format - - Signed-off-by: Juan Quintela - Reviewed-by: Vladimir Sementsov-Ogievskiy - -Signed-off-by: Cédric Le Goater ---- - migration/options.c | 153 +++++++++++--------------------------------- - 1 file changed, 39 insertions(+), 114 deletions(-) - -diff --git a/migration/options.c b/migration/options.c -index e51d667e14..bcfe244fa9 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -33,27 +33,21 @@ - - bool migrate_auto_converge(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; - } - - bool migrate_background_snapshot(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; - } - - bool migrate_block(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_BLOCK]; - } -@@ -61,95 +55,76 @@ bool migrate_block(void) - bool migrate_colo(void) - { - MigrationState *s = migrate_get_current(); -+ - return s->capabilities[MIGRATION_CAPABILITY_X_COLO]; - } - - bool migrate_compress(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_COMPRESS]; - } - - bool migrate_dirty_bitmaps(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; - } - - bool migrate_events(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_EVENTS]; - } - - bool migrate_ignore_shared(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED]; - } - - bool migrate_late_block_activate(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; - } - - bool migrate_multifd(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_MULTIFD]; - } - - bool migrate_pause_before_switchover(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; - } - - bool migrate_postcopy_blocktime(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; - } - - bool migrate_postcopy_preempt(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]; - } - - bool migrate_postcopy_ram(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; - } -@@ -163,54 +138,42 @@ bool migrate_rdma_pin_all(void) - - bool migrate_release_ram(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; - } - - bool migrate_return_path(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; - } - - bool migrate_validate_uuid(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; - } - - bool migrate_xbzrle(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_XBZRLE]; - } - - bool migrate_zero_blocks(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; - } - - bool migrate_zero_copy_send(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; - } -@@ -224,9 +187,7 @@ bool migrate_postcopy(void) - - bool migrate_tls(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.tls_creds && *s->parameters.tls_creds; - } -@@ -491,126 +452,98 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, - - bool migrate_block_incremental(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.block_incremental; - } - - uint32_t migrate_checkpoint_delay(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.x_checkpoint_delay; - } - - int migrate_compress_level(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.compress_level; - } - - int migrate_compress_threads(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.compress_threads; - } - - int migrate_compress_wait_thread(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.compress_wait_thread; - } - - uint8_t migrate_cpu_throttle_increment(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.cpu_throttle_increment; - } - - uint8_t migrate_cpu_throttle_initial(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.cpu_throttle_initial; - } - - bool migrate_cpu_throttle_tailslow(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.cpu_throttle_tailslow; - } - - int migrate_decompress_threads(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.decompress_threads; - } - - uint8_t migrate_max_cpu_throttle(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.max_cpu_throttle; - } - - uint64_t migrate_max_bandwidth(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.max_bandwidth; - } - - int64_t migrate_max_postcopy_bandwidth(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.max_postcopy_bandwidth; - } - - int migrate_multifd_channels(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.multifd_channels; - } - - MultiFDCompression migrate_multifd_compression(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - assert(s->parameters.multifd_compression < MULTIFD_COMPRESSION__MAX); - return s->parameters.multifd_compression; -@@ -618,36 +551,28 @@ MultiFDCompression migrate_multifd_compression(void) - - int migrate_multifd_zlib_level(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.multifd_zlib_level; - } - - int migrate_multifd_zstd_level(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.multifd_zstd_level; - } - - uint8_t migrate_throttle_trigger_threshold(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.throttle_trigger_threshold; - } - - uint64_t migrate_xbzrle_cache_size(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.xbzrle_cache_size; - } --- -2.39.3 - diff --git a/SOURCES/kvm-migration-Make-dirty_sync_count-atomic.patch b/SOURCES/kvm-migration-Make-dirty_sync_count-atomic.patch deleted file mode 100644 index ad1de7b..0000000 --- a/SOURCES/kvm-migration-Make-dirty_sync_count-atomic.patch +++ /dev/null @@ -1,105 +0,0 @@ -From 886b511e0a225b1c4428c646534d7bcc65bd9e2a Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 11 Apr 2023 18:02:34 +0200 -Subject: [PATCH 14/56] migration: Make dirty_sync_count atomic -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [13/50] ef3ae8cdd960e944ba9e73a53d54c9a5a55bb1ce (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Peter Xu -(cherry picked from commit 536b5a4e56ec67c958f46e7d46cbd5ac34e5a239) -Signed-off-by: Peter Xu ---- - migration/migration.c | 3 ++- - migration/ram.c | 13 +++++++------ - migration/ram.h | 2 +- - 3 files changed, 10 insertions(+), 8 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 8f2847d298..8fca751050 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1148,7 +1148,8 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) - info->ram->normal = stat64_get(&ram_counters.normal); - info->ram->normal_bytes = info->ram->normal * page_size; - info->ram->mbps = s->mbps; -- info->ram->dirty_sync_count = ram_counters.dirty_sync_count; -+ info->ram->dirty_sync_count = -+ stat64_get(&ram_counters.dirty_sync_count); - info->ram->dirty_sync_missed_zero_copy = - stat64_get(&ram_counters.dirty_sync_missed_zero_copy); - info->ram->postcopy_requests = ram_counters.postcopy_requests; -diff --git a/migration/ram.c b/migration/ram.c -index b1722b6071..3c13136559 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -764,7 +764,7 @@ static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr) - /* We don't care if this fails to allocate a new cache page - * as long as it updated an old one */ - cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page, -- ram_counters.dirty_sync_count); -+ stat64_get(&ram_counters.dirty_sync_count)); - } - - #define ENCODING_FLAG_XBZRLE 0x1 -@@ -790,13 +790,13 @@ static int save_xbzrle_page(RAMState *rs, PageSearchStatus *pss, - int encoded_len = 0, bytes_xbzrle; - uint8_t *prev_cached_page; - QEMUFile *file = pss->pss_channel; -+ uint64_t generation = stat64_get(&ram_counters.dirty_sync_count); - -- if (!cache_is_cached(XBZRLE.cache, current_addr, -- ram_counters.dirty_sync_count)) { -+ if (!cache_is_cached(XBZRLE.cache, current_addr, generation)) { - xbzrle_counters.cache_miss++; - if (!rs->last_stage) { - if (cache_insert(XBZRLE.cache, current_addr, *current_data, -- ram_counters.dirty_sync_count) == -1) { -+ generation) == -1) { - return -1; - } else { - /* update *current_data when the page has been -@@ -1209,7 +1209,7 @@ static void migration_bitmap_sync(RAMState *rs) - RAMBlock *block; - int64_t end_time; - -- ram_counters.dirty_sync_count++; -+ stat64_add(&ram_counters.dirty_sync_count, 1); - - if (!rs->time_last_bitmap_sync) { - rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); -@@ -1246,7 +1246,8 @@ static void migration_bitmap_sync(RAMState *rs) - rs->bytes_xfer_prev = stat64_get(&ram_counters.transferred); - } - if (migrate_use_events()) { -- qapi_event_send_migration_pass(ram_counters.dirty_sync_count); -+ uint64_t generation = stat64_get(&ram_counters.dirty_sync_count); -+ qapi_event_send_migration_pass(generation); - } - } - -diff --git a/migration/ram.h b/migration/ram.h -index bb52632424..8c0d07c43a 100644 ---- a/migration/ram.h -+++ b/migration/ram.h -@@ -42,7 +42,7 @@ - */ - typedef struct { - int64_t dirty_pages_rate; -- int64_t dirty_sync_count; -+ Stat64 dirty_sync_count; - Stat64 dirty_sync_missed_zero_copy; - Stat64 downtime_bytes; - Stat64 duplicate; --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Make-dirty_sync_missed_zero_copy-atomic.patch b/SOURCES/kvm-migration-Make-dirty_sync_missed_zero_copy-atomic.patch deleted file mode 100644 index b7b0f60..0000000 --- a/SOURCES/kvm-migration-Make-dirty_sync_missed_zero_copy-atomic.patch +++ /dev/null @@ -1,92 +0,0 @@ -From e9ff20d7f7e6c2354f3696e8bca265e535eeb801 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 11 Apr 2023 17:33:56 +0200 -Subject: [PATCH 11/56] migration: Make dirty_sync_missed_zero_copy atomic -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [10/50] 041230abb087db0e7ffae02b4f85772490b805a0 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Peter Xu -(cherry picked from commit 4291823694fd8507831d26e2558d9cd0030841f7) -Signed-off-by: Peter Xu ---- - migration/migration.c | 2 +- - migration/multifd.c | 2 +- - migration/ram.c | 5 ----- - migration/ram.h | 4 +--- - 4 files changed, 3 insertions(+), 10 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index ca68808b5c..645fb4b3c5 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1150,7 +1150,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) - info->ram->mbps = s->mbps; - info->ram->dirty_sync_count = ram_counters.dirty_sync_count; - info->ram->dirty_sync_missed_zero_copy = -- ram_counters.dirty_sync_missed_zero_copy; -+ stat64_get(&ram_counters.dirty_sync_missed_zero_copy); - info->ram->postcopy_requests = ram_counters.postcopy_requests; - info->ram->page_size = page_size; - info->ram->multifd_bytes = stat64_get(&ram_counters.multifd_bytes); -diff --git a/migration/multifd.c b/migration/multifd.c -index 1c992abf53..903df2117b 100644 ---- a/migration/multifd.c -+++ b/migration/multifd.c -@@ -576,7 +576,7 @@ static int multifd_zero_copy_flush(QIOChannel *c) - return -1; - } - if (ret == 1) { -- dirty_sync_missed_zero_copy(); -+ stat64_add(&ram_counters.dirty_sync_missed_zero_copy, 1); - } - - return ret; -diff --git a/migration/ram.c b/migration/ram.c -index 71320ed27a..93e0a48af4 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -472,11 +472,6 @@ void ram_transferred_add(uint64_t bytes) - stat64_add(&ram_counters.transferred, bytes); - } - --void dirty_sync_missed_zero_copy(void) --{ -- ram_counters.dirty_sync_missed_zero_copy++; --} -- - struct MigrationOps { - int (*ram_save_target_page)(RAMState *rs, PageSearchStatus *pss); - }; -diff --git a/migration/ram.h b/migration/ram.h -index ed70391317..2170c55e67 100644 ---- a/migration/ram.h -+++ b/migration/ram.h -@@ -43,7 +43,7 @@ - typedef struct { - int64_t dirty_pages_rate; - int64_t dirty_sync_count; -- uint64_t dirty_sync_missed_zero_copy; -+ Stat64 dirty_sync_missed_zero_copy; - uint64_t downtime_bytes; - Stat64 duplicate; - Stat64 multifd_bytes; -@@ -114,6 +114,4 @@ void ram_write_tracking_prepare(void); - int ram_write_tracking_start(void); - void ram_write_tracking_stop(void); - --void dirty_sync_missed_zero_copy(void); -- - #endif --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Make-downtime_bytes-atomic.patch b/SOURCES/kvm-migration-Make-downtime_bytes-atomic.patch deleted file mode 100644 index 9b206bc..0000000 --- a/SOURCES/kvm-migration-Make-downtime_bytes-atomic.patch +++ /dev/null @@ -1,68 +0,0 @@ -From 4c6af064277b5445b31db4a598e1c4402ba56452 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 11 Apr 2023 17:38:11 +0200 -Subject: [PATCH 13/56] migration: Make downtime_bytes atomic -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [12/50] ebfc16aae8bc4a8c1fec431780a062950e6f50c4 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Peter Xu -(cherry picked from commit 296a4ac2aa63038b6b702f2ee8f0f93ae26727ae) -Signed-off-by: Peter Xu ---- - migration/migration.c | 2 +- - migration/ram.c | 2 +- - migration/ram.h | 2 +- - 3 files changed, 3 insertions(+), 3 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 3a68d93d69..8f2847d298 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1156,7 +1156,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) - info->ram->multifd_bytes = stat64_get(&ram_counters.multifd_bytes); - info->ram->pages_per_second = s->pages_per_second; - info->ram->precopy_bytes = stat64_get(&ram_counters.precopy_bytes); -- info->ram->downtime_bytes = ram_counters.downtime_bytes; -+ info->ram->downtime_bytes = stat64_get(&ram_counters.downtime_bytes); - info->ram->postcopy_bytes = stat64_get(&ram_counters.postcopy_bytes); - - if (migrate_use_xbzrle()) { -diff --git a/migration/ram.c b/migration/ram.c -index 0b4693215e..b1722b6071 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -467,7 +467,7 @@ void ram_transferred_add(uint64_t bytes) - } else if (migration_in_postcopy()) { - stat64_add(&ram_counters.postcopy_bytes, bytes); - } else { -- ram_counters.downtime_bytes += bytes; -+ stat64_add(&ram_counters.downtime_bytes, bytes); - } - stat64_add(&ram_counters.transferred, bytes); - } -diff --git a/migration/ram.h b/migration/ram.h -index a766b895fa..bb52632424 100644 ---- a/migration/ram.h -+++ b/migration/ram.h -@@ -44,7 +44,7 @@ typedef struct { - int64_t dirty_pages_rate; - int64_t dirty_sync_count; - Stat64 dirty_sync_missed_zero_copy; -- uint64_t downtime_bytes; -+ Stat64 downtime_bytes; - Stat64 duplicate; - Stat64 multifd_bytes; - Stat64 normal; --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Make-multifd_bytes-atomic.patch b/SOURCES/kvm-migration-Make-multifd_bytes-atomic.patch deleted file mode 100644 index b315fdc..0000000 --- a/SOURCES/kvm-migration-Make-multifd_bytes-atomic.patch +++ /dev/null @@ -1,99 +0,0 @@ -From bfcc4bc8f60b541d545f1ea27b1ff156d8092d33 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 23 Nov 2022 20:36:56 +0100 -Subject: [PATCH 10/56] migration: Make multifd_bytes atomic -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [9/50] c2bc6b173770a0ea81c3f9d850c583c651647070 (peterx/qemu-kvm) - -In the spirit of: - -commit 394d323bc3451e4d07f13341cb8817fac8dfbadd -Author: Peter Xu -Date: Tue Oct 11 17:55:51 2022 -0400 - - migration: Use atomic ops properly for page accountings - -Reviewed-by: David Edmondson -Reviewed-by: Peter Xu -Signed-off-by: Juan Quintela -(cherry picked from commit cf671116facf4e29d91fce9c9ffb535385ffac81) -Signed-off-by: Peter Xu ---- - migration/migration.c | 4 ++-- - migration/multifd.c | 4 ++-- - migration/ram.h | 2 +- - 3 files changed, 5 insertions(+), 5 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index a91704d35c..ca68808b5c 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1153,7 +1153,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) - ram_counters.dirty_sync_missed_zero_copy; - info->ram->postcopy_requests = ram_counters.postcopy_requests; - info->ram->page_size = page_size; -- info->ram->multifd_bytes = ram_counters.multifd_bytes; -+ info->ram->multifd_bytes = stat64_get(&ram_counters.multifd_bytes); - info->ram->pages_per_second = s->pages_per_second; - info->ram->precopy_bytes = ram_counters.precopy_bytes; - info->ram->downtime_bytes = ram_counters.downtime_bytes; -@@ -3780,7 +3780,7 @@ static MigThrError migration_detect_error(MigrationState *s) - static uint64_t migration_total_bytes(MigrationState *s) - { - return qemu_file_total_transferred(s->to_dst_file) + -- ram_counters.multifd_bytes; -+ stat64_get(&ram_counters.multifd_bytes); - } - - static void migration_calculate_complete(MigrationState *s) -diff --git a/migration/multifd.c b/migration/multifd.c -index 6ef3a27938..1c992abf53 100644 ---- a/migration/multifd.c -+++ b/migration/multifd.c -@@ -432,9 +432,9 @@ static int multifd_send_pages(QEMUFile *f) - p->pages = pages; - transferred = ((uint64_t) pages->num) * p->page_size + p->packet_len; - qemu_file_acct_rate_limit(f, transferred); -- ram_counters.multifd_bytes += transferred; - qemu_mutex_unlock(&p->mutex); - stat64_add(&ram_counters.transferred, transferred); -+ stat64_add(&ram_counters.multifd_bytes, transferred); - qemu_sem_post(&p->sem); - - return 1; -@@ -627,9 +627,9 @@ int multifd_send_sync_main(QEMUFile *f) - p->flags |= MULTIFD_FLAG_SYNC; - p->pending_job++; - qemu_file_acct_rate_limit(f, p->packet_len); -- ram_counters.multifd_bytes += p->packet_len; - qemu_mutex_unlock(&p->mutex); - stat64_add(&ram_counters.transferred, p->packet_len); -+ stat64_add(&ram_counters.multifd_bytes, p->packet_len); - qemu_sem_post(&p->sem); - } - for (i = 0; i < migrate_multifd_channels(); i++) { -diff --git a/migration/ram.h b/migration/ram.h -index 7c026b5242..ed70391317 100644 ---- a/migration/ram.h -+++ b/migration/ram.h -@@ -46,7 +46,7 @@ typedef struct { - uint64_t dirty_sync_missed_zero_copy; - uint64_t downtime_bytes; - Stat64 duplicate; -- uint64_t multifd_bytes; -+ Stat64 multifd_bytes; - Stat64 normal; - Stat64 postcopy_bytes; - int64_t postcopy_requests; --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Make-postcopy_requests-atomic.patch b/SOURCES/kvm-migration-Make-postcopy_requests-atomic.patch deleted file mode 100644 index 894419a..0000000 --- a/SOURCES/kvm-migration-Make-postcopy_requests-atomic.patch +++ /dev/null @@ -1,69 +0,0 @@ -From e6ff4536a5e5f5bbfda370ecb525d0e066c3ab1c Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 11 Apr 2023 18:04:59 +0200 -Subject: [PATCH 15/56] migration: Make postcopy_requests atomic -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [14/50] d15c6052b77e7ded7bf34c66caa11bf86b75f2e8 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Peter Xu -(cherry picked from commit 3c764f9b2bc3e5eb5ed93ab45c2de6d599fef00f) -Signed-off-by: Peter Xu ---- - migration/migration.c | 3 ++- - migration/ram.c | 2 +- - migration/ram.h | 2 +- - 3 files changed, 4 insertions(+), 3 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 8fca751050..39501a0ed8 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1152,7 +1152,8 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) - stat64_get(&ram_counters.dirty_sync_count); - info->ram->dirty_sync_missed_zero_copy = - stat64_get(&ram_counters.dirty_sync_missed_zero_copy); -- info->ram->postcopy_requests = ram_counters.postcopy_requests; -+ info->ram->postcopy_requests = -+ stat64_get(&ram_counters.postcopy_requests); - info->ram->page_size = page_size; - info->ram->multifd_bytes = stat64_get(&ram_counters.multifd_bytes); - info->ram->pages_per_second = s->pages_per_second; -diff --git a/migration/ram.c b/migration/ram.c -index 3c13136559..fe69ecaef4 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -2169,7 +2169,7 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len) - RAMBlock *ramblock; - RAMState *rs = ram_state; - -- ram_counters.postcopy_requests++; -+ stat64_add(&ram_counters.postcopy_requests, 1); - RCU_READ_LOCK_GUARD(); - - if (!rbname) { -diff --git a/migration/ram.h b/migration/ram.h -index 8c0d07c43a..afa68521d7 100644 ---- a/migration/ram.h -+++ b/migration/ram.h -@@ -49,7 +49,7 @@ typedef struct { - Stat64 multifd_bytes; - Stat64 normal; - Stat64 postcopy_bytes; -- int64_t postcopy_requests; -+ Stat64 postcopy_requests; - Stat64 precopy_bytes; - int64_t remaining; - Stat64 transferred; --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Make-precopy_bytes-atomic.patch b/SOURCES/kvm-migration-Make-precopy_bytes-atomic.patch deleted file mode 100644 index 8e6c177..0000000 --- a/SOURCES/kvm-migration-Make-precopy_bytes-atomic.patch +++ /dev/null @@ -1,68 +0,0 @@ -From 7e4d4316855f7f6556364eb16828f925b61c80d4 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 11 Apr 2023 17:36:48 +0200 -Subject: [PATCH 12/56] migration: Make precopy_bytes atomic -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [11/50] 23bec49b4b8f4d23c2192b401416139e3ca13626 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Peter Xu -(cherry picked from commit b013b5d1f32ef88457e66c7ce576f6475238f97f) -Signed-off-by: Peter Xu ---- - migration/migration.c | 2 +- - migration/ram.c | 2 +- - migration/ram.h | 2 +- - 3 files changed, 3 insertions(+), 3 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 645fb4b3c5..3a68d93d69 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1155,7 +1155,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) - info->ram->page_size = page_size; - info->ram->multifd_bytes = stat64_get(&ram_counters.multifd_bytes); - info->ram->pages_per_second = s->pages_per_second; -- info->ram->precopy_bytes = ram_counters.precopy_bytes; -+ info->ram->precopy_bytes = stat64_get(&ram_counters.precopy_bytes); - info->ram->downtime_bytes = ram_counters.downtime_bytes; - info->ram->postcopy_bytes = stat64_get(&ram_counters.postcopy_bytes); - -diff --git a/migration/ram.c b/migration/ram.c -index 93e0a48af4..0b4693215e 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -463,7 +463,7 @@ RAMStats ram_counters; - void ram_transferred_add(uint64_t bytes) - { - if (runstate_is_running()) { -- ram_counters.precopy_bytes += bytes; -+ stat64_add(&ram_counters.precopy_bytes, bytes); - } else if (migration_in_postcopy()) { - stat64_add(&ram_counters.postcopy_bytes, bytes); - } else { -diff --git a/migration/ram.h b/migration/ram.h -index 2170c55e67..a766b895fa 100644 ---- a/migration/ram.h -+++ b/migration/ram.h -@@ -50,7 +50,7 @@ typedef struct { - Stat64 normal; - Stat64 postcopy_bytes; - int64_t postcopy_requests; -- uint64_t precopy_bytes; -+ Stat64 precopy_bytes; - int64_t remaining; - Stat64 transferred; - } RAMStats; --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Merge-ram_counters-and-ram_atomic_counters.patch b/SOURCES/kvm-migration-Merge-ram_counters-and-ram_atomic_counters.patch deleted file mode 100644 index 0679e89..0000000 --- a/SOURCES/kvm-migration-Merge-ram_counters-and-ram_atomic_counters.patch +++ /dev/null @@ -1,270 +0,0 @@ -From 5a87058eea6ee56f37fb454486c35baaf693d691 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 22 Feb 2023 15:56:45 +0100 -Subject: [PATCH 08/56] migration: Merge ram_counters and ram_atomic_counters -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [7/50] 90e395de66aa32b886cf151f7996a680190471f5 (peterx/qemu-kvm) - -Using MgrationStats as type for ram_counters mean that we didn't have -to re-declare each value in another struct. The need of atomic -counters have make us to create MigrationAtomicStats for this atomic -counters. - -Create RAMStats type which is a merge of MigrationStats and -MigrationAtomicStats removing unused members. - -Signed-off-by: Juan Quintela -Reviewed-by: Peter Xu - ---- - -Fix typos found by David Edmondson - -(cherry picked from commit abce5fa16d126ed085ccf8a5b3fe61a1efa20994) -Signed-off-by: Peter Xu ---- - migration/migration.c | 8 ++++---- - migration/multifd.c | 4 ++-- - migration/ram.c | 39 ++++++++++++++++----------------------- - migration/ram.h | 28 +++++++++++++++------------- - 4 files changed, 37 insertions(+), 42 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 99f86bd6c2..a91704d35c 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1140,12 +1140,12 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) - size_t page_size = qemu_target_page_size(); - - info->ram = g_malloc0(sizeof(*info->ram)); -- info->ram->transferred = stat64_get(&ram_atomic_counters.transferred); -+ info->ram->transferred = stat64_get(&ram_counters.transferred); - info->ram->total = ram_bytes_total(); -- info->ram->duplicate = stat64_get(&ram_atomic_counters.duplicate); -+ info->ram->duplicate = stat64_get(&ram_counters.duplicate); - /* legacy value. It is not used anymore */ - info->ram->skipped = 0; -- info->ram->normal = stat64_get(&ram_atomic_counters.normal); -+ info->ram->normal = stat64_get(&ram_counters.normal); - info->ram->normal_bytes = info->ram->normal * page_size; - info->ram->mbps = s->mbps; - info->ram->dirty_sync_count = ram_counters.dirty_sync_count; -@@ -1157,7 +1157,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) - info->ram->pages_per_second = s->pages_per_second; - info->ram->precopy_bytes = ram_counters.precopy_bytes; - info->ram->downtime_bytes = ram_counters.downtime_bytes; -- info->ram->postcopy_bytes = stat64_get(&ram_atomic_counters.postcopy_bytes); -+ info->ram->postcopy_bytes = stat64_get(&ram_counters.postcopy_bytes); - - if (migrate_use_xbzrle()) { - info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache)); -diff --git a/migration/multifd.c b/migration/multifd.c -index cbc0dfe39b..01fab01a92 100644 ---- a/migration/multifd.c -+++ b/migration/multifd.c -@@ -433,7 +433,7 @@ static int multifd_send_pages(QEMUFile *f) - transferred = ((uint64_t) pages->num) * p->page_size + p->packet_len; - qemu_file_acct_rate_limit(f, transferred); - ram_counters.multifd_bytes += transferred; -- stat64_add(&ram_atomic_counters.transferred, transferred); -+ stat64_add(&ram_counters.transferred, transferred); - qemu_mutex_unlock(&p->mutex); - qemu_sem_post(&p->sem); - -@@ -628,7 +628,7 @@ int multifd_send_sync_main(QEMUFile *f) - p->pending_job++; - qemu_file_acct_rate_limit(f, p->packet_len); - ram_counters.multifd_bytes += p->packet_len; -- stat64_add(&ram_atomic_counters.transferred, p->packet_len); -+ stat64_add(&ram_counters.transferred, p->packet_len); - qemu_mutex_unlock(&p->mutex); - qemu_sem_post(&p->sem); - } -diff --git a/migration/ram.c b/migration/ram.c -index 0e68099bf9..71320ed27a 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -458,25 +458,18 @@ uint64_t ram_bytes_remaining(void) - 0; - } - --/* -- * NOTE: not all stats in ram_counters are used in reality. See comments -- * for struct MigrationAtomicStats. The ultimate result of ram migration -- * counters will be a merged version with both ram_counters and the atomic -- * fields in ram_atomic_counters. -- */ --MigrationStats ram_counters; --MigrationAtomicStats ram_atomic_counters; -+RAMStats ram_counters; - - void ram_transferred_add(uint64_t bytes) - { - if (runstate_is_running()) { - ram_counters.precopy_bytes += bytes; - } else if (migration_in_postcopy()) { -- stat64_add(&ram_atomic_counters.postcopy_bytes, bytes); -+ stat64_add(&ram_counters.postcopy_bytes, bytes); - } else { - ram_counters.downtime_bytes += bytes; - } -- stat64_add(&ram_atomic_counters.transferred, bytes); -+ stat64_add(&ram_counters.transferred, bytes); - } - - void dirty_sync_missed_zero_copy(void) -@@ -756,7 +749,7 @@ void mig_throttle_counter_reset(void) - - rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); - rs->num_dirty_pages_period = 0; -- rs->bytes_xfer_prev = stat64_get(&ram_atomic_counters.transferred); -+ rs->bytes_xfer_prev = stat64_get(&ram_counters.transferred); - } - - /** -@@ -1130,8 +1123,8 @@ uint64_t ram_pagesize_summary(void) - - uint64_t ram_get_total_transferred_pages(void) - { -- return stat64_get(&ram_atomic_counters.normal) + -- stat64_get(&ram_atomic_counters.duplicate) + -+ return stat64_get(&ram_counters.normal) + -+ stat64_get(&ram_counters.duplicate) + - compression_counters.pages + xbzrle_counters.pages; - } - -@@ -1192,7 +1185,7 @@ static void migration_trigger_throttle(RAMState *rs) - MigrationState *s = migrate_get_current(); - uint64_t threshold = s->parameters.throttle_trigger_threshold; - uint64_t bytes_xfer_period = -- stat64_get(&ram_atomic_counters.transferred) - rs->bytes_xfer_prev; -+ stat64_get(&ram_counters.transferred) - rs->bytes_xfer_prev; - uint64_t bytes_dirty_period = rs->num_dirty_pages_period * TARGET_PAGE_SIZE; - uint64_t bytes_dirty_threshold = bytes_xfer_period * threshold / 100; - -@@ -1255,7 +1248,7 @@ static void migration_bitmap_sync(RAMState *rs) - /* reset period counters */ - rs->time_last_bitmap_sync = end_time; - rs->num_dirty_pages_period = 0; -- rs->bytes_xfer_prev = stat64_get(&ram_atomic_counters.transferred); -+ rs->bytes_xfer_prev = stat64_get(&ram_counters.transferred); - } - if (migrate_use_events()) { - qapi_event_send_migration_pass(ram_counters.dirty_sync_count); -@@ -1331,7 +1324,7 @@ static int save_zero_page(PageSearchStatus *pss, QEMUFile *f, RAMBlock *block, - int len = save_zero_page_to_file(pss, f, block, offset); - - if (len) { -- stat64_add(&ram_atomic_counters.duplicate, 1); -+ stat64_add(&ram_counters.duplicate, 1); - ram_transferred_add(len); - return 1; - } -@@ -1368,9 +1361,9 @@ static bool control_save_page(PageSearchStatus *pss, RAMBlock *block, - } - - if (bytes_xmit > 0) { -- stat64_add(&ram_atomic_counters.normal, 1); -+ stat64_add(&ram_counters.normal, 1); - } else if (bytes_xmit == 0) { -- stat64_add(&ram_atomic_counters.duplicate, 1); -+ stat64_add(&ram_counters.duplicate, 1); - } - - return true; -@@ -1402,7 +1395,7 @@ static int save_normal_page(PageSearchStatus *pss, RAMBlock *block, - qemu_put_buffer(file, buf, TARGET_PAGE_SIZE); - } - ram_transferred_add(TARGET_PAGE_SIZE); -- stat64_add(&ram_atomic_counters.normal, 1); -+ stat64_add(&ram_counters.normal, 1); - return 1; - } - -@@ -1458,7 +1451,7 @@ static int ram_save_multifd_page(QEMUFile *file, RAMBlock *block, - if (multifd_queue_page(file, block, offset) < 0) { - return -1; - } -- stat64_add(&ram_atomic_counters.normal, 1); -+ stat64_add(&ram_counters.normal, 1); - - return 1; - } -@@ -1497,7 +1490,7 @@ update_compress_thread_counts(const CompressParam *param, int bytes_xmit) - ram_transferred_add(bytes_xmit); - - if (param->zero_page) { -- stat64_add(&ram_atomic_counters.duplicate, 1); -+ stat64_add(&ram_counters.duplicate, 1); - return; - } - -@@ -2632,9 +2625,9 @@ void acct_update_position(QEMUFile *f, size_t size, bool zero) - uint64_t pages = size / TARGET_PAGE_SIZE; - - if (zero) { -- stat64_add(&ram_atomic_counters.duplicate, pages); -+ stat64_add(&ram_counters.duplicate, pages); - } else { -- stat64_add(&ram_atomic_counters.normal, pages); -+ stat64_add(&ram_counters.normal, pages); - ram_transferred_add(size); - qemu_file_credit_transfer(f, size); - } -diff --git a/migration/ram.h b/migration/ram.h -index 81cbb0947c..7c026b5242 100644 ---- a/migration/ram.h -+++ b/migration/ram.h -@@ -35,25 +35,27 @@ - #include "qemu/stats64.h" - - /* -- * These are the migration statistic counters that need to be updated using -- * atomic ops (can be accessed by more than one thread). Here since we -- * cannot modify MigrationStats directly to use Stat64 as it was defined in -- * the QAPI scheme, we define an internal structure to hold them, and we -- * propagate the real values when QMP queries happen. -- * -- * IOW, the corresponding fields within ram_counters on these specific -- * fields will be always zero and not being used at all; they're just -- * placeholders to make it QAPI-compatible. -+ * These are the ram migration statistic counters. It is loosely -+ * based on MigrationStats. We change to Stat64 any counter that -+ * needs to be updated using atomic ops (can be accessed by more than -+ * one thread). - */ - typedef struct { -- Stat64 transferred; -+ int64_t dirty_pages_rate; -+ int64_t dirty_sync_count; -+ uint64_t dirty_sync_missed_zero_copy; -+ uint64_t downtime_bytes; - Stat64 duplicate; -+ uint64_t multifd_bytes; - Stat64 normal; - Stat64 postcopy_bytes; --} MigrationAtomicStats; -+ int64_t postcopy_requests; -+ uint64_t precopy_bytes; -+ int64_t remaining; -+ Stat64 transferred; -+} RAMStats; - --extern MigrationAtomicStats ram_atomic_counters; --extern MigrationStats ram_counters; -+extern RAMStats ram_counters; - extern XBZRLECacheStats xbzrle_counters; - extern CompressionStats compression_counters; - --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Minor-control-flow-simplification.patch b/SOURCES/kvm-migration-Minor-control-flow-simplification.patch deleted file mode 100644 index a0dbdd9..0000000 --- a/SOURCES/kvm-migration-Minor-control-flow-simplification.patch +++ /dev/null @@ -1,52 +0,0 @@ -From c3bc974ea4b5186a76daa433209c1209d94dd0b7 Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Thu, 20 Apr 2023 09:35:51 -0500 -Subject: [PATCH 2/2] migration: Minor control flow simplification - -RH-Author: Eric Blake -RH-MergeRequest: 161: Avoid migration assertion from failed NFS server. -RH-Bugzilla: 2058982 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [2/2] 5afd8c25d6f14bdb2a380ecc77bc6c2f2a26df87 (ebblake/centos-qemu-kvm) - -No need to declare a temporary variable. - -Suggested-by: Juan Quintela -Fixes: 1df36e8c6289 ("migration: Handle block device inactivation failures better") -Signed-off-by: Eric Blake -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -(cherry picked from commit 5d39f44d7ac5c63f53d4d0900ceba9521bc27e49) ---- - migration/migration.c | 5 ++--- - 1 file changed, 2 insertions(+), 3 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index cb0d42c061..08007cef4e 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -3436,7 +3436,6 @@ static void migration_completion(MigrationState *s) - ret = global_state_store(); - - if (!ret) { -- bool inactivate = !migrate_colo_enabled(); - ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); - trace_migration_completion_vm_stop(ret); - if (ret >= 0) { -@@ -3444,10 +3443,10 @@ static void migration_completion(MigrationState *s) - MIGRATION_STATUS_DEVICE); - } - if (ret >= 0) { -- s->block_inactive = inactivate; -+ s->block_inactive = !migrate_colo_enabled(); - qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); - ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, -- inactivate); -+ s->block_inactive); - } - } - qemu_mutex_unlock_iothread(); --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_announce_params-to-option.c.patch b/SOURCES/kvm-migration-Move-migrate_announce_params-to-option.c.patch deleted file mode 100644 index 24dcb16..0000000 --- a/SOURCES/kvm-migration-Move-migrate_announce_params-to-option.c.patch +++ /dev/null @@ -1,90 +0,0 @@ -From 1f5232d611ecaaf61bcac151e7d90b8b452ac161 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 01:17:23 +0100 -Subject: [PATCH 43/56] migration: Move migrate_announce_params() to option.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [42/50] 541be7adc7f81c269058485aef5b14e787b2efe6 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Fabiano Rosas - ---- - -Fix extra whitespace (fabiano) - -(cherry picked from commit 2682c4eea72c621dfd0fb0151cbd758e81d1bdff) -Signed-off-by: Peter Xu ---- - migration/migration.c | 14 -------------- - migration/options.c | 17 +++++++++++++++++ - 2 files changed, 17 insertions(+), 14 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 724e841eb9..f27ce30be2 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -954,20 +954,6 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) - return params; - } - --AnnounceParameters *migrate_announce_params(void) --{ -- static AnnounceParameters ap; -- -- MigrationState *s = migrate_get_current(); -- -- ap.initial = s->parameters.announce_initial; -- ap.max = s->parameters.announce_max; -- ap.rounds = s->parameters.announce_rounds; -- ap.step = s->parameters.announce_step; -- -- return ≈ --} -- - /* - * Return true if we're already in the middle of a migration - * (i.e. any of the active or setup states) -diff --git a/migration/options.c b/migration/options.c -index 2cb04fbbd1..418aafac64 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -16,6 +16,7 @@ - #include "qapi/qapi-commands-migration.h" - #include "qapi/qmp/qerror.h" - #include "sysemu/runstate.h" -+#include "migration/misc.h" - #include "migration.h" - #include "ram.h" - #include "options.h" -@@ -589,3 +590,19 @@ uint64_t migrate_xbzrle_cache_size(void) - - return s->parameters.xbzrle_cache_size; - } -+ -+/* parameters helpers */ -+ -+AnnounceParameters *migrate_announce_params(void) -+{ -+ static AnnounceParameters ap; -+ -+ MigrationState *s = migrate_get_current(); -+ -+ ap.initial = s->parameters.announce_initial; -+ ap.max = s->parameters.announce_max; -+ ap.rounds = s->parameters.announce_rounds; -+ ap.step = s->parameters.announce_step; -+ -+ return ≈ -+} --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_cap_set-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_cap_set-to-options.c.patch deleted file mode 100644 index 0e33c4c..0000000 --- a/SOURCES/kvm-migration-Move-migrate_cap_set-to-options.c.patch +++ /dev/null @@ -1,110 +0,0 @@ -From 9c4f8d869f5bbdd07381f6baad2ed755b07d03f4 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 23:25:44 +0100 -Subject: [PATCH 36/56] migration: Move migrate_cap_set() to options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [35/50] d0cd6b8e9cf0534a56795d94c3da18622fa10ad7 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit f80196b772ddeeb07d3d80d5c8382cb5d1063fa2) -Signed-off-by: Peter Xu ---- - migration/migration.c | 20 -------------------- - migration/options.c | 21 +++++++++++++++++++++ - migration/options.h | 1 + - 3 files changed, 22 insertions(+), 20 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 369cd91796..880a51210e 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1666,26 +1666,6 @@ void migrate_set_state(int *state, int old_state, int new_state) - } - } - --static bool migrate_cap_set(int cap, bool value, Error **errp) --{ -- MigrationState *s = migrate_get_current(); -- bool new_caps[MIGRATION_CAPABILITY__MAX]; -- -- if (migration_is_running(s->state)) { -- error_setg(errp, QERR_MIGRATION_ACTIVE); -- return false; -- } -- -- memcpy(new_caps, s->capabilities, sizeof(new_caps)); -- new_caps[cap] = value; -- -- if (!migrate_caps_check(s->capabilities, new_caps, errp)) { -- return false; -- } -- s->capabilities[cap] = value; -- return true; --} -- - static void migrate_set_block_incremental(MigrationState *s, bool value) - { - s->parameters.block_incremental = value; -diff --git a/migration/options.c b/migration/options.c -index 4cbe77e35a..f3b2d6e482 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -14,6 +14,7 @@ - #include "qemu/osdep.h" - #include "qapi/error.h" - #include "qapi/qapi-commands-migration.h" -+#include "qapi/qmp/qerror.h" - #include "sysemu/runstate.h" - #include "migration.h" - #include "ram.h" -@@ -392,6 +393,26 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) - return true; - } - -+bool migrate_cap_set(int cap, bool value, Error **errp) -+{ -+ MigrationState *s = migrate_get_current(); -+ bool new_caps[MIGRATION_CAPABILITY__MAX]; -+ -+ if (migration_is_running(s->state)) { -+ error_setg(errp, QERR_MIGRATION_ACTIVE); -+ return false; -+ } -+ -+ memcpy(new_caps, s->capabilities, sizeof(new_caps)); -+ new_caps[cap] = value; -+ -+ if (!migrate_caps_check(s->capabilities, new_caps, errp)) { -+ return false; -+ } -+ s->capabilities[cap] = value; -+ return true; -+} -+ - MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) - { - MigrationCapabilityStatusList *head = NULL, **tail = &head; -diff --git a/migration/options.h b/migration/options.h -index e779f14161..5979e4ff90 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -41,5 +41,6 @@ bool migrate_zero_copy_send(void); - /* capabilities helpers */ - - bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp); -+bool migrate_cap_set(int cap, bool value, Error **errp); - - #endif --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_caps_check-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_caps_check-to-options.c.patch deleted file mode 100644 index 0d6fa08..0000000 --- a/SOURCES/kvm-migration-Move-migrate_caps_check-to-options.c.patch +++ /dev/null @@ -1,458 +0,0 @@ -From 3af7c7aaf7407ec14c19e54d52a2229ce4dbb7c5 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 23:05:53 +0100 -Subject: [PATCH 33/56] migration: Move migrate_caps_check() to options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [32/50] 12999471063d97fffb2b04c6dcb80083b902f963 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 77608706459bd197e25ac1ef54591b9f8a0b46f8) -Signed-off-by: Peter Xu ---- - migration/migration.c | 190 ----------------------------------------- - migration/options.c | 192 ++++++++++++++++++++++++++++++++++++++++++ - migration/options.h | 4 + - 3 files changed, 196 insertions(+), 190 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index f7facecd66..d9e30ca918 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -136,39 +136,6 @@ enum mig_rp_message_type { - MIG_RP_MSG_MAX - }; - --/* Migration capabilities set */ --struct MigrateCapsSet { -- int size; /* Capability set size */ -- MigrationCapability caps[]; /* Variadic array of capabilities */ --}; --typedef struct MigrateCapsSet MigrateCapsSet; -- --/* Define and initialize MigrateCapsSet */ --#define INITIALIZE_MIGRATE_CAPS_SET(_name, ...) \ -- MigrateCapsSet _name = { \ -- .size = sizeof((int []) { __VA_ARGS__ }) / sizeof(int), \ -- .caps = { __VA_ARGS__ } \ -- } -- --/* Background-snapshot compatibility check list */ --static const --INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot, -- MIGRATION_CAPABILITY_POSTCOPY_RAM, -- MIGRATION_CAPABILITY_DIRTY_BITMAPS, -- MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME, -- MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE, -- MIGRATION_CAPABILITY_RETURN_PATH, -- MIGRATION_CAPABILITY_MULTIFD, -- MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER, -- MIGRATION_CAPABILITY_AUTO_CONVERGE, -- MIGRATION_CAPABILITY_RELEASE_RAM, -- MIGRATION_CAPABILITY_RDMA_PIN_ALL, -- MIGRATION_CAPABILITY_COMPRESS, -- MIGRATION_CAPABILITY_XBZRLE, -- MIGRATION_CAPABILITY_X_COLO, -- MIGRATION_CAPABILITY_VALIDATE_UUID, -- MIGRATION_CAPABILITY_ZERO_COPY_SEND); -- - /* When we add fault tolerance, we could have several - migrations at once. For now we don't need to add - dynamic creation of migration */ -@@ -1235,163 +1202,6 @@ static void fill_source_migration_info(MigrationInfo *info) - info->status = state; - } - --typedef enum WriteTrackingSupport { -- WT_SUPPORT_UNKNOWN = 0, -- WT_SUPPORT_ABSENT, -- WT_SUPPORT_AVAILABLE, -- WT_SUPPORT_COMPATIBLE --} WriteTrackingSupport; -- --static --WriteTrackingSupport migrate_query_write_tracking(void) --{ -- /* Check if kernel supports required UFFD features */ -- if (!ram_write_tracking_available()) { -- return WT_SUPPORT_ABSENT; -- } -- /* -- * Check if current memory configuration is -- * compatible with required UFFD features. -- */ -- if (!ram_write_tracking_compatible()) { -- return WT_SUPPORT_AVAILABLE; -- } -- -- return WT_SUPPORT_COMPATIBLE; --} -- --/** -- * @migration_caps_check - check capability compatibility -- * -- * @old_caps: old capability list -- * @new_caps: new capability list -- * @errp: set *errp if the check failed, with reason -- * -- * Returns true if check passed, otherwise false. -- */ --static bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) --{ -- MigrationIncomingState *mis = migration_incoming_get_current(); -- --#ifndef CONFIG_LIVE_BLOCK_MIGRATION -- if (new_caps[MIGRATION_CAPABILITY_BLOCK]) { -- error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) " -- "block migration"); -- error_append_hint(errp, "Use drive_mirror+NBD instead.\n"); -- return false; -- } --#endif -- --#ifndef CONFIG_REPLICATION -- if (new_caps[MIGRATION_CAPABILITY_X_COLO]) { -- error_setg(errp, "QEMU compiled without replication module" -- " can't enable COLO"); -- error_append_hint(errp, "Please enable replication before COLO.\n"); -- return false; -- } --#endif -- -- if (new_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { -- /* This check is reasonably expensive, so only when it's being -- * set the first time, also it's only the destination that needs -- * special support. -- */ -- if (!old_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM] && -- runstate_check(RUN_STATE_INMIGRATE) && -- !postcopy_ram_supported_by_host(mis)) { -- /* postcopy_ram_supported_by_host will have emitted a more -- * detailed message -- */ -- error_setg(errp, "Postcopy is not supported"); -- return false; -- } -- -- if (new_caps[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) { -- error_setg(errp, "Postcopy is not compatible with ignore-shared"); -- return false; -- } -- } -- -- if (new_caps[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) { -- WriteTrackingSupport wt_support; -- int idx; -- /* -- * Check if 'background-snapshot' capability is supported by -- * host kernel and compatible with guest memory configuration. -- */ -- wt_support = migrate_query_write_tracking(); -- if (wt_support < WT_SUPPORT_AVAILABLE) { -- error_setg(errp, "Background-snapshot is not supported by host kernel"); -- return false; -- } -- if (wt_support < WT_SUPPORT_COMPATIBLE) { -- error_setg(errp, "Background-snapshot is not compatible " -- "with guest memory configuration"); -- return false; -- } -- -- /* -- * Check if there are any migration capabilities -- * incompatible with 'background-snapshot'. -- */ -- for (idx = 0; idx < check_caps_background_snapshot.size; idx++) { -- int incomp_cap = check_caps_background_snapshot.caps[idx]; -- if (new_caps[incomp_cap]) { -- error_setg(errp, -- "Background-snapshot is not compatible with %s", -- MigrationCapability_str(incomp_cap)); -- return false; -- } -- } -- } -- --#ifdef CONFIG_LINUX -- if (new_caps[MIGRATION_CAPABILITY_ZERO_COPY_SEND] && -- (!new_caps[MIGRATION_CAPABILITY_MULTIFD] || -- new_caps[MIGRATION_CAPABILITY_COMPRESS] || -- new_caps[MIGRATION_CAPABILITY_XBZRLE] || -- migrate_multifd_compression() || -- migrate_use_tls())) { -- error_setg(errp, -- "Zero copy only available for non-compressed non-TLS multifd migration"); -- return false; -- } --#else -- if (new_caps[MIGRATION_CAPABILITY_ZERO_COPY_SEND]) { -- error_setg(errp, -- "Zero copy currently only available on Linux"); -- return false; -- } --#endif -- -- if (new_caps[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]) { -- if (!new_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { -- error_setg(errp, "Postcopy preempt requires postcopy-ram"); -- return false; -- } -- -- /* -- * Preempt mode requires urgent pages to be sent in separate -- * channel, OTOH compression logic will disorder all pages into -- * different compression channels, which is not compatible with the -- * preempt assumptions on channel assignments. -- */ -- if (new_caps[MIGRATION_CAPABILITY_COMPRESS]) { -- error_setg(errp, "Postcopy preempt not compatible with compress"); -- return false; -- } -- } -- -- if (new_caps[MIGRATION_CAPABILITY_MULTIFD]) { -- if (new_caps[MIGRATION_CAPABILITY_COMPRESS]) { -- error_setg(errp, "Multifd is not compatible with compress"); -- return false; -- } -- } -- -- return true; --} -- - static void fill_destination_migration_info(MigrationInfo *info) - { - MigrationIncomingState *mis = migration_incoming_get_current(); -diff --git a/migration/options.c b/migration/options.c -index 9c9b8e5863..367c930f46 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -12,7 +12,10 @@ - */ - - #include "qemu/osdep.h" -+#include "qapi/error.h" -+#include "sysemu/runstate.h" - #include "migration.h" -+#include "ram.h" - #include "options.h" - - bool migrate_auto_converge(void) -@@ -198,3 +201,192 @@ bool migrate_zero_copy_send(void) - - return s->capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; - } -+typedef enum WriteTrackingSupport { -+ WT_SUPPORT_UNKNOWN = 0, -+ WT_SUPPORT_ABSENT, -+ WT_SUPPORT_AVAILABLE, -+ WT_SUPPORT_COMPATIBLE -+} WriteTrackingSupport; -+ -+static -+WriteTrackingSupport migrate_query_write_tracking(void) -+{ -+ /* Check if kernel supports required UFFD features */ -+ if (!ram_write_tracking_available()) { -+ return WT_SUPPORT_ABSENT; -+ } -+ /* -+ * Check if current memory configuration is -+ * compatible with required UFFD features. -+ */ -+ if (!ram_write_tracking_compatible()) { -+ return WT_SUPPORT_AVAILABLE; -+ } -+ -+ return WT_SUPPORT_COMPATIBLE; -+} -+ -+/* Migration capabilities set */ -+struct MigrateCapsSet { -+ int size; /* Capability set size */ -+ MigrationCapability caps[]; /* Variadic array of capabilities */ -+}; -+typedef struct MigrateCapsSet MigrateCapsSet; -+ -+/* Define and initialize MigrateCapsSet */ -+#define INITIALIZE_MIGRATE_CAPS_SET(_name, ...) \ -+ MigrateCapsSet _name = { \ -+ .size = sizeof((int []) { __VA_ARGS__ }) / sizeof(int), \ -+ .caps = { __VA_ARGS__ } \ -+ } -+ -+/* Background-snapshot compatibility check list */ -+static const -+INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot, -+ MIGRATION_CAPABILITY_POSTCOPY_RAM, -+ MIGRATION_CAPABILITY_DIRTY_BITMAPS, -+ MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME, -+ MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE, -+ MIGRATION_CAPABILITY_RETURN_PATH, -+ MIGRATION_CAPABILITY_MULTIFD, -+ MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER, -+ MIGRATION_CAPABILITY_AUTO_CONVERGE, -+ MIGRATION_CAPABILITY_RELEASE_RAM, -+ MIGRATION_CAPABILITY_RDMA_PIN_ALL, -+ MIGRATION_CAPABILITY_COMPRESS, -+ MIGRATION_CAPABILITY_XBZRLE, -+ MIGRATION_CAPABILITY_X_COLO, -+ MIGRATION_CAPABILITY_VALIDATE_UUID, -+ MIGRATION_CAPABILITY_ZERO_COPY_SEND); -+ -+/** -+ * @migration_caps_check - check capability compatibility -+ * -+ * @old_caps: old capability list -+ * @new_caps: new capability list -+ * @errp: set *errp if the check failed, with reason -+ * -+ * Returns true if check passed, otherwise false. -+ */ -+bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) -+{ -+ MigrationIncomingState *mis = migration_incoming_get_current(); -+ -+#ifndef CONFIG_LIVE_BLOCK_MIGRATION -+ if (new_caps[MIGRATION_CAPABILITY_BLOCK]) { -+ error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) " -+ "block migration"); -+ error_append_hint(errp, "Use drive_mirror+NBD instead.\n"); -+ return false; -+ } -+#endif -+ -+#ifndef CONFIG_REPLICATION -+ if (new_caps[MIGRATION_CAPABILITY_X_COLO]) { -+ error_setg(errp, "QEMU compiled without replication module" -+ " can't enable COLO"); -+ error_append_hint(errp, "Please enable replication before COLO.\n"); -+ return false; -+ } -+#endif -+ -+ if (new_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { -+ /* This check is reasonably expensive, so only when it's being -+ * set the first time, also it's only the destination that needs -+ * special support. -+ */ -+ if (!old_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM] && -+ runstate_check(RUN_STATE_INMIGRATE) && -+ !postcopy_ram_supported_by_host(mis)) { -+ /* postcopy_ram_supported_by_host will have emitted a more -+ * detailed message -+ */ -+ error_setg(errp, "Postcopy is not supported"); -+ return false; -+ } -+ -+ if (new_caps[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) { -+ error_setg(errp, "Postcopy is not compatible with ignore-shared"); -+ return false; -+ } -+ } -+ -+ if (new_caps[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) { -+ WriteTrackingSupport wt_support; -+ int idx; -+ /* -+ * Check if 'background-snapshot' capability is supported by -+ * host kernel and compatible with guest memory configuration. -+ */ -+ wt_support = migrate_query_write_tracking(); -+ if (wt_support < WT_SUPPORT_AVAILABLE) { -+ error_setg(errp, "Background-snapshot is not supported by host kernel"); -+ return false; -+ } -+ if (wt_support < WT_SUPPORT_COMPATIBLE) { -+ error_setg(errp, "Background-snapshot is not compatible " -+ "with guest memory configuration"); -+ return false; -+ } -+ -+ /* -+ * Check if there are any migration capabilities -+ * incompatible with 'background-snapshot'. -+ */ -+ for (idx = 0; idx < check_caps_background_snapshot.size; idx++) { -+ int incomp_cap = check_caps_background_snapshot.caps[idx]; -+ if (new_caps[incomp_cap]) { -+ error_setg(errp, -+ "Background-snapshot is not compatible with %s", -+ MigrationCapability_str(incomp_cap)); -+ return false; -+ } -+ } -+ } -+ -+#ifdef CONFIG_LINUX -+ if (new_caps[MIGRATION_CAPABILITY_ZERO_COPY_SEND] && -+ (!new_caps[MIGRATION_CAPABILITY_MULTIFD] || -+ new_caps[MIGRATION_CAPABILITY_COMPRESS] || -+ new_caps[MIGRATION_CAPABILITY_XBZRLE] || -+ migrate_multifd_compression() || -+ migrate_use_tls())) { -+ error_setg(errp, -+ "Zero copy only available for non-compressed non-TLS multifd migration"); -+ return false; -+ } -+#else -+ if (new_caps[MIGRATION_CAPABILITY_ZERO_COPY_SEND]) { -+ error_setg(errp, -+ "Zero copy currently only available on Linux"); -+ return false; -+ } -+#endif -+ -+ if (new_caps[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]) { -+ if (!new_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { -+ error_setg(errp, "Postcopy preempt requires postcopy-ram"); -+ return false; -+ } -+ -+ /* -+ * Preempt mode requires urgent pages to be sent in separate -+ * channel, OTOH compression logic will disorder all pages into -+ * different compression channels, which is not compatible with the -+ * preempt assumptions on channel assignments. -+ */ -+ if (new_caps[MIGRATION_CAPABILITY_COMPRESS]) { -+ error_setg(errp, "Postcopy preempt not compatible with compress"); -+ return false; -+ } -+ } -+ -+ if (new_caps[MIGRATION_CAPABILITY_MULTIFD]) { -+ if (new_caps[MIGRATION_CAPABILITY_COMPRESS]) { -+ error_setg(errp, "Multifd is not compatible with compress"); -+ return false; -+ } -+ } -+ -+ return true; -+} -diff --git a/migration/options.h b/migration/options.h -index 25c002b37a..e779f14161 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -38,4 +38,8 @@ bool migrate_xbzrle(void); - bool migrate_zero_blocks(void); - bool migrate_zero_copy_send(void); - -+/* capabilities helpers */ -+ -+bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp); -+ - #endif --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_colo_enabled-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_colo_enabled-to-options.c.patch deleted file mode 100644 index 47c6f83..0000000 --- a/SOURCES/kvm-migration-Move-migrate_colo_enabled-to-options.c.patch +++ /dev/null @@ -1,136 +0,0 @@ -From 13da9060fa2dfc666cd6f4b9bc85b7cee0fef45e Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 22:00:16 +0100 -Subject: [PATCH 24/56] migration: Move migrate_colo_enabled() to options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [23/50] 4809b1091edee38bd222af41b6313133705785c7 (peterx/qemu-kvm) - -Once that we are there, we rename the function to migrate_colo() to be -consistent with all other capabilities. - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 5e8046445575dc5879e63c5d07af893d174813d0) -Signed-off-by: Peter Xu ---- - migration/migration.c | 16 +++++----------- - migration/migration.h | 1 - - migration/options.c | 6 ++++++ - migration/options.h | 1 + - 4 files changed, 12 insertions(+), 12 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 66ea55be06..59ee0ef82b 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -2411,7 +2411,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, - } - - if (blk || blk_inc) { -- if (migrate_colo_enabled()) { -+ if (migrate_colo()) { - error_setg(errp, "No disk migration is required in COLO mode"); - return false; - } -@@ -3304,7 +3304,7 @@ static void migration_completion(MigrationState *s) - * have done so in order to remember to reactivate - * them if migration fails or is cancelled. - */ -- s->block_inactive = !migrate_colo_enabled(); -+ s->block_inactive = !migrate_colo(); - qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); - ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, - s->block_inactive); -@@ -3357,7 +3357,7 @@ static void migration_completion(MigrationState *s) - goto fail; - } - -- if (migrate_colo_enabled() && s->state == MIGRATION_STATUS_ACTIVE) { -+ if (migrate_colo() && s->state == MIGRATION_STATUS_ACTIVE) { - /* COLO does not support postcopy */ - migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE, - MIGRATION_STATUS_COLO); -@@ -3435,12 +3435,6 @@ fail: - MIGRATION_STATUS_FAILED); - } - --bool migrate_colo_enabled(void) --{ -- MigrationState *s = migrate_get_current(); -- return s->capabilities[MIGRATION_CAPABILITY_X_COLO]; --} -- - typedef enum MigThrError { - /* No error detected */ - MIG_THR_ERR_NONE = 0, -@@ -3771,7 +3765,7 @@ static void migration_iteration_finish(MigrationState *s) - runstate_set(RUN_STATE_POSTMIGRATE); - break; - case MIGRATION_STATUS_COLO: -- if (!migrate_colo_enabled()) { -+ if (!migrate_colo()) { - error_report("%s: critical error: calling COLO code without " - "COLO enabled", __func__); - } -@@ -3967,7 +3961,7 @@ static void *migration_thread(void *opaque) - qemu_savevm_send_postcopy_advise(s->to_dst_file); - } - -- if (migrate_colo_enabled()) { -+ if (migrate_colo()) { - /* Notify migration destination that we enable COLO */ - qemu_savevm_send_colo_enable(s->to_dst_file); - } -diff --git a/migration/migration.h b/migration/migration.h -index a25fed6ef0..42f0c68b6f 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -463,7 +463,6 @@ bool migrate_use_zero_copy_send(void); - int migrate_use_tls(void); - int migrate_use_xbzrle(void); - uint64_t migrate_xbzrle_cache_size(void); --bool migrate_colo_enabled(void); - - bool migrate_use_block(void); - bool migrate_use_block_incremental(void); -diff --git a/migration/options.c b/migration/options.c -index 88a9a45913..bd33c5da0a 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -33,6 +33,12 @@ bool migrate_background_snapshot(void) - return s->capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; - } - -+bool migrate_colo(void) -+{ -+ MigrationState *s = migrate_get_current(); -+ return s->capabilities[MIGRATION_CAPABILITY_X_COLO]; -+} -+ - bool migrate_dirty_bitmaps(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index 0dfa0af245..2a0ee61ff8 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -18,6 +18,7 @@ - - bool migrate_auto_converge(void); - bool migrate_background_snapshot(void); -+bool migrate_colo(void); - bool migrate_dirty_bitmaps(void); - bool migrate_ignore_shared(void); - bool migrate_late_block_activate(void); --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_postcopy-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_postcopy-to-options.c.patch deleted file mode 100644 index 892ec9e..0000000 --- a/SOURCES/kvm-migration-Move-migrate_postcopy-to-options.c.patch +++ /dev/null @@ -1,98 +0,0 @@ -From 710fe195a3c13ffe96795a7a2b550c00319997ea Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 10:44:20 +0100 -Subject: [PATCH 47/56] migration: Move migrate_postcopy() to options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [46/50] a4f3455b3524a331f44b481bf7a79318aef5abaa (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Fabiano Rosas -(cherry picked from commit f774fde5d4e97cbfc64dab6622c2c53c5fe5c9fe) -Signed-off-by: Peter Xu ---- - migration/migration.c | 5 ----- - migration/migration.h | 2 -- - migration/options.c | 8 ++++++++ - migration/options.h | 9 +++++++++ - 4 files changed, 17 insertions(+), 7 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index f27ce30be2..46a5ea4d42 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -2245,11 +2245,6 @@ void qmp_migrate_continue(MigrationStatus state, Error **errp) - qemu_sem_post(&s->pause_sem); - } - --bool migrate_postcopy(void) --{ -- return migrate_postcopy_ram() || migrate_dirty_bitmaps(); --} -- - int migrate_use_tls(void) - { - MigrationState *s; -diff --git a/migration/migration.h b/migration/migration.h -index 3ae938b19c..dcf906868d 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -447,8 +447,6 @@ bool migration_is_blocked(Error **errp); - bool migration_in_postcopy(void); - MigrationState *migrate_get_current(void); - --bool migrate_postcopy(void); -- - int migrate_use_tls(void); - - uint64_t ram_get_total_transferred_pages(void); -diff --git a/migration/options.c b/migration/options.c -index 615534c151..8bd2d949ae 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -204,6 +204,14 @@ bool migrate_zero_copy_send(void) - - return s->capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; - } -+ -+/* pseudo capabilities */ -+ -+bool migrate_postcopy(void) -+{ -+ return migrate_postcopy_ram() || migrate_dirty_bitmaps(); -+} -+ - typedef enum WriteTrackingSupport { - WT_SUPPORT_UNKNOWN = 0, - WT_SUPPORT_ABSENT, -diff --git a/migration/options.h b/migration/options.h -index 99f6bbd7a1..093bc907a1 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -38,6 +38,15 @@ bool migrate_xbzrle(void); - bool migrate_zero_blocks(void); - bool migrate_zero_copy_send(void); - -+/* -+ * pseudo capabilities -+ * -+ * These are functions that are used in a similar way to capabilities -+ * check, but they are not a capability. -+ */ -+ -+bool migrate_postcopy(void); -+ - /* capabilities helpers */ - - bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp); --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_use_block-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_use_block-to-options.c.patch deleted file mode 100644 index f7cb338..0000000 --- a/SOURCES/kvm-migration-Move-migrate_use_block-to-options.c.patch +++ /dev/null @@ -1,134 +0,0 @@ -From 276877a71778a5cef0dc5bc843e2679f0fdabb77 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 22:23:57 +0100 -Subject: [PATCH 30/56] migration: Move migrate_use_block() to options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [29/50] fcaeb0e07cf828f3cd0d115515b30d913525a0a2 (peterx/qemu-kvm) - -Once that we are there, we rename the function to migrate_block() -to be consistent with all other capabilities. - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 9d4b1e5f22a838285ebeb8f0eb7cc8df1161998f) -Signed-off-by: Peter Xu ---- - migration/block.c | 2 +- - migration/migration.c | 11 +---------- - migration/migration.h | 1 - - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - migration/savevm.c | 2 +- - 6 files changed, 13 insertions(+), 13 deletions(-) - -diff --git a/migration/block.c b/migration/block.c -index 4b167fa5cf..f0977217cf 100644 ---- a/migration/block.c -+++ b/migration/block.c -@@ -1001,7 +1001,7 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) - - static bool block_is_active(void *opaque) - { -- return migrate_use_block(); -+ return migrate_block(); - } - - static SaveVMHandlers savevm_block_handlers = { -diff --git a/migration/migration.c b/migration/migration.c -index a4ede4294e..96f82bd165 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -2415,7 +2415,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, - error_setg(errp, "No disk migration is required in COLO mode"); - return false; - } -- if (migrate_use_block() || migrate_use_block_incremental()) { -+ if (migrate_block() || migrate_use_block_incremental()) { - error_setg(errp, "Command options are incompatible with " - "current migration capabilities"); - return false; -@@ -2622,15 +2622,6 @@ static int64_t migrate_max_postcopy_bandwidth(void) - return s->parameters.max_postcopy_bandwidth; - } - --bool migrate_use_block(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_BLOCK]; --} -- - bool migrate_use_return_path(void) - { - MigrationState *s; -diff --git a/migration/migration.h b/migration/migration.h -index e2bb5b1e2f..d4b68b08a5 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -457,7 +457,6 @@ int migrate_multifd_zstd_level(void); - int migrate_use_tls(void); - uint64_t migrate_xbzrle_cache_size(void); - --bool migrate_use_block(void); - bool migrate_use_block_incremental(void); - int migrate_max_cpu_throttle(void); - bool migrate_use_return_path(void); -diff --git a/migration/options.c b/migration/options.c -index 25264c500e..fe1eadeed6 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -33,6 +33,15 @@ bool migrate_background_snapshot(void) - return s->capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; - } - -+bool migrate_block(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_BLOCK]; -+} -+ - bool migrate_colo(void) - { - MigrationState *s = migrate_get_current(); -diff --git a/migration/options.h b/migration/options.h -index 8f76a88329..e985a5233e 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -18,6 +18,7 @@ - - bool migrate_auto_converge(void); - bool migrate_background_snapshot(void); -+bool migrate_block(void); - bool migrate_colo(void); - bool migrate_compress(void); - bool migrate_dirty_bitmaps(void); -diff --git a/migration/savevm.c b/migration/savevm.c -index ebcf571e37..9671211339 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -1612,7 +1612,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) - return -EINVAL; - } - -- if (migrate_use_block()) { -+ if (migrate_block()) { - error_setg(errp, "Block migration and snapshots are incompatible"); - return -EINVAL; - } --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_use_block_incremental-to-opti.patch b/SOURCES/kvm-migration-Move-migrate_use_block_incremental-to-opti.patch deleted file mode 100644 index 3f20289..0000000 --- a/SOURCES/kvm-migration-Move-migrate_use_block_incremental-to-opti.patch +++ /dev/null @@ -1,121 +0,0 @@ -From def66503f4ccb97cf8029f88efe8e955edc8d32f Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 00:49:47 +0100 -Subject: [PATCH 39/56] migration: Move migrate_use_block_incremental() to - option.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [38/50] 961fda6464df3384fbcee88c726b56a33c26e14e (peterx/qemu-kvm) - -To be consistent with every other parameter, rename to -migrate_block_incremental(). - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 6f8be7080a1f79bf3832cf798fba1697c409c597) -Signed-off-by: Peter Xu ---- - migration/block.c | 2 +- - migration/migration.c | 11 +---------- - migration/migration.h | 1 - - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - 5 files changed, 12 insertions(+), 12 deletions(-) - -diff --git a/migration/block.c b/migration/block.c -index f0977217cf..6d532ac7a2 100644 ---- a/migration/block.c -+++ b/migration/block.c -@@ -417,7 +417,7 @@ static int init_blk_migration(QEMUFile *f) - bmds->bulk_completed = 0; - bmds->total_sectors = sectors; - bmds->completed_sectors = 0; -- bmds->shared_base = migrate_use_block_incremental(); -+ bmds->shared_base = migrate_block_incremental(); - - assert(i < num_bs); - bmds_bs[i].bmds = bmds; -diff --git a/migration/migration.c b/migration/migration.c -index 78bca9a93f..724e841eb9 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -2157,7 +2157,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, - error_setg(errp, "No disk migration is required in COLO mode"); - return false; - } -- if (migrate_block() || migrate_use_block_incremental()) { -+ if (migrate_block() || migrate_block_incremental()) { - error_setg(errp, "Command options are incompatible with " - "current migration capabilities"); - return false; -@@ -2273,15 +2273,6 @@ int migrate_use_tls(void) - return s->parameters.tls_creds && *s->parameters.tls_creds; - } - --bool migrate_use_block_incremental(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->parameters.block_incremental; --} -- - /* migration thread support */ - /* - * Something bad happened to the RP stream, mark an error -diff --git a/migration/migration.h b/migration/migration.h -index 8451e5f2fe..86051af132 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -451,7 +451,6 @@ bool migrate_postcopy(void); - - int migrate_use_tls(void); - --bool migrate_use_block_incremental(void); - int migrate_max_cpu_throttle(void); - - uint64_t ram_get_total_transferred_pages(void); -diff --git a/migration/options.c b/migration/options.c -index 8d15be858c..2b6d88b4b9 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -463,6 +463,15 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, - - /* parameters */ - -+bool migrate_block_incremental(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.block_incremental; -+} -+ - int migrate_compress_level(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index b24ee92283..96d5a8e6e4 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -45,6 +45,7 @@ bool migrate_cap_set(int cap, bool value, Error **errp); - - /* parameters */ - -+bool migrate_block_incremental(void); - int migrate_compress_level(void); - int migrate_compress_threads(void); - int migrate_compress_wait_thread(void); --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_use_compression-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_use_compression-to-options.c.patch deleted file mode 100644 index 8b74183..0000000 --- a/SOURCES/kvm-migration-Move-migrate_use_compression-to-options.c.patch +++ /dev/null @@ -1,183 +0,0 @@ -From ae183bfc9d7b001d3c4929556b095a76203bc08d Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 22:03:48 +0100 -Subject: [PATCH 25/56] migration: Move migrate_use_compression() to options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [24/50] 126b865f51bd4a1ae3a46411fdcd59033bfc5376 (peterx/qemu-kvm) - -Once that we are there, we rename the function to migrate_compress() -to be consistent with all other capabilities. - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit a7a94d14358dd7b445e20c2f26218ff987747642) -Signed-off-by: Peter Xu ---- - migration/migration.c | 11 +---------- - migration/migration.h | 1 - - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - migration/ram.c | 16 ++++++++-------- - 5 files changed, 19 insertions(+), 19 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 59ee0ef82b..c6e32555a8 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1133,7 +1133,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) - info->xbzrle_cache->overflow = xbzrle_counters.overflow; - } - -- if (migrate_use_compression()) { -+ if (migrate_compress()) { - info->compression = g_malloc0(sizeof(*info->compression)); - info->compression->pages = compression_counters.pages; - info->compression->busy = compression_counters.busy; -@@ -2522,15 +2522,6 @@ bool migrate_postcopy(void) - return migrate_postcopy_ram() || migrate_dirty_bitmaps(); - } - --bool migrate_use_compression(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_COMPRESS]; --} -- - int migrate_compress_level(void) - { - MigrationState *s; -diff --git a/migration/migration.h b/migration/migration.h -index 42f0c68b6f..77aa91c840 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -471,7 +471,6 @@ bool migrate_use_return_path(void); - - uint64_t ram_get_total_transferred_pages(void); - --bool migrate_use_compression(void); - int migrate_compress_level(void); - int migrate_compress_threads(void); - int migrate_compress_wait_thread(void); -diff --git a/migration/options.c b/migration/options.c -index bd33c5da0a..fa7a13d3dc 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -39,6 +39,15 @@ bool migrate_colo(void) - return s->capabilities[MIGRATION_CAPABILITY_X_COLO]; - } - -+bool migrate_compress(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_COMPRESS]; -+} -+ - bool migrate_dirty_bitmaps(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index 2a0ee61ff8..da2193fd94 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -19,6 +19,7 @@ - bool migrate_auto_converge(void); - bool migrate_background_snapshot(void); - bool migrate_colo(void); -+bool migrate_compress(void); - bool migrate_dirty_bitmaps(void); - bool migrate_ignore_shared(void); - bool migrate_late_block_activate(void); -diff --git a/migration/ram.c b/migration/ram.c -index 912ccd89fa..d050d0c5fd 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -586,7 +586,7 @@ static void compress_threads_save_cleanup(void) - { - int i, thread_count; - -- if (!migrate_use_compression() || !comp_param) { -+ if (!migrate_compress() || !comp_param) { - return; - } - -@@ -625,7 +625,7 @@ static int compress_threads_save_setup(void) - { - int i, thread_count; - -- if (!migrate_use_compression()) { -+ if (!migrate_compress()) { - return 0; - } - thread_count = migrate_compress_threads(); -@@ -1155,7 +1155,7 @@ static void migration_update_rates(RAMState *rs, int64_t end_time) - rs->xbzrle_bytes_prev = xbzrle_counters.bytes; - } - -- if (migrate_use_compression()) { -+ if (migrate_compress()) { - compression_counters.busy_rate = (double)(compression_counters.busy - - rs->compress_thread_busy_prev) / page_count; - rs->compress_thread_busy_prev = compression_counters.busy; -@@ -2270,7 +2270,7 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len) - - static bool save_page_use_compression(RAMState *rs) - { -- if (!migrate_use_compression()) { -+ if (!migrate_compress()) { - return false; - } - -@@ -3734,7 +3734,7 @@ static int wait_for_decompress_done(void) - { - int idx, thread_count; - -- if (!migrate_use_compression()) { -+ if (!migrate_compress()) { - return 0; - } - -@@ -3753,7 +3753,7 @@ static void compress_threads_load_cleanup(void) - { - int i, thread_count; - -- if (!migrate_use_compression()) { -+ if (!migrate_compress()) { - return; - } - thread_count = migrate_decompress_threads(); -@@ -3794,7 +3794,7 @@ static int compress_threads_load_setup(QEMUFile *f) - { - int i, thread_count; - -- if (!migrate_use_compression()) { -+ if (!migrate_compress()) { - return 0; - } - -@@ -4260,7 +4260,7 @@ static int ram_load_precopy(QEMUFile *f) - int flags = 0, ret = 0, invalid_flags = 0, len = 0, i = 0; - /* ADVISE is earlier, it shows the source has the postcopy capability on */ - bool postcopy_advised = migration_incoming_postcopy_advised(); -- if (!migrate_use_compression()) { -+ if (!migrate_compress()) { - invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE; - } - --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_use_events-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_use_events-to-options.c.patch deleted file mode 100644 index 41e05c3..0000000 --- a/SOURCES/kvm-migration-Move-migrate_use_events-to-options.c.patch +++ /dev/null @@ -1,120 +0,0 @@ -From 940f1eb4347c72edb3e1abc02c8d7e7c95753dcf Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 22:08:09 +0100 -Subject: [PATCH 26/56] migration: Move migrate_use_events() to options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [25/50] b3acd949af2a0fae18061d360e4f51dc12d32c6c (peterx/qemu-kvm) - -Once that we are there, we rename the function to migrate_events() -to be consistent with all other capabilities. - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit b890902c9c025b87d02e718eec3090fd3525ab18) -Signed-off-by: Peter Xu ---- - migration/migration.c | 11 +---------- - migration/migration.h | 1 - - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - migration/ram.c | 2 +- - 5 files changed, 12 insertions(+), 12 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index c6e32555a8..032cd5c050 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -353,7 +353,7 @@ void migration_incoming_state_destroy(void) - - static void migrate_generate_event(int new_state) - { -- if (migrate_use_events()) { -+ if (migrate_events()) { - qapi_event_send_migration(new_state); - } - } -@@ -2558,15 +2558,6 @@ int migrate_decompress_threads(void) - return s->parameters.decompress_threads; - } - --bool migrate_use_events(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_EVENTS]; --} -- - bool migrate_use_multifd(void) - { - MigrationState *s; -diff --git a/migration/migration.h b/migration/migration.h -index 77aa91c840..bd06520c19 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -475,7 +475,6 @@ int migrate_compress_level(void); - int migrate_compress_threads(void); - int migrate_compress_wait_thread(void); - int migrate_decompress_threads(void); --bool migrate_use_events(void); - - /* Sending on the return path - generic and then for each message type */ - void migrate_send_rp_shut(MigrationIncomingState *mis, -diff --git a/migration/options.c b/migration/options.c -index fa7a13d3dc..d2219ee0e4 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -57,6 +57,15 @@ bool migrate_dirty_bitmaps(void) - return s->capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; - } - -+bool migrate_events(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_EVENTS]; -+} -+ - bool migrate_ignore_shared(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index da2193fd94..b998024eba 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -21,6 +21,7 @@ bool migrate_background_snapshot(void); - bool migrate_colo(void); - bool migrate_compress(void); - bool migrate_dirty_bitmaps(void); -+bool migrate_events(void); - bool migrate_ignore_shared(void); - bool migrate_late_block_activate(void); - bool migrate_pause_before_switchover(void); -diff --git a/migration/ram.c b/migration/ram.c -index d050d0c5fd..ee454a3849 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -1246,7 +1246,7 @@ static void migration_bitmap_sync(RAMState *rs) - rs->num_dirty_pages_period = 0; - rs->bytes_xfer_prev = stat64_get(&ram_counters.transferred); - } -- if (migrate_use_events()) { -+ if (migrate_events()) { - uint64_t generation = stat64_get(&ram_counters.dirty_sync_count); - qapi_event_send_migration_pass(generation); - } --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_use_multifd-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_use_multifd-to-options.c.patch deleted file mode 100644 index 97d6597..0000000 --- a/SOURCES/kvm-migration-Move-migrate_use_multifd-to-options.c.patch +++ /dev/null @@ -1,247 +0,0 @@ -From afd8fb766af2be5cff97753b026847b91b09a30e Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 22:10:29 +0100 -Subject: [PATCH 27/56] migration: Move migrate_use_multifd() to options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [26/50] f2d72eae9cc80b2402ef613e809b40aa296d2e4c (peterx/qemu-kvm) - -Once that we are there, we rename the function to migrate_multifd() -to be consistent with all other capabilities. - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 51b07548f7c31793adc178c7460c5f4369733c61) -Signed-off-by: Peter Xu ---- - migration/migration.c | 19 +++++-------------- - migration/migration.h | 1 - - migration/multifd.c | 16 ++++++++-------- - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - migration/ram.c | 2 +- - migration/socket.c | 2 +- - 7 files changed, 25 insertions(+), 25 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 032cd5c050..e1d7f25786 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -186,7 +186,7 @@ static void migrate_fd_cancel(MigrationState *s); - - static bool migration_needs_multiple_sockets(void) - { -- return migrate_use_multifd() || migrate_postcopy_preempt(); -+ return migrate_multifd() || migrate_postcopy_preempt(); - } - - static bool uri_supports_multi_channels(const char *uri) -@@ -732,7 +732,7 @@ void migration_fd_process_incoming(QEMUFile *f, Error **errp) - static bool migration_should_start_incoming(bool main_channel) - { - /* Multifd doesn't start unless all channels are established */ -- if (migrate_use_multifd()) { -+ if (migrate_multifd()) { - return migration_has_all_channels(); - } - -@@ -759,7 +759,7 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) - uint32_t channel_magic = 0; - int ret = 0; - -- if (migrate_use_multifd() && !migrate_postcopy_ram() && -+ if (migrate_multifd() && !migrate_postcopy_ram() && - qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { - /* - * With multiple channels, it is possible that we receive channels -@@ -798,7 +798,7 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) - } else { - /* Multiple connections */ - assert(migration_needs_multiple_sockets()); -- if (migrate_use_multifd()) { -+ if (migrate_multifd()) { - multifd_recv_new_channel(ioc, &local_err); - } else { - assert(migrate_postcopy_preempt()); -@@ -834,7 +834,7 @@ bool migration_has_all_channels(void) - return false; - } - -- if (migrate_use_multifd()) { -+ if (migrate_multifd()) { - return multifd_recv_all_channels_created(); - } - -@@ -2558,15 +2558,6 @@ int migrate_decompress_threads(void) - return s->parameters.decompress_threads; - } - --bool migrate_use_multifd(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_MULTIFD]; --} -- - int migrate_multifd_channels(void) - { - MigrationState *s; -diff --git a/migration/migration.h b/migration/migration.h -index bd06520c19..49c0e13f41 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -449,7 +449,6 @@ MigrationState *migrate_get_current(void); - - bool migrate_postcopy(void); - --bool migrate_use_multifd(void); - int migrate_multifd_channels(void); - MultiFDCompression migrate_multifd_compression(void); - int migrate_multifd_zlib_level(void); -diff --git a/migration/multifd.c b/migration/multifd.c -index 903df2117b..6807328189 100644 ---- a/migration/multifd.c -+++ b/migration/multifd.c -@@ -516,7 +516,7 @@ void multifd_save_cleanup(void) - { - int i; - -- if (!migrate_use_multifd()) { -+ if (!migrate_multifd()) { - return; - } - multifd_send_terminate_threads(NULL); -@@ -587,7 +587,7 @@ int multifd_send_sync_main(QEMUFile *f) - int i; - bool flush_zero_copy; - -- if (!migrate_use_multifd()) { -+ if (!migrate_multifd()) { - return 0; - } - if (multifd_send_state->pages->num) { -@@ -911,7 +911,7 @@ int multifd_save_setup(Error **errp) - uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size(); - uint8_t i; - -- if (!migrate_use_multifd()) { -+ if (!migrate_multifd()) { - return 0; - } - -@@ -1016,7 +1016,7 @@ static void multifd_recv_terminate_threads(Error *err) - - void multifd_load_shutdown(void) - { -- if (migrate_use_multifd()) { -+ if (migrate_multifd()) { - multifd_recv_terminate_threads(NULL); - } - } -@@ -1025,7 +1025,7 @@ void multifd_load_cleanup(void) - { - int i; - -- if (!migrate_use_multifd()) { -+ if (!migrate_multifd()) { - return; - } - multifd_recv_terminate_threads(NULL); -@@ -1072,7 +1072,7 @@ void multifd_recv_sync_main(void) - { - int i; - -- if (!migrate_use_multifd()) { -+ if (!migrate_multifd()) { - return; - } - for (i = 0; i < migrate_multifd_channels(); i++) { -@@ -1170,7 +1170,7 @@ int multifd_load_setup(Error **errp) - * Return successfully if multiFD recv state is already initialised - * or multiFD is not enabled. - */ -- if (multifd_recv_state || !migrate_use_multifd()) { -+ if (multifd_recv_state || !migrate_multifd()) { - return 0; - } - -@@ -1216,7 +1216,7 @@ bool multifd_recv_all_channels_created(void) - { - int thread_count = migrate_multifd_channels(); - -- if (!migrate_use_multifd()) { -+ if (!migrate_multifd()) { - return true; - } - -diff --git a/migration/options.c b/migration/options.c -index d2219ee0e4..58673fc101 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -84,6 +84,15 @@ bool migrate_late_block_activate(void) - return s->capabilities[MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; - } - -+bool migrate_multifd(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_MULTIFD]; -+} -+ - bool migrate_pause_before_switchover(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index b998024eba..d07269ee38 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -24,6 +24,7 @@ bool migrate_dirty_bitmaps(void); - bool migrate_events(void); - bool migrate_ignore_shared(void); - bool migrate_late_block_activate(void); -+bool migrate_multifd(void); - bool migrate_pause_before_switchover(void); - bool migrate_postcopy_blocktime(void); - bool migrate_postcopy_preempt(void); -diff --git a/migration/ram.c b/migration/ram.c -index ee454a3849..859dd7b63f 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -2362,7 +2362,7 @@ static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss) - * if host page size == guest page size the dest guest during run may - * still see partially copied pages which is data corruption. - */ -- if (migrate_use_multifd() && !migration_in_postcopy()) { -+ if (migrate_multifd() && !migration_in_postcopy()) { - return ram_save_multifd_page(pss->pss_channel, block, offset); - } - -diff --git a/migration/socket.c b/migration/socket.c -index ebf9ac41af..f4835a256a 100644 ---- a/migration/socket.c -+++ b/migration/socket.c -@@ -183,7 +183,7 @@ socket_start_incoming_migration_internal(SocketAddress *saddr, - - qio_net_listener_set_name(listener, "migration-socket-listener"); - -- if (migrate_use_multifd()) { -+ if (migrate_multifd()) { - num = migrate_multifd_channels(); - } else if (migrate_postcopy_preempt()) { - num = RAM_CHANNEL_MAX; --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_use_return-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_use_return-to-options.c.patch deleted file mode 100644 index b250d40..0000000 --- a/SOURCES/kvm-migration-Move-migrate_use_return-to-options.c.patch +++ /dev/null @@ -1,138 +0,0 @@ -From 145b630767dbc7020ddf39b20075f4691f71321a Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 22:25:47 +0100 -Subject: [PATCH 31/56] migration: Move migrate_use_return() to options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [30/50] 5cc150188bcc61b69ea0844253597594ab18fc13 (peterx/qemu-kvm) - -Once that we are there, we rename the function to migrate_return_path() -to be consistent with all other capabilities. - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 38ad1110e368bf91453c0abbd657224d57b65d47) -Signed-off-by: Peter Xu ---- - migration/migration.c | 11 +---------- - migration/migration.h | 1 - - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - migration/rdma.c | 6 +++--- - 5 files changed, 14 insertions(+), 14 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 96f82bd165..f7facecd66 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -2622,15 +2622,6 @@ static int64_t migrate_max_postcopy_bandwidth(void) - return s->parameters.max_postcopy_bandwidth; - } - --bool migrate_use_return_path(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; --} -- - bool migrate_use_block_incremental(void) - { - MigrationState *s; -@@ -4175,7 +4166,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) - * precopy, only if user specified "return-path" capability would - * QEMU uses the return path. - */ -- if (migrate_postcopy_ram() || migrate_use_return_path()) { -+ if (migrate_postcopy_ram() || migrate_return_path()) { - if (open_return_path_on_source(s, !resume)) { - error_report("Unable to open return-path for postcopy"); - migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); -diff --git a/migration/migration.h b/migration/migration.h -index d4b68b08a5..24184622a8 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -459,7 +459,6 @@ uint64_t migrate_xbzrle_cache_size(void); - - bool migrate_use_block_incremental(void); - int migrate_max_cpu_throttle(void); --bool migrate_use_return_path(void); - - uint64_t ram_get_total_transferred_pages(void); - -diff --git a/migration/options.c b/migration/options.c -index fe1eadeed6..2003e413da 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -147,6 +147,15 @@ bool migrate_release_ram(void) - return s->capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; - } - -+bool migrate_return_path(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; -+} -+ - bool migrate_validate_uuid(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index e985a5233e..316efd1063 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -31,6 +31,7 @@ bool migrate_postcopy_blocktime(void); - bool migrate_postcopy_preempt(void); - bool migrate_postcopy_ram(void); - bool migrate_release_ram(void); -+bool migrate_return_path(void); - bool migrate_validate_uuid(void); - bool migrate_xbzrle(void); - bool migrate_zero_blocks(void); -diff --git a/migration/rdma.c b/migration/rdma.c -index f35f021963..bf55e2f163 100644 ---- a/migration/rdma.c -+++ b/migration/rdma.c -@@ -3373,7 +3373,7 @@ static int qemu_rdma_accept(RDMAContext *rdma) - * initialize the RDMAContext for return path for postcopy after first - * connection request reached. - */ -- if ((migrate_postcopy() || migrate_use_return_path()) -+ if ((migrate_postcopy() || migrate_return_path()) - && !rdma->is_return_path) { - rdma_return_path = qemu_rdma_data_init(rdma->host_port, NULL); - if (rdma_return_path == NULL) { -@@ -3456,7 +3456,7 @@ static int qemu_rdma_accept(RDMAContext *rdma) - } - - /* Accept the second connection request for return path */ -- if ((migrate_postcopy() || migrate_use_return_path()) -+ if ((migrate_postcopy() || migrate_return_path()) - && !rdma->is_return_path) { - qemu_set_fd_handler(rdma->channel->fd, rdma_accept_incoming_migration, - NULL, -@@ -4193,7 +4193,7 @@ void rdma_start_outgoing_migration(void *opaque, - } - - /* RDMA postcopy need a separate queue pair for return path */ -- if (migrate_postcopy() || migrate_use_return_path()) { -+ if (migrate_postcopy() || migrate_return_path()) { - rdma_return_path = qemu_rdma_data_init(host_port, errp); - - if (rdma_return_path == NULL) { --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_use_tls-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_use_tls-to-options.c.patch deleted file mode 100644 index 84734af..0000000 --- a/SOURCES/kvm-migration-Move-migrate_use_tls-to-options.c.patch +++ /dev/null @@ -1,134 +0,0 @@ -From 2e2df63892e191e91216b8253171162f69b93387 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 10:41:23 +0100 -Subject: [PATCH 49/56] migration: Move migrate_use_tls() to options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [48/50] 314431b0f5e92d2211e58a8161f32d7b67d69e38 (peterx/qemu-kvm) - -Once there, rename it to migrate_tls() and make it return bool for -consistency. - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy - ---- - -Fix typos found by fabiano - -(cherry picked from commit 10d4703be5d884bbbb6ecafe0e8bb270ad6ea937) -Signed-off-by: Peter Xu ---- - migration/migration.c | 9 --------- - migration/migration.h | 2 -- - migration/options.c | 11 ++++++++++- - migration/options.h | 1 + - migration/tls.c | 3 ++- - 5 files changed, 13 insertions(+), 13 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index c2e109329d..22ef83c619 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -2177,15 +2177,6 @@ void qmp_migrate_continue(MigrationStatus state, Error **errp) - qemu_sem_post(&s->pause_sem); - } - --int migrate_use_tls(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->parameters.tls_creds && *s->parameters.tls_creds; --} -- - /* migration thread support */ - /* - * Something bad happened to the RP stream, mark an error -diff --git a/migration/migration.h b/migration/migration.h -index dcf906868d..2b71df8617 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -447,8 +447,6 @@ bool migration_is_blocked(Error **errp); - bool migration_in_postcopy(void); - MigrationState *migrate_get_current(void); - --int migrate_use_tls(void); -- - uint64_t ram_get_total_transferred_pages(void); - - /* Sending on the return path - generic and then for each message type */ -diff --git a/migration/options.c b/migration/options.c -index 8e8753d9be..d4c0714683 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -214,6 +214,15 @@ bool migrate_postcopy(void) - return migrate_postcopy_ram() || migrate_dirty_bitmaps(); - } - -+bool migrate_tls(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.tls_creds && *s->parameters.tls_creds; -+} -+ - typedef enum WriteTrackingSupport { - WT_SUPPORT_UNKNOWN = 0, - WT_SUPPORT_ABSENT, -@@ -363,7 +372,7 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) - new_caps[MIGRATION_CAPABILITY_COMPRESS] || - new_caps[MIGRATION_CAPABILITY_XBZRLE] || - migrate_multifd_compression() || -- migrate_use_tls())) { -+ migrate_tls())) { - error_setg(errp, - "Zero copy only available for non-compressed non-TLS multifd migration"); - return false; -diff --git a/migration/options.h b/migration/options.h -index 1b78fa9f3d..13318a16c7 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -46,6 +46,7 @@ bool migrate_zero_copy_send(void); - */ - - bool migrate_postcopy(void); -+bool migrate_tls(void); - - /* capabilities helpers */ - -diff --git a/migration/tls.c b/migration/tls.c -index 4d2166a209..acd38e0b62 100644 ---- a/migration/tls.c -+++ b/migration/tls.c -@@ -22,6 +22,7 @@ - #include "channel.h" - #include "migration.h" - #include "tls.h" -+#include "options.h" - #include "crypto/tlscreds.h" - #include "qemu/error-report.h" - #include "qapi/error.h" -@@ -165,7 +166,7 @@ void migration_tls_channel_connect(MigrationState *s, - - bool migrate_channel_requires_tls_upgrade(QIOChannel *ioc) - { -- if (!migrate_use_tls()) { -+ if (!migrate_tls()) { - return false; - } - --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_use_xbzrle-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_use_xbzrle-to-options.c.patch deleted file mode 100644 index e3a8bab..0000000 --- a/SOURCES/kvm-migration-Move-migrate_use_xbzrle-to-options.c.patch +++ /dev/null @@ -1,156 +0,0 @@ -From 2184f7dae0df5fa52deba2dc884e09c6bdbc7b5f Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 22:20:13 +0100 -Subject: [PATCH 29/56] migration: Move migrate_use_xbzrle() to options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [28/50] fc8bee0f691a96e6bd0b41f2511abe507b81fea5 (peterx/qemu-kvm) - -Once that we are there, we rename the function to migrate_xbzrle() -to be consistent with all other capabilities. -We change the type to return bool also for consistency. - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 87dca0c9bb63014ef73ad82f7aedea1cb5a822e7) -Signed-off-by: Peter Xu ---- - migration/migration.c | 11 +---------- - migration/migration.h | 1 - - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - migration/ram.c | 10 +++++----- - 5 files changed, 16 insertions(+), 16 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 1d63718e88..a4ede4294e 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1122,7 +1122,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) - info->ram->downtime_bytes = stat64_get(&ram_counters.downtime_bytes); - info->ram->postcopy_bytes = stat64_get(&ram_counters.postcopy_bytes); - -- if (migrate_use_xbzrle()) { -+ if (migrate_xbzrle()) { - info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache)); - info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size(); - info->xbzrle_cache->bytes = xbzrle_counters.bytes; -@@ -2604,15 +2604,6 @@ int migrate_use_tls(void) - return s->parameters.tls_creds && *s->parameters.tls_creds; - } - --int migrate_use_xbzrle(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_XBZRLE]; --} -- - uint64_t migrate_xbzrle_cache_size(void) - { - MigrationState *s; -diff --git a/migration/migration.h b/migration/migration.h -index c939f82d53..e2bb5b1e2f 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -455,7 +455,6 @@ int migrate_multifd_zlib_level(void); - int migrate_multifd_zstd_level(void); - - int migrate_use_tls(void); --int migrate_use_xbzrle(void); - uint64_t migrate_xbzrle_cache_size(void); - - bool migrate_use_block(void); -diff --git a/migration/options.c b/migration/options.c -index f357c99996..25264c500e 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -147,6 +147,15 @@ bool migrate_validate_uuid(void) - return s->capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; - } - -+bool migrate_xbzrle(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_XBZRLE]; -+} -+ - bool migrate_zero_blocks(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index ad22f4d24a..8f76a88329 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -31,6 +31,7 @@ bool migrate_postcopy_preempt(void); - bool migrate_postcopy_ram(void); - bool migrate_release_ram(void); - bool migrate_validate_uuid(void); -+bool migrate_xbzrle(void); - bool migrate_zero_blocks(void); - bool migrate_zero_copy_send(void); - -diff --git a/migration/ram.c b/migration/ram.c -index 859dd7b63f..4576d0d849 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -156,14 +156,14 @@ static struct { - - static void XBZRLE_cache_lock(void) - { -- if (migrate_use_xbzrle()) { -+ if (migrate_xbzrle()) { - qemu_mutex_lock(&XBZRLE.lock); - } - } - - static void XBZRLE_cache_unlock(void) - { -- if (migrate_use_xbzrle()) { -+ if (migrate_xbzrle()) { - qemu_mutex_unlock(&XBZRLE.lock); - } - } -@@ -1137,7 +1137,7 @@ static void migration_update_rates(RAMState *rs, int64_t end_time) - return; - } - -- if (migrate_use_xbzrle()) { -+ if (migrate_xbzrle()) { - double encoded_size, unencoded_size; - - xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss - -@@ -1626,7 +1626,7 @@ static int find_dirty_block(RAMState *rs, PageSearchStatus *pss) - /* Flag that we've looped */ - pss->complete_round = true; - /* After the first round, enable XBZRLE. */ -- if (migrate_use_xbzrle()) { -+ if (migrate_xbzrle()) { - rs->xbzrle_enabled = true; - } - } -@@ -2979,7 +2979,7 @@ static int xbzrle_init(void) - { - Error *local_err = NULL; - -- if (!migrate_use_xbzrle()) { -+ if (!migrate_xbzrle()) { - return 0; - } - --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_use_zero_copy_send-to-options.patch b/SOURCES/kvm-migration-Move-migrate_use_zero_copy_send-to-options.patch deleted file mode 100644 index 90031df..0000000 --- a/SOURCES/kvm-migration-Move-migrate_use_zero_copy_send-to-options.patch +++ /dev/null @@ -1,167 +0,0 @@ -From 6eb252887378d639ad2e90dd426a1812d4b72ca6 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 22:17:14 +0100 -Subject: [PATCH 28/56] migration: Move migrate_use_zero_copy_send() to - options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [27/50] 5a4c2b5e75c62e0f60f9c4121a2756bd140a60d9 (peterx/qemu-kvm) - -Once that we are there, we rename the function to -migrate_zero_copy_send() to be consistent with all other capabilities. - -We can remove the CONFIG_LINUX guard. We already check that we can't -setup this capability in migrate_caps_check(). - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit b4bc342c766640e0cb8a0b72f71e0ee5545fb790) -Signed-off-by: Peter Xu ---- - migration/migration.c | 13 +------------ - migration/migration.h | 5 ----- - migration/multifd.c | 8 ++++---- - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - migration/socket.c | 2 +- - 6 files changed, 16 insertions(+), 22 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index e1d7f25786..1d63718e88 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1609,7 +1609,7 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) - } - - #ifdef CONFIG_LINUX -- if (migrate_use_zero_copy_send() && -+ if (migrate_zero_copy_send() && - ((params->has_multifd_compression && params->multifd_compression) || - (params->tls_creds && *params->tls_creds))) { - error_setg(errp, -@@ -2595,17 +2595,6 @@ int migrate_multifd_zstd_level(void) - return s->parameters.multifd_zstd_level; - } - --#ifdef CONFIG_LINUX --bool migrate_use_zero_copy_send(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; --} --#endif -- - int migrate_use_tls(void) - { - MigrationState *s; -diff --git a/migration/migration.h b/migration/migration.h -index 49c0e13f41..c939f82d53 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -454,11 +454,6 @@ MultiFDCompression migrate_multifd_compression(void); - int migrate_multifd_zlib_level(void); - int migrate_multifd_zstd_level(void); - --#ifdef CONFIG_LINUX --bool migrate_use_zero_copy_send(void); --#else --#define migrate_use_zero_copy_send() (false) --#endif - int migrate_use_tls(void); - int migrate_use_xbzrle(void); - uint64_t migrate_xbzrle_cache_size(void); -diff --git a/migration/multifd.c b/migration/multifd.c -index 6807328189..cce3ad6988 100644 ---- a/migration/multifd.c -+++ b/migration/multifd.c -@@ -25,7 +25,7 @@ - #include "trace.h" - #include "multifd.h" - #include "threadinfo.h" -- -+#include "options.h" - #include "qemu/yank.h" - #include "io/channel-socket.h" - #include "yank_functions.h" -@@ -608,7 +608,7 @@ int multifd_send_sync_main(QEMUFile *f) - * all the dirty bitmaps. - */ - -- flush_zero_copy = migrate_use_zero_copy_send(); -+ flush_zero_copy = migrate_zero_copy_send(); - - for (i = 0; i < migrate_multifd_channels(); i++) { - MultiFDSendParams *p = &multifd_send_state->params[i]; -@@ -653,7 +653,7 @@ static void *multifd_send_thread(void *opaque) - MigrationThread *thread = NULL; - Error *local_err = NULL; - int ret = 0; -- bool use_zero_copy_send = migrate_use_zero_copy_send(); -+ bool use_zero_copy_send = migrate_zero_copy_send(); - - thread = MigrationThreadAdd(p->name, qemu_get_thread_id()); - -@@ -945,7 +945,7 @@ int multifd_save_setup(Error **errp) - p->page_size = qemu_target_page_size(); - p->page_count = page_count; - -- if (migrate_use_zero_copy_send()) { -+ if (migrate_zero_copy_send()) { - p->write_flags = QIO_CHANNEL_WRITE_FLAG_ZERO_COPY; - } else { - p->write_flags = 0; -diff --git a/migration/options.c b/migration/options.c -index 58673fc101..f357c99996 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -155,3 +155,12 @@ bool migrate_zero_blocks(void) - - return s->capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; - } -+ -+bool migrate_zero_copy_send(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; -+} -diff --git a/migration/options.h b/migration/options.h -index d07269ee38..ad22f4d24a 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -32,5 +32,6 @@ bool migrate_postcopy_ram(void); - bool migrate_release_ram(void); - bool migrate_validate_uuid(void); - bool migrate_zero_blocks(void); -+bool migrate_zero_copy_send(void); - - #endif -diff --git a/migration/socket.c b/migration/socket.c -index f4835a256a..1b6f5baefb 100644 ---- a/migration/socket.c -+++ b/migration/socket.c -@@ -98,7 +98,7 @@ static void socket_outgoing_migration(QIOTask *task, - - trace_migration_socket_outgoing_connected(data->hostname); - -- if (migrate_use_zero_copy_send() && -+ if (migrate_zero_copy_send() && - !qio_channel_has_feature(sioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) { - error_setg(&err, "Zero copy send feature not detected in host kernel"); - } --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migration_properties-to-options.c.patch b/SOURCES/kvm-migration-Move-migration_properties-to-options.c.patch deleted file mode 100644 index 145b510..0000000 --- a/SOURCES/kvm-migration-Move-migration_properties-to-options.c.patch +++ /dev/null @@ -1,409 +0,0 @@ -From 0911e025a9dc8a0c85944ac11fb9df72e5ad0677 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 09/37] migration: Move migration_properties to options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [7/28] ff07358afa0c90f13125b177b0e08c74ef1b9905 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit f9436522c8dd -Author: Juan Quintela -Date: Thu Mar 2 12:55:57 2023 +0100 - - migration: Move migration_properties to options.c - - Signed-off-by: Juan Quintela - Reviewed-by: Vladimir Sementsov-Ogievskiy - -Signed-off-by: Cédric Le Goater ---- - migration/migration.c | 157 ------------------------------------------ - migration/options.c | 155 +++++++++++++++++++++++++++++++++++++++++ - migration/options.h | 7 ++ - 3 files changed, 162 insertions(+), 157 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 08f87f2b0e..1ac5f19bc2 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -52,8 +52,6 @@ - #include "io/channel-tls.h" - #include "migration/colo.h" - #include "hw/boards.h" --#include "hw/qdev-properties.h" --#include "hw/qdev-properties-system.h" - #include "monitor/monitor.h" - #include "net/announce.h" - #include "qemu/queue.h" -@@ -65,51 +63,6 @@ - #include "sysemu/qtest.h" - #include "options.h" - --#define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ -- --/* Time in milliseconds we are allowed to stop the source, -- * for sending the last part */ --#define DEFAULT_MIGRATE_SET_DOWNTIME 300 -- --/* Default compression thread count */ --#define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8 --/* Default decompression thread count, usually decompression is at -- * least 4 times as fast as compression.*/ --#define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2 --/*0: means nocompress, 1: best speed, ... 9: best compress ratio */ --#define DEFAULT_MIGRATE_COMPRESS_LEVEL 1 --/* Define default autoconverge cpu throttle migration parameters */ --#define DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD 50 --#define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20 --#define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10 --#define DEFAULT_MIGRATE_MAX_CPU_THROTTLE 99 -- --/* Migration XBZRLE default cache size */ --#define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024) -- --/* The delay time (in ms) between two COLO checkpoints */ --#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100) --#define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2 --#define DEFAULT_MIGRATE_MULTIFD_COMPRESSION MULTIFD_COMPRESSION_NONE --/* 0: means nocompress, 1: best speed, ... 9: best compress ratio */ --#define DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL 1 --/* 0: means nocompress, 1: best speed, ... 20: best compress ratio */ --#define DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL 1 -- --/* Background transfer rate for postcopy, 0 means unlimited, note -- * that page requests can still exceed this limit. -- */ --#define DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH 0 -- --/* -- * Parameters for self_announce_delay giving a stream of RARP/ARP -- * packets after migration. -- */ --#define DEFAULT_MIGRATE_ANNOUNCE_INITIAL 50 --#define DEFAULT_MIGRATE_ANNOUNCE_MAX 550 --#define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS 5 --#define DEFAULT_MIGRATE_ANNOUNCE_STEP 100 -- - static NotifierList migration_state_notifiers = - NOTIFIER_LIST_INITIALIZER(migration_state_notifiers); - -@@ -3336,116 +3289,6 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) - s->migration_thread_running = true; - } - --#define DEFINE_PROP_MIG_CAP(name, x) \ -- DEFINE_PROP_BOOL(name, MigrationState, capabilities[x], false) -- --static Property migration_properties[] = { -- DEFINE_PROP_BOOL("store-global-state", MigrationState, -- store_global_state, true), -- DEFINE_PROP_BOOL("send-configuration", MigrationState, -- send_configuration, true), -- DEFINE_PROP_BOOL("send-section-footer", MigrationState, -- send_section_footer, true), -- DEFINE_PROP_BOOL("decompress-error-check", MigrationState, -- decompress_error_check, true), -- DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState, -- clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT), -- DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState, -- preempt_pre_7_2, false), -- -- /* Migration parameters */ -- DEFINE_PROP_UINT8("x-compress-level", MigrationState, -- parameters.compress_level, -- DEFAULT_MIGRATE_COMPRESS_LEVEL), -- DEFINE_PROP_UINT8("x-compress-threads", MigrationState, -- parameters.compress_threads, -- DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT), -- DEFINE_PROP_BOOL("x-compress-wait-thread", MigrationState, -- parameters.compress_wait_thread, true), -- DEFINE_PROP_UINT8("x-decompress-threads", MigrationState, -- parameters.decompress_threads, -- DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT), -- DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState, -- parameters.throttle_trigger_threshold, -- DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD), -- DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState, -- parameters.cpu_throttle_initial, -- DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL), -- DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState, -- parameters.cpu_throttle_increment, -- DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT), -- DEFINE_PROP_BOOL("x-cpu-throttle-tailslow", MigrationState, -- parameters.cpu_throttle_tailslow, false), -- DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState, -- parameters.max_bandwidth, MAX_THROTTLE), -- DEFINE_PROP_UINT64("x-downtime-limit", MigrationState, -- parameters.downtime_limit, -- DEFAULT_MIGRATE_SET_DOWNTIME), -- DEFINE_PROP_UINT32("x-checkpoint-delay", MigrationState, -- parameters.x_checkpoint_delay, -- DEFAULT_MIGRATE_X_CHECKPOINT_DELAY), -- DEFINE_PROP_UINT8("multifd-channels", MigrationState, -- parameters.multifd_channels, -- DEFAULT_MIGRATE_MULTIFD_CHANNELS), -- DEFINE_PROP_MULTIFD_COMPRESSION("multifd-compression", MigrationState, -- parameters.multifd_compression, -- DEFAULT_MIGRATE_MULTIFD_COMPRESSION), -- DEFINE_PROP_UINT8("multifd-zlib-level", MigrationState, -- parameters.multifd_zlib_level, -- DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL), -- DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, -- parameters.multifd_zstd_level, -- DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), -- DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, -- parameters.xbzrle_cache_size, -- DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), -- DEFINE_PROP_SIZE("max-postcopy-bandwidth", MigrationState, -- parameters.max_postcopy_bandwidth, -- DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH), -- DEFINE_PROP_UINT8("max-cpu-throttle", MigrationState, -- parameters.max_cpu_throttle, -- DEFAULT_MIGRATE_MAX_CPU_THROTTLE), -- DEFINE_PROP_SIZE("announce-initial", MigrationState, -- parameters.announce_initial, -- DEFAULT_MIGRATE_ANNOUNCE_INITIAL), -- DEFINE_PROP_SIZE("announce-max", MigrationState, -- parameters.announce_max, -- DEFAULT_MIGRATE_ANNOUNCE_MAX), -- DEFINE_PROP_SIZE("announce-rounds", MigrationState, -- parameters.announce_rounds, -- DEFAULT_MIGRATE_ANNOUNCE_ROUNDS), -- DEFINE_PROP_SIZE("announce-step", MigrationState, -- parameters.announce_step, -- DEFAULT_MIGRATE_ANNOUNCE_STEP), -- DEFINE_PROP_STRING("tls-creds", MigrationState, parameters.tls_creds), -- DEFINE_PROP_STRING("tls-hostname", MigrationState, parameters.tls_hostname), -- DEFINE_PROP_STRING("tls-authz", MigrationState, parameters.tls_authz), -- -- /* Migration capabilities */ -- DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE), -- DEFINE_PROP_MIG_CAP("x-rdma-pin-all", MIGRATION_CAPABILITY_RDMA_PIN_ALL), -- DEFINE_PROP_MIG_CAP("x-auto-converge", MIGRATION_CAPABILITY_AUTO_CONVERGE), -- DEFINE_PROP_MIG_CAP("x-zero-blocks", MIGRATION_CAPABILITY_ZERO_BLOCKS), -- DEFINE_PROP_MIG_CAP("x-compress", MIGRATION_CAPABILITY_COMPRESS), -- DEFINE_PROP_MIG_CAP("x-events", MIGRATION_CAPABILITY_EVENTS), -- DEFINE_PROP_MIG_CAP("x-postcopy-ram", MIGRATION_CAPABILITY_POSTCOPY_RAM), -- DEFINE_PROP_MIG_CAP("x-postcopy-preempt", -- MIGRATION_CAPABILITY_POSTCOPY_PREEMPT), -- DEFINE_PROP_MIG_CAP("x-colo", MIGRATION_CAPABILITY_X_COLO), -- DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM), -- DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK), -- DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH), -- DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD), -- DEFINE_PROP_MIG_CAP("x-background-snapshot", -- MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT), --#ifdef CONFIG_LINUX -- DEFINE_PROP_MIG_CAP("x-zero-copy-send", -- MIGRATION_CAPABILITY_ZERO_COPY_SEND), --#endif -- -- DEFINE_PROP_END_OF_LIST(), --}; -- - static void migration_class_init(ObjectClass *klass, void *data) - { - DeviceClass *dc = DEVICE_CLASS(klass); -diff --git a/migration/options.c b/migration/options.c -index bcfe244fa9..a76984276d 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -31,6 +31,161 @@ - #define MAX_MIGRATE_DOWNTIME_SECONDS 2000 - #define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000) - -+#define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ -+ -+/* Time in milliseconds we are allowed to stop the source, -+ * for sending the last part */ -+#define DEFAULT_MIGRATE_SET_DOWNTIME 300 -+ -+/* Default compression thread count */ -+#define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8 -+/* Default decompression thread count, usually decompression is at -+ * least 4 times as fast as compression.*/ -+#define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2 -+/*0: means nocompress, 1: best speed, ... 9: best compress ratio */ -+#define DEFAULT_MIGRATE_COMPRESS_LEVEL 1 -+/* Define default autoconverge cpu throttle migration parameters */ -+#define DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD 50 -+#define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20 -+#define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10 -+#define DEFAULT_MIGRATE_MAX_CPU_THROTTLE 99 -+ -+/* Migration XBZRLE default cache size */ -+#define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024) -+ -+/* The delay time (in ms) between two COLO checkpoints */ -+#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100) -+#define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2 -+#define DEFAULT_MIGRATE_MULTIFD_COMPRESSION MULTIFD_COMPRESSION_NONE -+/* 0: means nocompress, 1: best speed, ... 9: best compress ratio */ -+#define DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL 1 -+/* 0: means nocompress, 1: best speed, ... 20: best compress ratio */ -+#define DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL 1 -+ -+/* Background transfer rate for postcopy, 0 means unlimited, note -+ * that page requests can still exceed this limit. -+ */ -+#define DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH 0 -+ -+/* -+ * Parameters for self_announce_delay giving a stream of RARP/ARP -+ * packets after migration. -+ */ -+#define DEFAULT_MIGRATE_ANNOUNCE_INITIAL 50 -+#define DEFAULT_MIGRATE_ANNOUNCE_MAX 550 -+#define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS 5 -+#define DEFAULT_MIGRATE_ANNOUNCE_STEP 100 -+ -+#define DEFINE_PROP_MIG_CAP(name, x) \ -+ DEFINE_PROP_BOOL(name, MigrationState, capabilities[x], false) -+ -+Property migration_properties[] = { -+ DEFINE_PROP_BOOL("store-global-state", MigrationState, -+ store_global_state, true), -+ DEFINE_PROP_BOOL("send-configuration", MigrationState, -+ send_configuration, true), -+ DEFINE_PROP_BOOL("send-section-footer", MigrationState, -+ send_section_footer, true), -+ DEFINE_PROP_BOOL("decompress-error-check", MigrationState, -+ decompress_error_check, true), -+ DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState, -+ clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT), -+ DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState, -+ preempt_pre_7_2, false), -+ -+ /* Migration parameters */ -+ DEFINE_PROP_UINT8("x-compress-level", MigrationState, -+ parameters.compress_level, -+ DEFAULT_MIGRATE_COMPRESS_LEVEL), -+ DEFINE_PROP_UINT8("x-compress-threads", MigrationState, -+ parameters.compress_threads, -+ DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT), -+ DEFINE_PROP_BOOL("x-compress-wait-thread", MigrationState, -+ parameters.compress_wait_thread, true), -+ DEFINE_PROP_UINT8("x-decompress-threads", MigrationState, -+ parameters.decompress_threads, -+ DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT), -+ DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState, -+ parameters.throttle_trigger_threshold, -+ DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD), -+ DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState, -+ parameters.cpu_throttle_initial, -+ DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL), -+ DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState, -+ parameters.cpu_throttle_increment, -+ DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT), -+ DEFINE_PROP_BOOL("x-cpu-throttle-tailslow", MigrationState, -+ parameters.cpu_throttle_tailslow, false), -+ DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState, -+ parameters.max_bandwidth, MAX_THROTTLE), -+ DEFINE_PROP_UINT64("x-downtime-limit", MigrationState, -+ parameters.downtime_limit, -+ DEFAULT_MIGRATE_SET_DOWNTIME), -+ DEFINE_PROP_UINT32("x-checkpoint-delay", MigrationState, -+ parameters.x_checkpoint_delay, -+ DEFAULT_MIGRATE_X_CHECKPOINT_DELAY), -+ DEFINE_PROP_UINT8("multifd-channels", MigrationState, -+ parameters.multifd_channels, -+ DEFAULT_MIGRATE_MULTIFD_CHANNELS), -+ DEFINE_PROP_MULTIFD_COMPRESSION("multifd-compression", MigrationState, -+ parameters.multifd_compression, -+ DEFAULT_MIGRATE_MULTIFD_COMPRESSION), -+ DEFINE_PROP_UINT8("multifd-zlib-level", MigrationState, -+ parameters.multifd_zlib_level, -+ DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL), -+ DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, -+ parameters.multifd_zstd_level, -+ DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), -+ DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, -+ parameters.xbzrle_cache_size, -+ DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), -+ DEFINE_PROP_SIZE("max-postcopy-bandwidth", MigrationState, -+ parameters.max_postcopy_bandwidth, -+ DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH), -+ DEFINE_PROP_UINT8("max-cpu-throttle", MigrationState, -+ parameters.max_cpu_throttle, -+ DEFAULT_MIGRATE_MAX_CPU_THROTTLE), -+ DEFINE_PROP_SIZE("announce-initial", MigrationState, -+ parameters.announce_initial, -+ DEFAULT_MIGRATE_ANNOUNCE_INITIAL), -+ DEFINE_PROP_SIZE("announce-max", MigrationState, -+ parameters.announce_max, -+ DEFAULT_MIGRATE_ANNOUNCE_MAX), -+ DEFINE_PROP_SIZE("announce-rounds", MigrationState, -+ parameters.announce_rounds, -+ DEFAULT_MIGRATE_ANNOUNCE_ROUNDS), -+ DEFINE_PROP_SIZE("announce-step", MigrationState, -+ parameters.announce_step, -+ DEFAULT_MIGRATE_ANNOUNCE_STEP), -+ DEFINE_PROP_STRING("tls-creds", MigrationState, parameters.tls_creds), -+ DEFINE_PROP_STRING("tls-hostname", MigrationState, parameters.tls_hostname), -+ DEFINE_PROP_STRING("tls-authz", MigrationState, parameters.tls_authz), -+ -+ /* Migration capabilities */ -+ DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE), -+ DEFINE_PROP_MIG_CAP("x-rdma-pin-all", MIGRATION_CAPABILITY_RDMA_PIN_ALL), -+ DEFINE_PROP_MIG_CAP("x-auto-converge", MIGRATION_CAPABILITY_AUTO_CONVERGE), -+ DEFINE_PROP_MIG_CAP("x-zero-blocks", MIGRATION_CAPABILITY_ZERO_BLOCKS), -+ DEFINE_PROP_MIG_CAP("x-compress", MIGRATION_CAPABILITY_COMPRESS), -+ DEFINE_PROP_MIG_CAP("x-events", MIGRATION_CAPABILITY_EVENTS), -+ DEFINE_PROP_MIG_CAP("x-postcopy-ram", MIGRATION_CAPABILITY_POSTCOPY_RAM), -+ DEFINE_PROP_MIG_CAP("x-postcopy-preempt", -+ MIGRATION_CAPABILITY_POSTCOPY_PREEMPT), -+ DEFINE_PROP_MIG_CAP("x-colo", MIGRATION_CAPABILITY_X_COLO), -+ DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM), -+ DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK), -+ DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH), -+ DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD), -+ DEFINE_PROP_MIG_CAP("x-background-snapshot", -+ MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT), -+#ifdef CONFIG_LINUX -+ DEFINE_PROP_MIG_CAP("x-zero-copy-send", -+ MIGRATION_CAPABILITY_ZERO_COPY_SEND), -+#endif -+ -+ DEFINE_PROP_END_OF_LIST(), -+}; -+ - bool migrate_auto_converge(void) - { - MigrationState *s = migrate_get_current(); -diff --git a/migration/options.h b/migration/options.h -index 89067e59a0..7b0f7245ad 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -14,6 +14,9 @@ - #ifndef QEMU_MIGRATION_OPTIONS_H - #define QEMU_MIGRATION_OPTIONS_H - -+#include "hw/qdev-properties.h" -+#include "hw/qdev-properties-system.h" -+ - /* constants */ - - /* Amount of time to allocate to each "chunk" of bandwidth-throttled -@@ -21,6 +24,10 @@ - #define BUFFER_DELAY 100 - #define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY) - -+/* migration properties */ -+ -+extern Property migration_properties[]; -+ - /* capabilities */ - - bool migrate_auto_converge(void); --- -2.39.3 - diff --git a/SOURCES/kvm-migration-Move-parameters-functions-to-option.c.patch b/SOURCES/kvm-migration-Move-parameters-functions-to-option.c.patch deleted file mode 100644 index ad4510b..0000000 --- a/SOURCES/kvm-migration-Move-parameters-functions-to-option.c.patch +++ /dev/null @@ -1,317 +0,0 @@ -From d5ea4c82c44a59ac70313eb1eac77999ca5fde36 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 00:39:03 +0100 -Subject: [PATCH 37/56] migration: Move parameters functions to option.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [36/50] 2540921028025504723e762c0a1d2f295ac5a6d1 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 1dfc4b9e19bcf1ad41a1be9ac82db35b9647c3c1) -Signed-off-by: Peter Xu ---- - migration/migration.c | 91 --------------------------------------- - migration/migration.h | 11 ----- - migration/multifd-zlib.c | 1 + - migration/multifd-zstd.c | 1 + - migration/options.c | 93 ++++++++++++++++++++++++++++++++++++++++ - migration/options.h | 13 ++++++ - 6 files changed, 108 insertions(+), 102 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 880a51210e..7f2e770deb 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -2264,79 +2264,6 @@ bool migrate_postcopy(void) - return migrate_postcopy_ram() || migrate_dirty_bitmaps(); - } - --int migrate_compress_level(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->parameters.compress_level; --} -- --int migrate_compress_threads(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->parameters.compress_threads; --} -- --int migrate_compress_wait_thread(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->parameters.compress_wait_thread; --} -- --int migrate_decompress_threads(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->parameters.decompress_threads; --} -- --int migrate_multifd_channels(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->parameters.multifd_channels; --} -- --MultiFDCompression migrate_multifd_compression(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- assert(s->parameters.multifd_compression < MULTIFD_COMPRESSION__MAX); -- return s->parameters.multifd_compression; --} -- --int migrate_multifd_zlib_level(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->parameters.multifd_zlib_level; --} -- --int migrate_multifd_zstd_level(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->parameters.multifd_zstd_level; --} -- - int migrate_use_tls(void) - { - MigrationState *s; -@@ -2346,24 +2273,6 @@ int migrate_use_tls(void) - return s->parameters.tls_creds && *s->parameters.tls_creds; - } - --uint64_t migrate_xbzrle_cache_size(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->parameters.xbzrle_cache_size; --} -- --static int64_t migrate_max_postcopy_bandwidth(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->parameters.max_postcopy_bandwidth; --} -- - bool migrate_use_block_incremental(void) - { - MigrationState *s; -diff --git a/migration/migration.h b/migration/migration.h -index 24184622a8..8451e5f2fe 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -449,24 +449,13 @@ MigrationState *migrate_get_current(void); - - bool migrate_postcopy(void); - --int migrate_multifd_channels(void); --MultiFDCompression migrate_multifd_compression(void); --int migrate_multifd_zlib_level(void); --int migrate_multifd_zstd_level(void); -- - int migrate_use_tls(void); --uint64_t migrate_xbzrle_cache_size(void); - - bool migrate_use_block_incremental(void); - int migrate_max_cpu_throttle(void); - - uint64_t ram_get_total_transferred_pages(void); - --int migrate_compress_level(void); --int migrate_compress_threads(void); --int migrate_compress_wait_thread(void); --int migrate_decompress_threads(void); -- - /* Sending on the return path - generic and then for each message type */ - void migrate_send_rp_shut(MigrationIncomingState *mis, - uint32_t value); -diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c -index 37770248e1..81701250ad 100644 ---- a/migration/multifd-zlib.c -+++ b/migration/multifd-zlib.c -@@ -18,6 +18,7 @@ - #include "qapi/error.h" - #include "migration.h" - #include "trace.h" -+#include "options.h" - #include "multifd.h" - - struct zlib_data { -diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c -index f4a8e1ed1f..d1d29e76cc 100644 ---- a/migration/multifd-zstd.c -+++ b/migration/multifd-zstd.c -@@ -18,6 +18,7 @@ - #include "qapi/error.h" - #include "migration.h" - #include "trace.h" -+#include "options.h" - #include "multifd.h" - - struct zstd_data { -diff --git a/migration/options.c b/migration/options.c -index f3b2d6e482..8d15be858c 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -460,3 +460,96 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, - s->capabilities[cap->value->capability] = cap->value->state; - } - } -+ -+/* parameters */ -+ -+int migrate_compress_level(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.compress_level; -+} -+ -+int migrate_compress_threads(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.compress_threads; -+} -+ -+int migrate_compress_wait_thread(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.compress_wait_thread; -+} -+ -+int migrate_decompress_threads(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.decompress_threads; -+} -+ -+int64_t migrate_max_postcopy_bandwidth(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.max_postcopy_bandwidth; -+} -+ -+int migrate_multifd_channels(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.multifd_channels; -+} -+ -+MultiFDCompression migrate_multifd_compression(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ assert(s->parameters.multifd_compression < MULTIFD_COMPRESSION__MAX); -+ return s->parameters.multifd_compression; -+} -+ -+int migrate_multifd_zlib_level(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.multifd_zlib_level; -+} -+ -+int migrate_multifd_zstd_level(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.multifd_zstd_level; -+} -+ -+uint64_t migrate_xbzrle_cache_size(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.xbzrle_cache_size; -+} -diff --git a/migration/options.h b/migration/options.h -index 5979e4ff90..b24ee92283 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -43,4 +43,17 @@ bool migrate_zero_copy_send(void); - bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp); - bool migrate_cap_set(int cap, bool value, Error **errp); - -+/* parameters */ -+ -+int migrate_compress_level(void); -+int migrate_compress_threads(void); -+int migrate_compress_wait_thread(void); -+int migrate_decompress_threads(void); -+int64_t migrate_max_postcopy_bandwidth(void); -+int migrate_multifd_channels(void); -+MultiFDCompression migrate_multifd_compression(void); -+int migrate_multifd_zlib_level(void); -+int migrate_multifd_zstd_level(void); -+uint64_t migrate_xbzrle_cache_size(void); -+ - #endif --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-qmp_migrate_set_capabilities-to-optio.patch b/SOURCES/kvm-migration-Move-qmp_migrate_set_capabilities-to-optio.patch deleted file mode 100644 index 10f185b..0000000 --- a/SOURCES/kvm-migration-Move-qmp_migrate_set_capabilities-to-optio.patch +++ /dev/null @@ -1,100 +0,0 @@ -From d967ec22cdb20e0a846f050a2bc7bd4caa87940d Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 23:18:02 +0100 -Subject: [PATCH 35/56] migration: Move qmp_migrate_set_capabilities() to - options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [34/50] 16b62ca7e06c58d71389c449dc19c11939dd0882 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 45c1de13f09b1fd4ea26f54e6da12aae52f34cb8) -Signed-off-by: Peter Xu ---- - migration/migration.c | 26 -------------------------- - migration/options.c | 26 ++++++++++++++++++++++++++ - 2 files changed, 26 insertions(+), 26 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 3dc8ee4875..369cd91796 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1222,32 +1222,6 @@ MigrationInfo *qmp_query_migrate(Error **errp) - return info; - } - --void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, -- Error **errp) --{ -- MigrationState *s = migrate_get_current(); -- MigrationCapabilityStatusList *cap; -- bool new_caps[MIGRATION_CAPABILITY__MAX]; -- -- if (migration_is_running(s->state)) { -- error_setg(errp, QERR_MIGRATION_ACTIVE); -- return; -- } -- -- memcpy(new_caps, s->capabilities, sizeof(new_caps)); -- for (cap = params; cap; cap = cap->next) { -- new_caps[cap->value->capability] = cap->value->state; -- } -- -- if (!migrate_caps_check(s->capabilities, new_caps, errp)) { -- return; -- } -- -- for (cap = params; cap; cap = cap->next) { -- s->capabilities[cap->value->capability] = cap->value->state; -- } --} -- - /* - * Check whether the parameters are valid. Error will be put into errp - * (if provided). Return true if valid, otherwise false. -diff --git a/migration/options.c b/migration/options.c -index ff621bdeb3..4cbe77e35a 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -413,3 +413,29 @@ MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) - - return head; - } -+ -+void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, -+ Error **errp) -+{ -+ MigrationState *s = migrate_get_current(); -+ MigrationCapabilityStatusList *cap; -+ bool new_caps[MIGRATION_CAPABILITY__MAX]; -+ -+ if (migration_is_running(s->state)) { -+ error_setg(errp, QERR_MIGRATION_ACTIVE); -+ return; -+ } -+ -+ memcpy(new_caps, s->capabilities, sizeof(new_caps)); -+ for (cap = params; cap; cap = cap->next) { -+ new_caps[cap->value->capability] = cap->value->state; -+ } -+ -+ if (!migrate_caps_check(s->capabilities, new_caps, errp)) { -+ return; -+ } -+ -+ for (cap = params; cap; cap = cap->next) { -+ s->capabilities[cap->value->capability] = cap->value->state; -+ } -+} --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-qmp_migrate_set_parameters-to-options.patch b/SOURCES/kvm-migration-Move-qmp_migrate_set_parameters-to-options.patch deleted file mode 100644 index 3685a33..0000000 --- a/SOURCES/kvm-migration-Move-qmp_migrate_set_parameters-to-options.patch +++ /dev/null @@ -1,943 +0,0 @@ -From 944bf4759d1279c342ddd29c47d47c9670b64625 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 10:13:16 +0100 -Subject: [PATCH 50/56] migration: Move qmp_migrate_set_parameters() to - options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [49/50] b55f7afe868e117d4212f1518b9a37514cc99b33 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 09d6c9658474e8573c5ada58dca8b20fe47dd99e) -Signed-off-by: Peter Xu ---- - migration/migration.c | 420 ------------------------------------------ - migration/options.c | 418 +++++++++++++++++++++++++++++++++++++++++ - migration/options.h | 11 ++ - 3 files changed, 429 insertions(+), 420 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 22ef83c619..08f87f2b0e 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -67,19 +67,10 @@ - - #define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ - --/* Amount of time to allocate to each "chunk" of bandwidth-throttled -- * data. */ --#define BUFFER_DELAY 100 --#define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY) -- - /* Time in milliseconds we are allowed to stop the source, - * for sending the last part */ - #define DEFAULT_MIGRATE_SET_DOWNTIME 300 - --/* Maximum migrate downtime set to 2000 seconds */ --#define MAX_MIGRATE_DOWNTIME_SECONDS 2000 --#define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000) -- - /* Default compression thread count */ - #define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8 - /* Default decompression thread count, usually decompression is at -@@ -1140,417 +1131,6 @@ MigrationInfo *qmp_query_migrate(Error **errp) - return info; - } - --/* -- * Check whether the parameters are valid. Error will be put into errp -- * (if provided). Return true if valid, otherwise false. -- */ --static bool migrate_params_check(MigrationParameters *params, Error **errp) --{ -- if (params->has_compress_level && -- (params->compress_level > 9)) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level", -- "a value between 0 and 9"); -- return false; -- } -- -- if (params->has_compress_threads && (params->compress_threads < 1)) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "compress_threads", -- "a value between 1 and 255"); -- return false; -- } -- -- if (params->has_decompress_threads && (params->decompress_threads < 1)) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "decompress_threads", -- "a value between 1 and 255"); -- return false; -- } -- -- if (params->has_throttle_trigger_threshold && -- (params->throttle_trigger_threshold < 1 || -- params->throttle_trigger_threshold > 100)) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "throttle_trigger_threshold", -- "an integer in the range of 1 to 100"); -- return false; -- } -- -- if (params->has_cpu_throttle_initial && -- (params->cpu_throttle_initial < 1 || -- params->cpu_throttle_initial > 99)) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "cpu_throttle_initial", -- "an integer in the range of 1 to 99"); -- return false; -- } -- -- if (params->has_cpu_throttle_increment && -- (params->cpu_throttle_increment < 1 || -- params->cpu_throttle_increment > 99)) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "cpu_throttle_increment", -- "an integer in the range of 1 to 99"); -- return false; -- } -- -- if (params->has_max_bandwidth && (params->max_bandwidth > SIZE_MAX)) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "max_bandwidth", -- "an integer in the range of 0 to "stringify(SIZE_MAX) -- " bytes/second"); -- return false; -- } -- -- if (params->has_downtime_limit && -- (params->downtime_limit > MAX_MIGRATE_DOWNTIME)) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "downtime_limit", -- "an integer in the range of 0 to " -- stringify(MAX_MIGRATE_DOWNTIME)" ms"); -- return false; -- } -- -- /* x_checkpoint_delay is now always positive */ -- -- if (params->has_multifd_channels && (params->multifd_channels < 1)) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "multifd_channels", -- "a value between 1 and 255"); -- return false; -- } -- -- if (params->has_multifd_zlib_level && -- (params->multifd_zlib_level > 9)) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zlib_level", -- "a value between 0 and 9"); -- return false; -- } -- -- if (params->has_multifd_zstd_level && -- (params->multifd_zstd_level > 20)) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zstd_level", -- "a value between 0 and 20"); -- return false; -- } -- -- if (params->has_xbzrle_cache_size && -- (params->xbzrle_cache_size < qemu_target_page_size() || -- !is_power_of_2(params->xbzrle_cache_size))) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "xbzrle_cache_size", -- "a power of two no less than the target page size"); -- return false; -- } -- -- if (params->has_max_cpu_throttle && -- (params->max_cpu_throttle < params->cpu_throttle_initial || -- params->max_cpu_throttle > 99)) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "max_cpu_throttle", -- "an integer in the range of cpu_throttle_initial to 99"); -- return false; -- } -- -- if (params->has_announce_initial && -- params->announce_initial > 100000) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "announce_initial", -- "a value between 0 and 100000"); -- return false; -- } -- if (params->has_announce_max && -- params->announce_max > 100000) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "announce_max", -- "a value between 0 and 100000"); -- return false; -- } -- if (params->has_announce_rounds && -- params->announce_rounds > 1000) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "announce_rounds", -- "a value between 0 and 1000"); -- return false; -- } -- if (params->has_announce_step && -- (params->announce_step < 1 || -- params->announce_step > 10000)) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "announce_step", -- "a value between 0 and 10000"); -- return false; -- } -- -- if (params->has_block_bitmap_mapping && -- !check_dirty_bitmap_mig_alias_map(params->block_bitmap_mapping, errp)) { -- error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); -- return false; -- } -- --#ifdef CONFIG_LINUX -- if (migrate_zero_copy_send() && -- ((params->has_multifd_compression && params->multifd_compression) || -- (params->tls_creds && *params->tls_creds))) { -- error_setg(errp, -- "Zero copy only available for non-compressed non-TLS multifd migration"); -- return false; -- } --#endif -- -- return true; --} -- --static void migrate_params_test_apply(MigrateSetParameters *params, -- MigrationParameters *dest) --{ -- *dest = migrate_get_current()->parameters; -- -- /* TODO use QAPI_CLONE() instead of duplicating it inline */ -- -- if (params->has_compress_level) { -- dest->compress_level = params->compress_level; -- } -- -- if (params->has_compress_threads) { -- dest->compress_threads = params->compress_threads; -- } -- -- if (params->has_compress_wait_thread) { -- dest->compress_wait_thread = params->compress_wait_thread; -- } -- -- if (params->has_decompress_threads) { -- dest->decompress_threads = params->decompress_threads; -- } -- -- if (params->has_throttle_trigger_threshold) { -- dest->throttle_trigger_threshold = params->throttle_trigger_threshold; -- } -- -- if (params->has_cpu_throttle_initial) { -- dest->cpu_throttle_initial = params->cpu_throttle_initial; -- } -- -- if (params->has_cpu_throttle_increment) { -- dest->cpu_throttle_increment = params->cpu_throttle_increment; -- } -- -- if (params->has_cpu_throttle_tailslow) { -- dest->cpu_throttle_tailslow = params->cpu_throttle_tailslow; -- } -- -- if (params->tls_creds) { -- assert(params->tls_creds->type == QTYPE_QSTRING); -- dest->tls_creds = params->tls_creds->u.s; -- } -- -- if (params->tls_hostname) { -- assert(params->tls_hostname->type == QTYPE_QSTRING); -- dest->tls_hostname = params->tls_hostname->u.s; -- } -- -- if (params->has_max_bandwidth) { -- dest->max_bandwidth = params->max_bandwidth; -- } -- -- if (params->has_downtime_limit) { -- dest->downtime_limit = params->downtime_limit; -- } -- -- if (params->has_x_checkpoint_delay) { -- dest->x_checkpoint_delay = params->x_checkpoint_delay; -- } -- -- if (params->has_block_incremental) { -- dest->block_incremental = params->block_incremental; -- } -- if (params->has_multifd_channels) { -- dest->multifd_channels = params->multifd_channels; -- } -- if (params->has_multifd_compression) { -- dest->multifd_compression = params->multifd_compression; -- } -- if (params->has_xbzrle_cache_size) { -- dest->xbzrle_cache_size = params->xbzrle_cache_size; -- } -- if (params->has_max_postcopy_bandwidth) { -- dest->max_postcopy_bandwidth = params->max_postcopy_bandwidth; -- } -- if (params->has_max_cpu_throttle) { -- dest->max_cpu_throttle = params->max_cpu_throttle; -- } -- if (params->has_announce_initial) { -- dest->announce_initial = params->announce_initial; -- } -- if (params->has_announce_max) { -- dest->announce_max = params->announce_max; -- } -- if (params->has_announce_rounds) { -- dest->announce_rounds = params->announce_rounds; -- } -- if (params->has_announce_step) { -- dest->announce_step = params->announce_step; -- } -- -- if (params->has_block_bitmap_mapping) { -- dest->has_block_bitmap_mapping = true; -- dest->block_bitmap_mapping = params->block_bitmap_mapping; -- } --} -- --static void migrate_params_apply(MigrateSetParameters *params, Error **errp) --{ -- MigrationState *s = migrate_get_current(); -- -- /* TODO use QAPI_CLONE() instead of duplicating it inline */ -- -- if (params->has_compress_level) { -- s->parameters.compress_level = params->compress_level; -- } -- -- if (params->has_compress_threads) { -- s->parameters.compress_threads = params->compress_threads; -- } -- -- if (params->has_compress_wait_thread) { -- s->parameters.compress_wait_thread = params->compress_wait_thread; -- } -- -- if (params->has_decompress_threads) { -- s->parameters.decompress_threads = params->decompress_threads; -- } -- -- if (params->has_throttle_trigger_threshold) { -- s->parameters.throttle_trigger_threshold = params->throttle_trigger_threshold; -- } -- -- if (params->has_cpu_throttle_initial) { -- s->parameters.cpu_throttle_initial = params->cpu_throttle_initial; -- } -- -- if (params->has_cpu_throttle_increment) { -- s->parameters.cpu_throttle_increment = params->cpu_throttle_increment; -- } -- -- if (params->has_cpu_throttle_tailslow) { -- s->parameters.cpu_throttle_tailslow = params->cpu_throttle_tailslow; -- } -- -- if (params->tls_creds) { -- g_free(s->parameters.tls_creds); -- assert(params->tls_creds->type == QTYPE_QSTRING); -- s->parameters.tls_creds = g_strdup(params->tls_creds->u.s); -- } -- -- if (params->tls_hostname) { -- g_free(s->parameters.tls_hostname); -- assert(params->tls_hostname->type == QTYPE_QSTRING); -- s->parameters.tls_hostname = g_strdup(params->tls_hostname->u.s); -- } -- -- if (params->tls_authz) { -- g_free(s->parameters.tls_authz); -- assert(params->tls_authz->type == QTYPE_QSTRING); -- s->parameters.tls_authz = g_strdup(params->tls_authz->u.s); -- } -- -- if (params->has_max_bandwidth) { -- s->parameters.max_bandwidth = params->max_bandwidth; -- if (s->to_dst_file && !migration_in_postcopy()) { -- qemu_file_set_rate_limit(s->to_dst_file, -- s->parameters.max_bandwidth / XFER_LIMIT_RATIO); -- } -- } -- -- if (params->has_downtime_limit) { -- s->parameters.downtime_limit = params->downtime_limit; -- } -- -- if (params->has_x_checkpoint_delay) { -- s->parameters.x_checkpoint_delay = params->x_checkpoint_delay; -- if (migration_in_colo_state()) { -- colo_checkpoint_notify(s); -- } -- } -- -- if (params->has_block_incremental) { -- s->parameters.block_incremental = params->block_incremental; -- } -- if (params->has_multifd_channels) { -- s->parameters.multifd_channels = params->multifd_channels; -- } -- if (params->has_multifd_compression) { -- s->parameters.multifd_compression = params->multifd_compression; -- } -- if (params->has_xbzrle_cache_size) { -- s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; -- xbzrle_cache_resize(params->xbzrle_cache_size, errp); -- } -- if (params->has_max_postcopy_bandwidth) { -- s->parameters.max_postcopy_bandwidth = params->max_postcopy_bandwidth; -- if (s->to_dst_file && migration_in_postcopy()) { -- qemu_file_set_rate_limit(s->to_dst_file, -- s->parameters.max_postcopy_bandwidth / XFER_LIMIT_RATIO); -- } -- } -- if (params->has_max_cpu_throttle) { -- s->parameters.max_cpu_throttle = params->max_cpu_throttle; -- } -- if (params->has_announce_initial) { -- s->parameters.announce_initial = params->announce_initial; -- } -- if (params->has_announce_max) { -- s->parameters.announce_max = params->announce_max; -- } -- if (params->has_announce_rounds) { -- s->parameters.announce_rounds = params->announce_rounds; -- } -- if (params->has_announce_step) { -- s->parameters.announce_step = params->announce_step; -- } -- -- if (params->has_block_bitmap_mapping) { -- qapi_free_BitmapMigrationNodeAliasList( -- s->parameters.block_bitmap_mapping); -- -- s->parameters.has_block_bitmap_mapping = true; -- s->parameters.block_bitmap_mapping = -- QAPI_CLONE(BitmapMigrationNodeAliasList, -- params->block_bitmap_mapping); -- } --} -- --void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp) --{ -- MigrationParameters tmp; -- -- /* TODO Rewrite "" to null instead */ -- if (params->tls_creds -- && params->tls_creds->type == QTYPE_QNULL) { -- qobject_unref(params->tls_creds->u.n); -- params->tls_creds->type = QTYPE_QSTRING; -- params->tls_creds->u.s = strdup(""); -- } -- /* TODO Rewrite "" to null instead */ -- if (params->tls_hostname -- && params->tls_hostname->type == QTYPE_QNULL) { -- qobject_unref(params->tls_hostname->u.n); -- params->tls_hostname->type = QTYPE_QSTRING; -- params->tls_hostname->u.s = strdup(""); -- } -- -- migrate_params_test_apply(params, &tmp); -- -- if (!migrate_params_check(&tmp, errp)) { -- /* Invalid parameter */ -- return; -- } -- -- migrate_params_apply(params, errp); --} -- -- - void qmp_migrate_start_postcopy(Error **errp) - { - MigrationState *s = migrate_get_current(); -diff --git a/migration/options.c b/migration/options.c -index d4c0714683..4701c75a4d 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -12,17 +12,25 @@ - */ - - #include "qemu/osdep.h" -+#include "exec/target_page.h" - #include "qapi/clone-visitor.h" - #include "qapi/error.h" - #include "qapi/qapi-commands-migration.h" - #include "qapi/qapi-visit-migration.h" - #include "qapi/qmp/qerror.h" -+#include "qapi/qmp/qnull.h" - #include "sysemu/runstate.h" -+#include "migration/colo.h" - #include "migration/misc.h" - #include "migration.h" -+#include "qemu-file.h" - #include "ram.h" - #include "options.h" - -+/* Maximum migrate downtime set to 2000 seconds */ -+#define MAX_MIGRATE_DOWNTIME_SECONDS 2000 -+#define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000) -+ - bool migrate_auto_converge(void) - { - MigrationState *s; -@@ -729,3 +737,413 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) - - return params; - } -+ -+/* -+ * Check whether the parameters are valid. Error will be put into errp -+ * (if provided). Return true if valid, otherwise false. -+ */ -+bool migrate_params_check(MigrationParameters *params, Error **errp) -+{ -+ if (params->has_compress_level && -+ (params->compress_level > 9)) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level", -+ "a value between 0 and 9"); -+ return false; -+ } -+ -+ if (params->has_compress_threads && (params->compress_threads < 1)) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "compress_threads", -+ "a value between 1 and 255"); -+ return false; -+ } -+ -+ if (params->has_decompress_threads && (params->decompress_threads < 1)) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "decompress_threads", -+ "a value between 1 and 255"); -+ return false; -+ } -+ -+ if (params->has_throttle_trigger_threshold && -+ (params->throttle_trigger_threshold < 1 || -+ params->throttle_trigger_threshold > 100)) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "throttle_trigger_threshold", -+ "an integer in the range of 1 to 100"); -+ return false; -+ } -+ -+ if (params->has_cpu_throttle_initial && -+ (params->cpu_throttle_initial < 1 || -+ params->cpu_throttle_initial > 99)) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "cpu_throttle_initial", -+ "an integer in the range of 1 to 99"); -+ return false; -+ } -+ -+ if (params->has_cpu_throttle_increment && -+ (params->cpu_throttle_increment < 1 || -+ params->cpu_throttle_increment > 99)) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "cpu_throttle_increment", -+ "an integer in the range of 1 to 99"); -+ return false; -+ } -+ -+ if (params->has_max_bandwidth && (params->max_bandwidth > SIZE_MAX)) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "max_bandwidth", -+ "an integer in the range of 0 to "stringify(SIZE_MAX) -+ " bytes/second"); -+ return false; -+ } -+ -+ if (params->has_downtime_limit && -+ (params->downtime_limit > MAX_MIGRATE_DOWNTIME)) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "downtime_limit", -+ "an integer in the range of 0 to " -+ stringify(MAX_MIGRATE_DOWNTIME)" ms"); -+ return false; -+ } -+ -+ /* x_checkpoint_delay is now always positive */ -+ -+ if (params->has_multifd_channels && (params->multifd_channels < 1)) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "multifd_channels", -+ "a value between 1 and 255"); -+ return false; -+ } -+ -+ if (params->has_multifd_zlib_level && -+ (params->multifd_zlib_level > 9)) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zlib_level", -+ "a value between 0 and 9"); -+ return false; -+ } -+ -+ if (params->has_multifd_zstd_level && -+ (params->multifd_zstd_level > 20)) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zstd_level", -+ "a value between 0 and 20"); -+ return false; -+ } -+ -+ if (params->has_xbzrle_cache_size && -+ (params->xbzrle_cache_size < qemu_target_page_size() || -+ !is_power_of_2(params->xbzrle_cache_size))) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "xbzrle_cache_size", -+ "a power of two no less than the target page size"); -+ return false; -+ } -+ -+ if (params->has_max_cpu_throttle && -+ (params->max_cpu_throttle < params->cpu_throttle_initial || -+ params->max_cpu_throttle > 99)) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "max_cpu_throttle", -+ "an integer in the range of cpu_throttle_initial to 99"); -+ return false; -+ } -+ -+ if (params->has_announce_initial && -+ params->announce_initial > 100000) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "announce_initial", -+ "a value between 0 and 100000"); -+ return false; -+ } -+ if (params->has_announce_max && -+ params->announce_max > 100000) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "announce_max", -+ "a value between 0 and 100000"); -+ return false; -+ } -+ if (params->has_announce_rounds && -+ params->announce_rounds > 1000) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "announce_rounds", -+ "a value between 0 and 1000"); -+ return false; -+ } -+ if (params->has_announce_step && -+ (params->announce_step < 1 || -+ params->announce_step > 10000)) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "announce_step", -+ "a value between 0 and 10000"); -+ return false; -+ } -+ -+ if (params->has_block_bitmap_mapping && -+ !check_dirty_bitmap_mig_alias_map(params->block_bitmap_mapping, errp)) { -+ error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); -+ return false; -+ } -+ -+#ifdef CONFIG_LINUX -+ if (migrate_zero_copy_send() && -+ ((params->has_multifd_compression && params->multifd_compression) || -+ (params->tls_creds && *params->tls_creds))) { -+ error_setg(errp, -+ "Zero copy only available for non-compressed non-TLS multifd migration"); -+ return false; -+ } -+#endif -+ -+ return true; -+} -+ -+static void migrate_params_test_apply(MigrateSetParameters *params, -+ MigrationParameters *dest) -+{ -+ *dest = migrate_get_current()->parameters; -+ -+ /* TODO use QAPI_CLONE() instead of duplicating it inline */ -+ -+ if (params->has_compress_level) { -+ dest->compress_level = params->compress_level; -+ } -+ -+ if (params->has_compress_threads) { -+ dest->compress_threads = params->compress_threads; -+ } -+ -+ if (params->has_compress_wait_thread) { -+ dest->compress_wait_thread = params->compress_wait_thread; -+ } -+ -+ if (params->has_decompress_threads) { -+ dest->decompress_threads = params->decompress_threads; -+ } -+ -+ if (params->has_throttle_trigger_threshold) { -+ dest->throttle_trigger_threshold = params->throttle_trigger_threshold; -+ } -+ -+ if (params->has_cpu_throttle_initial) { -+ dest->cpu_throttle_initial = params->cpu_throttle_initial; -+ } -+ -+ if (params->has_cpu_throttle_increment) { -+ dest->cpu_throttle_increment = params->cpu_throttle_increment; -+ } -+ -+ if (params->has_cpu_throttle_tailslow) { -+ dest->cpu_throttle_tailslow = params->cpu_throttle_tailslow; -+ } -+ -+ if (params->tls_creds) { -+ assert(params->tls_creds->type == QTYPE_QSTRING); -+ dest->tls_creds = params->tls_creds->u.s; -+ } -+ -+ if (params->tls_hostname) { -+ assert(params->tls_hostname->type == QTYPE_QSTRING); -+ dest->tls_hostname = params->tls_hostname->u.s; -+ } -+ -+ if (params->has_max_bandwidth) { -+ dest->max_bandwidth = params->max_bandwidth; -+ } -+ -+ if (params->has_downtime_limit) { -+ dest->downtime_limit = params->downtime_limit; -+ } -+ -+ if (params->has_x_checkpoint_delay) { -+ dest->x_checkpoint_delay = params->x_checkpoint_delay; -+ } -+ -+ if (params->has_block_incremental) { -+ dest->block_incremental = params->block_incremental; -+ } -+ if (params->has_multifd_channels) { -+ dest->multifd_channels = params->multifd_channels; -+ } -+ if (params->has_multifd_compression) { -+ dest->multifd_compression = params->multifd_compression; -+ } -+ if (params->has_xbzrle_cache_size) { -+ dest->xbzrle_cache_size = params->xbzrle_cache_size; -+ } -+ if (params->has_max_postcopy_bandwidth) { -+ dest->max_postcopy_bandwidth = params->max_postcopy_bandwidth; -+ } -+ if (params->has_max_cpu_throttle) { -+ dest->max_cpu_throttle = params->max_cpu_throttle; -+ } -+ if (params->has_announce_initial) { -+ dest->announce_initial = params->announce_initial; -+ } -+ if (params->has_announce_max) { -+ dest->announce_max = params->announce_max; -+ } -+ if (params->has_announce_rounds) { -+ dest->announce_rounds = params->announce_rounds; -+ } -+ if (params->has_announce_step) { -+ dest->announce_step = params->announce_step; -+ } -+ -+ if (params->has_block_bitmap_mapping) { -+ dest->has_block_bitmap_mapping = true; -+ dest->block_bitmap_mapping = params->block_bitmap_mapping; -+ } -+} -+ -+static void migrate_params_apply(MigrateSetParameters *params, Error **errp) -+{ -+ MigrationState *s = migrate_get_current(); -+ -+ /* TODO use QAPI_CLONE() instead of duplicating it inline */ -+ -+ if (params->has_compress_level) { -+ s->parameters.compress_level = params->compress_level; -+ } -+ -+ if (params->has_compress_threads) { -+ s->parameters.compress_threads = params->compress_threads; -+ } -+ -+ if (params->has_compress_wait_thread) { -+ s->parameters.compress_wait_thread = params->compress_wait_thread; -+ } -+ -+ if (params->has_decompress_threads) { -+ s->parameters.decompress_threads = params->decompress_threads; -+ } -+ -+ if (params->has_throttle_trigger_threshold) { -+ s->parameters.throttle_trigger_threshold = params->throttle_trigger_threshold; -+ } -+ -+ if (params->has_cpu_throttle_initial) { -+ s->parameters.cpu_throttle_initial = params->cpu_throttle_initial; -+ } -+ -+ if (params->has_cpu_throttle_increment) { -+ s->parameters.cpu_throttle_increment = params->cpu_throttle_increment; -+ } -+ -+ if (params->has_cpu_throttle_tailslow) { -+ s->parameters.cpu_throttle_tailslow = params->cpu_throttle_tailslow; -+ } -+ -+ if (params->tls_creds) { -+ g_free(s->parameters.tls_creds); -+ assert(params->tls_creds->type == QTYPE_QSTRING); -+ s->parameters.tls_creds = g_strdup(params->tls_creds->u.s); -+ } -+ -+ if (params->tls_hostname) { -+ g_free(s->parameters.tls_hostname); -+ assert(params->tls_hostname->type == QTYPE_QSTRING); -+ s->parameters.tls_hostname = g_strdup(params->tls_hostname->u.s); -+ } -+ -+ if (params->tls_authz) { -+ g_free(s->parameters.tls_authz); -+ assert(params->tls_authz->type == QTYPE_QSTRING); -+ s->parameters.tls_authz = g_strdup(params->tls_authz->u.s); -+ } -+ -+ if (params->has_max_bandwidth) { -+ s->parameters.max_bandwidth = params->max_bandwidth; -+ if (s->to_dst_file && !migration_in_postcopy()) { -+ qemu_file_set_rate_limit(s->to_dst_file, -+ s->parameters.max_bandwidth / XFER_LIMIT_RATIO); -+ } -+ } -+ -+ if (params->has_downtime_limit) { -+ s->parameters.downtime_limit = params->downtime_limit; -+ } -+ -+ if (params->has_x_checkpoint_delay) { -+ s->parameters.x_checkpoint_delay = params->x_checkpoint_delay; -+ if (migration_in_colo_state()) { -+ colo_checkpoint_notify(s); -+ } -+ } -+ -+ if (params->has_block_incremental) { -+ s->parameters.block_incremental = params->block_incremental; -+ } -+ if (params->has_multifd_channels) { -+ s->parameters.multifd_channels = params->multifd_channels; -+ } -+ if (params->has_multifd_compression) { -+ s->parameters.multifd_compression = params->multifd_compression; -+ } -+ if (params->has_xbzrle_cache_size) { -+ s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; -+ xbzrle_cache_resize(params->xbzrle_cache_size, errp); -+ } -+ if (params->has_max_postcopy_bandwidth) { -+ s->parameters.max_postcopy_bandwidth = params->max_postcopy_bandwidth; -+ if (s->to_dst_file && migration_in_postcopy()) { -+ qemu_file_set_rate_limit(s->to_dst_file, -+ s->parameters.max_postcopy_bandwidth / XFER_LIMIT_RATIO); -+ } -+ } -+ if (params->has_max_cpu_throttle) { -+ s->parameters.max_cpu_throttle = params->max_cpu_throttle; -+ } -+ if (params->has_announce_initial) { -+ s->parameters.announce_initial = params->announce_initial; -+ } -+ if (params->has_announce_max) { -+ s->parameters.announce_max = params->announce_max; -+ } -+ if (params->has_announce_rounds) { -+ s->parameters.announce_rounds = params->announce_rounds; -+ } -+ if (params->has_announce_step) { -+ s->parameters.announce_step = params->announce_step; -+ } -+ -+ if (params->has_block_bitmap_mapping) { -+ qapi_free_BitmapMigrationNodeAliasList( -+ s->parameters.block_bitmap_mapping); -+ -+ s->parameters.has_block_bitmap_mapping = true; -+ s->parameters.block_bitmap_mapping = -+ QAPI_CLONE(BitmapMigrationNodeAliasList, -+ params->block_bitmap_mapping); -+ } -+} -+ -+void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp) -+{ -+ MigrationParameters tmp; -+ -+ /* TODO Rewrite "" to null instead */ -+ if (params->tls_creds -+ && params->tls_creds->type == QTYPE_QNULL) { -+ qobject_unref(params->tls_creds->u.n); -+ params->tls_creds->type = QTYPE_QSTRING; -+ params->tls_creds->u.s = strdup(""); -+ } -+ /* TODO Rewrite "" to null instead */ -+ if (params->tls_hostname -+ && params->tls_hostname->type == QTYPE_QNULL) { -+ qobject_unref(params->tls_hostname->u.n); -+ params->tls_hostname->type = QTYPE_QSTRING; -+ params->tls_hostname->u.s = strdup(""); -+ } -+ -+ migrate_params_test_apply(params, &tmp); -+ -+ if (!migrate_params_check(&tmp, errp)) { -+ /* Invalid parameter */ -+ return; -+ } -+ -+ migrate_params_apply(params, errp); -+} -diff --git a/migration/options.h b/migration/options.h -index 13318a16c7..89067e59a0 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -14,6 +14,13 @@ - #ifndef QEMU_MIGRATION_OPTIONS_H - #define QEMU_MIGRATION_OPTIONS_H - -+/* constants */ -+ -+/* Amount of time to allocate to each "chunk" of bandwidth-throttled -+ * data. */ -+#define BUFFER_DELAY 100 -+#define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY) -+ - /* capabilities */ - - bool migrate_auto_converge(void); -@@ -74,4 +81,8 @@ int migrate_multifd_zstd_level(void); - uint8_t migrate_throttle_trigger_threshold(void); - uint64_t migrate_xbzrle_cache_size(void); - -+/* parameters helpers */ -+ -+bool migrate_params_check(MigrationParameters *params, Error **errp); -+ - #endif --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-qmp_query_migrate_capabilities-to-opt.patch b/SOURCES/kvm-migration-Move-qmp_query_migrate_capabilities-to-opt.patch deleted file mode 100644 index d2564de..0000000 --- a/SOURCES/kvm-migration-Move-qmp_query_migrate_capabilities-to-opt.patch +++ /dev/null @@ -1,100 +0,0 @@ -From 00cc3c3598828588619a7b3696819060bddaddb8 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 23:15:59 +0100 -Subject: [PATCH 34/56] migration: Move qmp_query_migrate_capabilities() to - options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [33/50] dbfa8f1e7aa7e000b4622ce2da12d7d418710f19 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 4d0c6b695bf5252402ebf967f83baebfd2f4b91e) -Signed-off-by: Peter Xu ---- - migration/migration.c | 22 ---------------------- - migration/options.c | 23 +++++++++++++++++++++++ - 2 files changed, 23 insertions(+), 22 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index d9e30ca918..3dc8ee4875 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -886,28 +886,6 @@ void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value) - migrate_send_rp_message(mis, MIG_RP_MSG_RESUME_ACK, sizeof(buf), &buf); - } - --MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) --{ -- MigrationCapabilityStatusList *head = NULL, **tail = &head; -- MigrationCapabilityStatus *caps; -- MigrationState *s = migrate_get_current(); -- int i; -- -- for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { --#ifndef CONFIG_LIVE_BLOCK_MIGRATION -- if (i == MIGRATION_CAPABILITY_BLOCK) { -- continue; -- } --#endif -- caps = g_malloc0(sizeof(*caps)); -- caps->capability = i; -- caps->state = s->capabilities[i]; -- QAPI_LIST_APPEND(tail, caps); -- } -- -- return head; --} -- - MigrationParameters *qmp_query_migrate_parameters(Error **errp) - { - MigrationParameters *params; -diff --git a/migration/options.c b/migration/options.c -index 367c930f46..ff621bdeb3 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -13,6 +13,7 @@ - - #include "qemu/osdep.h" - #include "qapi/error.h" -+#include "qapi/qapi-commands-migration.h" - #include "sysemu/runstate.h" - #include "migration.h" - #include "ram.h" -@@ -390,3 +391,25 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) - - return true; - } -+ -+MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) -+{ -+ MigrationCapabilityStatusList *head = NULL, **tail = &head; -+ MigrationCapabilityStatus *caps; -+ MigrationState *s = migrate_get_current(); -+ int i; -+ -+ for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { -+#ifndef CONFIG_LIVE_BLOCK_MIGRATION -+ if (i == MIGRATION_CAPABILITY_BLOCK) { -+ continue; -+ } -+#endif -+ caps = g_malloc0(sizeof(*caps)); -+ caps->capability = i; -+ caps->state = s->capabilities[i]; -+ QAPI_LIST_APPEND(tail, caps); -+ } -+ -+ return head; -+} --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Pass-migrate_caps_check-the-old-and-new-ca.patch b/SOURCES/kvm-migration-Pass-migrate_caps_check-the-old-and-new-ca.patch deleted file mode 100644 index 7339ce0..0000000 --- a/SOURCES/kvm-migration-Pass-migrate_caps_check-the-old-and-new-ca.patch +++ /dev/null @@ -1,226 +0,0 @@ -From 4782b59a8b0b5762f87505ac7a83b37ddd2e0b3f Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 20:28:56 +0100 -Subject: [PATCH 19/56] migration: Pass migrate_caps_check() the old and new - caps -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [18/50] df78d680d03f15d7cb7401ad89e68a4fc93fa835 (peterx/qemu-kvm) - -We used to pass the old capabilities array and the new -capabilities as a list. - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit b02c7fc9ef447787414e6fa67eff75e7b7b30180) -Signed-off-by: Peter Xu ---- - migration/migration.c | 80 +++++++++++++++++-------------------------- - 1 file changed, 31 insertions(+), 49 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index d8e5fb6226..e8f596bcfa 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1299,30 +1299,20 @@ WriteTrackingSupport migrate_query_write_tracking(void) - } - - /** -- * @migration_caps_check - check capability validity -+ * @migration_caps_check - check capability compatibility - * -- * @cap_list: old capability list, array of bool -- * @params: new capabilities to be applied soon -+ * @old_caps: old capability list -+ * @new_caps: new capability list - * @errp: set *errp if the check failed, with reason - * - * Returns true if check passed, otherwise false. - */ --static bool migrate_caps_check(bool *cap_list, -- MigrationCapabilityStatusList *params, -- Error **errp) -+static bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) - { -- MigrationCapabilityStatusList *cap; -- bool old_postcopy_cap; - MigrationIncomingState *mis = migration_incoming_get_current(); - -- old_postcopy_cap = cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]; -- -- for (cap = params; cap; cap = cap->next) { -- cap_list[cap->value->capability] = cap->value->state; -- } -- - #ifndef CONFIG_LIVE_BLOCK_MIGRATION -- if (cap_list[MIGRATION_CAPABILITY_BLOCK]) { -+ if (new_caps[MIGRATION_CAPABILITY_BLOCK]) { - error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) " - "block migration"); - error_append_hint(errp, "Use drive_mirror+NBD instead.\n"); -@@ -1331,7 +1321,7 @@ static bool migrate_caps_check(bool *cap_list, - #endif - - #ifndef CONFIG_REPLICATION -- if (cap_list[MIGRATION_CAPABILITY_X_COLO]) { -+ if (new_caps[MIGRATION_CAPABILITY_X_COLO]) { - error_setg(errp, "QEMU compiled without replication module" - " can't enable COLO"); - error_append_hint(errp, "Please enable replication before COLO.\n"); -@@ -1339,12 +1329,13 @@ static bool migrate_caps_check(bool *cap_list, - } - #endif - -- if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { -+ if (new_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { - /* This check is reasonably expensive, so only when it's being - * set the first time, also it's only the destination that needs - * special support. - */ -- if (!old_postcopy_cap && runstate_check(RUN_STATE_INMIGRATE) && -+ if (!old_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM] && -+ runstate_check(RUN_STATE_INMIGRATE) && - !postcopy_ram_supported_by_host(mis)) { - /* postcopy_ram_supported_by_host will have emitted a more - * detailed message -@@ -1353,13 +1344,13 @@ static bool migrate_caps_check(bool *cap_list, - return false; - } - -- if (cap_list[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) { -+ if (new_caps[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) { - error_setg(errp, "Postcopy is not compatible with ignore-shared"); - return false; - } - } - -- if (cap_list[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) { -+ if (new_caps[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) { - WriteTrackingSupport wt_support; - int idx; - /* -@@ -1383,7 +1374,7 @@ static bool migrate_caps_check(bool *cap_list, - */ - for (idx = 0; idx < check_caps_background_snapshot.size; idx++) { - int incomp_cap = check_caps_background_snapshot.caps[idx]; -- if (cap_list[incomp_cap]) { -+ if (new_caps[incomp_cap]) { - error_setg(errp, - "Background-snapshot is not compatible with %s", - MigrationCapability_str(incomp_cap)); -@@ -1393,10 +1384,10 @@ static bool migrate_caps_check(bool *cap_list, - } - - #ifdef CONFIG_LINUX -- if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND] && -- (!cap_list[MIGRATION_CAPABILITY_MULTIFD] || -- cap_list[MIGRATION_CAPABILITY_COMPRESS] || -- cap_list[MIGRATION_CAPABILITY_XBZRLE] || -+ if (new_caps[MIGRATION_CAPABILITY_ZERO_COPY_SEND] && -+ (!new_caps[MIGRATION_CAPABILITY_MULTIFD] || -+ new_caps[MIGRATION_CAPABILITY_COMPRESS] || -+ new_caps[MIGRATION_CAPABILITY_XBZRLE] || - migrate_multifd_compression() || - migrate_use_tls())) { - error_setg(errp, -@@ -1404,15 +1395,15 @@ static bool migrate_caps_check(bool *cap_list, - return false; - } - #else -- if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND]) { -+ if (new_caps[MIGRATION_CAPABILITY_ZERO_COPY_SEND]) { - error_setg(errp, - "Zero copy currently only available on Linux"); - return false; - } - #endif - -- if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]) { -- if (!cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { -+ if (new_caps[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]) { -+ if (!new_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { - error_setg(errp, "Postcopy preempt requires postcopy-ram"); - return false; - } -@@ -1423,14 +1414,14 @@ static bool migrate_caps_check(bool *cap_list, - * different compression channels, which is not compatible with the - * preempt assumptions on channel assignments. - */ -- if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) { -+ if (new_caps[MIGRATION_CAPABILITY_COMPRESS]) { - error_setg(errp, "Postcopy preempt not compatible with compress"); - return false; - } - } - -- if (cap_list[MIGRATION_CAPABILITY_MULTIFD]) { -- if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) { -+ if (new_caps[MIGRATION_CAPABILITY_MULTIFD]) { -+ if (new_caps[MIGRATION_CAPABILITY_COMPRESS]) { - error_setg(errp, "Multifd is not compatible with compress"); - return false; - } -@@ -1486,15 +1477,19 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, - { - MigrationState *s = migrate_get_current(); - MigrationCapabilityStatusList *cap; -- bool cap_list[MIGRATION_CAPABILITY__MAX]; -+ bool new_caps[MIGRATION_CAPABILITY__MAX]; - - if (migration_is_running(s->state)) { - error_setg(errp, QERR_MIGRATION_ACTIVE); - return; - } - -- memcpy(cap_list, s->capabilities, sizeof(cap_list)); -- if (!migrate_caps_check(cap_list, params, errp)) { -+ memcpy(new_caps, s->capabilities, sizeof(new_caps)); -+ for (cap = params; cap; cap = cap->next) { -+ new_caps[cap->value->capability] = cap->value->state; -+ } -+ -+ if (!migrate_caps_check(s->capabilities, new_caps, errp)) { - return; - } - -@@ -4634,27 +4629,14 @@ static void migration_instance_init(Object *obj) - */ - static bool migration_object_check(MigrationState *ms, Error **errp) - { -- MigrationCapabilityStatusList *head = NULL; - /* Assuming all off */ -- bool cap_list[MIGRATION_CAPABILITY__MAX] = { 0 }, ret; -- int i; -+ bool old_caps[MIGRATION_CAPABILITY__MAX] = { 0 }; - - if (!migrate_params_check(&ms->parameters, errp)) { - return false; - } - -- for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { -- if (ms->capabilities[i]) { -- QAPI_LIST_PREPEND(head, migrate_cap_add(i, true)); -- } -- } -- -- ret = migrate_caps_check(cap_list, head, errp); -- -- /* It works with head == NULL */ -- qapi_free_MigrationCapabilityStatusList(head); -- -- return ret; -+ return migrate_caps_check(old_caps, ms->capabilities, errp); - } - - static const TypeInfo migration_type = { --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Rename-duplicate-to-zero_pages.patch b/SOURCES/kvm-migration-Rename-duplicate-to-zero_pages.patch deleted file mode 100644 index 22acab5..0000000 --- a/SOURCES/kvm-migration-Rename-duplicate-to-zero_pages.patch +++ /dev/null @@ -1,109 +0,0 @@ -From 3cecf66655a0dd599666bcac8add2dee85d5651f Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 19 Apr 2023 18:16:05 +0200 -Subject: [PATCH 16/56] migration: Rename duplicate to zero_pages -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [15/50] 89db3c8b167c0f411ba95ce2730540c0e8f1206b (peterx/qemu-kvm) - -Rest of counters that refer to pages has a _pages suffix. -And historically, this showed the number of pages composed of the same -character, here comes the name "duplicated". But since years ago, it -refers to the number of zero_pages. - -Signed-off-by: Juan Quintela -Reviewed-by: Peter Xu -(cherry picked from commit 1a386e8de5995fb5478ea99baa6d3e71abcf4b80) -Signed-off-by: Peter Xu ---- - migration/migration.c | 2 +- - migration/ram.c | 10 +++++----- - migration/ram.h | 2 +- - 3 files changed, 7 insertions(+), 7 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 39501a0ed8..c15e2a61ca 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1142,7 +1142,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) - info->ram = g_malloc0(sizeof(*info->ram)); - info->ram->transferred = stat64_get(&ram_counters.transferred); - info->ram->total = ram_bytes_total(); -- info->ram->duplicate = stat64_get(&ram_counters.duplicate); -+ info->ram->duplicate = stat64_get(&ram_counters.zero_pages); - /* legacy value. It is not used anymore */ - info->ram->skipped = 0; - info->ram->normal = stat64_get(&ram_counters.normal); -diff --git a/migration/ram.c b/migration/ram.c -index fe69ecaef4..19d345a030 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -1119,7 +1119,7 @@ uint64_t ram_pagesize_summary(void) - uint64_t ram_get_total_transferred_pages(void) - { - return stat64_get(&ram_counters.normal) + -- stat64_get(&ram_counters.duplicate) + -+ stat64_get(&ram_counters.zero_pages) + - compression_counters.pages + xbzrle_counters.pages; - } - -@@ -1320,7 +1320,7 @@ static int save_zero_page(PageSearchStatus *pss, QEMUFile *f, RAMBlock *block, - int len = save_zero_page_to_file(pss, f, block, offset); - - if (len) { -- stat64_add(&ram_counters.duplicate, 1); -+ stat64_add(&ram_counters.zero_pages, 1); - ram_transferred_add(len); - return 1; - } -@@ -1359,7 +1359,7 @@ static bool control_save_page(PageSearchStatus *pss, RAMBlock *block, - if (bytes_xmit > 0) { - stat64_add(&ram_counters.normal, 1); - } else if (bytes_xmit == 0) { -- stat64_add(&ram_counters.duplicate, 1); -+ stat64_add(&ram_counters.zero_pages, 1); - } - - return true; -@@ -1486,7 +1486,7 @@ update_compress_thread_counts(const CompressParam *param, int bytes_xmit) - ram_transferred_add(bytes_xmit); - - if (param->zero_page) { -- stat64_add(&ram_counters.duplicate, 1); -+ stat64_add(&ram_counters.zero_pages, 1); - return; - } - -@@ -2621,7 +2621,7 @@ void acct_update_position(QEMUFile *f, size_t size, bool zero) - uint64_t pages = size / TARGET_PAGE_SIZE; - - if (zero) { -- stat64_add(&ram_counters.duplicate, pages); -+ stat64_add(&ram_counters.zero_pages, pages); - } else { - stat64_add(&ram_counters.normal, pages); - ram_transferred_add(size); -diff --git a/migration/ram.h b/migration/ram.h -index afa68521d7..55258334fe 100644 ---- a/migration/ram.h -+++ b/migration/ram.h -@@ -45,7 +45,7 @@ typedef struct { - Stat64 dirty_sync_count; - Stat64 dirty_sync_missed_zero_copy; - Stat64 downtime_bytes; -- Stat64 duplicate; -+ Stat64 zero_pages; - Stat64 multifd_bytes; - Stat64 normal; - Stat64 postcopy_bytes; --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Rename-normal-to-normal_pages.patch b/SOURCES/kvm-migration-Rename-normal-to-normal_pages.patch deleted file mode 100644 index 8ad6447..0000000 --- a/SOURCES/kvm-migration-Rename-normal-to-normal_pages.patch +++ /dev/null @@ -1,109 +0,0 @@ -From 7e27e7ea83856e1a7222ff46d91495f48fb6be4d Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 19 Apr 2023 18:19:45 +0200 -Subject: [PATCH 17/56] migration: Rename normal to normal_pages -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [16/50] 7df8b946918def9657bbe357861a6d72b5399ac6 (peterx/qemu-kvm) - -Rest of counters that refer to pages has a _pages suffix. -And historically, this showed the number of full pages transferred. -The name "normal" refered to the fact that they were sent without any -optimization (compression, xbzrle, zero_page, ...). - -Signed-off-by: Juan Quintela -Reviewed-by: Peter Xu -(cherry picked from commit 8c0cda8fa0de0a50148e2c60552afca9cffca643) -Signed-off-by: Peter Xu ---- - migration/migration.c | 2 +- - migration/ram.c | 10 +++++----- - migration/ram.h | 2 +- - 3 files changed, 7 insertions(+), 7 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index c15e2a61ca..f1b3439e5f 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1145,7 +1145,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) - info->ram->duplicate = stat64_get(&ram_counters.zero_pages); - /* legacy value. It is not used anymore */ - info->ram->skipped = 0; -- info->ram->normal = stat64_get(&ram_counters.normal); -+ info->ram->normal = stat64_get(&ram_counters.normal_pages); - info->ram->normal_bytes = info->ram->normal * page_size; - info->ram->mbps = s->mbps; - info->ram->dirty_sync_count = -diff --git a/migration/ram.c b/migration/ram.c -index 19d345a030..229714045a 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -1118,7 +1118,7 @@ uint64_t ram_pagesize_summary(void) - - uint64_t ram_get_total_transferred_pages(void) - { -- return stat64_get(&ram_counters.normal) + -+ return stat64_get(&ram_counters.normal_pages) + - stat64_get(&ram_counters.zero_pages) + - compression_counters.pages + xbzrle_counters.pages; - } -@@ -1357,7 +1357,7 @@ static bool control_save_page(PageSearchStatus *pss, RAMBlock *block, - } - - if (bytes_xmit > 0) { -- stat64_add(&ram_counters.normal, 1); -+ stat64_add(&ram_counters.normal_pages, 1); - } else if (bytes_xmit == 0) { - stat64_add(&ram_counters.zero_pages, 1); - } -@@ -1391,7 +1391,7 @@ static int save_normal_page(PageSearchStatus *pss, RAMBlock *block, - qemu_put_buffer(file, buf, TARGET_PAGE_SIZE); - } - ram_transferred_add(TARGET_PAGE_SIZE); -- stat64_add(&ram_counters.normal, 1); -+ stat64_add(&ram_counters.normal_pages, 1); - return 1; - } - -@@ -1447,7 +1447,7 @@ static int ram_save_multifd_page(QEMUFile *file, RAMBlock *block, - if (multifd_queue_page(file, block, offset) < 0) { - return -1; - } -- stat64_add(&ram_counters.normal, 1); -+ stat64_add(&ram_counters.normal_pages, 1); - - return 1; - } -@@ -2623,7 +2623,7 @@ void acct_update_position(QEMUFile *f, size_t size, bool zero) - if (zero) { - stat64_add(&ram_counters.zero_pages, pages); - } else { -- stat64_add(&ram_counters.normal, pages); -+ stat64_add(&ram_counters.normal_pages, pages); - ram_transferred_add(size); - qemu_file_credit_transfer(f, size); - } -diff --git a/migration/ram.h b/migration/ram.h -index 55258334fe..a6e0d70226 100644 ---- a/migration/ram.h -+++ b/migration/ram.h -@@ -47,7 +47,7 @@ typedef struct { - Stat64 downtime_bytes; - Stat64 zero_pages; - Stat64 multifd_bytes; -- Stat64 normal; -+ Stat64 normal_pages; - Stat64 postcopy_bytes; - Stat64 postcopy_requests; - Stat64 precopy_bytes; --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Update-atomic-stats-out-of-the-mutex.patch b/SOURCES/kvm-migration-Update-atomic-stats-out-of-the-mutex.patch deleted file mode 100644 index 7e78d82..0000000 --- a/SOURCES/kvm-migration-Update-atomic-stats-out-of-the-mutex.patch +++ /dev/null @@ -1,52 +0,0 @@ -From c0d377e1bf442a09b82fddbb8588fcddf6439854 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 24 Nov 2022 17:26:19 +0100 -Subject: [PATCH 09/56] migration: Update atomic stats out of the mutex -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [8/50] 88e9dbc9a3e5aef60a7c98c871144904c7062b1f (peterx/qemu-kvm) - -Reviewed-by: David Edmondson -Reviewed-by: Peter Xu -Signed-off-by: Juan Quintela -(cherry picked from commit 30fb22cda45bea43a3c0e26049ebdd71a9503ffd) -Signed-off-by: Peter Xu ---- - migration/multifd.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/migration/multifd.c b/migration/multifd.c -index 01fab01a92..6ef3a27938 100644 ---- a/migration/multifd.c -+++ b/migration/multifd.c -@@ -433,8 +433,8 @@ static int multifd_send_pages(QEMUFile *f) - transferred = ((uint64_t) pages->num) * p->page_size + p->packet_len; - qemu_file_acct_rate_limit(f, transferred); - ram_counters.multifd_bytes += transferred; -- stat64_add(&ram_counters.transferred, transferred); - qemu_mutex_unlock(&p->mutex); -+ stat64_add(&ram_counters.transferred, transferred); - qemu_sem_post(&p->sem); - - return 1; -@@ -628,8 +628,8 @@ int multifd_send_sync_main(QEMUFile *f) - p->pending_job++; - qemu_file_acct_rate_limit(f, p->packet_len); - ram_counters.multifd_bytes += p->packet_len; -- stat64_add(&ram_counters.transferred, p->packet_len); - qemu_mutex_unlock(&p->mutex); -+ stat64_add(&ram_counters.transferred, p->packet_len); - qemu_sem_post(&p->sem); - } - for (i = 0; i < migrate_multifd_channels(); i++) { --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Use-migrate_max_postcopy_bandwidth.patch b/SOURCES/kvm-migration-Use-migrate_max_postcopy_bandwidth.patch deleted file mode 100644 index f179761..0000000 --- a/SOURCES/kvm-migration-Use-migrate_max_postcopy_bandwidth.patch +++ /dev/null @@ -1,40 +0,0 @@ -From 8d203baa6cbd1f371e308c2c9d59a5ca7d29dca8 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 10:55:30 +0100 -Subject: [PATCH 38/56] migration: Use migrate_max_postcopy_bandwidth() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [37/50] d62948e9ee40a85ed9b460a583c3b0e43cd5d47f (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 5390adec03a7d8bc6bcf5887f726b0ddaeb90681) -Signed-off-by: Peter Xu ---- - migration/migration.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 7f2e770deb..78bca9a93f 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -3799,7 +3799,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) - - if (resume) { - /* This is a resumed migration */ -- rate_limit = s->parameters.max_postcopy_bandwidth / -+ rate_limit = migrate_max_postcopy_bandwidth() / - XFER_LIMIT_RATIO; - } else { - /* This is a fresh new migration */ --- -2.39.1 - diff --git a/SOURCES/kvm-migration-mark-mixed-functions-that-can-suspend.patch b/SOURCES/kvm-migration-mark-mixed-functions-that-can-suspend.patch deleted file mode 100644 index 9451696..0000000 --- a/SOURCES/kvm-migration-mark-mixed-functions-that-can-suspend.patch +++ /dev/null @@ -1,153 +0,0 @@ -From cfdf5715a2334ad06b5966ec986d134bbd5ba08b Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Fri, 16 Dec 2022 12:48:16 +0100 -Subject: [PATCH 05/56] migration: mark mixed functions that can suspend -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [4/50] 9f055b526edd06a3440999d5de91e5d624678c7d (peterx/qemu-kvm) - -There should be no paths from a coroutine_fn to aio_poll, however in -practice coroutine_mixed_fn will call aio_poll in the !qemu_in_coroutine() -path. By marking mixed functions, we can track accurately the call paths -that execute entirely in coroutine context, and find more missing -coroutine_fn markers. This results in more accurate checks that -coroutine code does not end up blocking. - -If the marking were extended transitively to all functions that call -these ones, static analysis could be done much more efficiently. -However, this is a start and makes it possible to use vrc's path-based -searches to find potential bugs where coroutine_fns call blocking functions. - -Signed-off-by: Paolo Bonzini -(cherry picked from commit 394b9407e4c515f96df6647d629ee28cbb86f07c) -Signed-off-by: Peter Xu ---- - include/migration/qemu-file-types.h | 4 ++-- - migration/qemu-file.c | 14 +++++++------- - migration/qemu-file.h | 6 +++--- - 3 files changed, 12 insertions(+), 12 deletions(-) - -diff --git a/include/migration/qemu-file-types.h b/include/migration/qemu-file-types.h -index 2867e3da84..1436f9ce92 100644 ---- a/include/migration/qemu-file-types.h -+++ b/include/migration/qemu-file-types.h -@@ -35,7 +35,7 @@ void qemu_put_byte(QEMUFile *f, int v); - void qemu_put_be16(QEMUFile *f, unsigned int v); - void qemu_put_be32(QEMUFile *f, unsigned int v); - void qemu_put_be64(QEMUFile *f, uint64_t v); --size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size); -+size_t coroutine_mixed_fn qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size); - - int qemu_get_byte(QEMUFile *f); - -@@ -161,7 +161,7 @@ static inline void qemu_get_sbe64s(QEMUFile *f, int64_t *pv) - qemu_get_be64s(f, (uint64_t *)pv); - } - --size_t qemu_get_counted_string(QEMUFile *f, char buf[256]); -+size_t coroutine_mixed_fn qemu_get_counted_string(QEMUFile *f, char buf[256]); - - void qemu_put_counted_string(QEMUFile *f, const char *name); - -diff --git a/migration/qemu-file.c b/migration/qemu-file.c -index 102ab3b439..ee04240a21 100644 ---- a/migration/qemu-file.c -+++ b/migration/qemu-file.c -@@ -392,7 +392,7 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset, - * case if the underlying file descriptor gives a short read, and that can - * happen even on a blocking fd. - */ --static ssize_t qemu_fill_buffer(QEMUFile *f) -+static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f) - { - int len; - int pending; -@@ -585,7 +585,7 @@ void qemu_file_skip(QEMUFile *f, int size) - * return as many as it managed to read (assuming blocking fd's which - * all current QEMUFile are) - */ --size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset) -+size_t coroutine_mixed_fn qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset) - { - ssize_t pending; - size_t index; -@@ -633,7 +633,7 @@ size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset) - * return as many as it managed to read (assuming blocking fd's which - * all current QEMUFile are) - */ --size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size) -+size_t coroutine_mixed_fn qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size) - { - size_t pending = size; - size_t done = 0; -@@ -674,7 +674,7 @@ size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size) - * Note: Since **buf may get changed, the caller should take care to - * keep a pointer to the original buffer if it needs to deallocate it. - */ --size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size) -+size_t coroutine_mixed_fn qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size) - { - if (size < IO_BUF_SIZE) { - size_t res; -@@ -696,7 +696,7 @@ size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size) - * Peeks a single byte from the buffer; this isn't guaranteed to work if - * offset leaves a gap after the previous read/peeked data. - */ --int qemu_peek_byte(QEMUFile *f, int offset) -+int coroutine_mixed_fn qemu_peek_byte(QEMUFile *f, int offset) - { - int index = f->buf_index + offset; - -@@ -713,7 +713,7 @@ int qemu_peek_byte(QEMUFile *f, int offset) - return f->buf[index]; - } - --int qemu_get_byte(QEMUFile *f) -+int coroutine_mixed_fn qemu_get_byte(QEMUFile *f) - { - int result; - -@@ -894,7 +894,7 @@ int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_src) - * else 0 - * (Note a 0 length string will return 0 either way) - */ --size_t qemu_get_counted_string(QEMUFile *f, char buf[256]) -+size_t coroutine_fn qemu_get_counted_string(QEMUFile *f, char buf[256]) - { - size_t len = qemu_get_byte(f); - size_t res = qemu_get_buffer(f, (uint8_t *)buf, len); -diff --git a/migration/qemu-file.h b/migration/qemu-file.h -index 9d0155a2a1..d16cd50448 100644 ---- a/migration/qemu-file.h -+++ b/migration/qemu-file.h -@@ -108,8 +108,8 @@ bool qemu_file_is_writable(QEMUFile *f); - - #include "migration/qemu-file-types.h" - --size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset); --size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size); -+size_t coroutine_mixed_fn qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset); -+size_t coroutine_mixed_fn qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size); - ssize_t qemu_put_compression_data(QEMUFile *f, z_stream *stream, - const uint8_t *p, size_t size); - int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_src); -@@ -119,7 +119,7 @@ int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_src); - * is; you aren't guaranteed to be able to peak to +n bytes unless you've - * previously peeked +n-1. - */ --int qemu_peek_byte(QEMUFile *f, int offset); -+int coroutine_mixed_fn qemu_peek_byte(QEMUFile *f, int offset); - void qemu_file_skip(QEMUFile *f, int size); - /* - * qemu_file_credit_transfer: --- -2.39.1 - diff --git a/SOURCES/kvm-migration-move-migration_global_dump-to-migration-hm.patch b/SOURCES/kvm-migration-move-migration_global_dump-to-migration-hm.patch deleted file mode 100644 index 4e73c80..0000000 --- a/SOURCES/kvm-migration-move-migration_global_dump-to-migration-hm.patch +++ /dev/null @@ -1,121 +0,0 @@ -From 96e6914cbfb18bb8287c57b9ac9a6b364d3e7a22 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 22 Feb 2023 17:18:05 +0100 -Subject: [PATCH 20/56] migration: move migration_global_dump() to - migration-hmp-cmds.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [19/50] c8d330a2833c706b9bd78f7154be882e3977ad06 (peterx/qemu-kvm) - -It is only used there, so we can make it static. -Once there, remove spice.h that it is not used. - -Signed-off-by: Juan Quintela -Reviewed-by: Peter Xu -Reviewed-by: Philippe Mathieu-Daudé - ---- - -fix David Edmonson ui/qemu-spice.h unintended removal - -(cherry picked from commit c938157713e723165a42cb6e8364adb6fcbd0e22) -Signed-off-by: Peter Xu ---- - include/migration/misc.h | 1 - - migration/migration-hmp-cmds.c | 22 +++++++++++++++++++++- - migration/migration.c | 19 ------------------- - 3 files changed, 21 insertions(+), 21 deletions(-) - -diff --git a/include/migration/misc.h b/include/migration/misc.h -index 8b49841016..5ebe13b4b9 100644 ---- a/include/migration/misc.h -+++ b/include/migration/misc.h -@@ -66,7 +66,6 @@ bool migration_has_finished(MigrationState *); - bool migration_has_failed(MigrationState *); - /* ...and after the device transmission */ - bool migration_in_postcopy_after_devices(MigrationState *); --void migration_global_dump(Monitor *mon); - /* True if incoming migration entered POSTCOPY_INCOMING_DISCARD */ - bool migration_in_incoming_postcopy(void); - /* True if incoming migration entered POSTCOPY_INCOMING_ADVISE */ -diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c -index 72519ea99f..71da91967a 100644 ---- a/migration/migration-hmp-cmds.c -+++ b/migration/migration-hmp-cmds.c -@@ -15,7 +15,6 @@ - - #include "qemu/osdep.h" - #include "block/qapi.h" --#include "migration/misc.h" - #include "migration/snapshot.h" - #include "monitor/hmp.h" - #include "monitor/monitor.h" -@@ -30,6 +29,27 @@ - #include "qemu/sockets.h" - #include "sysemu/runstate.h" - #include "ui/qemu-spice.h" -+#include "sysemu/sysemu.h" -+#include "migration.h" -+ -+static void migration_global_dump(Monitor *mon) -+{ -+ MigrationState *ms = migrate_get_current(); -+ -+ monitor_printf(mon, "globals:\n"); -+ monitor_printf(mon, "store-global-state: %s\n", -+ ms->store_global_state ? "on" : "off"); -+ monitor_printf(mon, "only-migratable: %s\n", -+ only_migratable ? "on" : "off"); -+ monitor_printf(mon, "send-configuration: %s\n", -+ ms->send_configuration ? "on" : "off"); -+ monitor_printf(mon, "send-section-footer: %s\n", -+ ms->send_section_footer ? "on" : "off"); -+ monitor_printf(mon, "decompress-error-check: %s\n", -+ ms->decompress_error_check ? "on" : "off"); -+ monitor_printf(mon, "clear-bitmap-shift: %u\n", -+ ms->clear_bitmap_shift); -+} - - void hmp_info_migrate(Monitor *mon, const QDict *qdict) - { -diff --git a/migration/migration.c b/migration/migration.c -index e8f596bcfa..aa96ffdc5b 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -4420,25 +4420,6 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) - s->migration_thread_running = true; - } - --void migration_global_dump(Monitor *mon) --{ -- MigrationState *ms = migrate_get_current(); -- -- monitor_printf(mon, "globals:\n"); -- monitor_printf(mon, "store-global-state: %s\n", -- ms->store_global_state ? "on" : "off"); -- monitor_printf(mon, "only-migratable: %s\n", -- only_migratable ? "on" : "off"); -- monitor_printf(mon, "send-configuration: %s\n", -- ms->send_configuration ? "on" : "off"); -- monitor_printf(mon, "send-section-footer: %s\n", -- ms->send_section_footer ? "on" : "off"); -- monitor_printf(mon, "decompress-error-check: %s\n", -- ms->decompress_error_check ? "on" : "off"); -- monitor_printf(mon, "clear-bitmap-shift: %u\n", -- ms->clear_bitmap_shift); --} -- - #define DEFINE_PROP_MIG_CAP(name, x) \ - DEFINE_PROP_BOOL(name, MigrationState, capabilities[x], false) - --- -2.39.1 - diff --git a/SOURCES/kvm-migration-postcopy-Detect-file-system-on-dest-host.patch b/SOURCES/kvm-migration-postcopy-Detect-file-system-on-dest-host.patch deleted file mode 100644 index 7700466..0000000 --- a/SOURCES/kvm-migration-postcopy-Detect-file-system-on-dest-host.patch +++ /dev/null @@ -1,117 +0,0 @@ -From 4827d5be5357ab89e0c46f606ad828bf97d36471 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Wed, 19 Apr 2023 12:17:38 -0400 -Subject: [PATCH 04/56] migration/postcopy: Detect file system on dest host -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [3/50] 121aeeda8a019f79dba6c077c7018bd1c86f3d71 (peterx/qemu-kvm) - -Postcopy requires the memory support userfaultfd to work. Right now we -check it but it's a bit too late (when switching to postcopy migration). - -Do that early right at enabling of postcopy. - -Note that this is still only a best effort because ramblocks can be -dynamically created. We can add check in hostmem creations and fail if -postcopy enabled, but maybe that's too aggressive. - -Still, we have chance to fail the most obvious where we know there's an -existing unsupported ramblock. - -Signed-off-by: Peter Xu -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -(cherry picked from commit ae30b9b2892b85e6c3d5c0b8d1949c4d77a2954a) -Signed-off-by: Peter Xu ---- - migration/postcopy-ram.c | 34 ++++++++++++++++++++++++++++++---- - 1 file changed, 30 insertions(+), 4 deletions(-) - -diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c -index 93f39f8e06..bbb8af61ae 100644 ---- a/migration/postcopy-ram.c -+++ b/migration/postcopy-ram.c -@@ -36,6 +36,7 @@ - #include "yank_functions.h" - #include "tls.h" - #include "qemu/userfaultfd.h" -+#include "qemu/mmap-alloc.h" - - /* Arbitrary limit on size of each discard command, - * keeps them around ~200 bytes -@@ -336,11 +337,12 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) - - /* Callback from postcopy_ram_supported_by_host block iterator. - */ --static int test_ramblock_postcopiable(RAMBlock *rb, void *opaque) -+static int test_ramblock_postcopiable(RAMBlock *rb) - { - const char *block_name = qemu_ram_get_idstr(rb); - ram_addr_t length = qemu_ram_get_used_length(rb); - size_t pagesize = qemu_ram_pagesize(rb); -+ QemuFsType fs; - - if (length % pagesize) { - error_report("Postcopy requires RAM blocks to be a page size multiple," -@@ -348,6 +350,15 @@ static int test_ramblock_postcopiable(RAMBlock *rb, void *opaque) - "page size of 0x%zx", block_name, length, pagesize); - return 1; - } -+ -+ if (rb->fd >= 0) { -+ fs = qemu_fd_getfs(rb->fd); -+ if (fs != QEMU_FS_TYPE_TMPFS && fs != QEMU_FS_TYPE_HUGETLBFS) { -+ error_report("Host backend files need to be TMPFS or HUGETLBFS only"); -+ return 1; -+ } -+ } -+ - return 0; - } - -@@ -366,6 +377,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) - struct uffdio_range range_struct; - uint64_t feature_mask; - Error *local_err = NULL; -+ RAMBlock *block; - - if (qemu_target_page_size() > pagesize) { - error_report("Target page size bigger than host page size"); -@@ -390,9 +402,23 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) - goto out; - } - -- /* We don't support postcopy with shared RAM yet */ -- if (foreach_not_ignored_block(test_ramblock_postcopiable, NULL)) { -- goto out; -+ /* -+ * We don't support postcopy with some type of ramblocks. -+ * -+ * NOTE: we explicitly ignored ramblock_is_ignored() instead we checked -+ * all possible ramblocks. This is because this function can be called -+ * when creating the migration object, during the phase RAM_MIGRATABLE -+ * is not even properly set for all the ramblocks. -+ * -+ * A side effect of this is we'll also check against RAM_SHARED -+ * ramblocks even if migrate_ignore_shared() is set (in which case -+ * we'll never migrate RAM_SHARED at all), but normally this shouldn't -+ * affect in reality, or we can revisit. -+ */ -+ RAMBLOCK_FOREACH(block) { -+ if (test_ramblock_postcopiable(block)) { -+ goto out; -+ } - } - - /* --- -2.39.1 - diff --git a/SOURCES/kvm-migration-remove-extra-whitespace-character-for-code.patch b/SOURCES/kvm-migration-remove-extra-whitespace-character-for-code.patch deleted file mode 100644 index 88eb791..0000000 --- a/SOURCES/kvm-migration-remove-extra-whitespace-character-for-code.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 93c9a1ae812720d3a29980a3c5fcfc1e916993de Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?=E6=9D=8E=E7=9A=86=E4=BF=8A?= -Date: Fri, 17 Mar 2023 09:57:13 +0000 -Subject: [PATCH 07/56] migration: remove extra whitespace character for code - style -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [6/50] bc1cd812f8dfc18e47e1644b5333c703eae23d2d (peterx/qemu-kvm) - -Fix code style. - -Signed-off-by: 李皆俊 -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -(cherry picked from commit 8ebb6ecc3798e66a9ba98355983762bedfa1b72d) -Signed-off-by: Peter Xu ---- - migration/ram.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/migration/ram.c b/migration/ram.c -index 79d881f735..0e68099bf9 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -3293,7 +3293,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) - - migration_ops = g_malloc0(sizeof(MigrationOps)); - migration_ops->ram_save_target_page = ram_save_target_page_legacy; -- ret = multifd_send_sync_main(f); -+ ret = multifd_send_sync_main(f); - if (ret < 0) { - return ret; - } --- -2.39.1 - diff --git a/SOURCES/kvm-migration-rename-enabled_capabilities-to-capabilitie.patch b/SOURCES/kvm-migration-rename-enabled_capabilities-to-capabilitie.patch deleted file mode 100644 index 52b19b3..0000000 --- a/SOURCES/kvm-migration-rename-enabled_capabilities-to-capabilitie.patch +++ /dev/null @@ -1,329 +0,0 @@ -From ee566ec12099992f9134bda1db92dd568427245a Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 18:26:59 +0100 -Subject: [PATCH 18/56] migration: rename enabled_capabilities to capabilities -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [17/50] 841a27addf273d8f559bc8ebd2c854200e8ca673 (peterx/qemu-kvm) - -It is clear from the context what that means, and such a long name -with the extra long names of the capabilities make very difficilut to -stay inside the 80 columns limit. - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 0cec2056ff67557c18d7b8ab1b70ab47c9e31f2f) -Signed-off-by: Peter Xu ---- - migration/migration.c | 52 +++++++++++++++++++++---------------------- - migration/migration.h | 2 +- - migration/rdma.c | 4 ++-- - migration/savevm.c | 6 ++--- - 4 files changed, 31 insertions(+), 33 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index f1b3439e5f..d8e5fb6226 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -364,8 +364,7 @@ static bool migrate_late_block_activate(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[ -- MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; -+ return s->capabilities[MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; - } - - /* -@@ -944,7 +943,7 @@ MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) - #endif - caps = g_malloc0(sizeof(*caps)); - caps->capability = i; -- caps->state = s->enabled_capabilities[i]; -+ caps->state = s->capabilities[i]; - QAPI_LIST_APPEND(tail, caps); - } - -@@ -1494,13 +1493,13 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, - return; - } - -- memcpy(cap_list, s->enabled_capabilities, sizeof(cap_list)); -+ memcpy(cap_list, s->capabilities, sizeof(cap_list)); - if (!migrate_caps_check(cap_list, params, errp)) { - return; - } - - for (cap = params; cap; cap = cap->next) { -- s->enabled_capabilities[cap->value->capability] = cap->value->state; -+ s->capabilities[cap->value->capability] = cap->value->state; - } - } - -@@ -2569,7 +2568,7 @@ bool migrate_release_ram(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; -+ return s->capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; - } - - bool migrate_postcopy_ram(void) -@@ -2578,7 +2577,7 @@ bool migrate_postcopy_ram(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; -+ return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; - } - - bool migrate_postcopy(void) -@@ -2592,7 +2591,7 @@ bool migrate_auto_converge(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; -+ return s->capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; - } - - bool migrate_zero_blocks(void) -@@ -2601,7 +2600,7 @@ bool migrate_zero_blocks(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; -+ return s->capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; - } - - bool migrate_postcopy_blocktime(void) -@@ -2610,7 +2609,7 @@ bool migrate_postcopy_blocktime(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; -+ return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; - } - - bool migrate_use_compression(void) -@@ -2619,7 +2618,7 @@ bool migrate_use_compression(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS]; -+ return s->capabilities[MIGRATION_CAPABILITY_COMPRESS]; - } - - int migrate_compress_level(void) -@@ -2664,7 +2663,7 @@ bool migrate_dirty_bitmaps(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; -+ return s->capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; - } - - bool migrate_ignore_shared(void) -@@ -2673,7 +2672,7 @@ bool migrate_ignore_shared(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED]; -+ return s->capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED]; - } - - bool migrate_validate_uuid(void) -@@ -2682,7 +2681,7 @@ bool migrate_validate_uuid(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; -+ return s->capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; - } - - bool migrate_use_events(void) -@@ -2691,7 +2690,7 @@ bool migrate_use_events(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS]; -+ return s->capabilities[MIGRATION_CAPABILITY_EVENTS]; - } - - bool migrate_use_multifd(void) -@@ -2700,7 +2699,7 @@ bool migrate_use_multifd(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_MULTIFD]; -+ return s->capabilities[MIGRATION_CAPABILITY_MULTIFD]; - } - - bool migrate_pause_before_switchover(void) -@@ -2709,8 +2708,7 @@ bool migrate_pause_before_switchover(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[ -- MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; -+ return s->capabilities[MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; - } - - int migrate_multifd_channels(void) -@@ -2757,7 +2755,7 @@ bool migrate_use_zero_copy_send(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; -+ return s->capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; - } - #endif - -@@ -2776,7 +2774,7 @@ int migrate_use_xbzrle(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE]; -+ return s->capabilities[MIGRATION_CAPABILITY_XBZRLE]; - } - - uint64_t migrate_xbzrle_cache_size(void) -@@ -2803,7 +2801,7 @@ bool migrate_use_block(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK]; -+ return s->capabilities[MIGRATION_CAPABILITY_BLOCK]; - } - - bool migrate_use_return_path(void) -@@ -2812,7 +2810,7 @@ bool migrate_use_return_path(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; -+ return s->capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; - } - - bool migrate_use_block_incremental(void) -@@ -2830,7 +2828,7 @@ bool migrate_background_snapshot(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; -+ return s->capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; - } - - bool migrate_postcopy_preempt(void) -@@ -2839,7 +2837,7 @@ bool migrate_postcopy_preempt(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]; -+ return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]; - } - - /* migration thread support */ -@@ -3584,7 +3582,7 @@ fail: - bool migrate_colo_enabled(void) - { - MigrationState *s = migrate_get_current(); -- return s->enabled_capabilities[MIGRATION_CAPABILITY_X_COLO]; -+ return s->capabilities[MIGRATION_CAPABILITY_X_COLO]; - } - - typedef enum MigThrError { -@@ -4447,7 +4445,7 @@ void migration_global_dump(Monitor *mon) - } - - #define DEFINE_PROP_MIG_CAP(name, x) \ -- DEFINE_PROP_BOOL(name, MigrationState, enabled_capabilities[x], false) -+ DEFINE_PROP_BOOL(name, MigrationState, capabilities[x], false) - - static Property migration_properties[] = { - DEFINE_PROP_BOOL("store-global-state", MigrationState, -@@ -4646,7 +4644,7 @@ static bool migration_object_check(MigrationState *ms, Error **errp) - } - - for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { -- if (ms->enabled_capabilities[i]) { -+ if (ms->capabilities[i]) { - QAPI_LIST_PREPEND(head, migrate_cap_add(i, true)); - } - } -diff --git a/migration/migration.h b/migration/migration.h -index 310ae8901b..04e0860b4e 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -310,7 +310,7 @@ struct MigrationState { - int64_t downtime_start; - int64_t downtime; - int64_t expected_downtime; -- bool enabled_capabilities[MIGRATION_CAPABILITY__MAX]; -+ bool capabilities[MIGRATION_CAPABILITY__MAX]; - int64_t setup_time; - /* - * Whether guest was running when we enter the completion stage. -diff --git a/migration/rdma.c b/migration/rdma.c -index df646be35e..f35f021963 100644 ---- a/migration/rdma.c -+++ b/migration/rdma.c -@@ -4179,7 +4179,7 @@ void rdma_start_outgoing_migration(void *opaque, - } - - ret = qemu_rdma_source_init(rdma, -- s->enabled_capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp); -+ s->capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp); - - if (ret) { - goto err; -@@ -4201,7 +4201,7 @@ void rdma_start_outgoing_migration(void *opaque, - } - - ret = qemu_rdma_source_init(rdma_return_path, -- s->enabled_capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp); -+ s->capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp); - - if (ret) { - goto return_path_err; -diff --git a/migration/savevm.c b/migration/savevm.c -index aa54a67fda..589ef926ab 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -253,7 +253,7 @@ static uint32_t get_validatable_capabilities_count(void) - uint32_t result = 0; - int i; - for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { -- if (should_validate_capability(i) && s->enabled_capabilities[i]) { -+ if (should_validate_capability(i) && s->capabilities[i]) { - result++; - } - } -@@ -275,7 +275,7 @@ static int configuration_pre_save(void *opaque) - state->capabilities = g_renew(MigrationCapability, state->capabilities, - state->caps_count); - for (i = j = 0; i < MIGRATION_CAPABILITY__MAX; i++) { -- if (should_validate_capability(i) && s->enabled_capabilities[i]) { -+ if (should_validate_capability(i) && s->capabilities[i]) { - state->capabilities[j++] = i; - } - } -@@ -325,7 +325,7 @@ static bool configuration_validate_capabilities(SaveState *state) - continue; - } - source_state = test_bit(i, source_caps_bm); -- target_state = s->enabled_capabilities[i]; -+ target_state = s->capabilities[i]; - if (source_state != target_state) { - error_report("Capability %s is %s, but received capability is %s", - MigrationCapability_str(i), --- -2.39.1 - diff --git a/SOURCES/kvm-multifd-Create-property-multifd-flush-after-each-sec.patch b/SOURCES/kvm-multifd-Create-property-multifd-flush-after-each-sec.patch deleted file mode 100644 index 0bebd2e..0000000 --- a/SOURCES/kvm-multifd-Create-property-multifd-flush-after-each-sec.patch +++ /dev/null @@ -1,127 +0,0 @@ -From 2a5ea92ca0a5dffad54e4d06a683f683996cea9a Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 21 Jun 2022 12:13:14 +0200 -Subject: [PATCH 05/12] multifd: Create property - multifd-flush-after-each-section -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: quintela1 -RH-MergeRequest: 186: Multifd flushes its channels 10 times per second -RH-Bugzilla: 2196295 -RH-Acked-by: Peter Xu -RH-Acked-by: Leonardo Brás -RH-Commit: [1/3] 5bf5348e8be5b1d1629b859ce1ddb7aa0d72c0d6 (juan.quintela/c9s-qemu-kvm) - -We used to flush all channels at the end of each RAM section -sent. That is not needed, so preparing to only flush after a full -iteration through all the RAM. - -Default value of the property is false. But we return "true" in -migrate_multifd_flush_after_each_section() until we implement the code -in following patches. - -Signed-off-by: Juan Quintela -Reviewed-by: Dr. David Alan Gilbert -Acked-by: Peter Xu - ---- - -Rename each-iteration to after-each-section -Rename multifd-sync-after-each-section to - multifd-flush-after-each-section -Move to machine-8.0 (peter) - -conflit hw_compat_8_0 and hw_compat_rhel_9_2 - -(cherry picked from commit 77c259a4cb1c9799754b48f570301ebf1de5ded8) ---- - hw/core/machine.c | 2 ++ - migration/migration.h | 12 ++++++++++++ - migration/options.c | 13 +++++++++++++ - migration/options.h | 1 + - 4 files changed, 28 insertions(+) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 5abdc8c39b..5ea52317b9 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -54,6 +54,8 @@ const char *rhel_old_machine_deprecation = - "machine types for previous major releases are deprecated"; - - GlobalProperty hw_compat_rhel_9_2[] = { -+ /* hw_compat_rhel_9_2 from hw_compat_8_0 */ -+ { "migration", "multifd-flush-after-each-section", "on"}, - /* hw_compat_rhel_9_2 from hw_compat_7_2 */ - { "e1000e", "migrate-timadj", "off" }, - /* hw_compat_rhel_9_2 from hw_compat_7_2 */ -diff --git a/migration/migration.h b/migration/migration.h -index 7ccf460aa2..04c78c1fd6 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -411,6 +411,18 @@ struct MigrationState { - */ - bool preempt_pre_7_2; - -+ /* -+ * flush every channel after each section sent. -+ * -+ * This assures that we can't mix pages from one iteration through -+ * ram pages with pages for the following iteration. We really -+ * only need to do this flush after we have go through all the -+ * dirty pages. For historical reasons, we do that after each -+ * section. This is suboptimal (we flush too many times). -+ * Default value is false. Setting this property has no effect -+ * until the patch that removes this comment. (since 8.1) -+ */ -+ bool multifd_flush_after_each_section; - /* - * This decides the size of guest memory chunk that will be used - * to track dirty bitmap clearing. The size of memory chunk will -diff --git a/migration/options.c b/migration/options.c -index ccd7ef3907..5b0d080ecb 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -88,6 +88,8 @@ Property migration_properties[] = { - send_section_footer, true), - DEFINE_PROP_BOOL("decompress-error-check", MigrationState, - decompress_error_check, true), -+ DEFINE_PROP_BOOL("multifd-flush-after-each-section", MigrationState, -+ multifd_flush_after_each_section, true), - DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState, - clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT), - DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState, -@@ -344,6 +346,17 @@ bool migrate_zero_copy_send(void) - - /* pseudo capabilities */ - -+bool migrate_multifd_flush_after_each_section(void) -+{ -+ MigrationState *s = migrate_get_current(); -+ -+ /* -+ * Until the patch that remove this comment, we always return that -+ * the property is enabled. -+ */ -+ return true || s->multifd_flush_after_each_section; -+} -+ - bool migrate_postcopy(void) - { - return migrate_postcopy_ram() || migrate_dirty_bitmaps(); -diff --git a/migration/options.h b/migration/options.h -index 0fc7be6869..271f49ae5f 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -60,6 +60,7 @@ bool migrate_zero_copy_send(void); - * check, but they are not a capability. - */ - -+bool migrate_multifd_flush_after_each_section(void); - bool migrate_postcopy(void); - bool migrate_tls(void); - --- -2.39.3 - diff --git a/SOURCES/kvm-multifd-Fix-the-number-of-channels-ready.patch b/SOURCES/kvm-multifd-Fix-the-number-of-channels-ready.patch deleted file mode 100644 index abf21e6..0000000 --- a/SOURCES/kvm-multifd-Fix-the-number-of-channels-ready.patch +++ /dev/null @@ -1,58 +0,0 @@ -From af6f2a543c7db6d67d33fd12615a50e57fc3fe66 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 26 Apr 2023 12:20:36 +0200 -Subject: [PATCH 19/21] multifd: Fix the number of channels ready -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 171: multifd: Fix the number of channels ready -RH-Bugzilla: 2196289 -RH-Acked-by: Peter Xu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/1] a5e271ba249d85b27a68d3cff10480ca3a112c5d (LeoBras/centos-qemu-kvm) - -We don't wait in the sem when we are doing a sync_main. Make it wait -there. To make things clearer, we mark the channel ready at the -begining of the thread loop. - -Signed-off-by: Juan Quintela -Reviewed-by: Fabiano Rosas -(cherry picked from commit d2026ee117147893f8d80f060cede6d872ecbd7f) -Signed-off-by: Leonardo Bras ---- - migration/multifd.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/migration/multifd.c b/migration/multifd.c -index cce3ad6988..6a59c03dd2 100644 ---- a/migration/multifd.c -+++ b/migration/multifd.c -@@ -635,6 +635,7 @@ int multifd_send_sync_main(QEMUFile *f) - for (i = 0; i < migrate_multifd_channels(); i++) { - MultiFDSendParams *p = &multifd_send_state->params[i]; - -+ qemu_sem_wait(&multifd_send_state->channels_ready); - trace_multifd_send_sync_main_wait(p->id); - qemu_sem_wait(&p->sem_sync); - -@@ -668,6 +669,7 @@ static void *multifd_send_thread(void *opaque) - p->num_packets = 1; - - while (true) { -+ qemu_sem_post(&multifd_send_state->channels_ready); - qemu_sem_wait(&p->sem); - - if (qatomic_read(&multifd_send_state->exiting)) { -@@ -736,7 +738,6 @@ static void *multifd_send_thread(void *opaque) - if (flags & MULTIFD_FLAG_SYNC) { - qemu_sem_post(&p->sem_sync); - } -- qemu_sem_post(&multifd_send_state->channels_ready); - } else if (p->quit) { - qemu_mutex_unlock(&p->mutex); - break; --- -2.39.3 - diff --git a/SOURCES/kvm-multifd-Only-flush-once-each-full-round-of-memory.patch b/SOURCES/kvm-multifd-Only-flush-once-each-full-round-of-memory.patch deleted file mode 100644 index 3f76384..0000000 --- a/SOURCES/kvm-multifd-Only-flush-once-each-full-round-of-memory.patch +++ /dev/null @@ -1,166 +0,0 @@ -From e6f770506091eada46c63ac1c8b934b508e3807f Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 21 Jun 2022 13:36:11 +0200 -Subject: [PATCH 07/12] multifd: Only flush once each full round of memory -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: quintela1 -RH-MergeRequest: 186: Multifd flushes its channels 10 times per second -RH-Bugzilla: 2196295 -RH-Acked-by: Peter Xu -RH-Acked-by: Leonardo Brás -RH-Commit: [3/3] 33f76dfc72a2552a42dc7f0fe3923564185a7bf7 (juan.quintela/c9s-qemu-kvm) - -We need to add a new flag to mean to flush at that point. -Notice that we still flush at the end of setup and at the end of -complete stages. - -Signed-off-by: Juan Quintela -Acked-by: Peter Xu - ---- - -Add missing qemu_fflush(), now it passes all tests always. -In the previous version, the check that changes the default value to -false got lost in some rebase. Get it back. - -(cherry picked from commit 294e5a4034e81b3d8db03b4e0f691386f20d6ed3) ---- - migration/migration.h | 3 +-- - migration/options.c | 8 ++------ - migration/ram.c | 28 +++++++++++++++++++++++++++- - 3 files changed, 30 insertions(+), 9 deletions(-) - -diff --git a/migration/migration.h b/migration/migration.h -index 04c78c1fd6..dfec649af8 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -419,8 +419,7 @@ struct MigrationState { - * only need to do this flush after we have go through all the - * dirty pages. For historical reasons, we do that after each - * section. This is suboptimal (we flush too many times). -- * Default value is false. Setting this property has no effect -- * until the patch that removes this comment. (since 8.1) -+ * Default value is false. (since 8.1) - */ - bool multifd_flush_after_each_section; - /* -diff --git a/migration/options.c b/migration/options.c -index 5b0d080ecb..e13c7cb8e5 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -89,7 +89,7 @@ Property migration_properties[] = { - DEFINE_PROP_BOOL("decompress-error-check", MigrationState, - decompress_error_check, true), - DEFINE_PROP_BOOL("multifd-flush-after-each-section", MigrationState, -- multifd_flush_after_each_section, true), -+ multifd_flush_after_each_section, false), - DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState, - clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT), - DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState, -@@ -350,11 +350,7 @@ bool migrate_multifd_flush_after_each_section(void) - { - MigrationState *s = migrate_get_current(); - -- /* -- * Until the patch that remove this comment, we always return that -- * the property is enabled. -- */ -- return true || s->multifd_flush_after_each_section; -+ return s->multifd_flush_after_each_section; - } - - bool migrate_postcopy(void) -diff --git a/migration/ram.c b/migration/ram.c -index 1e2414d681..e9dcda8b9d 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -86,6 +86,7 @@ - #define RAM_SAVE_FLAG_XBZRLE 0x40 - /* 0x80 is reserved in qemu-file.h for RAM_SAVE_FLAG_HOOK */ - #define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100 -+#define RAM_SAVE_FLAG_MULTIFD_FLUSH 0x200 - /* We can't use any flag that is bigger than 0x200 */ - - int (*xbzrle_encode_buffer_func)(uint8_t *, uint8_t *, int, -@@ -1581,6 +1582,7 @@ retry: - * associated with the search process. - * - * Returns: -+ * <0: An error happened - * PAGE_ALL_CLEAN: no dirty page found, give up - * PAGE_TRY_AGAIN: no dirty page found, retry for next block - * PAGE_DIRTY_FOUND: dirty page found -@@ -1608,6 +1610,15 @@ static int find_dirty_block(RAMState *rs, PageSearchStatus *pss) - pss->page = 0; - pss->block = QLIST_NEXT_RCU(pss->block, next); - if (!pss->block) { -+ if (!migrate_multifd_flush_after_each_section()) { -+ QEMUFile *f = rs->pss[RAM_CHANNEL_PRECOPY].pss_channel; -+ int ret = multifd_send_sync_main(f); -+ if (ret < 0) { -+ return ret; -+ } -+ qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH); -+ qemu_fflush(f); -+ } - /* - * If memory migration starts over, we will meet a dirtied page - * which may still exists in compression threads's ring, so we -@@ -2600,6 +2611,9 @@ static int ram_find_and_save_block(RAMState *rs) - break; - } else if (res == PAGE_TRY_AGAIN) { - continue; -+ } else if (res < 0) { -+ pages = res; -+ break; - } - } - } -@@ -3286,6 +3300,10 @@ static int ram_save_setup(QEMUFile *f, void *opaque) - return ret; - } - -+ if (!migrate_multifd_flush_after_each_section()) { -+ qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH); -+ } -+ - qemu_put_be64(f, RAM_SAVE_FLAG_EOS); - qemu_fflush(f); - -@@ -3471,6 +3489,9 @@ static int ram_save_complete(QEMUFile *f, void *opaque) - return ret; - } - -+ if (!migrate_multifd_flush_after_each_section()) { -+ qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH); -+ } - qemu_put_be64(f, RAM_SAVE_FLAG_EOS); - qemu_fflush(f); - -@@ -4152,7 +4173,9 @@ int ram_load_postcopy(QEMUFile *f, int channel) - } - decompress_data_with_multi_threads(f, page_buffer, len); - break; -- -+ case RAM_SAVE_FLAG_MULTIFD_FLUSH: -+ multifd_recv_sync_main(); -+ break; - case RAM_SAVE_FLAG_EOS: - /* normal exit */ - if (migrate_multifd_flush_after_each_section()) { -@@ -4426,6 +4449,9 @@ static int ram_load_precopy(QEMUFile *f) - break; - } - break; -+ case RAM_SAVE_FLAG_MULTIFD_FLUSH: -+ multifd_recv_sync_main(); -+ break; - case RAM_SAVE_FLAG_EOS: - /* normal exit */ - if (migrate_multifd_flush_after_each_section()) { --- -2.39.3 - diff --git a/SOURCES/kvm-multifd-Protect-multifd_send_sync_main-calls.patch b/SOURCES/kvm-multifd-Protect-multifd_send_sync_main-calls.patch deleted file mode 100644 index 779841f..0000000 --- a/SOURCES/kvm-multifd-Protect-multifd_send_sync_main-calls.patch +++ /dev/null @@ -1,78 +0,0 @@ -From c4bfb4900b95e13bef2d86b83c33786c7c4f6289 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 21 Jun 2022 12:21:32 +0200 -Subject: [PATCH 06/12] multifd: Protect multifd_send_sync_main() calls -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: quintela1 -RH-MergeRequest: 186: Multifd flushes its channels 10 times per second -RH-Bugzilla: 2196295 -RH-Acked-by: Peter Xu -RH-Acked-by: Leonardo Brás -RH-Commit: [2/3] a91adf59c6b2f39bf4a308f566b00e39cae6e0ae (juan.quintela/c9s-qemu-kvm) - -We only need to do that on the ram_save_iterate() call on sending and -on destination when we get a RAM_SAVE_FLAG_EOS. - -In setup() and complete() we need to synch in both new and old cases, -so don't add a check there. - -Signed-off-by: Juan Quintela -Reviewed-by: Dr. David Alan Gilbert -Acked-by: Peter Xu - ---- - -Remove the wrappers that we take out on patch 5. - -(cherry picked from commit b05292c237030343516d073b1a1e5f49ffc017a8) ---- - migration/ram.c | 16 +++++++++++----- - 1 file changed, 11 insertions(+), 5 deletions(-) - -diff --git a/migration/ram.c b/migration/ram.c -index 01356f60a4..1e2414d681 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -3394,9 +3394,11 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) - out: - if (ret >= 0 - && migration_is_setup_or_active(migrate_get_current()->state)) { -- ret = multifd_send_sync_main(rs->pss[RAM_CHANNEL_PRECOPY].pss_channel); -- if (ret < 0) { -- return ret; -+ if (migrate_multifd_flush_after_each_section()) { -+ ret = multifd_send_sync_main(rs->pss[RAM_CHANNEL_PRECOPY].pss_channel); -+ if (ret < 0) { -+ return ret; -+ } - } - - qemu_put_be64(f, RAM_SAVE_FLAG_EOS); -@@ -4153,7 +4155,9 @@ int ram_load_postcopy(QEMUFile *f, int channel) - - case RAM_SAVE_FLAG_EOS: - /* normal exit */ -- multifd_recv_sync_main(); -+ if (migrate_multifd_flush_after_each_section()) { -+ multifd_recv_sync_main(); -+ } - break; - default: - error_report("Unknown combination of migration flags: 0x%x" -@@ -4424,7 +4428,9 @@ static int ram_load_precopy(QEMUFile *f) - break; - case RAM_SAVE_FLAG_EOS: - /* normal exit */ -- multifd_recv_sync_main(); -+ if (migrate_multifd_flush_after_each_section()) { -+ multifd_recv_sync_main(); -+ } - break; - default: - if (flags & RAM_SAVE_FLAG_HOOK) { --- -2.39.3 - diff --git a/SOURCES/kvm-nbd-server-CVE-2024-7409-Avoid-use-after-free-when-c.patch b/SOURCES/kvm-nbd-server-CVE-2024-7409-Avoid-use-after-free-when-c.patch new file mode 100644 index 0000000..f65d293 --- /dev/null +++ b/SOURCES/kvm-nbd-server-CVE-2024-7409-Avoid-use-after-free-when-c.patch @@ -0,0 +1,101 @@ +From 6f60c86c5dd747ba68cb4a11084e7b021769e70b Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Thu, 22 Aug 2024 09:35:29 -0500 +Subject: [PATCH] nbd/server: CVE-2024-7409: Avoid use-after-free when closing + server + +RH-Author: Eric Blake +RH-MergeRequest: 266: nbd/server: CVE-2024-7409: Avoid use-after-free when closing server +RH-Jira: RHEL-52617 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Hanna Czenczek +RH-Commit: [1/1] e6e12c985cd13dd14336d98ab0719c789b5e914d (ebblake/centos-qemu-kvm) + +Commit 3e7ef738 plugged the use-after-free of the global nbd_server +object, but overlooked a use-after-free of nbd_server->listener. +Although this race is harder to hit, notice that our shutdown path +first drops the reference count of nbd_server->listener, then triggers +actions that can result in a pending client reaching the +nbd_blockdev_client_closed() callback, which in turn calls +qio_net_listener_set_client_func on a potentially stale object. + +If we know we don't want any more clients to connect, and have already +told the listener socket to shut down, then we should not be trying to +update the listener socket's associated function. + +Reproducer: + +> #!/usr/bin/python3 +> +> import os +> from threading import Thread +> +> def start_stop(): +> while 1: +> os.system('virsh qemu-monitor-command VM \'{"execute": "nbd-server-start", ++"arguments":{"addr":{"type":"unix","data":{"path":"/tmp/nbd-sock"}}}}\'') +> os.system('virsh qemu-monitor-command VM \'{"execute": "nbd-server-stop"}\'') +> +> def nbd_list(): +> while 1: +> os.system('/path/to/build/qemu-nbd -L -k /tmp/nbd-sock') +> +> def test(): +> sst = Thread(target=start_stop) +> sst.start() +> nlt = Thread(target=nbd_list) +> nlt.start() +> +> sst.join() +> nlt.join() +> +> test() + +Fixes: CVE-2024-7409 +Fixes: 3e7ef738c8 ("nbd/server: CVE-2024-7409: Close stray clients at server-stop") +CC: qemu-stable@nongnu.org +Reported-by: Andrey Drobyshev +Signed-off-by: Eric Blake +Message-ID: <20240822143617.800419-2-eblake@redhat.com> +Reviewed-by: Stefan Hajnoczi + +(cherry picked from commit 3874f5f73c441c52f1c699c848d463b0eda01e4c) +Jira: https://issues.redhat.com/browse/RHEL-52617 +Signed-off-by: Eric Blake +--- + blockdev-nbd.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +diff --git a/blockdev-nbd.c b/blockdev-nbd.c +index f73409ae49..b36f41b7c5 100644 +--- a/blockdev-nbd.c ++++ b/blockdev-nbd.c +@@ -92,10 +92,13 @@ static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc, + + static void nbd_update_server_watch(NBDServerData *s) + { +- if (!s->max_connections || s->connections < s->max_connections) { +- qio_net_listener_set_client_func(s->listener, nbd_accept, NULL, NULL); +- } else { +- qio_net_listener_set_client_func(s->listener, NULL, NULL, NULL); ++ if (s->listener) { ++ if (!s->max_connections || s->connections < s->max_connections) { ++ qio_net_listener_set_client_func(s->listener, nbd_accept, NULL, ++ NULL); ++ } else { ++ qio_net_listener_set_client_func(s->listener, NULL, NULL, NULL); ++ } + } + } + +@@ -113,6 +116,7 @@ static void nbd_server_free(NBDServerData *server) + */ + qio_net_listener_disconnect(server->listener); + object_unref(OBJECT(server->listener)); ++ server->listener = NULL; + QLIST_FOREACH_SAFE(conn, &server->conns, next, tmp) { + qio_channel_shutdown(QIO_CHANNEL(conn->cioc), QIO_CHANNEL_SHUTDOWN_BOTH, + NULL); +-- +2.39.3 + diff --git a/SOURCES/kvm-nbd-server-CVE-2024-7409-Cap-default-max-connections.patch b/SOURCES/kvm-nbd-server-CVE-2024-7409-Cap-default-max-connections.patch new file mode 100644 index 0000000..5459a51 --- /dev/null +++ b/SOURCES/kvm-nbd-server-CVE-2024-7409-Cap-default-max-connections.patch @@ -0,0 +1,184 @@ +From f76d73f62555ad73081558c1f56bcb832fbb8c35 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Tue, 6 Aug 2024 13:53:00 -0500 +Subject: [PATCH 098/100] nbd/server: CVE-2024-7409: Cap default + max-connections to 100 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Blake +RH-MergeRequest: 262: nbd/server: fix CVE-2024-7409 (qemu crash on nbd-server-stop) +RH-Jira: RHEL-52617 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/4] 1fb3b8cd9781a66bba2f4a6bee2b320e96de86aa (redhat/centos-stream/src/qemu-kvm) + +Allowing an unlimited number of clients to any web service is a recipe +for a rudimentary denial of service attack: the client merely needs to +open lots of sockets without closing them, until qemu no longer has +any more fds available to allocate. + +For qemu-nbd, we default to allowing only 1 connection unless more are +explicitly asked for (-e or --shared); this was historically picked as +a nice default (without an explicit -t, a non-persistent qemu-nbd goes +away after a client disconnects, without needing any additional +follow-up commands), and we are not going to change that interface now +(besides, someday we want to point people towards qemu-storage-daemon +instead of qemu-nbd). + +But for qemu proper, and the newer qemu-storage-daemon, the QMP +nbd-server-start command has historically had a default of unlimited +number of connections, in part because unlike qemu-nbd it is +inherently persistent until nbd-server-stop. Allowing multiple client +sockets is particularly useful for clients that can take advantage of +MULTI_CONN (creating parallel sockets to increase throughput), +although known clients that do so (such as libnbd's nbdcopy) typically +use only 8 or 16 connections (the benefits of scaling diminish once +more sockets are competing for kernel attention). Picking a number +large enough for typical use cases, but not unlimited, makes it +slightly harder for a malicious client to perform a denial of service +merely by opening lots of connections withot progressing through the +handshake. + +This change does not eliminate CVE-2024-7409 on its own, but reduces +the chance for fd exhaustion or unlimited memory usage as an attack +surface. On the other hand, by itself, it makes it more obvious that +with a finite limit, we have the problem of an unauthenticated client +holding 100 fds opened as a way to block out a legitimate client from +being able to connect; thus, later patches will further add timeouts +to reject clients that are not making progress. + +This is an INTENTIONAL change in behavior, and will break any client +of nbd-server-start that was not passing an explicit max-connections +parameter, yet expects more than 100 simultaneous connections. We are +not aware of any such client (as stated above, most clients aware of +MULTI_CONN get by just fine on 8 or 16 connections, and probably cope +with later connections failing by relying on the earlier connections; +libvirt has not yet been passing max-connections, but generally +creates NBD servers with the intent for a single client for the sake +of live storage migration; meanwhile, the KubeSAN project anticipates +a large cluster sharing multiple clients [up to 8 per node, and up to +100 nodes in a cluster], but it currently uses qemu-nbd with an +explicit --shared=0 rather than qemu-storage-daemon with +nbd-server-start). + +We considered using a deprecation period (declare that omitting +max-parameters is deprecated, and make it mandatory in 3 releases - +then we don't need to pick an arbitrary default); that has zero risk +of breaking any apps that accidentally depended on more than 100 +connections, and where such breakage might not be noticed under unit +testing but only under the larger loads of production usage. But it +does not close the denial-of-service hole until far into the future, +and requires all apps to change to add the parameter even if 100 was +good enough. It also has a drawback that any app (like libvirt) that +is accidentally relying on an unlimited default should seriously +consider their own CVE now, at which point they are going to change to +pass explicit max-connections sooner than waiting for 3 qemu releases. +Finally, if our changed default breaks an app, that app can always +pass in an explicit max-parameters with a larger value. + +It is also intentional that the HMP interface to nbd-server-start is +not changed to expose max-connections (any client needing to fine-tune +things should be using QMP). + +Suggested-by: Daniel P. Berrangé +Signed-off-by: Eric Blake +Message-ID: <20240807174943.771624-12-eblake@redhat.com> +Reviewed-by: Daniel P. Berrangé +[ericb: Expand commit message to summarize Dan's argument for why we +break corner-case back-compat behavior without a deprecation period] +Signed-off-by: Eric Blake + +(cherry picked from commit c8a76dbd90c2f48df89b75bef74917f90a59b623) +Jira: https://issues.redhat.com/browse/RHEL-52617 +Signed-off-by: Eric Blake +--- + block/monitor/block-hmp-cmds.c | 3 ++- + blockdev-nbd.c | 8 ++++++++ + include/block/nbd.h | 7 +++++++ + qapi/block-export.json | 4 ++-- + 4 files changed, 19 insertions(+), 3 deletions(-) + +diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c +index d954bec6f1..bdf2eb50b6 100644 +--- a/block/monitor/block-hmp-cmds.c ++++ b/block/monitor/block-hmp-cmds.c +@@ -402,7 +402,8 @@ void hmp_nbd_server_start(Monitor *mon, const QDict *qdict) + goto exit; + } + +- nbd_server_start(addr, NULL, NULL, 0, &local_err); ++ nbd_server_start(addr, NULL, NULL, NBD_DEFAULT_MAX_CONNECTIONS, ++ &local_err); + qapi_free_SocketAddress(addr); + if (local_err != NULL) { + goto exit; +diff --git a/blockdev-nbd.c b/blockdev-nbd.c +index 267a1de903..24ba5382db 100644 +--- a/blockdev-nbd.c ++++ b/blockdev-nbd.c +@@ -170,6 +170,10 @@ void nbd_server_start(SocketAddress *addr, const char *tls_creds, + + void nbd_server_start_options(NbdServerOptions *arg, Error **errp) + { ++ if (!arg->has_max_connections) { ++ arg->max_connections = NBD_DEFAULT_MAX_CONNECTIONS; ++ } ++ + nbd_server_start(arg->addr, arg->tls_creds, arg->tls_authz, + arg->max_connections, errp); + } +@@ -182,6 +186,10 @@ void qmp_nbd_server_start(SocketAddressLegacy *addr, + { + SocketAddress *addr_flat = socket_address_flatten(addr); + ++ if (!has_max_connections) { ++ max_connections = NBD_DEFAULT_MAX_CONNECTIONS; ++ } ++ + nbd_server_start(addr_flat, tls_creds, tls_authz, max_connections, errp); + qapi_free_SocketAddress(addr_flat); + } +diff --git a/include/block/nbd.h b/include/block/nbd.h +index 1d4d65922d..d4f8b21aec 100644 +--- a/include/block/nbd.h ++++ b/include/block/nbd.h +@@ -39,6 +39,13 @@ extern const BlockExportDriver blk_exp_nbd; + */ + #define NBD_DEFAULT_HANDSHAKE_MAX_SECS 10 + ++/* ++ * NBD_DEFAULT_MAX_CONNECTIONS: Number of client sockets to allow at ++ * once; must be large enough to allow a MULTI_CONN-aware client like ++ * nbdcopy to create its typical number of 8-16 sockets. ++ */ ++#define NBD_DEFAULT_MAX_CONNECTIONS 100 ++ + /* Handshake phase structs - this struct is passed on the wire */ + + typedef struct NBDOption { +diff --git a/qapi/block-export.json b/qapi/block-export.json +index 3919a2d5b9..f45e4fd481 100644 +--- a/qapi/block-export.json ++++ b/qapi/block-export.json +@@ -28,7 +28,7 @@ + # @max-connections: The maximum number of connections to allow at the + # same time, 0 for unlimited. Setting this to 1 also stops the + # server from advertising multiple client support (since 5.2; +-# default: 0) ++# default: 100) + # + # Since: 4.2 + ## +@@ -63,7 +63,7 @@ + # @max-connections: The maximum number of connections to allow at the + # same time, 0 for unlimited. Setting this to 1 also stops the + # server from advertising multiple client support (since 5.2; +-# default: 0). ++# default: 100). + # + # Errors: + # - if the server is already running +-- +2.39.3 + diff --git a/SOURCES/kvm-nbd-server-CVE-2024-7409-Close-stray-clients-at-serv.patch b/SOURCES/kvm-nbd-server-CVE-2024-7409-Close-stray-clients-at-serv.patch new file mode 100644 index 0000000..2ba16e5 --- /dev/null +++ b/SOURCES/kvm-nbd-server-CVE-2024-7409-Close-stray-clients-at-serv.patch @@ -0,0 +1,173 @@ +From 6522c68268f00c9c5665f8f98cf6ed1984124cf3 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Wed, 7 Aug 2024 12:23:13 -0500 +Subject: [PATCH 100/100] nbd/server: CVE-2024-7409: Close stray clients at + server-stop +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Blake +RH-MergeRequest: 262: nbd/server: fix CVE-2024-7409 (qemu crash on nbd-server-stop) +RH-Jira: RHEL-52617 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [4/4] c00bb5a7e73446e9f071ef83e4f1576f73a17059 (redhat/centos-stream/src/qemu-kvm) + +A malicious client can attempt to connect to an NBD server, and then +intentionally delay progress in the handshake, including if it does +not know the TLS secrets. Although the previous two patches reduce +this behavior by capping the default max-connections parameter and +killing slow clients, they did not eliminate the possibility of a +client waiting to close the socket until after the QMP nbd-server-stop +command is executed, at which point qemu would SEGV when trying to +dereference the NULL nbd_server global which is no longer present. +This amounts to a denial of service attack. Worse, if another NBD +server is started before the malicious client disconnects, I cannot +rule out additional adverse effects when the old client interferes +with the connection count of the new server (although the most likely +is a crash due to an assertion failure when checking +nbd_server->connections > 0). + +For environments without this patch, the CVE can be mitigated by +ensuring (such as via a firewall) that only trusted clients can +connect to an NBD server. Note that using frameworks like libvirt +that ensure that TLS is used and that nbd-server-stop is not executed +while any trusted clients are still connected will only help if there +is also no possibility for an untrusted client to open a connection +but then stall on the NBD handshake. + +Given the previous patches, it would be possible to guarantee that no +clients remain connected by having nbd-server-stop sleep for longer +than the default handshake deadline before finally freeing the global +nbd_server object, but that could make QMP non-responsive for a long +time. So intead, this patch fixes the problem by tracking all client +sockets opened while the server is running, and forcefully closing any +such sockets remaining without a completed handshake at the time of +nbd-server-stop, then waiting until the coroutines servicing those +sockets notice the state change. nbd-server-stop now has a second +AIO_WAIT_WHILE_UNLOCKED (the first is indirectly through the +blk_exp_close_all_type() that disconnects all clients that completed +handshakes), but forced socket shutdown is enough to progress the +coroutines and quickly tear down all clients before the server is +freed, thus finally fixing the CVE. + +This patch relies heavily on the fact that nbd/server.c guarantees +that it only calls nbd_blockdev_client_closed() from the main loop +(see the assertion in nbd_client_put() and the hoops used in +nbd_client_put_nonzero() to achieve that); if we did not have that +guarantee, we would also need a mutex protecting our accesses of the +list of connections to survive re-entrancy from independent iothreads. + +Although I did not actually try to test old builds, it looks like this +problem has existed since at least commit 862172f45c (v2.12.0, 2017) - +even back when that patch started using a QIONetListener to handle +listening on multiple sockets, nbd_server_free() was already unaware +that the nbd_blockdev_client_closed callback can be reached later by a +client thread that has not completed handshakes (and therefore the +client's socket never got added to the list closed in +nbd_export_close_all), despite that patch intentionally tearing down +the QIONetListener to prevent new clients. + +Reported-by: Alexander Ivanov +Fixes: CVE-2024-7409 +CC: qemu-stable@nongnu.org +Signed-off-by: Eric Blake +Message-ID: <20240807174943.771624-14-eblake@redhat.com> +Reviewed-by: Daniel P. Berrangé + +(cherry picked from commit 3e7ef738c8462c45043a1d39f702a0990406a3b3) +Jira: https://issues.redhat.com/browse/RHEL-52617 +Signed-off-by: Eric Blake +--- + blockdev-nbd.c | 35 ++++++++++++++++++++++++++++++++++- + 1 file changed, 34 insertions(+), 1 deletion(-) + +diff --git a/blockdev-nbd.c b/blockdev-nbd.c +index 24ba5382db..f73409ae49 100644 +--- a/blockdev-nbd.c ++++ b/blockdev-nbd.c +@@ -21,12 +21,18 @@ + #include "io/channel-socket.h" + #include "io/net-listener.h" + ++typedef struct NBDConn { ++ QIOChannelSocket *cioc; ++ QLIST_ENTRY(NBDConn) next; ++} NBDConn; ++ + typedef struct NBDServerData { + QIONetListener *listener; + QCryptoTLSCreds *tlscreds; + char *tlsauthz; + uint32_t max_connections; + uint32_t connections; ++ QLIST_HEAD(, NBDConn) conns; + } NBDServerData; + + static NBDServerData *nbd_server; +@@ -51,6 +57,14 @@ int nbd_server_max_connections(void) + + static void nbd_blockdev_client_closed(NBDClient *client, bool ignored) + { ++ NBDConn *conn = nbd_client_owner(client); ++ ++ assert(qemu_in_main_thread() && nbd_server); ++ ++ object_unref(OBJECT(conn->cioc)); ++ QLIST_REMOVE(conn, next); ++ g_free(conn); ++ + nbd_client_put(client); + assert(nbd_server->connections > 0); + nbd_server->connections--; +@@ -60,14 +74,20 @@ static void nbd_blockdev_client_closed(NBDClient *client, bool ignored) + static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc, + gpointer opaque) + { ++ NBDConn *conn = g_new0(NBDConn, 1); ++ ++ assert(qemu_in_main_thread() && nbd_server); + nbd_server->connections++; ++ object_ref(OBJECT(cioc)); ++ conn->cioc = cioc; ++ QLIST_INSERT_HEAD(&nbd_server->conns, conn, next); + nbd_update_server_watch(nbd_server); + + qio_channel_set_name(QIO_CHANNEL(cioc), "nbd-server"); + /* TODO - expose handshake timeout as QMP option */ + nbd_client_new(cioc, NBD_DEFAULT_HANDSHAKE_MAX_SECS, + nbd_server->tlscreds, nbd_server->tlsauthz, +- nbd_blockdev_client_closed, NULL); ++ nbd_blockdev_client_closed, conn); + } + + static void nbd_update_server_watch(NBDServerData *s) +@@ -81,12 +101,25 @@ static void nbd_update_server_watch(NBDServerData *s) + + static void nbd_server_free(NBDServerData *server) + { ++ NBDConn *conn, *tmp; ++ + if (!server) { + return; + } + ++ /* ++ * Forcefully close the listener socket, and any clients that have ++ * not yet disconnected on their own. ++ */ + qio_net_listener_disconnect(server->listener); + object_unref(OBJECT(server->listener)); ++ QLIST_FOREACH_SAFE(conn, &server->conns, next, tmp) { ++ qio_channel_shutdown(QIO_CHANNEL(conn->cioc), QIO_CHANNEL_SHUTDOWN_BOTH, ++ NULL); ++ } ++ ++ AIO_WAIT_WHILE_UNLOCKED(NULL, server->connections > 0); ++ + if (server->tlscreds) { + object_unref(OBJECT(server->tlscreds)); + } +-- +2.39.3 + diff --git a/SOURCES/kvm-nbd-server-CVE-2024-7409-Drop-non-negotiating-client.patch b/SOURCES/kvm-nbd-server-CVE-2024-7409-Drop-non-negotiating-client.patch new file mode 100644 index 0000000..e1755c2 --- /dev/null +++ b/SOURCES/kvm-nbd-server-CVE-2024-7409-Drop-non-negotiating-client.patch @@ -0,0 +1,135 @@ +From ca30846351f1136d15f55717a5534ad927f7cf52 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Thu, 8 Aug 2024 16:05:08 -0500 +Subject: [PATCH 099/100] nbd/server: CVE-2024-7409: Drop non-negotiating + clients +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Blake +RH-MergeRequest: 262: nbd/server: fix CVE-2024-7409 (qemu crash on nbd-server-stop) +RH-Jira: RHEL-52617 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/4] 8008a1067766951d9752bcc41c2127a07fce934d (redhat/centos-stream/src/qemu-kvm) + +A client that opens a socket but does not negotiate is merely hogging +qemu's resources (an open fd and a small amount of memory); and a +malicious client that can access the port where NBD is listening can +attempt a denial of service attack by intentionally opening and +abandoning lots of unfinished connections. The previous patch put a +default bound on the number of such ongoing connections, but once that +limit is hit, no more clients can connect (including legitimate ones). +The solution is to insist that clients complete handshake within a +reasonable time limit, defaulting to 10 seconds. A client that has +not successfully completed NBD_OPT_GO by then (including the case of +where the client didn't know TLS credentials to even reach the point +of NBD_OPT_GO) is wasting our time and does not deserve to stay +connected. Later patches will allow fine-tuning the limit away from +the default value (including disabling it for doing integration +testing of the handshake process itself). + +Note that this patch in isolation actually makes it more likely to see +qemu SEGV after nbd-server-stop, as any client socket still connected +when the server shuts down will now be closed after 10 seconds rather +than at the client's whims. That will be addressed in the next patch. + +For a demo of this patch in action: +$ qemu-nbd -f raw -r -t -e 10 file & +$ nbdsh --opt-mode -c ' +H = list() +for i in range(20): + print(i) + H.insert(i, nbd.NBD()) + H[i].set_opt_mode(True) + H[i].connect_uri("nbd://localhost") +' +$ kill $! + +where later connections get to start progressing once earlier ones are +forcefully dropped for taking too long, rather than hanging. + +Suggested-by: Daniel P. Berrangé +Signed-off-by: Eric Blake +Message-ID: <20240807174943.771624-13-eblake@redhat.com> +Reviewed-by: Daniel P. Berrangé +[eblake: rebase to changes earlier in series, reduce scope of timer] +Signed-off-by: Eric Blake + +(cherry picked from commit b9b72cb3ce15b693148bd09cef7e50110566d8a0) +Jira: https://issues.redhat.com/browse/RHEL-52617 +Signed-off-by: Eric Blake +--- + nbd/server.c | 28 +++++++++++++++++++++++++++- + nbd/trace-events | 1 + + 2 files changed, 28 insertions(+), 1 deletion(-) + +diff --git a/nbd/server.c b/nbd/server.c +index e50012499f..39285cc971 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -3186,22 +3186,48 @@ static void nbd_client_receive_next_request(NBDClient *client) + } + } + ++static void nbd_handshake_timer_cb(void *opaque) ++{ ++ QIOChannel *ioc = opaque; ++ ++ trace_nbd_handshake_timer_cb(); ++ qio_channel_shutdown(ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); ++} ++ + static coroutine_fn void nbd_co_client_start(void *opaque) + { + NBDClient *client = opaque; + Error *local_err = NULL; ++ QEMUTimer *handshake_timer = NULL; + + qemu_co_mutex_init(&client->send_lock); + +- /* TODO - utilize client->handshake_max_secs */ ++ /* ++ * Create a timer to bound the time spent in negotiation. If the ++ * timer expires, it is likely nbd_negotiate will fail because the ++ * socket was shutdown. ++ */ ++ if (client->handshake_max_secs > 0) { ++ handshake_timer = aio_timer_new(qemu_get_aio_context(), ++ QEMU_CLOCK_REALTIME, ++ SCALE_NS, ++ nbd_handshake_timer_cb, ++ client->sioc); ++ timer_mod(handshake_timer, ++ qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + ++ client->handshake_max_secs * NANOSECONDS_PER_SECOND); ++ } ++ + if (nbd_negotiate(client, &local_err)) { + if (local_err) { + error_report_err(local_err); + } ++ timer_free(handshake_timer); + client_close(client, false); + return; + } + ++ timer_free(handshake_timer); + WITH_QEMU_LOCK_GUARD(&client->lock) { + nbd_client_receive_next_request(client); + } +diff --git a/nbd/trace-events b/nbd/trace-events +index 00ae3216a1..cbd0a4ab7e 100644 +--- a/nbd/trace-events ++++ b/nbd/trace-events +@@ -76,6 +76,7 @@ nbd_co_receive_request_payload_received(uint64_t cookie, uint64_t len) "Payload + nbd_co_receive_ext_payload_compliance(uint64_t from, uint64_t len) "client sent non-compliant write without payload flag: from=0x%" PRIx64 ", len=0x%" PRIx64 + nbd_co_receive_align_compliance(const char *op, uint64_t from, uint64_t len, uint32_t align) "client sent non-compliant unaligned %s request: from=0x%" PRIx64 ", len=0x%" PRIx64 ", align=0x%" PRIx32 + nbd_trip(void) "Reading request" ++nbd_handshake_timer_cb(void) "client took too long to negotiate" + + # client-connection.c + nbd_connect_thread_sleep(uint64_t timeout) "timeout %" PRIu64 +-- +2.39.3 + diff --git a/SOURCES/kvm-nbd-server-Fix-drained_poll-to-wake-coroutine-in-rig.patch b/SOURCES/kvm-nbd-server-Fix-drained_poll-to-wake-coroutine-in-rig.patch deleted file mode 100644 index 214b6dd..0000000 --- a/SOURCES/kvm-nbd-server-Fix-drained_poll-to-wake-coroutine-in-rig.patch +++ /dev/null @@ -1,159 +0,0 @@ -From 639f65d2cd4c6627a1d22c4b418b41400fe40154 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 17 May 2023 17:28:33 +0200 -Subject: [PATCH 03/21] nbd/server: Fix drained_poll to wake coroutine in right - AioContext - -RH-Author: Kevin Wolf -RH-MergeRequest: 166: block/graph-lock: Disable locking for now -RH-Bugzilla: 2186725 -RH-Acked-by: Eric Blake -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [3/4] 177092e61360c2feb04377890b32fdeb2d1cfefc (kmwolf/centos-qemu-kvm) - -nbd_drained_poll() generally runs in the main thread, not whatever -iothread the NBD server coroutine is meant to run in, so it can't -directly reenter the coroutines to wake them up. - -The code seems to have the right intention, it specifies the correct -AioContext when it calls qemu_aio_coroutine_enter(). However, this -functions doesn't schedule the coroutine to run in that AioContext, but -it assumes it is already called in the home thread of the AioContext. - -To fix this, add a new thread-safe qio_channel_wake_read() that can be -called in the main thread to wake up the coroutine in its AioContext, -and use this in nbd_drained_poll(). - -Cc: qemu-stable@nongnu.org -Signed-off-by: Kevin Wolf -Message-Id: <20230517152834.277483-3-kwolf@redhat.com> -Reviewed-by: Eric Blake -Signed-off-by: Kevin Wolf -(cherry picked from commit 7c1f51bf38de8cea4ed5030467646c37b46edeb7) -Signed-off-by: Kevin Wolf ---- - include/io/channel.h | 10 ++++++++++ - io/channel.c | 33 +++++++++++++++++++++++++++------ - nbd/server.c | 3 +-- - 3 files changed, 38 insertions(+), 8 deletions(-) - -diff --git a/include/io/channel.h b/include/io/channel.h -index 153fbd2904..2b905423a9 100644 ---- a/include/io/channel.h -+++ b/include/io/channel.h -@@ -757,6 +757,16 @@ void qio_channel_detach_aio_context(QIOChannel *ioc); - void coroutine_fn qio_channel_yield(QIOChannel *ioc, - GIOCondition condition); - -+/** -+ * qio_channel_wake_read: -+ * @ioc: the channel object -+ * -+ * If qio_channel_yield() is currently waiting for the channel to become -+ * readable, interrupt it and reenter immediately. This function is safe to call -+ * from any thread. -+ */ -+void qio_channel_wake_read(QIOChannel *ioc); -+ - /** - * qio_channel_wait: - * @ioc: the channel object -diff --git a/io/channel.c b/io/channel.c -index a8c7f11649..3c9b7beb65 100644 ---- a/io/channel.c -+++ b/io/channel.c -@@ -19,6 +19,7 @@ - */ - - #include "qemu/osdep.h" -+#include "block/aio-wait.h" - #include "io/channel.h" - #include "qapi/error.h" - #include "qemu/main-loop.h" -@@ -514,7 +515,11 @@ int qio_channel_flush(QIOChannel *ioc, - static void qio_channel_restart_read(void *opaque) - { - QIOChannel *ioc = opaque; -- Coroutine *co = ioc->read_coroutine; -+ Coroutine *co = qatomic_xchg(&ioc->read_coroutine, NULL); -+ -+ if (!co) { -+ return; -+ } - - /* Assert that aio_co_wake() reenters the coroutine directly */ - assert(qemu_get_current_aio_context() == -@@ -525,7 +530,11 @@ static void qio_channel_restart_read(void *opaque) - static void qio_channel_restart_write(void *opaque) - { - QIOChannel *ioc = opaque; -- Coroutine *co = ioc->write_coroutine; -+ Coroutine *co = qatomic_xchg(&ioc->write_coroutine, NULL); -+ -+ if (!co) { -+ return; -+ } - - /* Assert that aio_co_wake() reenters the coroutine directly */ - assert(qemu_get_current_aio_context() == -@@ -568,7 +577,11 @@ void qio_channel_detach_aio_context(QIOChannel *ioc) - void coroutine_fn qio_channel_yield(QIOChannel *ioc, - GIOCondition condition) - { -+ AioContext *ioc_ctx = ioc->ctx ?: qemu_get_aio_context(); -+ - assert(qemu_in_coroutine()); -+ assert(in_aio_context_home_thread(ioc_ctx)); -+ - if (condition == G_IO_IN) { - assert(!ioc->read_coroutine); - ioc->read_coroutine = qemu_coroutine_self(); -@@ -580,18 +593,26 @@ void coroutine_fn qio_channel_yield(QIOChannel *ioc, - } - qio_channel_set_aio_fd_handlers(ioc); - qemu_coroutine_yield(); -+ assert(in_aio_context_home_thread(ioc_ctx)); - - /* Allow interrupting the operation by reentering the coroutine other than - * through the aio_fd_handlers. */ -- if (condition == G_IO_IN && ioc->read_coroutine) { -- ioc->read_coroutine = NULL; -+ if (condition == G_IO_IN) { -+ assert(ioc->read_coroutine == NULL); - qio_channel_set_aio_fd_handlers(ioc); -- } else if (condition == G_IO_OUT && ioc->write_coroutine) { -- ioc->write_coroutine = NULL; -+ } else if (condition == G_IO_OUT) { -+ assert(ioc->write_coroutine == NULL); - qio_channel_set_aio_fd_handlers(ioc); - } - } - -+void qio_channel_wake_read(QIOChannel *ioc) -+{ -+ Coroutine *co = qatomic_xchg(&ioc->read_coroutine, NULL); -+ if (co) { -+ aio_co_wake(co); -+ } -+} - - static gboolean qio_channel_wait_complete(QIOChannel *ioc, - GIOCondition condition, -diff --git a/nbd/server.c b/nbd/server.c -index 3d8d0d81df..ea47522e8f 100644 ---- a/nbd/server.c -+++ b/nbd/server.c -@@ -1599,8 +1599,7 @@ static bool nbd_drained_poll(void *opaque) - * enter it here so we don't depend on the client to wake it up. - */ - if (client->recv_coroutine != NULL && client->read_yielding) { -- qemu_aio_coroutine_enter(exp->common.ctx, -- client->recv_coroutine); -+ qio_channel_wake_read(client->ioc); - } - - return true; --- -2.39.3 - diff --git a/SOURCES/kvm-nbd-server-Mark-negotiation-functions-as-coroutine_f.patch b/SOURCES/kvm-nbd-server-Mark-negotiation-functions-as-coroutine_f.patch new file mode 100644 index 0000000..7614df8 --- /dev/null +++ b/SOURCES/kvm-nbd-server-Mark-negotiation-functions-as-coroutine_f.patch @@ -0,0 +1,330 @@ +From af6f51ad3482513e3ac047eb203f9dc623d47088 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Mon, 8 Apr 2024 11:00:44 -0500 +Subject: [PATCH 2/2] nbd/server: Mark negotiation functions as coroutine_fn + +RH-Author: Eric Blake +RH-MergeRequest: 239: avoid destination hang on NBD+TLS storage migration +RH-Jira: RHEL-33440 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/2] acf61b854f80365993d24bcd1d110ed3518b22fa (ebblake/centos-qemu-kvm) + +nbd_negotiate() is already marked coroutine_fn. And given the fix in +the previous patch to have nbd_negotiate_handle_starttls not create +and wait on a g_main_loop (as that would violate coroutine +constraints), it is worth marking the rest of the related static +functions reachable only during option negotiation as also being +coroutine_fn. + +Suggested-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Eric Blake +Message-ID: <20240408160214.1200629-6-eblake@redhat.com> +Reviewed-by: Vladimir Sementsov-Ogievskiy +[eblake: drop one spurious coroutine_fn marking] +Signed-off-by: Eric Blake + +Jira: https://issues.redhat.com/browse/RHEL-33440 +(cherry picked from commit 4fa333e08dd96395a99ea8dd9e4c73a29dd23344) +Signed-off-by: Eric Blake +--- + nbd/server.c | 102 +++++++++++++++++++++++++++++---------------------- + 1 file changed, 59 insertions(+), 43 deletions(-) + +diff --git a/nbd/server.c b/nbd/server.c +index 98ae0e1632..892797bb11 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -195,8 +195,9 @@ static inline void set_be_option_rep(NBDOptionReply *rep, uint32_t option, + + /* Send a reply header, including length, but no payload. + * Return -errno on error, 0 on success. */ +-static int nbd_negotiate_send_rep_len(NBDClient *client, uint32_t type, +- uint32_t len, Error **errp) ++static coroutine_fn int ++nbd_negotiate_send_rep_len(NBDClient *client, uint32_t type, ++ uint32_t len, Error **errp) + { + NBDOptionReply rep; + +@@ -211,15 +212,15 @@ static int nbd_negotiate_send_rep_len(NBDClient *client, uint32_t type, + + /* Send a reply header with default 0 length. + * Return -errno on error, 0 on success. */ +-static int nbd_negotiate_send_rep(NBDClient *client, uint32_t type, +- Error **errp) ++static coroutine_fn int ++nbd_negotiate_send_rep(NBDClient *client, uint32_t type, Error **errp) + { + return nbd_negotiate_send_rep_len(client, type, 0, errp); + } + + /* Send an error reply. + * Return -errno on error, 0 on success. */ +-static int G_GNUC_PRINTF(4, 0) ++static coroutine_fn int G_GNUC_PRINTF(4, 0) + nbd_negotiate_send_rep_verr(NBDClient *client, uint32_t type, + Error **errp, const char *fmt, va_list va) + { +@@ -259,7 +260,7 @@ nbd_sanitize_name(const char *name) + + /* Send an error reply. + * Return -errno on error, 0 on success. */ +-static int G_GNUC_PRINTF(4, 5) ++static coroutine_fn int G_GNUC_PRINTF(4, 5) + nbd_negotiate_send_rep_err(NBDClient *client, uint32_t type, + Error **errp, const char *fmt, ...) + { +@@ -275,7 +276,7 @@ nbd_negotiate_send_rep_err(NBDClient *client, uint32_t type, + /* Drop remainder of the current option, and send a reply with the + * given error type and message. Return -errno on read or write + * failure; or 0 if connection is still live. */ +-static int G_GNUC_PRINTF(4, 0) ++static coroutine_fn int G_GNUC_PRINTF(4, 0) + nbd_opt_vdrop(NBDClient *client, uint32_t type, Error **errp, + const char *fmt, va_list va) + { +@@ -288,7 +289,7 @@ nbd_opt_vdrop(NBDClient *client, uint32_t type, Error **errp, + return ret; + } + +-static int G_GNUC_PRINTF(4, 5) ++static coroutine_fn int G_GNUC_PRINTF(4, 5) + nbd_opt_drop(NBDClient *client, uint32_t type, Error **errp, + const char *fmt, ...) + { +@@ -302,7 +303,7 @@ nbd_opt_drop(NBDClient *client, uint32_t type, Error **errp, + return ret; + } + +-static int G_GNUC_PRINTF(3, 4) ++static coroutine_fn int G_GNUC_PRINTF(3, 4) + nbd_opt_invalid(NBDClient *client, Error **errp, const char *fmt, ...) + { + int ret; +@@ -319,8 +320,9 @@ nbd_opt_invalid(NBDClient *client, Error **errp, const char *fmt, ...) + * If @check_nul, require that no NUL bytes appear in buffer. + * Return -errno on I/O error, 0 if option was completely handled by + * sending a reply about inconsistent lengths, or 1 on success. */ +-static int nbd_opt_read(NBDClient *client, void *buffer, size_t size, +- bool check_nul, Error **errp) ++static coroutine_fn int ++nbd_opt_read(NBDClient *client, void *buffer, size_t size, ++ bool check_nul, Error **errp) + { + if (size > client->optlen) { + return nbd_opt_invalid(client, errp, +@@ -343,7 +345,8 @@ static int nbd_opt_read(NBDClient *client, void *buffer, size_t size, + /* Drop size bytes from the unparsed payload of the current option. + * Return -errno on I/O error, 0 if option was completely handled by + * sending a reply about inconsistent lengths, or 1 on success. */ +-static int nbd_opt_skip(NBDClient *client, size_t size, Error **errp) ++static coroutine_fn int ++nbd_opt_skip(NBDClient *client, size_t size, Error **errp) + { + if (size > client->optlen) { + return nbd_opt_invalid(client, errp, +@@ -366,8 +369,9 @@ static int nbd_opt_skip(NBDClient *client, size_t size, Error **errp) + * Return -errno on I/O error, 0 if option was completely handled by + * sending a reply about inconsistent lengths, or 1 on success. + */ +-static int nbd_opt_read_name(NBDClient *client, char **name, uint32_t *length, +- Error **errp) ++static coroutine_fn int ++nbd_opt_read_name(NBDClient *client, char **name, uint32_t *length, ++ Error **errp) + { + int ret; + uint32_t len; +@@ -402,8 +406,8 @@ static int nbd_opt_read_name(NBDClient *client, char **name, uint32_t *length, + + /* Send a single NBD_REP_SERVER reply to NBD_OPT_LIST, including payload. + * Return -errno on error, 0 on success. */ +-static int nbd_negotiate_send_rep_list(NBDClient *client, NBDExport *exp, +- Error **errp) ++static coroutine_fn int ++nbd_negotiate_send_rep_list(NBDClient *client, NBDExport *exp, Error **errp) + { + ERRP_GUARD(); + size_t name_len, desc_len; +@@ -444,7 +448,8 @@ static int nbd_negotiate_send_rep_list(NBDClient *client, NBDExport *exp, + + /* Process the NBD_OPT_LIST command, with a potential series of replies. + * Return -errno on error, 0 on success. */ +-static int nbd_negotiate_handle_list(NBDClient *client, Error **errp) ++static coroutine_fn int ++nbd_negotiate_handle_list(NBDClient *client, Error **errp) + { + NBDExport *exp; + assert(client->opt == NBD_OPT_LIST); +@@ -459,7 +464,8 @@ static int nbd_negotiate_handle_list(NBDClient *client, Error **errp) + return nbd_negotiate_send_rep(client, NBD_REP_ACK, errp); + } + +-static void nbd_check_meta_export(NBDClient *client, NBDExport *exp) ++static coroutine_fn void ++nbd_check_meta_export(NBDClient *client, NBDExport *exp) + { + if (exp != client->contexts.exp) { + client->contexts.count = 0; +@@ -468,8 +474,9 @@ static void nbd_check_meta_export(NBDClient *client, NBDExport *exp) + + /* Send a reply to NBD_OPT_EXPORT_NAME. + * Return -errno on error, 0 on success. */ +-static int nbd_negotiate_handle_export_name(NBDClient *client, bool no_zeroes, +- Error **errp) ++static coroutine_fn int ++nbd_negotiate_handle_export_name(NBDClient *client, bool no_zeroes, ++ Error **errp) + { + ERRP_GUARD(); + g_autofree char *name = NULL; +@@ -536,9 +543,9 @@ static int nbd_negotiate_handle_export_name(NBDClient *client, bool no_zeroes, + /* Send a single NBD_REP_INFO, with a buffer @buf of @length bytes. + * The buffer does NOT include the info type prefix. + * Return -errno on error, 0 if ready to send more. */ +-static int nbd_negotiate_send_info(NBDClient *client, +- uint16_t info, uint32_t length, void *buf, +- Error **errp) ++static coroutine_fn int ++nbd_negotiate_send_info(NBDClient *client, uint16_t info, uint32_t length, ++ void *buf, Error **errp) + { + int rc; + +@@ -565,7 +572,8 @@ static int nbd_negotiate_send_info(NBDClient *client, + * -errno transmission error occurred or @fatal was requested, errp is set + * 0 error message successfully sent to client, errp is not set + */ +-static int nbd_reject_length(NBDClient *client, bool fatal, Error **errp) ++static coroutine_fn int ++nbd_reject_length(NBDClient *client, bool fatal, Error **errp) + { + int ret; + +@@ -583,7 +591,8 @@ static int nbd_reject_length(NBDClient *client, bool fatal, Error **errp) + /* Handle NBD_OPT_INFO and NBD_OPT_GO. + * Return -errno on error, 0 if ready for next option, and 1 to move + * into transmission phase. */ +-static int nbd_negotiate_handle_info(NBDClient *client, Error **errp) ++static coroutine_fn int ++nbd_negotiate_handle_info(NBDClient *client, Error **errp) + { + int rc; + g_autofree char *name = NULL; +@@ -755,7 +764,8 @@ struct NBDTLSServerHandshakeData { + Coroutine *co; + }; + +-static void nbd_server_tls_handshake(QIOTask *task, void *opaque) ++static void ++nbd_server_tls_handshake(QIOTask *task, void *opaque) + { + struct NBDTLSServerHandshakeData *data = opaque; + +@@ -768,8 +778,8 @@ static void nbd_server_tls_handshake(QIOTask *task, void *opaque) + + /* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the + * new channel for all further (now-encrypted) communication. */ +-static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client, +- Error **errp) ++static coroutine_fn QIOChannel * ++nbd_negotiate_handle_starttls(NBDClient *client, Error **errp) + { + QIOChannel *ioc; + QIOChannelTLS *tioc; +@@ -821,10 +831,9 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client, + * + * For NBD_OPT_LIST_META_CONTEXT @context_id is ignored, 0 is used instead. + */ +-static int nbd_negotiate_send_meta_context(NBDClient *client, +- const char *context, +- uint32_t context_id, +- Error **errp) ++static coroutine_fn int ++nbd_negotiate_send_meta_context(NBDClient *client, const char *context, ++ uint32_t context_id, Error **errp) + { + NBDOptionReplyMetaContext opt; + struct iovec iov[] = { +@@ -849,8 +858,9 @@ static int nbd_negotiate_send_meta_context(NBDClient *client, + * Return true if @query matches @pattern, or if @query is empty when + * the @client is performing _LIST_. + */ +-static bool nbd_meta_empty_or_pattern(NBDClient *client, const char *pattern, +- const char *query) ++static coroutine_fn bool ++nbd_meta_empty_or_pattern(NBDClient *client, const char *pattern, ++ const char *query) + { + if (!*query) { + trace_nbd_negotiate_meta_query_parse("empty"); +@@ -867,7 +877,8 @@ static bool nbd_meta_empty_or_pattern(NBDClient *client, const char *pattern, + /* + * Return true and adjust @str in place if it begins with @prefix. + */ +-static bool nbd_strshift(const char **str, const char *prefix) ++static coroutine_fn bool ++nbd_strshift(const char **str, const char *prefix) + { + size_t len = strlen(prefix); + +@@ -883,8 +894,9 @@ static bool nbd_strshift(const char **str, const char *prefix) + * Handle queries to 'base' namespace. For now, only the base:allocation + * context is available. Return true if @query has been handled. + */ +-static bool nbd_meta_base_query(NBDClient *client, NBDMetaContexts *meta, +- const char *query) ++static coroutine_fn bool ++nbd_meta_base_query(NBDClient *client, NBDMetaContexts *meta, ++ const char *query) + { + if (!nbd_strshift(&query, "base:")) { + return false; +@@ -903,8 +915,9 @@ static bool nbd_meta_base_query(NBDClient *client, NBDMetaContexts *meta, + * and qemu:allocation-depth contexts are available. Return true if @query + * has been handled. + */ +-static bool nbd_meta_qemu_query(NBDClient *client, NBDMetaContexts *meta, +- const char *query) ++static coroutine_fn bool ++nbd_meta_qemu_query(NBDClient *client, NBDMetaContexts *meta, ++ const char *query) + { + size_t i; + +@@ -968,8 +981,9 @@ static bool nbd_meta_qemu_query(NBDClient *client, NBDMetaContexts *meta, + * + * Return -errno on I/O error, 0 if option was completely handled by + * sending a reply about inconsistent lengths, or 1 on success. */ +-static int nbd_negotiate_meta_query(NBDClient *client, +- NBDMetaContexts *meta, Error **errp) ++static coroutine_fn int ++nbd_negotiate_meta_query(NBDClient *client, ++ NBDMetaContexts *meta, Error **errp) + { + int ret; + g_autofree char *query = NULL; +@@ -1008,7 +1022,8 @@ static int nbd_negotiate_meta_query(NBDClient *client, + * Handle NBD_OPT_LIST_META_CONTEXT and NBD_OPT_SET_META_CONTEXT + * + * Return -errno on I/O error, or 0 if option was completely handled. */ +-static int nbd_negotiate_meta_queries(NBDClient *client, Error **errp) ++static coroutine_fn int ++nbd_negotiate_meta_queries(NBDClient *client, Error **errp) + { + int ret; + g_autofree char *export_name = NULL; +@@ -1136,7 +1151,8 @@ static int nbd_negotiate_meta_queries(NBDClient *client, Error **errp) + * 1 if client sent NBD_OPT_ABORT, i.e. on valid disconnect, + * errp is not set + */ +-static int nbd_negotiate_options(NBDClient *client, Error **errp) ++static coroutine_fn int ++nbd_negotiate_options(NBDClient *client, Error **errp) + { + uint32_t flags; + bool fixedNewstyle = false; +-- +2.39.3 + diff --git a/SOURCES/kvm-nbd-server-Plumb-in-new-args-to-nbd_client_add.patch b/SOURCES/kvm-nbd-server-Plumb-in-new-args-to-nbd_client_add.patch new file mode 100644 index 0000000..6b4c670 --- /dev/null +++ b/SOURCES/kvm-nbd-server-Plumb-in-new-args-to-nbd_client_add.patch @@ -0,0 +1,175 @@ +From 70acef52a99e5114699f5fa58de5f0b5c031b880 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Wed, 7 Aug 2024 08:50:01 -0500 +Subject: [PATCH 097/100] nbd/server: Plumb in new args to nbd_client_add() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Blake +RH-MergeRequest: 262: nbd/server: fix CVE-2024-7409 (qemu crash on nbd-server-stop) +RH-Jira: RHEL-52617 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/4] 7614e294e1f5b7861386950ae994bea166d19950 (redhat/centos-stream/src/qemu-kvm) + +Upcoming patches to fix a CVE need to track an opaque pointer passed +in by the owner of a client object, as well as request for a time +limit on how fast negotiation must complete. Prepare for that by +changing the signature of nbd_client_new() and adding an accessor to +get at the opaque pointer, although for now the two servers +(qemu-nbd.c and blockdev-nbd.c) do not change behavior even though +they pass in a new default timeout value. + +Suggested-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Eric Blake +Message-ID: <20240807174943.771624-11-eblake@redhat.com> +Reviewed-by: Daniel P. Berrangé +[eblake: s/LIMIT/MAX_SECS/ as suggested by Dan] +Signed-off-by: Eric Blake + +(cherry picked from commit fb1c2aaa981e0a2fa6362c9985f1296b74f055ac) +Jira: https://issues.redhat.com/browse/RHEL-52617 +Signed-off-by: Eric Blake +--- + blockdev-nbd.c | 6 ++++-- + include/block/nbd.h | 11 ++++++++++- + nbd/server.c | 20 +++++++++++++++++--- + qemu-nbd.c | 4 +++- + 4 files changed, 34 insertions(+), 7 deletions(-) + +diff --git a/blockdev-nbd.c b/blockdev-nbd.c +index 213012435f..267a1de903 100644 +--- a/blockdev-nbd.c ++++ b/blockdev-nbd.c +@@ -64,8 +64,10 @@ static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc, + nbd_update_server_watch(nbd_server); + + qio_channel_set_name(QIO_CHANNEL(cioc), "nbd-server"); +- nbd_client_new(cioc, nbd_server->tlscreds, nbd_server->tlsauthz, +- nbd_blockdev_client_closed); ++ /* TODO - expose handshake timeout as QMP option */ ++ nbd_client_new(cioc, NBD_DEFAULT_HANDSHAKE_MAX_SECS, ++ nbd_server->tlscreds, nbd_server->tlsauthz, ++ nbd_blockdev_client_closed, NULL); + } + + static void nbd_update_server_watch(NBDServerData *s) +diff --git a/include/block/nbd.h b/include/block/nbd.h +index 4e7bd6342f..1d4d65922d 100644 +--- a/include/block/nbd.h ++++ b/include/block/nbd.h +@@ -33,6 +33,12 @@ typedef struct NBDMetaContexts NBDMetaContexts; + + extern const BlockExportDriver blk_exp_nbd; + ++/* ++ * NBD_DEFAULT_HANDSHAKE_MAX_SECS: Number of seconds in which client must ++ * succeed at NBD_OPT_GO before being forcefully dropped as too slow. ++ */ ++#define NBD_DEFAULT_HANDSHAKE_MAX_SECS 10 ++ + /* Handshake phase structs - this struct is passed on the wire */ + + typedef struct NBDOption { +@@ -403,9 +409,12 @@ AioContext *nbd_export_aio_context(NBDExport *exp); + NBDExport *nbd_export_find(const char *name); + + void nbd_client_new(QIOChannelSocket *sioc, ++ uint32_t handshake_max_secs, + QCryptoTLSCreds *tlscreds, + const char *tlsauthz, +- void (*close_fn)(NBDClient *, bool)); ++ void (*close_fn)(NBDClient *, bool), ++ void *owner); ++void *nbd_client_owner(NBDClient *client); + void nbd_client_get(NBDClient *client); + void nbd_client_put(NBDClient *client); + +diff --git a/nbd/server.c b/nbd/server.c +index 892797bb11..e50012499f 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -124,12 +124,14 @@ struct NBDMetaContexts { + struct NBDClient { + int refcount; /* atomic */ + void (*close_fn)(NBDClient *client, bool negotiated); ++ void *owner; + + QemuMutex lock; + + NBDExport *exp; + QCryptoTLSCreds *tlscreds; + char *tlsauthz; ++ uint32_t handshake_max_secs; + QIOChannelSocket *sioc; /* The underlying data channel */ + QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */ + +@@ -3191,6 +3193,7 @@ static coroutine_fn void nbd_co_client_start(void *opaque) + + qemu_co_mutex_init(&client->send_lock); + ++ /* TODO - utilize client->handshake_max_secs */ + if (nbd_negotiate(client, &local_err)) { + if (local_err) { + error_report_err(local_err); +@@ -3205,14 +3208,17 @@ static coroutine_fn void nbd_co_client_start(void *opaque) + } + + /* +- * Create a new client listener using the given channel @sioc. ++ * Create a new client listener using the given channel @sioc and @owner. + * Begin servicing it in a coroutine. When the connection closes, call +- * @close_fn with an indication of whether the client completed negotiation. ++ * @close_fn with an indication of whether the client completed negotiation ++ * within @handshake_max_secs seconds (0 for unbounded). + */ + void nbd_client_new(QIOChannelSocket *sioc, ++ uint32_t handshake_max_secs, + QCryptoTLSCreds *tlscreds, + const char *tlsauthz, +- void (*close_fn)(NBDClient *, bool)) ++ void (*close_fn)(NBDClient *, bool), ++ void *owner) + { + NBDClient *client; + Coroutine *co; +@@ -3225,13 +3231,21 @@ void nbd_client_new(QIOChannelSocket *sioc, + object_ref(OBJECT(client->tlscreds)); + } + client->tlsauthz = g_strdup(tlsauthz); ++ client->handshake_max_secs = handshake_max_secs; + client->sioc = sioc; + qio_channel_set_delay(QIO_CHANNEL(sioc), false); + object_ref(OBJECT(client->sioc)); + client->ioc = QIO_CHANNEL(sioc); + object_ref(OBJECT(client->ioc)); + client->close_fn = close_fn; ++ client->owner = owner; + + co = qemu_coroutine_create(nbd_co_client_start, client); + qemu_coroutine_enter(co); + } ++ ++void * ++nbd_client_owner(NBDClient *client) ++{ ++ return client->owner; ++} +diff --git a/qemu-nbd.c b/qemu-nbd.c +index d7b3ccab21..48e2fa5858 100644 +--- a/qemu-nbd.c ++++ b/qemu-nbd.c +@@ -390,7 +390,9 @@ static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc, + + nb_fds++; + nbd_update_server_watch(); +- nbd_client_new(cioc, tlscreds, tlsauthz, nbd_client_closed); ++ /* TODO - expose handshake timeout as command line option */ ++ nbd_client_new(cioc, NBD_DEFAULT_HANDSHAKE_MAX_SECS, ++ tlscreds, tlsauthz, nbd_client_closed, NULL); + } + + static void nbd_update_server_watch(void) +-- +2.39.3 + diff --git a/SOURCES/kvm-nbd-server-do-not-poll-within-a-coroutine-context.patch b/SOURCES/kvm-nbd-server-do-not-poll-within-a-coroutine-context.patch new file mode 100644 index 0000000..9f65e99 --- /dev/null +++ b/SOURCES/kvm-nbd-server-do-not-poll-within-a-coroutine-context.patch @@ -0,0 +1,208 @@ +From ae9ebfc4ebd6f146951f04cf9e12eeaaf4c2387e Mon Sep 17 00:00:00 2001 +From: Zhu Yangyang +Date: Mon, 8 Apr 2024 11:00:43 -0500 +Subject: [PATCH 1/2] nbd/server: do not poll within a coroutine context + +RH-Author: Eric Blake +RH-MergeRequest: 239: avoid destination hang on NBD+TLS storage migration +RH-Jira: RHEL-33440 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/2] 27963e92aacca8ed43994113b645f472fba8f8bc (ebblake/centos-qemu-kvm) + +Coroutines are not supposed to block. Instead, they should yield. + +The client performs TLS upgrade outside of an AIOContext, during +synchronous handshake; this still requires g_main_loop. But the +server responds to TLS upgrade inside a coroutine, so a nested +g_main_loop is wrong. Since the two callbacks no longer share more +than the setting of data.complete and data.error, it's just as easy to +use static helpers instead of trying to share a common code path. It +is also possible to add assertions that no other code is interfering +with the eventual path to qio reaching the callback, whether or not it +required a yield or main loop. + +Fixes: f95910f ("nbd: implement TLS support in the protocol negotiation") +Signed-off-by: Zhu Yangyang +[eblake: move callbacks to their use point, add assertions] +Signed-off-by: Eric Blake +Message-ID: <20240408160214.1200629-5-eblake@redhat.com> +Reviewed-by: Vladimir Sementsov-Ogievskiy + +Jira: https://issues.redhat.com/browse/RHEL-33440 +(cherry picked from commit ae6d91a7e9b77abb029ed3fa9fad461422286942) +Signed-off-by: Eric Blake +--- + nbd/client.c | 28 ++++++++++++++++++++++++---- + nbd/common.c | 11 ----------- + nbd/nbd-internal.h | 10 ---------- + nbd/server.c | 28 +++++++++++++++++++++++----- + 4 files changed, 47 insertions(+), 30 deletions(-) + +diff --git a/nbd/client.c b/nbd/client.c +index 29ffc609a4..c89c750467 100644 +--- a/nbd/client.c ++++ b/nbd/client.c +@@ -596,13 +596,31 @@ static int nbd_request_simple_option(QIOChannel *ioc, int opt, bool strict, + return 1; + } + ++/* Callback to learn when QIO TLS upgrade is complete */ ++struct NBDTLSClientHandshakeData { ++ bool complete; ++ Error *error; ++ GMainLoop *loop; ++}; ++ ++static void nbd_client_tls_handshake(QIOTask *task, void *opaque) ++{ ++ struct NBDTLSClientHandshakeData *data = opaque; ++ ++ qio_task_propagate_error(task, &data->error); ++ data->complete = true; ++ if (data->loop) { ++ g_main_loop_quit(data->loop); ++ } ++} ++ + static QIOChannel *nbd_receive_starttls(QIOChannel *ioc, + QCryptoTLSCreds *tlscreds, + const char *hostname, Error **errp) + { + int ret; + QIOChannelTLS *tioc; +- struct NBDTLSHandshakeData data = { 0 }; ++ struct NBDTLSClientHandshakeData data = { 0 }; + + ret = nbd_request_simple_option(ioc, NBD_OPT_STARTTLS, true, errp); + if (ret <= 0) { +@@ -619,18 +637,20 @@ static QIOChannel *nbd_receive_starttls(QIOChannel *ioc, + return NULL; + } + qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-client-tls"); +- data.loop = g_main_loop_new(g_main_context_default(), FALSE); + trace_nbd_receive_starttls_tls_handshake(); + qio_channel_tls_handshake(tioc, +- nbd_tls_handshake, ++ nbd_client_tls_handshake, + &data, + NULL, + NULL); + + if (!data.complete) { ++ data.loop = g_main_loop_new(g_main_context_default(), FALSE); + g_main_loop_run(data.loop); ++ assert(data.complete); ++ g_main_loop_unref(data.loop); + } +- g_main_loop_unref(data.loop); ++ + if (data.error) { + error_propagate(errp, data.error); + object_unref(OBJECT(tioc)); +diff --git a/nbd/common.c b/nbd/common.c +index 3247c1d618..589a748cfe 100644 +--- a/nbd/common.c ++++ b/nbd/common.c +@@ -47,17 +47,6 @@ int nbd_drop(QIOChannel *ioc, size_t size, Error **errp) + } + + +-void nbd_tls_handshake(QIOTask *task, +- void *opaque) +-{ +- struct NBDTLSHandshakeData *data = opaque; +- +- qio_task_propagate_error(task, &data->error); +- data->complete = true; +- g_main_loop_quit(data->loop); +-} +- +- + const char *nbd_opt_lookup(uint32_t opt) + { + switch (opt) { +diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h +index dfa02f77ee..91895106a9 100644 +--- a/nbd/nbd-internal.h ++++ b/nbd/nbd-internal.h +@@ -72,16 +72,6 @@ static inline int nbd_write(QIOChannel *ioc, const void *buffer, size_t size, + return qio_channel_write_all(ioc, buffer, size, errp) < 0 ? -EIO : 0; + } + +-struct NBDTLSHandshakeData { +- GMainLoop *loop; +- bool complete; +- Error *error; +-}; +- +- +-void nbd_tls_handshake(QIOTask *task, +- void *opaque); +- + int nbd_drop(QIOChannel *ioc, size_t size, Error **errp); + + #endif +diff --git a/nbd/server.c b/nbd/server.c +index c3484cc1eb..98ae0e1632 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -748,6 +748,23 @@ static int nbd_negotiate_handle_info(NBDClient *client, Error **errp) + return rc; + } + ++/* Callback to learn when QIO TLS upgrade is complete */ ++struct NBDTLSServerHandshakeData { ++ bool complete; ++ Error *error; ++ Coroutine *co; ++}; ++ ++static void nbd_server_tls_handshake(QIOTask *task, void *opaque) ++{ ++ struct NBDTLSServerHandshakeData *data = opaque; ++ ++ qio_task_propagate_error(task, &data->error); ++ data->complete = true; ++ if (!qemu_coroutine_entered(data->co)) { ++ aio_co_wake(data->co); ++ } ++} + + /* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the + * new channel for all further (now-encrypted) communication. */ +@@ -756,7 +773,7 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client, + { + QIOChannel *ioc; + QIOChannelTLS *tioc; +- struct NBDTLSHandshakeData data = { 0 }; ++ struct NBDTLSServerHandshakeData data = { 0 }; + + assert(client->opt == NBD_OPT_STARTTLS); + +@@ -777,17 +794,18 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client, + + qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-server-tls"); + trace_nbd_negotiate_handle_starttls_handshake(); +- data.loop = g_main_loop_new(g_main_context_default(), FALSE); ++ data.co = qemu_coroutine_self(); + qio_channel_tls_handshake(tioc, +- nbd_tls_handshake, ++ nbd_server_tls_handshake, + &data, + NULL, + NULL); + + if (!data.complete) { +- g_main_loop_run(data.loop); ++ qemu_coroutine_yield(); ++ assert(data.complete); + } +- g_main_loop_unref(data.loop); ++ + if (data.error) { + object_unref(OBJECT(tioc)); + error_propagate(errp, data.error); +-- +2.39.3 + diff --git a/SOURCES/kvm-net-socket-move-fd-type-checking-to-its-own-function.patch b/SOURCES/kvm-net-socket-move-fd-type-checking-to-its-own-function.patch deleted file mode 100644 index 20b9c04..0000000 --- a/SOURCES/kvm-net-socket-move-fd-type-checking-to-its-own-function.patch +++ /dev/null @@ -1,78 +0,0 @@ -From d6b3f9e4b388b8d621761104ddf075d6087f6d6c Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Fri, 9 Jun 2023 09:27:47 +0200 -Subject: [PATCH 09/12] net: socket: move fd type checking to its own function - -RH-Author: Laurent Vivier -RH-MergeRequest: 187: net: socket: do not close file descriptor if it's not a socket -RH-Jira: RHEL-582 -RH-Acked-by: Stefano Brivio -RH-Acked-by: Jason Wang -RH-Acked-by: David Gibson (Red Hat) -RH-Commit: [2/3] 9726f0ae81ac209b5db33dc7767f652867d8ca0a (lvivier/qemu-kvm-centos) - -JIRA: https://issues.redhat.com/browse/RHEL-582 - -Reviewed-by: David Gibson -Signed-off-by: Laurent Vivier -Signed-off-by: Jason Wang -(cherry picked from commit 23455ae341656ca867ee4a171826b9d280d6acb5) ---- - net/socket.c | 28 ++++++++++++++++++++-------- - 1 file changed, 20 insertions(+), 8 deletions(-) - -diff --git a/net/socket.c b/net/socket.c -index 24dcaa55bc..6b1f0fec3a 100644 ---- a/net/socket.c -+++ b/net/socket.c -@@ -446,16 +446,32 @@ static NetSocketState *net_socket_fd_init_stream(NetClientState *peer, - return s; - } - -+static int net_socket_fd_check(int fd, Error **errp) -+{ -+ int so_type, optlen = sizeof(so_type); -+ -+ if (getsockopt(fd, SOL_SOCKET, SO_TYPE, (char *)&so_type, -+ (socklen_t *)&optlen) < 0) { -+ error_setg(errp, "can't get socket option SO_TYPE"); -+ return -1; -+ } -+ if (so_type != SOCK_DGRAM && so_type != SOCK_STREAM) { -+ error_setg(errp, "socket type=%d for fd=%d must be either" -+ " SOCK_DGRAM or SOCK_STREAM", so_type, fd); -+ return -1; -+ } -+ return so_type; -+} -+ - static NetSocketState *net_socket_fd_init(NetClientState *peer, - const char *model, const char *name, - int fd, int is_connected, - const char *mc, Error **errp) - { -- int so_type = -1, optlen=sizeof(so_type); -+ int so_type; - -- if(getsockopt(fd, SOL_SOCKET, SO_TYPE, (char *)&so_type, -- (socklen_t *)&optlen)< 0) { -- error_setg(errp, "can't get socket option SO_TYPE"); -+ so_type = net_socket_fd_check(fd, errp); -+ if (so_type < 0) { - close(fd); - return NULL; - } -@@ -465,10 +481,6 @@ static NetSocketState *net_socket_fd_init(NetClientState *peer, - mc, errp); - case SOCK_STREAM: - return net_socket_fd_init_stream(peer, model, name, fd, is_connected); -- default: -- error_setg(errp, "socket type=%d for fd=%d must be either" -- " SOCK_DGRAM or SOCK_STREAM", so_type, fd); -- close(fd); - } - return NULL; - } --- -2.39.3 - diff --git a/SOURCES/kvm-net-socket-prepare-to-cleanup-net_init_socket.patch b/SOURCES/kvm-net-socket-prepare-to-cleanup-net_init_socket.patch deleted file mode 100644 index 269da29..0000000 --- a/SOURCES/kvm-net-socket-prepare-to-cleanup-net_init_socket.patch +++ /dev/null @@ -1,60 +0,0 @@ -From a467540e49e76c5961d86e3f47d3f8fcad8cef09 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Fri, 9 Jun 2023 09:27:46 +0200 -Subject: [PATCH 08/12] net: socket: prepare to cleanup net_init_socket() - -RH-Author: Laurent Vivier -RH-MergeRequest: 187: net: socket: do not close file descriptor if it's not a socket -RH-Jira: RHEL-582 -RH-Acked-by: Stefano Brivio -RH-Acked-by: Jason Wang -RH-Acked-by: David Gibson (Red Hat) -RH-Commit: [1/3] 3e4f8370586ae1ac2474fef971a239edb31eeb67 (lvivier/qemu-kvm-centos) - -JIRA: https://issues.redhat.com/browse/RHEL-582 - -Use directly net_socket_fd_init_stream() and net_socket_fd_init_dgram() -when the socket type is already known. - -Reviewed-by: David Gibson -Signed-off-by: Laurent Vivier -Signed-off-by: Jason Wang -(cherry picked from commit 006c3fa74c3edb978ff46d2851699e9a95609da5) ---- - net/socket.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/net/socket.c b/net/socket.c -index ba6e5b0b00..24dcaa55bc 100644 ---- a/net/socket.c -+++ b/net/socket.c -@@ -587,7 +587,7 @@ static int net_socket_connect_init(NetClientState *peer, - break; - } - } -- s = net_socket_fd_init(peer, model, name, fd, connected, NULL, errp); -+ s = net_socket_fd_init_stream(peer, model, name, fd, connected); - if (!s) { - return -1; - } -@@ -629,7 +629,7 @@ static int net_socket_mcast_init(NetClientState *peer, - return -1; - } - -- s = net_socket_fd_init(peer, model, name, fd, 0, NULL, errp); -+ s = net_socket_fd_init_dgram(peer, model, name, fd, 0, NULL, errp); - if (!s) { - return -1; - } -@@ -683,7 +683,7 @@ static int net_socket_udp_init(NetClientState *peer, - } - qemu_socket_set_nonblock(fd); - -- s = net_socket_fd_init(peer, model, name, fd, 0, NULL, errp); -+ s = net_socket_fd_init_dgram(peer, model, name, fd, 0, NULL, errp); - if (!s) { - return -1; - } --- -2.39.3 - diff --git a/SOURCES/kvm-net-socket-remove-net_init_socket.patch b/SOURCES/kvm-net-socket-remove-net_init_socket.patch deleted file mode 100644 index 98c96f2..0000000 --- a/SOURCES/kvm-net-socket-remove-net_init_socket.patch +++ /dev/null @@ -1,102 +0,0 @@ -From ecb4f97895849c562112b76a30ddc2037e8df79e Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Fri, 9 Jun 2023 09:27:48 +0200 -Subject: [PATCH 10/12] net: socket: remove net_init_socket() - -RH-Author: Laurent Vivier -RH-MergeRequest: 187: net: socket: do not close file descriptor if it's not a socket -RH-Jira: RHEL-582 -RH-Acked-by: Stefano Brivio -RH-Acked-by: Jason Wang -RH-Acked-by: David Gibson (Red Hat) -RH-Commit: [3/3] e1d7939f5df4a77c2fff62d1ae4899a7a3615ad9 (lvivier/qemu-kvm-centos) - -JIRA: https://issues.redhat.com/browse/RHEL-582 - -Move the file descriptor type checking before doing anything with it. -If it's not usable, don't close it as it could be in use by another -part of QEMU, only fail and report an error. - -Reviewed-by: David Gibson -Signed-off-by: Laurent Vivier -Signed-off-by: Jason Wang -(cherry picked from commit b6aeee02980e193f744f74c48fd900940feb2799) ---- - net/socket.c | 43 +++++++++++++++++-------------------------- - 1 file changed, 17 insertions(+), 26 deletions(-) - -diff --git a/net/socket.c b/net/socket.c -index 6b1f0fec3a..8e3702e1f3 100644 ---- a/net/socket.c -+++ b/net/socket.c -@@ -463,28 +463,6 @@ static int net_socket_fd_check(int fd, Error **errp) - return so_type; - } - --static NetSocketState *net_socket_fd_init(NetClientState *peer, -- const char *model, const char *name, -- int fd, int is_connected, -- const char *mc, Error **errp) --{ -- int so_type; -- -- so_type = net_socket_fd_check(fd, errp); -- if (so_type < 0) { -- close(fd); -- return NULL; -- } -- switch(so_type) { -- case SOCK_DGRAM: -- return net_socket_fd_init_dgram(peer, model, name, fd, is_connected, -- mc, errp); -- case SOCK_STREAM: -- return net_socket_fd_init_stream(peer, model, name, fd, is_connected); -- } -- return NULL; --} -- - static void net_socket_accept(void *opaque) - { - NetSocketState *s = opaque; -@@ -728,21 +706,34 @@ int net_init_socket(const Netdev *netdev, const char *name, - } - - if (sock->fd) { -- int fd, ret; -+ int fd, ret, so_type; - - fd = monitor_fd_param(monitor_cur(), sock->fd, errp); - if (fd == -1) { - return -1; - } -+ so_type = net_socket_fd_check(fd, errp); -+ if (so_type < 0) { -+ return -1; -+ } - ret = qemu_socket_try_set_nonblock(fd); - if (ret < 0) { - error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d", - name, fd); - return -1; - } -- if (!net_socket_fd_init(peer, "socket", name, fd, 1, sock->mcast, -- errp)) { -- return -1; -+ switch (so_type) { -+ case SOCK_DGRAM: -+ if (!net_socket_fd_init_dgram(peer, "socket", name, fd, 1, -+ sock->mcast, errp)) { -+ return -1; -+ } -+ break; -+ case SOCK_STREAM: -+ if (!net_socket_fd_init_stream(peer, "socket", name, fd, 1)) { -+ return -1; -+ } -+ break; - } - return 0; - } --- -2.39.3 - diff --git a/SOURCES/kvm-numa-Validate-cluster-and-NUMA-node-boundary-if-requ.patch b/SOURCES/kvm-numa-Validate-cluster-and-NUMA-node-boundary-if-requ.patch deleted file mode 100644 index 66d68f1..0000000 --- a/SOURCES/kvm-numa-Validate-cluster-and-NUMA-node-boundary-if-requ.patch +++ /dev/null @@ -1,145 +0,0 @@ -From 760a2f284f6d4cd3cd3b1685411bbca21c4ad233 Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Tue, 27 Jun 2023 20:20:09 +1000 -Subject: [PATCH 1/6] numa: Validate cluster and NUMA node boundary if required -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Gavin Shan -RH-MergeRequest: 175: hw/arm: Validate CPU cluster and NUMA node boundary for RHEL machines -RH-Bugzilla: 2171363 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Eric Auger -RH-Commit: [1/3] 24580064b9a0076ec4d9a916839d85135ac48cd9 - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2171363 - -For some architectures like ARM64, multiple CPUs in one cluster can be -associated with different NUMA nodes, which is irregular configuration -because we shouldn't have this in baremetal environment. The irregular -configuration causes Linux guest to misbehave, as the following warning -messages indicate. - - -smp 6,maxcpus=6,sockets=2,clusters=1,cores=3,threads=1 \ - -numa node,nodeid=0,cpus=0-1,memdev=ram0 \ - -numa node,nodeid=1,cpus=2-3,memdev=ram1 \ - -numa node,nodeid=2,cpus=4-5,memdev=ram2 \ - - ------------[ cut here ]------------ - WARNING: CPU: 0 PID: 1 at kernel/sched/topology.c:2271 build_sched_domains+0x284/0x910 - Modules linked in: - CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-268.el9.aarch64 #1 - pstate: 00400005 (nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) - pc : build_sched_domains+0x284/0x910 - lr : build_sched_domains+0x184/0x910 - sp : ffff80000804bd50 - x29: ffff80000804bd50 x28: 0000000000000002 x27: 0000000000000000 - x26: ffff800009cf9a80 x25: 0000000000000000 x24: ffff800009cbf840 - x23: ffff000080325000 x22: ffff0000005df800 x21: ffff80000a4ce508 - x20: 0000000000000000 x19: ffff000080324440 x18: 0000000000000014 - x17: 00000000388925c0 x16: 000000005386a066 x15: 000000009c10cc2e - x14: 00000000000001c0 x13: 0000000000000001 x12: ffff00007fffb1a0 - x11: ffff00007fffb180 x10: ffff80000a4ce508 x9 : 0000000000000041 - x8 : ffff80000a4ce500 x7 : ffff80000a4cf920 x6 : 0000000000000001 - x5 : 0000000000000001 x4 : 0000000000000007 x3 : 0000000000000002 - x2 : 0000000000001000 x1 : ffff80000a4cf928 x0 : 0000000000000001 - Call trace: - build_sched_domains+0x284/0x910 - sched_init_domains+0xac/0xe0 - sched_init_smp+0x48/0xc8 - kernel_init_freeable+0x140/0x1ac - kernel_init+0x28/0x140 - ret_from_fork+0x10/0x20 - -Improve the situation to warn when multiple CPUs in one cluster have -been associated with different NUMA nodes. However, one NUMA node is -allowed to be associated with different clusters. - -Signed-off-by: Gavin Shan -Acked-by: Philippe Mathieu-Daudé -Acked-by: Igor Mammedov -Message-Id: <20230509002739.18388-2-gshan@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit a494fdb715832000ee9047a549a35aacfea8175e) -Signed-off-by: Gavin Shan ---- - hw/core/machine.c | 42 ++++++++++++++++++++++++++++++++++++++++++ - include/hw/boards.h | 1 + - 2 files changed, 43 insertions(+) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index c28702b690..5abdc8c39b 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -1496,6 +1496,45 @@ static void machine_numa_finish_cpu_init(MachineState *machine) - g_string_free(s, true); - } - -+static void validate_cpu_cluster_to_numa_boundary(MachineState *ms) -+{ -+ MachineClass *mc = MACHINE_GET_CLASS(ms); -+ NumaState *state = ms->numa_state; -+ const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms); -+ const CPUArchId *cpus = possible_cpus->cpus; -+ int i, j; -+ -+ if (state->num_nodes <= 1 || possible_cpus->len <= 1) { -+ return; -+ } -+ -+ /* -+ * The Linux scheduling domain can't be parsed when the multiple CPUs -+ * in one cluster have been associated with different NUMA nodes. However, -+ * it's fine to associate one NUMA node with CPUs in different clusters. -+ */ -+ for (i = 0; i < possible_cpus->len; i++) { -+ for (j = i + 1; j < possible_cpus->len; j++) { -+ if (cpus[i].props.has_socket_id && -+ cpus[i].props.has_cluster_id && -+ cpus[i].props.has_node_id && -+ cpus[j].props.has_socket_id && -+ cpus[j].props.has_cluster_id && -+ cpus[j].props.has_node_id && -+ cpus[i].props.socket_id == cpus[j].props.socket_id && -+ cpus[i].props.cluster_id == cpus[j].props.cluster_id && -+ cpus[i].props.node_id != cpus[j].props.node_id) { -+ warn_report("CPU-%d and CPU-%d in socket-%" PRId64 "-cluster-%" PRId64 -+ " have been associated with node-%" PRId64 " and node-%" PRId64 -+ " respectively. It can cause OSes like Linux to" -+ " misbehave", i, j, cpus[i].props.socket_id, -+ cpus[i].props.cluster_id, cpus[i].props.node_id, -+ cpus[j].props.node_id); -+ } -+ } -+ } -+} -+ - MemoryRegion *machine_consume_memdev(MachineState *machine, - HostMemoryBackend *backend) - { -@@ -1581,6 +1620,9 @@ void machine_run_board_init(MachineState *machine, const char *mem_path, Error * - numa_complete_configuration(machine); - if (machine->numa_state->num_nodes) { - machine_numa_finish_cpu_init(machine); -+ if (machine_class->cpu_cluster_has_numa_boundary) { -+ validate_cpu_cluster_to_numa_boundary(machine); -+ } - } - } - -diff --git a/include/hw/boards.h b/include/hw/boards.h -index 5f08bd7550..3628671228 100644 ---- a/include/hw/boards.h -+++ b/include/hw/boards.h -@@ -275,6 +275,7 @@ struct MachineClass { - bool nvdimm_supported; - bool numa_mem_supported; - bool auto_enable_numa; -+ bool cpu_cluster_has_numa_boundary; - SMPCompatProps smp_props; - const char *default_ram_id; - --- -2.39.3 - diff --git a/SOURCES/kvm-pc-bios-s390-ccw-Don-t-use-__bss_start-with-the-larl.patch b/SOURCES/kvm-pc-bios-s390-ccw-Don-t-use-__bss_start-with-the-larl.patch deleted file mode 100644 index 312af68..0000000 --- a/SOURCES/kvm-pc-bios-s390-ccw-Don-t-use-__bss_start-with-the-larl.patch +++ /dev/null @@ -1,78 +0,0 @@ -From 7495a51c586818925470fb247882f5ba0f7b0ffd Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Tue, 27 Jun 2023 09:47:03 +0200 -Subject: [PATCH 34/37] pc-bios/s390-ccw: Don't use __bss_start with the "larl" - instruction -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -RH-MergeRequest: 180: Fix misaligned symbol error in the s390-ccw image during qemu-kvm build with binutils 2.40 -RH-Bugzilla: 2220866 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Miroslav Rezanina -RH-Commit: [4/4] 2483a50c0ed37fa29db649ec44220ac83c215698 (thuth/qemu-kvm-cs9) - -start.S currently cannot be compiled with Clang 16 and binutils 2.40: - - ld: start.o(.text+0x8): misaligned symbol `__bss_start' (0xc1e5) for - relocation R_390_PC32DBL - -According to the built-in linker script of ld, the symbol __bss_start -can actually point *before* the .bss section and does not need to have -any alignment, so in certain situations (like when using the internal -assembler of Clang), the __bss_start symbol can indeed be unaligned -and thus it is not suitable for being used with the "larl" instruction -that needs an address that is at least aligned to halfwords. -The problem went unnoticed so far since binutils <= 2.39 did not -check the alignment, but starting with binutils 2.40, such unaligned -addresses are now refused. - -Fix it by loading the address indirectly instead. - -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2216662 -Reported-by: Miroslav Rezanina -Suggested-by: Andreas Krebbel -Message-Id: <20230629104821.194859-8-thuth@redhat.com> -Reviewed-by: Claudio Imbrenda -Signed-off-by: Thomas Huth -(cherry picked from commit 7cd50cbe4ca3e2860b31b06ec92c17c54bd82d48) ---- - pc-bios/s390-ccw/start.S | 6 ++++-- - 1 file changed, 4 insertions(+), 2 deletions(-) - -diff --git a/pc-bios/s390-ccw/start.S b/pc-bios/s390-ccw/start.S -index abd6fe6639..22c1c296df 100644 ---- a/pc-bios/s390-ccw/start.S -+++ b/pc-bios/s390-ccw/start.S -@@ -19,7 +19,8 @@ _start: - larl %r15,stack + STACK_SIZE - STACK_FRAME_SIZE /* Set up stack */ - - /* clear bss */ -- larl %r2,__bss_start -+ larl %r2,bss_start_literal /* __bss_start might be unaligned ... */ -+ lg %r2,0(%r2) /* ... so load it indirectly */ - larl %r3,_end - slgr %r3,%r2 /* get sizeof bss */ - ltgr %r3,%r3 /* bss empty? */ -@@ -45,7 +46,6 @@ done: - memsetxc: - xc 0(1,%r1),0(%r1) - -- - /* - * void disabled_wait(void) - * -@@ -113,6 +113,8 @@ io_new_code: - br %r14 - - .align 8 -+bss_start_literal: -+ .quad __bss_start - disabled_wait_psw: - .quad 0x0002000180000000,0x0000000000000000 - enabled_wait_psw: --- -2.39.3 - diff --git a/SOURCES/kvm-pc-bios-s390-ccw-Fix-indentation-in-start.S.patch b/SOURCES/kvm-pc-bios-s390-ccw-Fix-indentation-in-start.S.patch deleted file mode 100644 index bd13187..0000000 --- a/SOURCES/kvm-pc-bios-s390-ccw-Fix-indentation-in-start.S.patch +++ /dev/null @@ -1,218 +0,0 @@ -From 24bc8fc932ae1c88cc2e97f0f90786a7be411bb2 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Tue, 27 Jun 2023 09:47:00 +0200 -Subject: [PATCH 32/37] pc-bios/s390-ccw: Fix indentation in start.S -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -RH-MergeRequest: 180: Fix misaligned symbol error in the s390-ccw image during qemu-kvm build with binutils 2.40 -RH-Bugzilla: 2220866 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/4] cf8fa053602ce1cfac0b6efa67f491688d4f9348 (thuth/qemu-kvm-cs9) - -start.S is currently indented with a mixture of spaces and tabs, which -is quite ugly. QEMU coding style says indentation should be 4 spaces, -and this is also what we are using in the assembler files in the -tests/tcg/s390x/ folder already, so let's adjust start.S accordingly. - -Reviewed-by: Cédric Le Goater -Message-Id: <20230627074703.99608-2-thuth@redhat.com> -Reviewed-by: Claudio Imbrenda -Reviewed-by: Eric Farman -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Thomas Huth -(cherry picked from commit f52420fa4fd9f519dc42c20d2616aba4149adc25) ---- - pc-bios/s390-ccw/start.S | 136 +++++++++++++++++++-------------------- - 1 file changed, 68 insertions(+), 68 deletions(-) - -diff --git a/pc-bios/s390-ccw/start.S b/pc-bios/s390-ccw/start.S -index 6072906df4..d29de09cc6 100644 ---- a/pc-bios/s390-ccw/start.S -+++ b/pc-bios/s390-ccw/start.S -@@ -10,37 +10,37 @@ - * directory. - */ - -- .globl _start -+ .globl _start - _start: - -- larl %r15, stack + 0x8000 /* Set up stack */ -+ larl %r15,stack + 0x8000 /* Set up stack */ - -- /* clear bss */ -- larl %r2, __bss_start -- larl %r3, _end -- slgr %r3, %r2 /* get sizeof bss */ -- ltgr %r3,%r3 /* bss empty? */ -- jz done -- aghi %r3,-1 -- srlg %r4,%r3,8 /* how many 256 byte chunks? */ -- ltgr %r4,%r4 -- lgr %r1,%r2 -- jz remainder -+ /* clear bss */ -+ larl %r2,__bss_start -+ larl %r3,_end -+ slgr %r3,%r2 /* get sizeof bss */ -+ ltgr %r3,%r3 /* bss empty? */ -+ jz done -+ aghi %r3,-1 -+ srlg %r4,%r3,8 /* how many 256 byte chunks? */ -+ ltgr %r4,%r4 -+ lgr %r1,%r2 -+ jz remainder - loop: -- xc 0(256,%r1),0(%r1) -- la %r1,256(%r1) -- brctg %r4,loop -+ xc 0(256,%r1),0(%r1) -+ la %r1,256(%r1) -+ brctg %r4,loop - remainder: -- larl %r2,memsetxc -- ex %r3,0(%r2) -+ larl %r2,memsetxc -+ ex %r3,0(%r2) - done: -- /* set up a pgm exception disabled wait psw */ -- larl %r2, disabled_wait_psw -- mvc 0x01d0(16), 0(%r2) -- j main /* And call C */ -+ /* set up a pgm exception disabled wait psw */ -+ larl %r2,disabled_wait_psw -+ mvc 0x01d0(16),0(%r2) -+ j main /* And call C */ - - memsetxc: -- xc 0(1,%r1),0(%r1) -+ xc 0(1,%r1),0(%r1) - - - /* -@@ -48,11 +48,11 @@ memsetxc: - * - * stops the current guest cpu. - */ -- .globl disabled_wait -+ .globl disabled_wait - disabled_wait: -- larl %r1,disabled_wait_psw -- lpswe 0(%r1) --1: j 1b -+ larl %r1,disabled_wait_psw -+ lpswe 0(%r1) -+1: j 1b - - - /* -@@ -60,61 +60,61 @@ disabled_wait: - * - * eats one sclp interrupt - */ -- .globl consume_sclp_int -+ .globl consume_sclp_int - consume_sclp_int: -- /* enable service interrupts in cr0 */ -- stctg %c0,%c0,0(%r15) -- oi 6(%r15),0x2 -- lctlg %c0,%c0,0(%r15) -- /* prepare external call handler */ -- larl %r1, external_new_code -- stg %r1, 0x1b8 -- larl %r1, external_new_mask -- mvc 0x1b0(8),0(%r1) -- /* load enabled wait PSW */ -- larl %r1, enabled_wait_psw -- lpswe 0(%r1) -+ /* enable service interrupts in cr0 */ -+ stctg %c0,%c0,0(%r15) -+ oi 6(%r15),0x2 -+ lctlg %c0,%c0,0(%r15) -+ /* prepare external call handler */ -+ larl %r1,external_new_code -+ stg %r1,0x1b8 -+ larl %r1,external_new_mask -+ mvc 0x1b0(8),0(%r1) -+ /* load enabled wait PSW */ -+ larl %r1,enabled_wait_psw -+ lpswe 0(%r1) - - /* - * void consume_io_int(void) - * - * eats one I/O interrupt - */ -- .globl consume_io_int -+ .globl consume_io_int - consume_io_int: -- /* enable I/O interrupts in cr6 */ -- stctg %c6,%c6,0(%r15) -- oi 4(%r15), 0xff -- lctlg %c6,%c6,0(%r15) -- /* prepare i/o call handler */ -- larl %r1, io_new_code -- stg %r1, 0x1f8 -- larl %r1, io_new_mask -- mvc 0x1f0(8),0(%r1) -- /* load enabled wait PSW */ -- larl %r1, enabled_wait_psw -- lpswe 0(%r1) -+ /* enable I/O interrupts in cr6 */ -+ stctg %c6,%c6,0(%r15) -+ oi 4(%r15), 0xff -+ lctlg %c6,%c6,0(%r15) -+ /* prepare i/o call handler */ -+ larl %r1,io_new_code -+ stg %r1,0x1f8 -+ larl %r1,io_new_mask -+ mvc 0x1f0(8),0(%r1) -+ /* load enabled wait PSW */ -+ larl %r1,enabled_wait_psw -+ lpswe 0(%r1) - - external_new_code: -- /* disable service interrupts in cr0 */ -- stctg %c0,%c0,0(%r15) -- ni 6(%r15),0xfd -- lctlg %c0,%c0,0(%r15) -- br %r14 -+ /* disable service interrupts in cr0 */ -+ stctg %c0,%c0,0(%r15) -+ ni 6(%r15),0xfd -+ lctlg %c0,%c0,0(%r15) -+ br %r14 - - io_new_code: -- /* disable I/O interrupts in cr6 */ -- stctg %c6,%c6,0(%r15) -- ni 4(%r15), 0x00 -- lctlg %c6,%c6,0(%r15) -- br %r14 -+ /* disable I/O interrupts in cr6 */ -+ stctg %c6,%c6,0(%r15) -+ ni 4(%r15),0x00 -+ lctlg %c6,%c6,0(%r15) -+ br %r14 - -- .align 8 -+ .align 8 - disabled_wait_psw: -- .quad 0x0002000180000000,0x0000000000000000 -+ .quad 0x0002000180000000,0x0000000000000000 - enabled_wait_psw: -- .quad 0x0302000180000000,0x0000000000000000 -+ .quad 0x0302000180000000,0x0000000000000000 - external_new_mask: -- .quad 0x0000000180000000 -+ .quad 0x0000000180000000 - io_new_mask: -- .quad 0x0000000180000000 -+ .quad 0x0000000180000000 --- -2.39.3 - diff --git a/SOURCES/kvm-pc-bios-s390-ccw-Makefile-Use-z-noexecstack-to-silen.patch b/SOURCES/kvm-pc-bios-s390-ccw-Makefile-Use-z-noexecstack-to-silen.patch deleted file mode 100644 index 907fe43..0000000 --- a/SOURCES/kvm-pc-bios-s390-ccw-Makefile-Use-z-noexecstack-to-silen.patch +++ /dev/null @@ -1,50 +0,0 @@ -From b5b243cbbb897b236c08699529e13457e1e49924 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Thu, 22 Jun 2023 15:08:22 +0200 -Subject: [PATCH 31/37] pc-bios/s390-ccw/Makefile: Use -z noexecstack to - silence linker warning -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -RH-MergeRequest: 180: Fix misaligned symbol error in the s390-ccw image during qemu-kvm build with binutils 2.40 -RH-Bugzilla: 2220866 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/4] 04f6f83169f1c5545a0e2772b4babfc6a50bd5bf (thuth/qemu-kvm-cs9) - -Recent versions of ld complain when linking the s390-ccw bios: - - /usr/bin/ld: warning: start.o: missing .note.GNU-stack section implies - executable stack - /usr/bin/ld: NOTE: This behaviour is deprecated and will be removed in - a future version of the linker - -We can silence the warning by telling the linker to mark the stack -as not executable. - -Message-Id: <20230622130822.396793-1-thuth@redhat.com> -Acked-by: Christian Borntraeger -Signed-off-by: Thomas Huth -(cherry picked from commit 442ef32ee5b6059a8f247fb2def9d449578d0a89) ---- - pc-bios/s390-ccw/Makefile | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/pc-bios/s390-ccw/Makefile b/pc-bios/s390-ccw/Makefile -index 10e8f5cb63..2a590af4a9 100644 ---- a/pc-bios/s390-ccw/Makefile -+++ b/pc-bios/s390-ccw/Makefile -@@ -53,7 +53,7 @@ config-cc.mak: Makefile - $(call cc-option,-march=z900,-march=z10)) 3> config-cc.mak - -include config-cc.mak - --LDFLAGS += -Wl,-pie -nostdlib -+LDFLAGS += -Wl,-pie -nostdlib -z noexecstack - - build-all: s390-ccw.img s390-netboot.img - --- -2.39.3 - diff --git a/SOURCES/kvm-pc-bios-s390-ccw-Provide-space-for-initial-stack-fra.patch b/SOURCES/kvm-pc-bios-s390-ccw-Provide-space-for-initial-stack-fra.patch deleted file mode 100644 index 0c4ce6f..0000000 --- a/SOURCES/kvm-pc-bios-s390-ccw-Provide-space-for-initial-stack-fra.patch +++ /dev/null @@ -1,59 +0,0 @@ -From 2c52aebf90f28121a3e46a9305304406023b9747 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Tue, 27 Jun 2023 09:47:01 +0200 -Subject: [PATCH 33/37] pc-bios/s390-ccw: Provide space for initial stack frame - in start.S -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -RH-MergeRequest: 180: Fix misaligned symbol error in the s390-ccw image during qemu-kvm build with binutils 2.40 -RH-Bugzilla: 2220866 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/4] c2f69ce5998861fe20b799bf0113def8cf0cd128 (thuth/qemu-kvm-cs9) - -Providing the space of a stack frame is the duty of the caller, -so we should reserve 160 bytes before jumping into the main function. -Otherwise the main() function might write past the stack array. - -While we're at it, add a proper STACK_SIZE macro for the stack size -instead of using magic numbers (this is also required for the following -patch). - -Reviewed-by: Christian Borntraeger -Reviewed-by: Cédric Le Goater -Message-Id: <20230627074703.99608-3-thuth@redhat.com> -Reviewed-by: Eric Farman -Reviewed-by: Claudio Imbrenda -Reviewed-by: Marc Hartmayer -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Thomas Huth -(cherry picked from commit 74fe98ee7fb3344dbd085d1fa32c0dc2fc2c831f) ---- - pc-bios/s390-ccw/start.S | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/pc-bios/s390-ccw/start.S b/pc-bios/s390-ccw/start.S -index d29de09cc6..abd6fe6639 100644 ---- a/pc-bios/s390-ccw/start.S -+++ b/pc-bios/s390-ccw/start.S -@@ -10,10 +10,13 @@ - * directory. - */ - -+#define STACK_SIZE 0x8000 -+#define STACK_FRAME_SIZE 160 -+ - .globl _start - _start: - -- larl %r15,stack + 0x8000 /* Set up stack */ -+ larl %r15,stack + STACK_SIZE - STACK_FRAME_SIZE /* Set up stack */ - - /* clear bss */ - larl %r2,__bss_start --- -2.39.3 - diff --git a/SOURCES/kvm-pci-host-q35-Move-PAM-initialization-above-SMRAM-ini.patch b/SOURCES/kvm-pci-host-q35-Move-PAM-initialization-above-SMRAM-ini.patch new file mode 100644 index 0000000..2c21dde --- /dev/null +++ b/SOURCES/kvm-pci-host-q35-Move-PAM-initialization-above-SMRAM-ini.patch @@ -0,0 +1,68 @@ +From c0a65c752cd83dea27cbeb34074d65fb2c5a6b59 Mon Sep 17 00:00:00 2001 +From: Isaku Yamahata +Date: Wed, 20 Mar 2024 03:39:13 -0500 +Subject: [PATCH 008/100] pci-host/q35: Move PAM initialization above SMRAM + initialization + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [8/91] 22a9221d4726e872aa0f0dc25ae9d823c0611547 (bonzini/rhel-qemu-kvm) + +In mch_realize(), process PAM initialization before SMRAM initialization so +that later patch can skill all the SMRAM related with a single check. + +Signed-off-by: Isaku Yamahata +Signed-off-by: Xiaoyao Li +Signed-off-by: Michael Roth +Message-ID: <20240320083945.991426-18-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 42c11ae2416dcbcd694ec3ee574fe2f3e70099ae) +Signed-off-by: Paolo Bonzini +--- + hw/pci-host/q35.c | 19 ++++++++++--------- + 1 file changed, 10 insertions(+), 9 deletions(-) + +diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c +index 0d7d4e3f08..98d4a7c253 100644 +--- a/hw/pci-host/q35.c ++++ b/hw/pci-host/q35.c +@@ -568,6 +568,16 @@ static void mch_realize(PCIDevice *d, Error **errp) + /* setup pci memory mapping */ + pc_pci_as_mapping_init(mch->system_memory, mch->pci_address_space); + ++ /* PAM */ ++ init_pam(&mch->pam_regions[0], OBJECT(mch), mch->ram_memory, ++ mch->system_memory, mch->pci_address_space, ++ PAM_BIOS_BASE, PAM_BIOS_SIZE); ++ for (i = 0; i < ARRAY_SIZE(mch->pam_regions) - 1; ++i) { ++ init_pam(&mch->pam_regions[i + 1], OBJECT(mch), mch->ram_memory, ++ mch->system_memory, mch->pci_address_space, ++ PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE); ++ } ++ + /* if *disabled* show SMRAM to all CPUs */ + memory_region_init_alias(&mch->smram_region, OBJECT(mch), "smram-region", + mch->pci_address_space, MCH_HOST_BRIDGE_SMRAM_C_BASE, +@@ -634,15 +644,6 @@ static void mch_realize(PCIDevice *d, Error **errp) + + object_property_add_const_link(qdev_get_machine(), "smram", + OBJECT(&mch->smram)); +- +- init_pam(&mch->pam_regions[0], OBJECT(mch), mch->ram_memory, +- mch->system_memory, mch->pci_address_space, +- PAM_BIOS_BASE, PAM_BIOS_SIZE); +- for (i = 0; i < ARRAY_SIZE(mch->pam_regions) - 1; ++i) { +- init_pam(&mch->pam_regions[i + 1], OBJECT(mch), mch->ram_memory, +- mch->system_memory, mch->pci_address_space, +- PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE); +- } + } + + uint64_t mch_mcfg_base(void) +-- +2.39.3 + diff --git a/SOURCES/kvm-pcie-Add-hotplug-detect-state-register-to-cmask.patch b/SOURCES/kvm-pcie-Add-hotplug-detect-state-register-to-cmask.patch deleted file mode 100644 index 1ec1c82..0000000 --- a/SOURCES/kvm-pcie-Add-hotplug-detect-state-register-to-cmask.patch +++ /dev/null @@ -1,87 +0,0 @@ -From 2732b6c5ef249d3ec9affca66768cc2fc476ff7c Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Thu, 6 Jul 2023 01:55:47 -0300 -Subject: [PATCH 11/12] pcie: Add hotplug detect state register to cmask -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 188: pcie: Add hotplug detect state register to cmask -RH-Bugzilla: 2215819 -RH-Acked-by: Peter Xu -RH-Acked-by: quintela1 -RH-Commit: [1/1] a125fa337711bddbc957c399044393e82272b143 (LeoBras/centos-qemu-kvm) - -When trying to migrate a machine type pc-q35-6.0 or lower, with this -cmdline options, - --device driver=pcie-root-port,port=18,chassis=19,id=pcie-root-port18,bus=pcie.0,addr=0x12 \ --device driver=nec-usb-xhci,p2=4,p3=4,id=nex-usb-xhci0,bus=pcie-root-port18,addr=0x12.0x1 - -the following bug happens after all ram pages were sent: - -qemu-kvm: get_pci_config_device: Bad config data: i=0x6e read: 0 device: 40 cmask: ff wmask: 0 w1cmask:19 -qemu-kvm: Failed to load PCIDevice:config -qemu-kvm: Failed to load pcie-root-port:parent_obj.parent_obj.parent_obj -qemu-kvm: error while loading state for instance 0x0 of device '0000:00:12.0/pcie-root-port' -qemu-kvm: load of migration failed: Invalid argument - -This happens on pc-q35-6.0 or lower because of: -{ "ICH9-LPC", ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, "off" } - -In this scenario, hotplug_handler_plug() calls pcie_cap_slot_plug_cb(), -which sets dev->config byte 0x6e with bit PCI_EXP_SLTSTA_PDS to signal PCI -hotplug for the guest. After a while the guest will deal with this hotplug -and qemu will clear the above bit. - -Then, during migration, get_pci_config_device() will compare the -configs of both the freshly created device and the one that is being -received via migration, which will differ due to the PCI_EXP_SLTSTA_PDS bit -and cause the bug to reproduce. - -To avoid this fake incompatibility, there are tree fields in PCIDevice that -can help: - -- wmask: Used to implement R/W bytes, and -- w1cmask: Used to implement RW1C(Write 1 to Clear) bytes -- cmask: Used to enable config checks on load. - -According to PCI Express® Base Specification Revision 5.0 Version 1.0, -table 7-27 (Slot Status Register) bit 6, the "Presence Detect State" is -listed as RO (read-only), so it only makes sense to make use of the cmask -field. - -So, clear PCI_EXP_SLTSTA_PDS bit on cmask, so the fake incompatibility on -get_pci_config_device() does not abort the migration. - -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2215819 -Signed-off-by: Leonardo Bras -Message-Id: <20230706045546.593605-3-leobras@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Juan Quintela -(cherry picked from commit 625b370c45f4acd155ee625d61c0057d770a5b5e) -Signed-off-by: Leonardo Bras ---- - hw/pci/pcie.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c -index b8c24cf45f..8bc4a4ee57 100644 ---- a/hw/pci/pcie.c -+++ b/hw/pci/pcie.c -@@ -659,6 +659,10 @@ void pcie_cap_slot_init(PCIDevice *dev, PCIESlot *s) - pci_word_test_and_set_mask(dev->w1cmask + pos + PCI_EXP_SLTSTA, - PCI_EXP_HP_EV_SUPPORTED); - -+ /* Avoid migration abortion when this device hot-removed by guest */ -+ pci_word_test_and_clear_mask(dev->cmask + pos + PCI_EXP_SLTSTA, -+ PCI_EXP_SLTSTA_PDS); -+ - dev->exp.hpev_notified = false; - - qbus_set_hotplug_handler(BUS(pci_bridge_get_sec_bus(PCI_BRIDGE(dev))), --- -2.39.3 - diff --git a/SOURCES/kvm-physmem-Introduce-ram_block_discard_guest_memfd_rang.patch b/SOURCES/kvm-physmem-Introduce-ram_block_discard_guest_memfd_rang.patch new file mode 100644 index 0000000..66e0423 --- /dev/null +++ b/SOURCES/kvm-physmem-Introduce-ram_block_discard_guest_memfd_rang.patch @@ -0,0 +1,83 @@ +From c70f6e7e3461e6562c0591079cc71068bf0f2ed8 Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Wed, 20 Mar 2024 03:39:07 -0500 +Subject: [PATCH 033/100] physmem: Introduce + ram_block_discard_guest_memfd_range() + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [33/91] b6169fa8d752d83977b18897be24f6ab9f3d3472 (bonzini/rhel-qemu-kvm) + +When memory page is converted from private to shared, the original +private memory is back'ed by guest_memfd. Introduce +ram_block_discard_guest_memfd_range() for discarding memory in +guest_memfd. + +Based on a patch by Isaku Yamahata . + +Signed-off-by: Xiaoyao Li +Reviewed-by: David Hildenbrand +Signed-off-by: Michael Roth +Message-ID: <20240320083945.991426-12-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit b2e9426c04fdd32d93a3a37db6b0c2e67c88c335) +Signed-off-by: Paolo Bonzini +--- + include/exec/cpu-common.h | 2 ++ + system/physmem.c | 23 +++++++++++++++++++++++ + 2 files changed, 25 insertions(+) + +diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h +index 6346df17ce..6d5318895a 100644 +--- a/include/exec/cpu-common.h ++++ b/include/exec/cpu-common.h +@@ -159,6 +159,8 @@ typedef int (RAMBlockIterFunc)(RAMBlock *rb, void *opaque); + + int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque); + int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length); ++int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start, ++ size_t length); + + #endif + +diff --git a/system/physmem.c b/system/physmem.c +index 5ebcf5be11..c3d04ca921 100644 +--- a/system/physmem.c ++++ b/system/physmem.c +@@ -3721,6 +3721,29 @@ err: + return ret; + } + ++int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start, ++ size_t length) ++{ ++ int ret = -1; ++ ++#ifdef CONFIG_FALLOCATE_PUNCH_HOLE ++ ret = fallocate(rb->guest_memfd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, ++ start, length); ++ ++ if (ret) { ++ ret = -errno; ++ error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx (%d)", ++ __func__, rb->idstr, start, length, ret); ++ } ++#else ++ ret = -ENOSYS; ++ error_report("%s: fallocate not available %s:%" PRIx64 " +%zx (%d)", ++ __func__, rb->idstr, start, length, ret); ++#endif ++ ++ return ret; ++} ++ + bool ramblock_is_pmem(RAMBlock *rb) + { + return rb->flags & RAM_PMEM; +-- +2.39.3 + diff --git a/SOURCES/kvm-postcopy-ram-do-not-use-qatomic_mb_read.patch b/SOURCES/kvm-postcopy-ram-do-not-use-qatomic_mb_read.patch deleted file mode 100644 index 0421e33..0000000 --- a/SOURCES/kvm-postcopy-ram-do-not-use-qatomic_mb_read.patch +++ /dev/null @@ -1,42 +0,0 @@ -From ab9b8620c62540f3267d005c198920671ef9abc3 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Fri, 3 Mar 2023 11:15:28 +0100 -Subject: [PATCH 06/56] postcopy-ram: do not use qatomic_mb_read -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [5/50] 534c0e13362dfc994fa90c79bfb5ed6ee8c27dfc (peterx/qemu-kvm) - -It does not even pair with a qatomic_mb_set(), so it is clearer to use -load-acquire in this case; they are synonyms. - -Signed-off-by: Paolo Bonzini -(cherry picked from commit 4592eaf38755a28300d113cd128f65b5b38495f2) -Signed-off-by: Peter Xu ---- - migration/postcopy-ram.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c -index bbb8af61ae..d7b48dd920 100644 ---- a/migration/postcopy-ram.c -+++ b/migration/postcopy-ram.c -@@ -1526,7 +1526,7 @@ static PostcopyState incoming_postcopy_state; - - PostcopyState postcopy_state_get(void) - { -- return qatomic_mb_read(&incoming_postcopy_state); -+ return qatomic_load_acquire(&incoming_postcopy_state); - } - - /* Set the state and return the old state */ --- -2.39.1 - diff --git a/SOURCES/kvm-ppc-pef-switch-to-use-confidential_guest_kvm_init-re.patch b/SOURCES/kvm-ppc-pef-switch-to-use-confidential_guest_kvm_init-re.patch new file mode 100644 index 0000000..037442c --- /dev/null +++ b/SOURCES/kvm-ppc-pef-switch-to-use-confidential_guest_kvm_init-re.patch @@ -0,0 +1,140 @@ +From cfb109b393e019398a52f66a5ff0e9581c841335 Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Thu, 29 Feb 2024 01:00:37 -0500 +Subject: [PATCH 013/100] ppc/pef: switch to use + confidential_guest_kvm_init/reset() + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [13/91] e25c498fc79a4c8e22ca41d9cbd06e40b4cf1f11 (bonzini/rhel-qemu-kvm) + +Use the unified interface to call confidential guest related kvm_init() +and kvm_reset(), to avoid exposing pef specific functions. + +As a bonus, pef.h goes away since there is no direct call from sPAPR +board code to PEF code anymore. + +Signed-off-by: Xiaoyao Li +Signed-off-by: Paolo Bonzini +(cherry picked from commit 00a238b1a845fd5f0acd771664c5e184a63ed9b6) +Signed-off-by: Paolo Bonzini +--- + hw/ppc/pef.c | 9 ++++++--- + hw/ppc/spapr.c | 10 +++++++--- + include/hw/ppc/pef.h | 17 ----------------- + 3 files changed, 13 insertions(+), 23 deletions(-) + delete mode 100644 include/hw/ppc/pef.h + +diff --git a/hw/ppc/pef.c b/hw/ppc/pef.c +index d28ed3ba73..47553348b1 100644 +--- a/hw/ppc/pef.c ++++ b/hw/ppc/pef.c +@@ -15,7 +15,6 @@ + #include "sysemu/kvm.h" + #include "migration/blocker.h" + #include "exec/confidential-guest-support.h" +-#include "hw/ppc/pef.h" + + #define TYPE_PEF_GUEST "pef-guest" + OBJECT_DECLARE_SIMPLE_TYPE(PefGuest, PEF_GUEST) +@@ -93,7 +92,7 @@ static int kvmppc_svm_off(Error **errp) + #endif + } + +-int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) ++static int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + { + if (!object_dynamic_cast(OBJECT(cgs), TYPE_PEF_GUEST)) { + return 0; +@@ -107,7 +106,7 @@ int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + return kvmppc_svm_init(cgs, errp); + } + +-int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp) ++static int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp) + { + if (!object_dynamic_cast(OBJECT(cgs), TYPE_PEF_GUEST)) { + return 0; +@@ -131,6 +130,10 @@ OBJECT_DEFINE_TYPE_WITH_INTERFACES(PefGuest, + + static void pef_guest_class_init(ObjectClass *oc, void *data) + { ++ ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); ++ ++ klass->kvm_init = pef_kvm_init; ++ klass->kvm_reset = pef_kvm_reset; + } + + static void pef_guest_init(Object *obj) +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index a258d81846..6f6f0fd790 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -75,6 +75,7 @@ + #include "hw/virtio/vhost-scsi-common.h" + + #include "exec/ram_addr.h" ++#include "exec/confidential-guest-support.h" + #include "hw/usb.h" + #include "qemu/config-file.h" + #include "qemu/error-report.h" +@@ -87,7 +88,6 @@ + #include "hw/ppc/spapr_tpm_proxy.h" + #include "hw/ppc/spapr_nvdimm.h" + #include "hw/ppc/spapr_numa.h" +-#include "hw/ppc/pef.h" + + #include "monitor/monitor.h" + +@@ -1715,7 +1715,9 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason) + qemu_guest_getrandom_nofail(spapr->fdt_rng_seed, 32); + } + +- pef_kvm_reset(machine->cgs, &error_fatal); ++ if (machine->cgs) { ++ confidential_guest_kvm_reset(machine->cgs, &error_fatal); ++ } + spapr_caps_apply(spapr); + spapr_nested_reset(spapr); + if (spapr->svm_allowed) { +@@ -2844,7 +2846,9 @@ static void spapr_machine_init(MachineState *machine) + /* + * if Secure VM (PEF) support is configured, then initialize it + */ +- pef_kvm_init(machine->cgs, &error_fatal); ++ if (machine->cgs) { ++ confidential_guest_kvm_init(machine->cgs, &error_fatal); ++ } + + msi_nonbroken = true; + +diff --git a/include/hw/ppc/pef.h b/include/hw/ppc/pef.h +deleted file mode 100644 +index 707dbe524c..0000000000 +--- a/include/hw/ppc/pef.h ++++ /dev/null +@@ -1,17 +0,0 @@ +-/* +- * PEF (Protected Execution Facility) for POWER support +- * +- * Copyright Red Hat. +- * +- * This work is licensed under the terms of the GNU GPL, version 2 or later. +- * See the COPYING file in the top-level directory. +- * +- */ +- +-#ifndef HW_PPC_PEF_H +-#define HW_PPC_PEF_H +- +-int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); +-int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp); +- +-#endif /* HW_PPC_PEF_H */ +-- +2.39.3 + diff --git a/SOURCES/kvm-q35-Introduce-smm_ranges-property-for-q35-pci-host.patch b/SOURCES/kvm-q35-Introduce-smm_ranges-property-for-q35-pci-host.patch new file mode 100644 index 0000000..c7f121b --- /dev/null +++ b/SOURCES/kvm-q35-Introduce-smm_ranges-property-for-q35-pci-host.patch @@ -0,0 +1,164 @@ +From 83bb32c25472b500738a54ac8f2ad0f5c496acf1 Mon Sep 17 00:00:00 2001 +From: Isaku Yamahata +Date: Wed, 20 Mar 2024 03:39:14 -0500 +Subject: [PATCH 009/100] q35: Introduce smm_ranges property for q35-pci-host + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [9/91] 931156772bfc2085e7241eecc56cf6eca3dac1fd (bonzini/rhel-qemu-kvm) + +Add a q35 property to check whether or not SMM ranges, e.g. SMRAM, TSEG, +etc... exist for the target platform. TDX doesn't support SMM and doesn't +play nice with QEMU modifying related guest memory ranges. + +Signed-off-by: Isaku Yamahata +Co-developed-by: Sean Christopherson +Signed-off-by: Sean Christopherson +Signed-off-by: Xiaoyao Li +Signed-off-by: Michael Roth +Message-ID: <20240320083945.991426-19-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit b07bf7b73fd02d24a7baa64a580f4974b86bbc86) +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc_q35.c | 2 ++ + hw/pci-host/q35.c | 42 +++++++++++++++++++++++++++------------ + include/hw/i386/pc.h | 1 + + include/hw/pci-host/q35.h | 1 + + 4 files changed, 33 insertions(+), 13 deletions(-) + +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 9adcdadce8..dedc86eec9 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -219,6 +219,8 @@ static void pc_q35_init(MachineState *machine) + x86ms->above_4g_mem_size, NULL); + object_property_set_bool(phb, PCI_HOST_BYPASS_IOMMU, + pcms->default_bus_bypass_iommu, NULL); ++ object_property_set_bool(phb, PCI_HOST_PROP_SMM_RANGES, ++ x86_machine_is_smm_enabled(x86ms), NULL); + sysbus_realize_and_unref(SYS_BUS_DEVICE(phb), &error_fatal); + + /* pci */ +diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c +index 98d4a7c253..0b6cbaed7e 100644 +--- a/hw/pci-host/q35.c ++++ b/hw/pci-host/q35.c +@@ -179,6 +179,8 @@ static Property q35_host_props[] = { + mch.below_4g_mem_size, 0), + DEFINE_PROP_SIZE(PCI_HOST_ABOVE_4G_MEM_SIZE, Q35PCIHost, + mch.above_4g_mem_size, 0), ++ DEFINE_PROP_BOOL(PCI_HOST_PROP_SMM_RANGES, Q35PCIHost, ++ mch.has_smm_ranges, true), + DEFINE_PROP_BOOL("x-pci-hole64-fix", Q35PCIHost, pci_hole64_fix, true), + DEFINE_PROP_END_OF_LIST(), + }; +@@ -214,6 +216,7 @@ static void q35_host_initfn(Object *obj) + /* mch's object_initialize resets the default value, set it again */ + qdev_prop_set_uint64(DEVICE(s), PCI_HOST_PROP_PCI_HOLE64_SIZE, + Q35_PCI_HOST_HOLE64_SIZE_DEFAULT); ++ + object_property_add(obj, PCI_HOST_PROP_PCI_HOLE_START, "uint32", + q35_host_get_pci_hole_start, + NULL, NULL, NULL); +@@ -476,6 +479,10 @@ static void mch_write_config(PCIDevice *d, + mch_update_pciexbar(mch); + } + ++ if (!mch->has_smm_ranges) { ++ return; ++ } ++ + if (ranges_overlap(address, len, MCH_HOST_BRIDGE_SMRAM, + MCH_HOST_BRIDGE_SMRAM_SIZE)) { + mch_update_smram(mch); +@@ -494,10 +501,13 @@ static void mch_write_config(PCIDevice *d, + static void mch_update(MCHPCIState *mch) + { + mch_update_pciexbar(mch); ++ + mch_update_pam(mch); +- mch_update_smram(mch); +- mch_update_ext_tseg_mbytes(mch); +- mch_update_smbase_smram(mch); ++ if (mch->has_smm_ranges) { ++ mch_update_smram(mch); ++ mch_update_ext_tseg_mbytes(mch); ++ mch_update_smbase_smram(mch); ++ } + + /* + * pci hole goes from end-of-low-ram to io-apic. +@@ -538,18 +548,20 @@ static void mch_reset(DeviceState *qdev) + pci_set_quad(d->config + MCH_HOST_BRIDGE_PCIEXBAR, + MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT); + +- d->config[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_DEFAULT; +- d->config[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_DEFAULT; +- d->wmask[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_WMASK; +- d->wmask[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_WMASK; ++ if (mch->has_smm_ranges) { ++ d->config[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_DEFAULT; ++ d->config[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_DEFAULT; ++ d->wmask[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_WMASK; ++ d->wmask[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_WMASK; + +- if (mch->ext_tseg_mbytes > 0) { +- pci_set_word(d->config + MCH_HOST_BRIDGE_EXT_TSEG_MBYTES, +- MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_QUERY); +- } ++ if (mch->ext_tseg_mbytes > 0) { ++ pci_set_word(d->config + MCH_HOST_BRIDGE_EXT_TSEG_MBYTES, ++ MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_QUERY); ++ } + +- d->config[MCH_HOST_BRIDGE_F_SMBASE] = 0; +- d->wmask[MCH_HOST_BRIDGE_F_SMBASE] = 0xff; ++ d->config[MCH_HOST_BRIDGE_F_SMBASE] = 0; ++ d->wmask[MCH_HOST_BRIDGE_F_SMBASE] = 0xff; ++ } + + mch_update(mch); + } +@@ -578,6 +590,10 @@ static void mch_realize(PCIDevice *d, Error **errp) + PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE); + } + ++ if (!mch->has_smm_ranges) { ++ return; ++ } ++ + /* if *disabled* show SMRAM to all CPUs */ + memory_region_init_alias(&mch->smram_region, OBJECT(mch), "smram-region", + mch->pci_address_space, MCH_HOST_BRIDGE_SMRAM_C_BASE, +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 87420783ab..467e7fb52f 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -164,6 +164,7 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level); + #define PCI_HOST_PROP_PCI_HOLE64_SIZE "pci-hole64-size" + #define PCI_HOST_BELOW_4G_MEM_SIZE "below-4g-mem-size" + #define PCI_HOST_ABOVE_4G_MEM_SIZE "above-4g-mem-size" ++#define PCI_HOST_PROP_SMM_RANGES "smm-ranges" + + + void pc_pci_as_mapping_init(MemoryRegion *system_memory, +diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h +index bafcbe6752..22fadfa3ed 100644 +--- a/include/hw/pci-host/q35.h ++++ b/include/hw/pci-host/q35.h +@@ -50,6 +50,7 @@ struct MCHPCIState { + MemoryRegion tseg_blackhole, tseg_window; + MemoryRegion smbase_blackhole, smbase_window; + bool has_smram_at_smbase; ++ bool has_smm_ranges; + Range pci_hole; + uint64_t below_4g_mem_size; + uint64_t above_4g_mem_size; +-- +2.39.3 + diff --git a/SOURCES/kvm-qapi-add-fdset-feature-for-BlockdevOptionsVirtioBlkV.patch b/SOURCES/kvm-qapi-add-fdset-feature-for-BlockdevOptionsVirtioBlkV.patch deleted file mode 100644 index abaadf8..0000000 --- a/SOURCES/kvm-qapi-add-fdset-feature-for-BlockdevOptionsVirtioBlkV.patch +++ /dev/null @@ -1,79 +0,0 @@ -From 99f27e14856c528f442b628e8f4a7881e6e63179 Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Tue, 30 May 2023 09:19:41 +0200 -Subject: [PATCH 4/5] qapi: add '@fdset' feature for - BlockdevOptionsVirtioBlkVhostVdpa - -RH-Author: Stefano Garzarella -RH-MergeRequest: 169: block/blkio: support fd passing for virtio-blk-vhost-vdpa driver -RH-Bugzilla: 2180076 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [2/2] abee2a542e41f9eaa17dd204b74778e232d1eb60 (sgarzarella/qemu-kvm-c-9-s) - -The virtio-blk-vhost-vdpa driver in libblkio 1.3.0 supports the fd -passing through the new 'fd' property. - -Since now we are using qemu_open() on '@path' if the virtio-blk driver -supports the fd passing, let's announce it. -In this way, the management layer can pass the file descriptor of an -already opened vhost-vdpa character device. This is useful especially -when the device can only be accessed with certain privileges. - -Add the '@fdset' feature only when the virtio-blk-vhost-vdpa driver -in libblkio supports it. - -Suggested-by: Markus Armbruster -Reviewed-by: Stefan Hajnoczi -Signed-off-by: Stefano Garzarella -Message-id: 20230530071941.8954-3-sgarzare@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 98b126f5e3228a346c774e569e26689943b401dd) -- changed doc indentantion since QAPI parser failed downstream because - we don't have commit 08349786c84306863a3b659c8a9b28bb74c405c6 - downstream. It relaxed the indentation rules. -Signed-off-by: Stefano Garzarella ---- - meson.build | 4 ++++ - qapi/block-core.json | 6 ++++++ - 2 files changed, 10 insertions(+) - -diff --git a/meson.build b/meson.build -index d964e741e7..a18cc64531 100644 ---- a/meson.build -+++ b/meson.build -@@ -1843,6 +1843,10 @@ config_host_data.set('CONFIG_LZO', lzo.found()) - config_host_data.set('CONFIG_MPATH', mpathpersist.found()) - config_host_data.set('CONFIG_MPATH_NEW_API', mpathpersist_new_api) - config_host_data.set('CONFIG_BLKIO', blkio.found()) -+if blkio.found() -+ config_host_data.set('CONFIG_BLKIO_VHOST_VDPA_FD', -+ blkio.version().version_compare('>=1.3.0')) -+endif - config_host_data.set('CONFIG_CURL', curl.found()) - config_host_data.set('CONFIG_CURSES', curses.found()) - config_host_data.set('CONFIG_GBM', gbm.found()) -diff --git a/qapi/block-core.json b/qapi/block-core.json -index c05ad0c07e..81b48a8d3b 100644 ---- a/qapi/block-core.json -+++ b/qapi/block-core.json -@@ -3841,10 +3841,16 @@ - # - # @path: path to the vhost-vdpa character device. - # -+# Features: -+# @fdset: Member @path supports the special "/dev/fdset/N" path -+# (since 8.1) -+# - # Since: 7.2 - ## - { 'struct': 'BlockdevOptionsVirtioBlkVhostVdpa', - 'data': { 'path': 'str' }, -+ 'features': [ { 'name' :'fdset', -+ 'if': 'CONFIG_BLKIO_VHOST_VDPA_FD' } ], - 'if': 'CONFIG_BLKIO' } - - ## --- -2.39.3 - diff --git a/SOURCES/kvm-qapi-i386-sev-Change-the-reduced-phys-bits-value-fro.patch b/SOURCES/kvm-qapi-i386-sev-Change-the-reduced-phys-bits-value-fro.patch deleted file mode 100644 index a95895b..0000000 --- a/SOURCES/kvm-qapi-i386-sev-Change-the-reduced-phys-bits-value-fro.patch +++ /dev/null @@ -1,50 +0,0 @@ -From cbf9c74ef46d71c015b9de53f4514941dca8a035 Mon Sep 17 00:00:00 2001 -From: Bandan Das -Date: Thu, 3 Aug 2023 14:19:37 -0400 -Subject: [PATCH 10/14] qapi, i386/sev: Change the reduced-phys-bits value from - 5 to 1 - -RH-Author: Bandan Das -RH-MergeRequest: 196: Updates to SEV reduced-phys-bits parameter -RH-Bugzilla: 2214839 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/4] 4243578db33f89461e60b745eb96fee402218c9f (bdas1/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2214839 - -commit 798a818f50a9bfc01e8b5943090de458863b897b -Author: Tom Lendacky -Date: Fri Sep 30 10:14:27 2022 -0500 - - qapi, i386/sev: Change the reduced-phys-bits value from 5 to 1 - - A guest only ever experiences, at most, 1 bit of reduced physical - addressing. Change the query-sev-capabilities json comment to use 1. - - Fixes: 31dd67f684 ("sev/i386: qmp: add query-sev-capabilities command") - Signed-off-by: Tom Lendacky - Reviewed-by: Dr. David Alan Gilbert - Message-Id: - Signed-off-by: Paolo Bonzini - -Signed-off-by: Bandan Das ---- - qapi/misc-target.json | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/qapi/misc-target.json b/qapi/misc-target.json -index de91054523..bf04042f45 100644 ---- a/qapi/misc-target.json -+++ b/qapi/misc-target.json -@@ -172,7 +172,7 @@ - # -> { "execute": "query-sev-capabilities" } - # <- { "return": { "pdh": "8CCDD8DDD", "cert-chain": "888CCCDDDEE", - # "cpu0-id": "2lvmGwo+...61iEinw==", --# "cbitpos": 47, "reduced-phys-bits": 5}} -+# "cbitpos": 47, "reduced-phys-bits": 1}} - # - ## - { 'command': 'query-sev-capabilities', 'returns': 'SevCapability', --- -2.39.3 - diff --git a/SOURCES/kvm-qcow2-Don-t-open-data_file-with-BDRV_O_NO_IO.patch b/SOURCES/kvm-qcow2-Don-t-open-data_file-with-BDRV_O_NO_IO.patch new file mode 100644 index 0000000..00cdae0 --- /dev/null +++ b/SOURCES/kvm-qcow2-Don-t-open-data_file-with-BDRV_O_NO_IO.patch @@ -0,0 +1,117 @@ +From 97739ee2ed20856fe395248d399dfc7f9ab4f33d Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 11 Apr 2024 15:06:01 +0200 +Subject: [PATCH 1/4] qcow2: Don't open data_file with BDRV_O_NO_IO + +RH-Author: Hana Czenczek +RH-MergeRequest: 1: CVE 2024-4467 (PRDSC) +RH-Jira: RHEL-35611 +RH-CVE: CVE-2024-4467 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Eric Blake +RH-Commit: [1/4] f9843ce5c519901654a7d8ba43ee95ce25ca13c2 + +One use case for 'qemu-img info' is verifying that untrusted images +don't reference an unwanted external file, be it as a backing file or an +external data file. To make sure that calling 'qemu-img info' can't +already have undesired side effects with a malicious image, just don't +open the data file at all with BDRV_O_NO_IO. If nothing ever tries to do +I/O, we don't need to have it open. + +This changes the output of iotests case 061, which used 'qemu-img info' +to show that opening an image with an invalid data file fails. After +this patch, it succeeds. Replace this part of the test with a qemu-io +call, but keep the final 'qemu-img info' to show that the invalid data +file is correctly displayed in the output. + +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Hanna Czenczek +Upstream: N/A, embargoed +Signed-off-by: Hanna Czenczek +--- + block/qcow2.c | 17 ++++++++++++++++- + tests/qemu-iotests/061 | 6 ++++-- + tests/qemu-iotests/061.out | 8 ++++++-- + 3 files changed, 26 insertions(+), 5 deletions(-) + +diff --git a/block/qcow2.c b/block/qcow2.c +index 0ebd455dc8..a4cffb628c 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -1642,7 +1642,22 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, + goto fail; + } + +- if (open_data_file) { ++ if (open_data_file && (flags & BDRV_O_NO_IO)) { ++ /* ++ * Don't open the data file for 'qemu-img info' so that it can be used ++ * to verify that an untrusted qcow2 image doesn't refer to external ++ * files. ++ * ++ * Note: This still makes has_data_file() return true. ++ */ ++ if (s->incompatible_features & QCOW2_INCOMPAT_DATA_FILE) { ++ s->data_file = NULL; ++ } else { ++ s->data_file = bs->file; ++ } ++ qdict_extract_subqdict(options, NULL, "data-file."); ++ qdict_del(options, "data-file"); ++ } else if (open_data_file) { + /* Open external data file */ + bdrv_graph_co_rdunlock(); + s->data_file = bdrv_co_open_child(NULL, options, "data-file", bs, +diff --git a/tests/qemu-iotests/061 b/tests/qemu-iotests/061 +index 53c7d428e3..b71ac097d1 100755 +--- a/tests/qemu-iotests/061 ++++ b/tests/qemu-iotests/061 +@@ -326,12 +326,14 @@ $QEMU_IMG amend -o "data_file=foo" "$TEST_IMG" + echo + _make_test_img -o "compat=1.1,data_file=$TEST_IMG.data" 64M + $QEMU_IMG amend -o "data_file=foo" "$TEST_IMG" +-_img_info --format-specific ++$QEMU_IO -c "read 0 4k" "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt ++$QEMU_IO -c "open -o data-file.filename=$TEST_IMG.data,file.filename=$TEST_IMG" -c "read 0 4k" | _filter_qemu_io + TEST_IMG="data-file.filename=$TEST_IMG.data,file.filename=$TEST_IMG" _img_info --format-specific --image-opts + + echo + $QEMU_IMG amend -o "data_file=" --image-opts "data-file.filename=$TEST_IMG.data,file.filename=$TEST_IMG" +-_img_info --format-specific ++$QEMU_IO -c "read 0 4k" "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt ++$QEMU_IO -c "open -o data-file.filename=$TEST_IMG.data,file.filename=$TEST_IMG" -c "read 0 4k" | _filter_qemu_io + TEST_IMG="data-file.filename=$TEST_IMG.data,file.filename=$TEST_IMG" _img_info --format-specific --image-opts + + echo +diff --git a/tests/qemu-iotests/061.out b/tests/qemu-iotests/061.out +index 139fc68177..24c33add7c 100644 +--- a/tests/qemu-iotests/061.out ++++ b/tests/qemu-iotests/061.out +@@ -545,7 +545,9 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 + qemu-img: data-file can only be set for images that use an external data file + + Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 data_file=TEST_DIR/t.IMGFMT.data +-qemu-img: Could not open 'TEST_DIR/t.IMGFMT': Could not open 'foo': No such file or directory ++qemu-io: can't open device TEST_DIR/t.IMGFMT: Could not open 'foo': No such file or directory ++read 4096/4096 bytes at offset 0 ++4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + image: TEST_DIR/t.IMGFMT + file format: IMGFMT + virtual size: 64 MiB (67108864 bytes) +@@ -560,7 +562,9 @@ Format specific information: + corrupt: false + extended l2: false + +-qemu-img: Could not open 'TEST_DIR/t.IMGFMT': 'data-file' is required for this image ++qemu-io: can't open device TEST_DIR/t.IMGFMT: 'data-file' is required for this image ++read 4096/4096 bytes at offset 0 ++4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + image: TEST_DIR/t.IMGFMT + file format: IMGFMT + virtual size: 64 MiB (67108864 bytes) +-- +2.39.3 + diff --git a/SOURCES/kvm-qemu-guest-agent-Update-the-logfile-path-of-qga-fsfr.patch b/SOURCES/kvm-qemu-guest-agent-Update-the-logfile-path-of-qga-fsfr.patch new file mode 100644 index 0000000..967adc1 --- /dev/null +++ b/SOURCES/kvm-qemu-guest-agent-Update-the-logfile-path-of-qga-fsfr.patch @@ -0,0 +1,40 @@ +From 97e10c2ccc8dc29019d6d22de1d23c55fea0f6c4 Mon Sep 17 00:00:00 2001 +From: Dehan Meng +Date: Wed, 21 Aug 2024 14:55:01 +0800 +Subject: [PATCH] qemu-guest-agent: Update the logfile path of + qga-fsfreeze-hook.log + +RH-Author: 6-dehan +RH-MergeRequest: 265: qemu-guest-agent: Update the logfile path of qga-fsfreeze-hook.log +RH-Jira: RHEL-52250 +RH-Acked-by: Konstantin Kostiuk +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/1] 7c5cfb882dbc277becb7daa2c5d6b8eff3d601b2 (6-dehan/src_centosupstream_qemu-kvm) + +selinux context 'system_u:object_r:virt_qemu_ga_log_t:s0', it +should be changed to '/var/log/qemu-ga/qga-fsfreeze-hook.log'. And +it's worth to mention that this is RHEL-only change for matching +existing SELinux boolean and policy. + +Jira: https://issues.redhat.com/browse/RHEL-52250 +Signed-off-by: Dehan Meng +--- + scripts/qemu-guest-agent/fsfreeze-hook | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/scripts/qemu-guest-agent/fsfreeze-hook b/scripts/qemu-guest-agent/fsfreeze-hook +index e9b84ec028..70536ba3e3 100755 +--- a/scripts/qemu-guest-agent/fsfreeze-hook ++++ b/scripts/qemu-guest-agent/fsfreeze-hook +@@ -7,7 +7,7 @@ + # "freeze" argument before the filesystem is frozen. And for fsfreeze-thaw + # request, it is issued with "thaw" argument after filesystem is thawed. + +-LOGFILE=/var/log/qga-fsfreeze-hook.log ++LOGFILE=/var/log/qemu-ga/qga-fsfreeze-hook.log + FSFREEZE_D=$(dirname -- "$(realpath $0)")/fsfreeze-hook.d + + # Check whether file $1 is a backup or rpm-generated file and should be ignored +-- +2.39.3 + diff --git a/SOURCES/kvm-qemu-options.hx-Update-the-reduced-phys-bits-documen.patch b/SOURCES/kvm-qemu-options.hx-Update-the-reduced-phys-bits-documen.patch deleted file mode 100644 index 6830692..0000000 --- a/SOURCES/kvm-qemu-options.hx-Update-the-reduced-phys-bits-documen.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 50c833fc3c7d8d3a5124cfdb2f2dc06b910c2252 Mon Sep 17 00:00:00 2001 -From: Bandan Das -Date: Thu, 3 Aug 2023 14:21:25 -0400 -Subject: [PATCH 11/14] qemu-options.hx: Update the reduced-phys-bits - documentation - -RH-Author: Bandan Das -RH-MergeRequest: 196: Updates to SEV reduced-phys-bits parameter -RH-Bugzilla: 2214839 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/4] b0c4a19e9f4185c97ddf71857bc9367cea01ffa8 (bdas1/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2214839 - -commit 326e3015c4c6f3197157ea0bb00826ae740e2fad -Author: Tom Lendacky -Date: Fri Sep 30 10:14:28 2022 -0500 - - qemu-options.hx: Update the reduced-phys-bits documentation - - A guest only ever experiences, at most, 1 bit of reduced physical - addressing. Update the documentation to reflect this as well as change - the example value on the reduced-phys-bits option. - - Fixes: a9b4942f48 ("target/i386: add Secure Encrypted Virtualization (SEV) object") - Signed-off-by: Tom Lendacky - Reviewed-by: Dr. David Alan Gilbert - Message-Id: <13a62ced1808546c1d398e2025cf85f4c94ae123.1664550870.git.thomas.lendacky@amd.com> - Signed-off-by: Paolo Bonzini - -Signed-off-by: Bandan Das ---- - qemu-options.hx | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/qemu-options.hx b/qemu-options.hx -index b18f933703..edf10a5aac 100644 ---- a/qemu-options.hx -+++ b/qemu-options.hx -@@ -5417,7 +5417,7 @@ SRST - physical address space. The ``reduced-phys-bits`` is used to - provide the number of bits we loose in physical address space. - Similar to C-bit, the value is Host family dependent. On EPYC, -- the value should be 5. -+ a guest will lose a maximum of 1 bit, so the value should be 1. - - The ``sev-device`` provides the device file to use for - communicating with the SEV firmware running inside AMD Secure -@@ -5452,7 +5452,7 @@ SRST - - # |qemu_system_x86| \\ - ...... \\ -- -object sev-guest,id=sev0,cbitpos=47,reduced-phys-bits=5 \\ -+ -object sev-guest,id=sev0,cbitpos=47,reduced-phys-bits=1 \\ - -machine ...,memory-encryption=sev0 \\ - ..... - --- -2.39.3 - diff --git a/SOURCES/kvm-qio-Inherit-follow_coroutine_ctx-across-TLS.patch b/SOURCES/kvm-qio-Inherit-follow_coroutine_ctx-across-TLS.patch new file mode 100644 index 0000000..c60570e --- /dev/null +++ b/SOURCES/kvm-qio-Inherit-follow_coroutine_ctx-across-TLS.patch @@ -0,0 +1,129 @@ +From fef17d3466deba9b4d581604de1c64c210d1a454 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Fri, 17 May 2024 21:50:14 -0500 +Subject: [PATCH 1/4] qio: Inherit follow_coroutine_ctx across TLS +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Blake +RH-MergeRequest: 244: qio: Inherit follow_coroutine_ctx across TLS +RH-Jira: RHEL-33440 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [1/2] 447b144638a1f032fa036838f5f1839628389fe5 (ebblake/centos-qemu-kvm) + +Since qemu 8.2, the combination of NBD + TLS + iothread crashes on an +assertion failure: + +qemu-kvm: ../io/channel.c:534: void qio_channel_restart_read(void *): Assertion `qemu_get_current_aio_context() == qemu_coroutine_get_aio_context(co)' failed. + +It turns out that when we removed AioContext locking, we did so by +having NBD tell its qio channels that it wanted to opt in to +qio_channel_set_follow_coroutine_ctx(); but while we opted in on the +main channel, we did not opt in on the TLS wrapper channel. +qemu-iotests has coverage of NBD+iothread and NBD+TLS, but apparently +no coverage of NBD+TLS+iothread, or we would have noticed this +regression sooner. (I'll add that in the next patch) + +But while we could manually opt in to the TLS channel in nbd/server.c +(a one-line change), it is more generic if all qio channels that wrap +other channels inherit the follow status, in the same way that they +inherit feature bits. + +CC: Stefan Hajnoczi +CC: Daniel P. Berrangé +CC: qemu-stable@nongnu.org +Fixes: https://issues.redhat.com/browse/RHEL-34786 +Fixes: 06e0f098 ("io: follow coroutine AioContext in qio_channel_yield()", v8.2.0) +Signed-off-by: Eric Blake +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Message-ID: <20240518025246.791593-5-eblake@redhat.com> +(cherry picked from commit 199e84de1c903ba5aa1f7256310bbc4a20dd930b) +Jira: https://issues.redhat.com/browse/RHEL-33440 +Signed-off-by: Eric Blake +--- + io/channel-tls.c | 26 +++++++++++++++----------- + io/channel-websock.c | 1 + + 2 files changed, 16 insertions(+), 11 deletions(-) + +diff --git a/io/channel-tls.c b/io/channel-tls.c +index 1d9c9c72bf..67b9700006 100644 +--- a/io/channel-tls.c ++++ b/io/channel-tls.c +@@ -69,37 +69,40 @@ qio_channel_tls_new_server(QIOChannel *master, + const char *aclname, + Error **errp) + { +- QIOChannelTLS *ioc; ++ QIOChannelTLS *tioc; ++ QIOChannel *ioc; + +- ioc = QIO_CHANNEL_TLS(object_new(TYPE_QIO_CHANNEL_TLS)); ++ tioc = QIO_CHANNEL_TLS(object_new(TYPE_QIO_CHANNEL_TLS)); ++ ioc = QIO_CHANNEL(tioc); + +- ioc->master = master; ++ tioc->master = master; ++ ioc->follow_coroutine_ctx = master->follow_coroutine_ctx; + if (qio_channel_has_feature(master, QIO_CHANNEL_FEATURE_SHUTDOWN)) { +- qio_channel_set_feature(QIO_CHANNEL(ioc), QIO_CHANNEL_FEATURE_SHUTDOWN); ++ qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN); + } + object_ref(OBJECT(master)); + +- ioc->session = qcrypto_tls_session_new( ++ tioc->session = qcrypto_tls_session_new( + creds, + NULL, + aclname, + QCRYPTO_TLS_CREDS_ENDPOINT_SERVER, + errp); +- if (!ioc->session) { ++ if (!tioc->session) { + goto error; + } + + qcrypto_tls_session_set_callbacks( +- ioc->session, ++ tioc->session, + qio_channel_tls_write_handler, + qio_channel_tls_read_handler, +- ioc); ++ tioc); + +- trace_qio_channel_tls_new_server(ioc, master, creds, aclname); +- return ioc; ++ trace_qio_channel_tls_new_server(tioc, master, creds, aclname); ++ return tioc; + + error: +- object_unref(OBJECT(ioc)); ++ object_unref(OBJECT(tioc)); + return NULL; + } + +@@ -116,6 +119,7 @@ qio_channel_tls_new_client(QIOChannel *master, + ioc = QIO_CHANNEL(tioc); + + tioc->master = master; ++ ioc->follow_coroutine_ctx = master->follow_coroutine_ctx; + if (qio_channel_has_feature(master, QIO_CHANNEL_FEATURE_SHUTDOWN)) { + qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN); + } +diff --git a/io/channel-websock.c b/io/channel-websock.c +index a12acc27cf..de39f0d182 100644 +--- a/io/channel-websock.c ++++ b/io/channel-websock.c +@@ -883,6 +883,7 @@ qio_channel_websock_new_server(QIOChannel *master) + ioc = QIO_CHANNEL(wioc); + + wioc->master = master; ++ ioc->follow_coroutine_ctx = master->follow_coroutine_ctx; + if (qio_channel_has_feature(master, QIO_CHANNEL_FEATURE_SHUTDOWN)) { + qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN); + } +-- +2.39.3 + diff --git a/SOURCES/kvm-raven-disable-reentrancy-detection-for-iomem.patch b/SOURCES/kvm-raven-disable-reentrancy-detection-for-iomem.patch deleted file mode 100644 index 4a4a2cc..0000000 --- a/SOURCES/kvm-raven-disable-reentrancy-detection-for-iomem.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 936e21428a04524ccffeb36110d1aa61de9f44e5 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 9 May 2023 10:29:03 -0400 -Subject: [PATCH 11/21] raven: disable reentrancy detection for iomem - -RH-Author: Jon Maloy -RH-MergeRequest: 165: memory: prevent dma-reentracy issues -RH-Jira: RHEL-516 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [7/13] 48278583aa1ab08b912f49cd8b3a79d1bb3abf5f (jmaloy/jmaloy-qemu-kvm-2) - -Jira: https://issues.redhat.com/browse/RHEL-516 -Upstream: Merged -CVE: CVE-2023-2680 - -commit 6dad5a6810d9c60ca320d01276f6133bbcfa1fc7 -Author: Alexander Bulekov -Date: Thu Apr 27 17:10:12 2023 -0400 - - raven: disable reentrancy detection for iomem - - As the code is designed for re-entrant calls from raven_io_ops to - pci-conf, mark raven_io_ops as reentrancy-safe. - - Signed-off-by: Alexander Bulekov - Message-Id: <20230427211013.2994127-8-alxndr@bu.edu> - Signed-off-by: Thomas Huth - -Signed-off-by: Jon Maloy ---- - hw/pci-host/raven.c | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/hw/pci-host/raven.c b/hw/pci-host/raven.c -index 072ffe3c5e..9a11ac4b2b 100644 ---- a/hw/pci-host/raven.c -+++ b/hw/pci-host/raven.c -@@ -294,6 +294,13 @@ static void raven_pcihost_initfn(Object *obj) - memory_region_init(&s->pci_memory, obj, "pci-memory", 0x3f000000); - address_space_init(&s->pci_io_as, &s->pci_io, "raven-io"); - -+ /* -+ * Raven's raven_io_ops use the address-space API to access pci-conf-idx -+ * (which is also owned by the raven device). As such, mark the -+ * pci_io_non_contiguous as re-entrancy safe. -+ */ -+ s->pci_io_non_contiguous.disable_reentrancy_guard = true; -+ - /* CPU address space */ - memory_region_add_subregion(address_space_mem, PCI_IO_BASE_ADDR, - &s->pci_io); --- -2.39.3 - diff --git a/SOURCES/kvm-rhel-9.4.0-machine-type-compat-for-virtio-gpu-migrat.patch b/SOURCES/kvm-rhel-9.4.0-machine-type-compat-for-virtio-gpu-migrat.patch new file mode 100644 index 0000000..c6948ae --- /dev/null +++ b/SOURCES/kvm-rhel-9.4.0-machine-type-compat-for-virtio-gpu-migrat.patch @@ -0,0 +1,36 @@ +From 97334815c4a7cf0911b30c1366bbe67e883c57dd Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Wed, 5 Jun 2024 10:28:20 +0400 +Subject: [PATCH 4/4] rhel 9.4.0 machine type compat for virtio-gpu migration +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +RH-MergeRequest: 246: virtio-gpu: fix v2 migration +RH-Jira: RHEL-34621 +RH-Acked-by: Peter Xu +RH-Acked-by: Thomas Huth +RH-Commit: [2/2] 2a895d510453e83bb45dfb39b7751bcc2f309cc5 (marcandre.lureau-rh/qemu-kvm-centos) + +Signed-off-by: Marc-André Lureau +--- + hw/core/machine.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index cf1d7faaaf..92609aae27 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -310,6 +310,8 @@ GlobalProperty hw_compat_rhel_9_5[] = { + { TYPE_VIRTIO_IOMMU_PCI, "granule", "4k" }, + /* hw_compat_rhel_9_5 from hw_compat_8_2 */ + { TYPE_VIRTIO_IOMMU_PCI, "aw-bits", "64" }, ++ /* hw_compat_rhel_9_5 from hw_compat_8_2 */ ++ { "virtio-gpu-device", "x-scanout-vmstate-version", "1" }, + }; + const size_t hw_compat_rhel_9_5_len = G_N_ELEMENTS(hw_compat_rhel_9_5); + +-- +2.39.3 + diff --git a/SOURCES/kvm-runstate-skip-initial-CPU-reset-if-reset-is-not-actu.patch b/SOURCES/kvm-runstate-skip-initial-CPU-reset-if-reset-is-not-actu.patch new file mode 100644 index 0000000..4f757bf --- /dev/null +++ b/SOURCES/kvm-runstate-skip-initial-CPU-reset-if-reset-is-not-actu.patch @@ -0,0 +1,67 @@ +From 4c93bec108f7e3918a2ef91b51cec477ade38cc3 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 18 Mar 2024 17:45:56 -0400 +Subject: [PATCH 018/100] runstate: skip initial CPU reset if reset is not + actually possible +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [18/91] ced267fdaadbf2072c1897223522457a006e6c81 (bonzini/rhel-qemu-kvm) + +Right now, the system reset is concluded by a call to +cpu_synchronize_all_post_reset() in order to sync any changes +that the machine reset callback applied to the CPU state. + +However, for VMs with encrypted state such as SEV-ES guests (currently +the only case of guests with non-resettable CPUs) this cannot be done, +because guest state has already been finalized by machine-init-done notifiers. +cpu_synchronize_all_post_reset() does nothing on these guests, and actually +we would like to make it fail if called once guest has been encrypted. +So, assume that boards that support non-resettable CPUs do not touch +CPU state and that all such setup is done before, at the time of +cpu_synchronize_all_post_init(). + +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Paolo Bonzini +(cherry picked from commit 08b2d15cdd0d3fbbe37ce23bf192b770db3a7539) +Signed-off-by: Paolo Bonzini +--- + system/runstate.c | 15 ++++++++++++++- + 1 file changed, 14 insertions(+), 1 deletion(-) + +diff --git a/system/runstate.c b/system/runstate.c +index d6ab860eca..cb4905a40f 100644 +--- a/system/runstate.c ++++ b/system/runstate.c +@@ -501,7 +501,20 @@ void qemu_system_reset(ShutdownCause reason) + default: + qapi_event_send_reset(shutdown_caused_by_guest(reason), reason); + } +- cpu_synchronize_all_post_reset(); ++ ++ /* ++ * Some boards use the machine reset callback to point CPUs to the firmware ++ * entry point. Assume that this is not the case for boards that support ++ * non-resettable CPUs (currently used only for confidential guests), in ++ * which case cpu_synchronize_all_post_init() is enough because ++ * it does _more_ than cpu_synchronize_all_post_reset(). ++ */ ++ if (cpus_are_resettable()) { ++ cpu_synchronize_all_post_reset(); ++ } else { ++ assert(runstate_check(RUN_STATE_PRELAUNCH)); ++ } ++ + vm_set_suspended(false); + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-s390-Switch-to-use-confidential_guest_kvm_init.patch b/SOURCES/kvm-s390-Switch-to-use-confidential_guest_kvm_init.patch new file mode 100644 index 0000000..bc6e4e3 --- /dev/null +++ b/SOURCES/kvm-s390-Switch-to-use-confidential_guest_kvm_init.patch @@ -0,0 +1,109 @@ +From 4ebc58d4a7a3d4a20f20f1cd3f21082b80097fe2 Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Thu, 29 Feb 2024 01:00:38 -0500 +Subject: [PATCH 014/100] s390: Switch to use confidential_guest_kvm_init() + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [14/91] 8c9e09ec9976c00b41c02868fff034286a341468 (bonzini/rhel-qemu-kvm) + +Use unified confidential_guest_kvm_init() for consistency with +other architectures. + +Signed-off-by: Xiaoyao Li +Message-Id: <20240229060038.606591-1-xiaoyao.li@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit a14a2b0148e657cc526b7a75f2a1937628764e7a) +Signed-off-by: Paolo Bonzini +--- + hw/s390x/s390-virtio-ccw.c | 5 ++++- + target/s390x/kvm/pv.c | 10 +++++++++- + target/s390x/kvm/pv.h | 14 -------------- + 3 files changed, 13 insertions(+), 16 deletions(-) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 9ad54682c6..828ce6e87e 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -14,6 +14,7 @@ + #include "qemu/osdep.h" + #include "qapi/error.h" + #include "exec/ram_addr.h" ++#include "exec/confidential-guest-support.h" + #include "hw/s390x/s390-virtio-hcall.h" + #include "hw/s390x/sclp.h" + #include "hw/s390x/s390_flic.h" +@@ -260,7 +261,9 @@ static void ccw_init(MachineState *machine) + s390_init_cpus(machine); + + /* Need CPU model to be determined before we can set up PV */ +- s390_pv_init(machine->cgs, &error_fatal); ++ if (machine->cgs) { ++ confidential_guest_kvm_init(machine->cgs, &error_fatal); ++ } + + s390_flic_init(); + +diff --git a/target/s390x/kvm/pv.c b/target/s390x/kvm/pv.c +index 7ca7faec73..dde836d21a 100644 +--- a/target/s390x/kvm/pv.c ++++ b/target/s390x/kvm/pv.c +@@ -334,12 +334,17 @@ static bool s390_pv_guest_check(ConfidentialGuestSupport *cgs, Error **errp) + return s390_pv_check_cpus(errp); + } + +-int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) ++static int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + { + if (!object_dynamic_cast(OBJECT(cgs), TYPE_S390_PV_GUEST)) { + return 0; + } + ++ if (!kvm_enabled()) { ++ error_setg(errp, "Protected Virtualization requires KVM"); ++ return -1; ++ } ++ + if (!s390_has_feat(S390_FEAT_UNPACK)) { + error_setg(errp, + "CPU model does not support Protected Virtualization"); +@@ -364,6 +369,9 @@ OBJECT_DEFINE_TYPE_WITH_INTERFACES(S390PVGuest, + + static void s390_pv_guest_class_init(ObjectClass *oc, void *data) + { ++ ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); ++ ++ klass->kvm_init = s390_pv_kvm_init; + } + + static void s390_pv_guest_init(Object *obj) +diff --git a/target/s390x/kvm/pv.h b/target/s390x/kvm/pv.h +index 5877d28ff1..4b40817439 100644 +--- a/target/s390x/kvm/pv.h ++++ b/target/s390x/kvm/pv.h +@@ -80,18 +80,4 @@ static inline int kvm_s390_dump_mem_state(uint64_t addr, size_t len, + static inline int kvm_s390_dump_completion_data(void *buff) { return 0; } + #endif /* CONFIG_KVM */ + +-int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); +-static inline int s390_pv_init(ConfidentialGuestSupport *cgs, Error **errp) +-{ +- if (!cgs) { +- return 0; +- } +- if (kvm_enabled()) { +- return s390_pv_kvm_init(cgs, errp); +- } +- +- error_setg(errp, "Protected Virtualization requires KVM"); +- return -1; +-} +- + #endif /* HW_S390_PV_H */ +-- +2.39.3 + diff --git a/SOURCES/kvm-s390x-ap-Wire-up-the-device-request-notifier-interfa.patch b/SOURCES/kvm-s390x-ap-Wire-up-the-device-request-notifier-interfa.patch deleted file mode 100644 index d0eb303..0000000 --- a/SOURCES/kvm-s390x-ap-Wire-up-the-device-request-notifier-interfa.patch +++ /dev/null @@ -1,220 +0,0 @@ -From 41987ce0dd79d8734088002cbd34f20704dd017a Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Mon, 17 Jul 2023 17:36:07 +0200 -Subject: [PATCH 04/12] s390x/ap: Wire up the device request notifier interface -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 185: Backport s390x fixes from QEMU 8.1 -RH-Jira: RHEL-794 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Thomas Huth -RH-Commit: [1/1] ab6c912a1b8cdb584adacac16af79352fdfe7355 (clegoate/qemu-kvm-c9s) - -Jira: https://issues.redhat.com/browse/RHEL-794 - -commit 1360b2ad1f673d32a09de5826cd71ecd0510164a -Author: Tony Krowiak -Date: Fri Jun 2 10:11:25 2023 -0400 - - s390x/ap: Wire up the device request notifier interface - - Let's wire up the device request notifier interface to handle device unplug - requests for AP. - - Signed-off-by: Tony Krowiak - Link: https://lore.kernel.org/qemu-devel/20230530225544.280031-1-akrowiak@linux.ibm.com/ - Signed-off-by: Cédric Le Goater - -Backport note: - - - linux-headers/linux/vfio.h - updated to v6.5-rc1 level for VFIO_AP_REQ_IRQ_INDEX definition - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/ap.c | 113 +++++++++++++++++++++++++++++++++++++ - linux-headers/linux/vfio.h | 9 +++ - 2 files changed, 122 insertions(+) - -diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c -index e0dd561e85..6e21d1da5a 100644 ---- a/hw/vfio/ap.c -+++ b/hw/vfio/ap.c -@@ -18,6 +18,8 @@ - #include "hw/vfio/vfio-common.h" - #include "hw/s390x/ap-device.h" - #include "qemu/error-report.h" -+#include "qemu/event_notifier.h" -+#include "qemu/main-loop.h" - #include "qemu/module.h" - #include "qemu/option.h" - #include "qemu/config-file.h" -@@ -33,6 +35,7 @@ - struct VFIOAPDevice { - APDevice apdev; - VFIODevice vdev; -+ EventNotifier req_notifier; - }; - - OBJECT_DECLARE_SIMPLE_TYPE(VFIOAPDevice, VFIO_AP_DEVICE) -@@ -84,10 +87,110 @@ static VFIOGroup *vfio_ap_get_group(VFIOAPDevice *vapdev, Error **errp) - return vfio_get_group(groupid, &address_space_memory, errp); - } - -+static void vfio_ap_req_notifier_handler(void *opaque) -+{ -+ VFIOAPDevice *vapdev = opaque; -+ Error *err = NULL; -+ -+ if (!event_notifier_test_and_clear(&vapdev->req_notifier)) { -+ return; -+ } -+ -+ qdev_unplug(DEVICE(vapdev), &err); -+ -+ if (err) { -+ warn_reportf_err(err, VFIO_MSG_PREFIX, vapdev->vdev.name); -+ } -+} -+ -+static void vfio_ap_register_irq_notifier(VFIOAPDevice *vapdev, -+ unsigned int irq, Error **errp) -+{ -+ int fd; -+ size_t argsz; -+ IOHandler *fd_read; -+ EventNotifier *notifier; -+ struct vfio_irq_info *irq_info; -+ VFIODevice *vdev = &vapdev->vdev; -+ -+ switch (irq) { -+ case VFIO_AP_REQ_IRQ_INDEX: -+ notifier = &vapdev->req_notifier; -+ fd_read = vfio_ap_req_notifier_handler; -+ break; -+ default: -+ error_setg(errp, "vfio: Unsupported device irq(%d)", irq); -+ return; -+ } -+ -+ if (vdev->num_irqs < irq + 1) { -+ error_setg(errp, "vfio: IRQ %u not available (number of irqs %u)", -+ irq, vdev->num_irqs); -+ return; -+ } -+ -+ argsz = sizeof(*irq_info); -+ irq_info = g_malloc0(argsz); -+ irq_info->index = irq; -+ irq_info->argsz = argsz; -+ -+ if (ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, -+ irq_info) < 0 || irq_info->count < 1) { -+ error_setg_errno(errp, errno, "vfio: Error getting irq info"); -+ goto out_free_info; -+ } -+ -+ if (event_notifier_init(notifier, 0)) { -+ error_setg_errno(errp, errno, -+ "vfio: Unable to init event notifier for irq (%d)", -+ irq); -+ goto out_free_info; -+ } -+ -+ fd = event_notifier_get_fd(notifier); -+ qemu_set_fd_handler(fd, fd_read, NULL, vapdev); -+ -+ if (vfio_set_irq_signaling(vdev, irq, 0, VFIO_IRQ_SET_ACTION_TRIGGER, fd, -+ errp)) { -+ qemu_set_fd_handler(fd, NULL, NULL, vapdev); -+ event_notifier_cleanup(notifier); -+ } -+ -+out_free_info: -+ g_free(irq_info); -+ -+} -+ -+static void vfio_ap_unregister_irq_notifier(VFIOAPDevice *vapdev, -+ unsigned int irq) -+{ -+ Error *err = NULL; -+ EventNotifier *notifier; -+ -+ switch (irq) { -+ case VFIO_AP_REQ_IRQ_INDEX: -+ notifier = &vapdev->req_notifier; -+ break; -+ default: -+ error_report("vfio: Unsupported device irq(%d)", irq); -+ return; -+ } -+ -+ if (vfio_set_irq_signaling(&vapdev->vdev, irq, 0, -+ VFIO_IRQ_SET_ACTION_TRIGGER, -1, &err)) { -+ warn_reportf_err(err, VFIO_MSG_PREFIX, vapdev->vdev.name); -+ } -+ -+ qemu_set_fd_handler(event_notifier_get_fd(notifier), -+ NULL, NULL, vapdev); -+ event_notifier_cleanup(notifier); -+} -+ - static void vfio_ap_realize(DeviceState *dev, Error **errp) - { - int ret; - char *mdevid; -+ Error *err = NULL; - VFIOGroup *vfio_group; - APDevice *apdev = AP_DEVICE(dev); - VFIOAPDevice *vapdev = VFIO_AP_DEVICE(apdev); -@@ -116,6 +219,15 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp) - goto out_get_dev_err; - } - -+ vfio_ap_register_irq_notifier(vapdev, VFIO_AP_REQ_IRQ_INDEX, &err); -+ if (err) { -+ /* -+ * Report this error, but do not make it a failing condition. -+ * Lack of this IRQ in the host does not prevent normal operation. -+ */ -+ error_report_err(err); -+ } -+ - return; - - out_get_dev_err: -@@ -129,6 +241,7 @@ static void vfio_ap_unrealize(DeviceState *dev) - VFIOAPDevice *vapdev = VFIO_AP_DEVICE(apdev); - VFIOGroup *group = vapdev->vdev.group; - -+ vfio_ap_unregister_irq_notifier(vapdev, VFIO_AP_REQ_IRQ_INDEX); - vfio_ap_put_device(vapdev); - vfio_put_group(group); - } -diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h -index c59692ce0b..ce464957c8 100644 ---- a/linux-headers/linux/vfio.h -+++ b/linux-headers/linux/vfio.h -@@ -642,6 +642,15 @@ enum { - VFIO_CCW_NUM_IRQS - }; - -+/* -+ * The vfio-ap bus driver makes use of the following IRQ index mapping. -+ * Unimplemented IRQ types return a count of zero. -+ */ -+enum { -+ VFIO_AP_REQ_IRQ_INDEX, -+ VFIO_AP_NUM_IRQS -+}; -+ - /** - * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 12, - * struct vfio_pci_hot_reset_info) --- -2.39.3 - diff --git a/SOURCES/kvm-s390x-pv-Fix-spurious-warning-with-asynchronous-tear.patch b/SOURCES/kvm-s390x-pv-Fix-spurious-warning-with-asynchronous-tear.patch deleted file mode 100644 index ecf1353..0000000 --- a/SOURCES/kvm-s390x-pv-Fix-spurious-warning-with-asynchronous-tear.patch +++ /dev/null @@ -1,129 +0,0 @@ -From 3cab2a638a10ece2b76d9f33a3c5dc6f64f1bbaa Mon Sep 17 00:00:00 2001 -From: Claudio Imbrenda -Date: Wed, 10 May 2023 12:55:31 +0200 -Subject: [PATCH 21/21] s390x/pv: Fix spurious warning with asynchronous - teardown -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -RH-MergeRequest: 173: Improve memory reclaiming for z15 Secure Execution guests -RH-Bugzilla: 2168500 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Cédric Le Goater -RH-Commit: [2/2] cb690d3155ea22c6df00a4d75b72f501515e5556 (thuth/qemu-kvm-cs9) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168500 - -Kernel commit 292a7d6fca33 ("KVM: s390: pv: fix asynchronous teardown -for small VMs") causes the KVM_PV_ASYNC_CLEANUP_PREPARE ioctl to fail -if the VM is not larger than 2GiB. QEMU would attempt it and fail, -print an error message, and then proceed with a normal teardown. - -Avoid attempting to use asynchronous teardown altogether when the VM is -not larger than 2 GiB. This will avoid triggering the error message and -also avoid pointless overhead; normal teardown is fast enough for small -VMs. - -Reported-by: Marc Hartmayer -Fixes: c3a073c610 ("s390x/pv: Add support for asynchronous teardown for reboot") -Link: https://lore.kernel.org/all/20230421085036.52511-2-imbrenda@linux.ibm.com/ -Signed-off-by: Claudio Imbrenda -Message-Id: <20230510105531.30623-2-imbrenda@linux.ibm.com> -Reviewed-by: Thomas Huth -[thuth: Fix inline function parameter in pv.h] -Signed-off-by: Thomas Huth -(cherry picked from commit 88693ab2a53f2f3d25cb39a7b5034ab391bc5a81) ---- - hw/s390x/pv.c | 10 ++++++++-- - hw/s390x/s390-virtio-ccw.c | 2 +- - include/hw/s390x/pv.h | 6 +++--- - 3 files changed, 12 insertions(+), 6 deletions(-) - -diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c -index 49ea38236c..b63f3784c6 100644 ---- a/hw/s390x/pv.c -+++ b/hw/s390x/pv.c -@@ -13,6 +13,7 @@ - - #include - -+#include "qemu/units.h" - #include "qapi/error.h" - #include "qemu/error-report.h" - #include "sysemu/kvm.h" -@@ -115,7 +116,7 @@ static void *s390_pv_do_unprot_async_fn(void *p) - return NULL; - } - --bool s390_pv_vm_try_disable_async(void) -+bool s390_pv_vm_try_disable_async(S390CcwMachineState *ms) - { - /* - * t is only needed to create the thread; once qemu_thread_create -@@ -123,7 +124,12 @@ bool s390_pv_vm_try_disable_async(void) - */ - QemuThread t; - -- if (!kvm_check_extension(kvm_state, KVM_CAP_S390_PROTECTED_ASYNC_DISABLE)) { -+ /* -+ * If the feature is not present or if the VM is not larger than 2 GiB, -+ * KVM_PV_ASYNC_CLEANUP_PREPARE fill fail; no point in attempting it. -+ */ -+ if ((MACHINE(ms)->maxram_size <= 2 * GiB) || -+ !kvm_check_extension(kvm_state, KVM_CAP_S390_PROTECTED_ASYNC_DISABLE)) { - return false; - } - if (s390_pv_cmd(KVM_PV_ASYNC_CLEANUP_PREPARE, NULL) != 0) { -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 6a0b93c63d..d95c595f88 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -330,7 +330,7 @@ static inline void s390_do_cpu_ipl(CPUState *cs, run_on_cpu_data arg) - - static void s390_machine_unprotect(S390CcwMachineState *ms) - { -- if (!s390_pv_vm_try_disable_async()) { -+ if (!s390_pv_vm_try_disable_async(ms)) { - s390_pv_vm_disable(); - } - ms->pv = false; -diff --git a/include/hw/s390x/pv.h b/include/hw/s390x/pv.h -index 966306a9db..7b935e2246 100644 ---- a/include/hw/s390x/pv.h -+++ b/include/hw/s390x/pv.h -@@ -14,10 +14,10 @@ - - #include "qapi/error.h" - #include "sysemu/kvm.h" -+#include "hw/s390x/s390-virtio-ccw.h" - - #ifdef CONFIG_KVM - #include "cpu.h" --#include "hw/s390x/s390-virtio-ccw.h" - - static inline bool s390_is_pv(void) - { -@@ -41,7 +41,7 @@ static inline bool s390_is_pv(void) - int s390_pv_query_info(void); - int s390_pv_vm_enable(void); - void s390_pv_vm_disable(void); --bool s390_pv_vm_try_disable_async(void); -+bool s390_pv_vm_try_disable_async(S390CcwMachineState *ms); - int s390_pv_set_sec_parms(uint64_t origin, uint64_t length); - int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak); - void s390_pv_prep_reset(void); -@@ -61,7 +61,7 @@ static inline bool s390_is_pv(void) { return false; } - static inline int s390_pv_query_info(void) { return 0; } - static inline int s390_pv_vm_enable(void) { return 0; } - static inline void s390_pv_vm_disable(void) {} --static inline bool s390_pv_vm_try_disable_async(void) { return false; } -+static inline bool s390_pv_vm_try_disable_async(S390CcwMachineState *ms) { return false; } - static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t length) { return 0; } - static inline int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak) { return 0; } - static inline void s390_pv_prep_reset(void) {} --- -2.39.3 - diff --git a/SOURCES/kvm-scripts-update-linux-header.sh-be-more-src-tree-frie.patch b/SOURCES/kvm-scripts-update-linux-header.sh-be-more-src-tree-frie.patch new file mode 100644 index 0000000..2c0fb91 --- /dev/null +++ b/SOURCES/kvm-scripts-update-linux-header.sh-be-more-src-tree-frie.patch @@ -0,0 +1,186 @@ +From 3d197f42afea6d0b176c2b26b772965692ffeab3 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Alex=20Benn=C3=A9e?= +Date: Tue, 14 May 2024 18:42:44 +0100 +Subject: [PATCH 047/100] scripts/update-linux-header.sh: be more src tree + friendly +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [47/91] c4165cc8bf79c3f96912e8210b3bb3565add288f (bonzini/rhel-qemu-kvm) + +Running "install_headers" in the Linux source tree is fairly +unfriendly as out-of-tree builds will start complaining about the +kernel source being non-pristine. As we have a temporary directory for +the install we should also do the build step here. So now we have: + + $tmpdir/ + $blddir/ + $hdrdir/ + +Reviewed-by: Pierrick Bouvier +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Alex Bennée +Message-Id: <20240514174253.694591-3-alex.bennee@linaro.org> +(cherry picked from commit b51ddd937f11f76614d4b36d14d8778df242661c) +Signed-off-by: Paolo Bonzini +--- + scripts/update-linux-headers.sh | 80 +++++++++++++++++---------------- + 1 file changed, 41 insertions(+), 39 deletions(-) + +diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh +index 5f20434d5c..4431ba4d54 100755 +--- a/scripts/update-linux-headers.sh ++++ b/scripts/update-linux-headers.sh +@@ -27,6 +27,8 @@ + # types like "__u64". This work is done in the cp_portable function. + + tmpdir=$(mktemp -d) ++hdrdir="$tmpdir/headers" ++blddir="$tmpdir/build" + linux="$1" + output="$2" + +@@ -111,56 +113,56 @@ for arch in $ARCHLIST; do + arch_var=ARCH + fi + +- make -C "$linux" INSTALL_HDR_PATH="$tmpdir" $arch_var=$arch headers_install ++ make -C "$linux" O="$blddir" INSTALL_HDR_PATH="$hdrdir" $arch_var=$arch headers_install + + rm -rf "$output/linux-headers/asm-$arch" + mkdir -p "$output/linux-headers/asm-$arch" + for header in kvm.h unistd.h bitsperlong.h mman.h; do +- cp "$tmpdir/include/asm/$header" "$output/linux-headers/asm-$arch" ++ cp "$hdrdir/include/asm/$header" "$output/linux-headers/asm-$arch" + done + + if [ $arch = mips ]; then +- cp "$tmpdir/include/asm/sgidefs.h" "$output/linux-headers/asm-mips/" +- cp "$tmpdir/include/asm/unistd_o32.h" "$output/linux-headers/asm-mips/" +- cp "$tmpdir/include/asm/unistd_n32.h" "$output/linux-headers/asm-mips/" +- cp "$tmpdir/include/asm/unistd_n64.h" "$output/linux-headers/asm-mips/" ++ cp "$hdrdir/include/asm/sgidefs.h" "$output/linux-headers/asm-mips/" ++ cp "$hdrdir/include/asm/unistd_o32.h" "$output/linux-headers/asm-mips/" ++ cp "$hdrdir/include/asm/unistd_n32.h" "$output/linux-headers/asm-mips/" ++ cp "$hdrdir/include/asm/unistd_n64.h" "$output/linux-headers/asm-mips/" + fi + if [ $arch = powerpc ]; then +- cp "$tmpdir/include/asm/unistd_32.h" "$output/linux-headers/asm-powerpc/" +- cp "$tmpdir/include/asm/unistd_64.h" "$output/linux-headers/asm-powerpc/" ++ cp "$hdrdir/include/asm/unistd_32.h" "$output/linux-headers/asm-powerpc/" ++ cp "$hdrdir/include/asm/unistd_64.h" "$output/linux-headers/asm-powerpc/" + fi + + rm -rf "$output/include/standard-headers/asm-$arch" + mkdir -p "$output/include/standard-headers/asm-$arch" + if [ $arch = s390 ]; then +- cp_portable "$tmpdir/include/asm/virtio-ccw.h" "$output/include/standard-headers/asm-s390/" +- cp "$tmpdir/include/asm/unistd_32.h" "$output/linux-headers/asm-s390/" +- cp "$tmpdir/include/asm/unistd_64.h" "$output/linux-headers/asm-s390/" ++ cp_portable "$hdrdir/include/asm/virtio-ccw.h" "$output/include/standard-headers/asm-s390/" ++ cp "$hdrdir/include/asm/unistd_32.h" "$output/linux-headers/asm-s390/" ++ cp "$hdrdir/include/asm/unistd_64.h" "$output/linux-headers/asm-s390/" + fi + if [ $arch = arm ]; then +- cp "$tmpdir/include/asm/unistd-eabi.h" "$output/linux-headers/asm-arm/" +- cp "$tmpdir/include/asm/unistd-oabi.h" "$output/linux-headers/asm-arm/" +- cp "$tmpdir/include/asm/unistd-common.h" "$output/linux-headers/asm-arm/" ++ cp "$hdrdir/include/asm/unistd-eabi.h" "$output/linux-headers/asm-arm/" ++ cp "$hdrdir/include/asm/unistd-oabi.h" "$output/linux-headers/asm-arm/" ++ cp "$hdrdir/include/asm/unistd-common.h" "$output/linux-headers/asm-arm/" + fi + if [ $arch = arm64 ]; then +- cp "$tmpdir/include/asm/sve_context.h" "$output/linux-headers/asm-arm64/" ++ cp "$hdrdir/include/asm/sve_context.h" "$output/linux-headers/asm-arm64/" + fi + if [ $arch = x86 ]; then +- cp "$tmpdir/include/asm/unistd_32.h" "$output/linux-headers/asm-x86/" +- cp "$tmpdir/include/asm/unistd_x32.h" "$output/linux-headers/asm-x86/" +- cp "$tmpdir/include/asm/unistd_64.h" "$output/linux-headers/asm-x86/" +- cp_portable "$tmpdir/include/asm/kvm_para.h" "$output/include/standard-headers/asm-$arch" ++ cp "$hdrdir/include/asm/unistd_32.h" "$output/linux-headers/asm-x86/" ++ cp "$hdrdir/include/asm/unistd_x32.h" "$output/linux-headers/asm-x86/" ++ cp "$hdrdir/include/asm/unistd_64.h" "$output/linux-headers/asm-x86/" ++ cp_portable "$hdrdir/include/asm/kvm_para.h" "$output/include/standard-headers/asm-$arch" + # Remove everything except the macros from bootparam.h avoiding the + # unnecessary import of several video/ist/etc headers + sed -e '/__ASSEMBLY__/,/__ASSEMBLY__/d' \ +- "$tmpdir/include/asm/bootparam.h" > "$tmpdir/bootparam.h" +- cp_portable "$tmpdir/bootparam.h" \ ++ "$hdrdir/include/asm/bootparam.h" > "$hdrdir/bootparam.h" ++ cp_portable "$hdrdir/bootparam.h" \ + "$output/include/standard-headers/asm-$arch" +- cp_portable "$tmpdir/include/asm/setup_data.h" \ ++ cp_portable "$hdrdir/include/asm/setup_data.h" \ + "$output/standard-headers/asm-x86" + fi + if [ $arch = riscv ]; then +- cp "$tmpdir/include/asm/ptrace.h" "$output/linux-headers/asm-riscv/" ++ cp "$hdrdir/include/asm/ptrace.h" "$output/linux-headers/asm-riscv/" + fi + done + arch= +@@ -170,13 +172,13 @@ mkdir -p "$output/linux-headers/linux" + for header in const.h stddef.h kvm.h vfio.h vfio_ccw.h vfio_zdev.h vhost.h \ + psci.h psp-sev.h userfaultfd.h memfd.h mman.h nvme_ioctl.h \ + vduse.h iommufd.h bits.h; do +- cp "$tmpdir/include/linux/$header" "$output/linux-headers/linux" ++ cp "$hdrdir/include/linux/$header" "$output/linux-headers/linux" + done + + rm -rf "$output/linux-headers/asm-generic" + mkdir -p "$output/linux-headers/asm-generic" + for header in unistd.h bitsperlong.h mman-common.h mman.h hugetlb_encode.h; do +- cp "$tmpdir/include/asm-generic/$header" "$output/linux-headers/asm-generic" ++ cp "$hdrdir/include/asm-generic/$header" "$output/linux-headers/asm-generic" + done + + if [ -L "$linux/source" ]; then +@@ -211,23 +213,23 @@ EOF + + rm -rf "$output/include/standard-headers/linux" + mkdir -p "$output/include/standard-headers/linux" +-for i in "$tmpdir"/include/linux/*virtio*.h \ +- "$tmpdir/include/linux/qemu_fw_cfg.h" \ +- "$tmpdir/include/linux/fuse.h" \ +- "$tmpdir/include/linux/input.h" \ +- "$tmpdir/include/linux/input-event-codes.h" \ +- "$tmpdir/include/linux/udmabuf.h" \ +- "$tmpdir/include/linux/pci_regs.h" \ +- "$tmpdir/include/linux/ethtool.h" \ +- "$tmpdir/include/linux/const.h" \ +- "$tmpdir/include/linux/kernel.h" \ +- "$tmpdir/include/linux/vhost_types.h" \ +- "$tmpdir/include/linux/sysinfo.h" \ +- "$tmpdir/include/misc/pvpanic.h"; do ++for i in "$hdrdir"/include/linux/*virtio*.h \ ++ "$hdrdir/include/linux/qemu_fw_cfg.h" \ ++ "$hdrdir/include/linux/fuse.h" \ ++ "$hdrdir/include/linux/input.h" \ ++ "$hdrdir/include/linux/input-event-codes.h" \ ++ "$hdrdir/include/linux/udmabuf.h" \ ++ "$hdrdir/include/linux/pci_regs.h" \ ++ "$hdrdir/include/linux/ethtool.h" \ ++ "$hdrdir/include/linux/const.h" \ ++ "$hdrdir/include/linux/kernel.h" \ ++ "$hdrdir/include/linux/vhost_types.h" \ ++ "$hdrdir/include/linux/sysinfo.h" \ ++ "$hdrdir/include/misc/pvpanic.h"; do + cp_portable "$i" "$output/include/standard-headers/linux" + done + mkdir -p "$output/include/standard-headers/drm" +-cp_portable "$tmpdir/include/drm/drm_fourcc.h" \ ++cp_portable "$hdrdir/include/drm/drm_fourcc.h" \ + "$output/include/standard-headers/drm" + + rm -rf "$output/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma" +-- +2.39.3 + diff --git a/SOURCES/kvm-scripts-update-linux-headers-Add-bits.h-to-file-impo.patch b/SOURCES/kvm-scripts-update-linux-headers-Add-bits.h-to-file-impo.patch new file mode 100644 index 0000000..eea324e --- /dev/null +++ b/SOURCES/kvm-scripts-update-linux-headers-Add-bits.h-to-file-impo.patch @@ -0,0 +1,38 @@ +From 5db9faee4d6efc9dbe010d2b745aba59d943d2ac Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Wed, 21 Feb 2024 10:51:38 -0600 +Subject: [PATCH 016/100] scripts/update-linux-headers: Add bits.h to file + imports + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [16/91] 150ee6376982bd5f471cb561f6760bf80d1211db (bonzini/rhel-qemu-kvm) + +Signed-off-by: Michael Roth +Signed-off-by: Paolo Bonzini +(cherry picked from commit b40b8eb609d3549ac14aab43849b20f5cba951c9) +Signed-off-by: Paolo Bonzini +--- + scripts/update-linux-headers.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh +index d48856f9e2..5f20434d5c 100755 +--- a/scripts/update-linux-headers.sh ++++ b/scripts/update-linux-headers.sh +@@ -169,7 +169,7 @@ rm -rf "$output/linux-headers/linux" + mkdir -p "$output/linux-headers/linux" + for header in const.h stddef.h kvm.h vfio.h vfio_ccw.h vfio_zdev.h vhost.h \ + psci.h psp-sev.h userfaultfd.h memfd.h mman.h nvme_ioctl.h \ +- vduse.h iommufd.h; do ++ vduse.h iommufd.h bits.h; do + cp "$tmpdir/include/linux/$header" "$output/linux-headers/linux" + done + +-- +2.39.3 + diff --git a/SOURCES/kvm-scripts-update-linux-headers-Add-setup_data.h-to-imp.patch b/SOURCES/kvm-scripts-update-linux-headers-Add-setup_data.h-to-imp.patch new file mode 100644 index 0000000..86c1f9c --- /dev/null +++ b/SOURCES/kvm-scripts-update-linux-headers-Add-setup_data.h-to-imp.patch @@ -0,0 +1,83 @@ +From 2d0989fe09703ef46ba9c5d14770dbf8a6fd2f80 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Sun, 18 Feb 2024 23:35:02 -0600 +Subject: [PATCH 015/100] scripts/update-linux-headers: Add setup_data.h to + import list + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [15/91] 9d46c8787259317710a84e7a6aa36731e9f55a17 (bonzini/rhel-qemu-kvm) + +Data structures like struct setup_data have been moved to a separate +setup_data.h header which bootparam.h relies on. Add setup_data.h to +the cp_portable() list and sync it along with the other header files. + +Note that currently struct setup_data is stripped away as part of +generating bootparam.h, but that handling is no currently needed for +setup_data.h since it doesn't pull in many external +headers/dependencies. However, QEMU currently redefines struct +setup_data in hw/i386/x86.c, so that will need to be removed as part of +any header update that pulls in the new setup_data.h to avoid build +bisect breakage. + +Because is the first architecture specific #include +in include/standard-headers/, add a new sed substitution to rewrite +asm/ include to the standard-headers/asm-* subdirectory for the current +architecture. + +And while at it, remove asm-generic/kvm_para.h from the list of +allowed includes: it does not have a matching substitution, and therefore +it would not be possible to use it on non-Linux systems where there is +no /usr/include/asm-generic/ directory. + +Signed-off-by: Michael Roth +Signed-off-by: Paolo Bonzini +(cherry picked from commit 66210a1a30f2384bb59f9dad8d769dba56dd30f1) +Signed-off-by: Paolo Bonzini +--- + scripts/update-linux-headers.sh | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh +index a0006eec6f..d48856f9e2 100755 +--- a/scripts/update-linux-headers.sh ++++ b/scripts/update-linux-headers.sh +@@ -61,7 +61,7 @@ cp_portable() { + -e 'linux/const' \ + -e 'linux/kernel' \ + -e 'linux/sysinfo' \ +- -e 'asm-generic/kvm_para' \ ++ -e 'asm/setup_data.h' \ + > /dev/null + then + echo "Unexpected #include in input file $f". +@@ -77,6 +77,7 @@ cp_portable() { + -e 's/__be\([0-9][0-9]*\)/uint\1_t/g' \ + -e 's/"\(input-event-codes\.h\)"/"standard-headers\/linux\/\1"/' \ + -e 's/]*\)>/"standard-headers\/linux\/\1"/' \ ++ -e 's/]*\)>/"standard-headers\/asm-'$arch'\/\1"/' \ + -e 's/__bitwise//' \ + -e 's/__attribute__((packed))/QEMU_PACKED/' \ + -e 's/__inline__/inline/' \ +@@ -155,11 +156,14 @@ for arch in $ARCHLIST; do + "$tmpdir/include/asm/bootparam.h" > "$tmpdir/bootparam.h" + cp_portable "$tmpdir/bootparam.h" \ + "$output/include/standard-headers/asm-$arch" ++ cp_portable "$tmpdir/include/asm/setup_data.h" \ ++ "$output/standard-headers/asm-x86" + fi + if [ $arch = riscv ]; then + cp "$tmpdir/include/asm/ptrace.h" "$output/linux-headers/asm-riscv/" + fi + done ++arch= + + rm -rf "$output/linux-headers/linux" + mkdir -p "$output/linux-headers/linux" +-- +2.39.3 + diff --git a/SOURCES/kvm-scripts-update-linux-headers.sh-Fix-the-path-of-setu.patch b/SOURCES/kvm-scripts-update-linux-headers.sh-Fix-the-path-of-setu.patch new file mode 100644 index 0000000..ecd631a --- /dev/null +++ b/SOURCES/kvm-scripts-update-linux-headers.sh-Fix-the-path-of-setu.patch @@ -0,0 +1,47 @@ +From 09acdbc49a4dd85d82ad30ec2859edfcdba8431e Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Mon, 27 May 2024 08:01:26 +0200 +Subject: [PATCH 049/100] scripts/update-linux-headers.sh: Fix the path of + setup_data.h + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [49/91] f3008bc07796687c9440f5720fbc72a12d0a1278 (bonzini/rhel-qemu-kvm) + +When running the update-linx-headers.sh script, it currently fails with: + +scripts/update-linux-headers.sh: line 73: .../qemu/standard-headers/asm-x86/setup_data.h: No such file or directory + +The "include" folder is obviously missing here - no clue how this could +have worked before? + +Fixes: 66210a1a30 ("scripts/update-linux-headers: Add setup_data.h to import list") +Message-ID: <20240527060126.12578-1-thuth@redhat.com> +Reviewed-by: Cornelia Huck +Signed-off-by: Thomas Huth +(cherry picked from commit bde26d90ae9f7551cac90e117fc7216c807a3bfe) +Signed-off-by: Paolo Bonzini +--- + scripts/update-linux-headers.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh +index 0f404d5317..f084bee72e 100755 +--- a/scripts/update-linux-headers.sh ++++ b/scripts/update-linux-headers.sh +@@ -160,7 +160,7 @@ for arch in $ARCHLIST; do + cp_portable "$hdrdir/bootparam.h" \ + "$output/include/standard-headers/asm-$arch" + cp_portable "$hdrdir/include/asm/setup_data.h" \ +- "$output/standard-headers/asm-x86" ++ "$output/include/standard-headers/asm-x86" + fi + if [ $arch = riscv ]; then + cp "$hdrdir/include/asm/ptrace.h" "$output/linux-headers/asm-riscv/" +-- +2.39.3 + diff --git a/SOURCES/kvm-scripts-update-linux-headers.sh-Remove-temporary-dir.patch b/SOURCES/kvm-scripts-update-linux-headers.sh-Remove-temporary-dir.patch new file mode 100644 index 0000000..c7dbd3a --- /dev/null +++ b/SOURCES/kvm-scripts-update-linux-headers.sh-Remove-temporary-dir.patch @@ -0,0 +1,44 @@ +From 8e63d742015bf69a00fd44e88eb1198f594b2de2 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Mon, 27 May 2024 08:02:43 +0200 +Subject: [PATCH 048/100] scripts/update-linux-headers.sh: Remove temporary + directory inbetween + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [48/91] 879554dc7e722c4e20b302a00ca745ddeefdc0fb (bonzini/rhel-qemu-kvm) + +We are reusing the same temporary directory for installing the headers +of all targets, so there could be stale files here when switching from +one target to another. Make sure to delete the folder before installing +a new set of target headers into it. + +Message-ID: <20240527060243.12647-1-thuth@redhat.com> +Reviewed-by: Michael S. Tsirkin +Acked-by: Cornelia Huck +Signed-off-by: Thomas Huth +(cherry picked from commit 3efc75ad9d9317e5709861bbebb2c29390f8e7a2) +Signed-off-by: Paolo Bonzini +--- + scripts/update-linux-headers.sh | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh +index 4431ba4d54..0f404d5317 100755 +--- a/scripts/update-linux-headers.sh ++++ b/scripts/update-linux-headers.sh +@@ -113,6 +113,7 @@ for arch in $ARCHLIST; do + arch_var=ARCH + fi + ++ rm -rf "$hdrdir" + make -C "$linux" O="$blddir" INSTALL_HDR_PATH="$hdrdir" $arch_var=$arch headers_install + + rm -rf "$output/linux-headers/asm-$arch" +-- +2.39.3 + diff --git a/SOURCES/kvm-scsi-block-Don-t-skip-callback-for-sgio-error-status.patch b/SOURCES/kvm-scsi-block-Don-t-skip-callback-for-sgio-error-status.patch new file mode 100644 index 0000000..a7eebbc --- /dev/null +++ b/SOURCES/kvm-scsi-block-Don-t-skip-callback-for-sgio-error-status.patch @@ -0,0 +1,60 @@ +From d580b83d9eda7802ffa3890ea8641793fe78937c Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 31 Jul 2024 14:32:05 +0200 +Subject: [PATCH 094/100] scsi-block: Don't skip callback for sgio error + status/driver_status + +RH-Author: Kevin Wolf +RH-MergeRequest: 261: scsi-block: Fix error handling with r/werror=stop +RH-Jira: RHEL-50000 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [2/4] 1fee1b21dae314f4f34c88f2d2fabd7af011404a (kmwolf/centos-qemu-kvm) + +Instead of calling into scsi_handle_rw_error() directly from +scsi_block_sgio_complete() and skipping the normal callback, go through +the normal cleanup path by calling the callback with a positive error +value. + +The important difference here is not only that the code path is cleaner, +but that the callbacks set r->req.aiocb = NULL. If we skip setting this +and the error action is BLOCK_ERROR_ACTION_STOP, resuming the VM runs +into an assertion failure in scsi_read_data() or scsi_write_data() +because the dangling aiocb pointer is unexpected. + +Fixes: a108557bbf ("scsi: inline sg_io_sense_from_errno() into the callers.") +Buglink: https://issues.redhat.com/browse/RHEL-50000 +Signed-off-by: Kevin Wolf +Acked-by: Paolo Bonzini +Message-ID: <20240731123207.27636-3-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 622a70161ac258e4a166a7dca4b5be267e0652d9) +Signed-off-by: Kevin Wolf +--- + hw/scsi/scsi-disk.c | 10 ---------- + 1 file changed, 10 deletions(-) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index bed2c8746c..e7f57f3230 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -2804,16 +2804,6 @@ static void scsi_block_sgio_complete(void *opaque, int ret) + } else { + ret = io_hdr->status; + } +- +- if (ret > 0) { +- if (scsi_handle_rw_error(r, ret, true)) { +- scsi_req_unref(&r->req); +- return; +- } +- +- /* Ignore error. */ +- ret = 0; +- } + } + + req->cb(req->cb_opaque, ret); +-- +2.39.3 + diff --git a/SOURCES/kvm-scsi-cleanup-scsi_clear_unit_attention.patch b/SOURCES/kvm-scsi-cleanup-scsi_clear_unit_attention.patch deleted file mode 100644 index 11dda3a..0000000 --- a/SOURCES/kvm-scsi-cleanup-scsi_clear_unit_attention.patch +++ /dev/null @@ -1,81 +0,0 @@ -From 5dd7d26c034c26b2d4d9b91b8d1a7b605e19730f Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Wed, 12 Jul 2023 15:43:51 +0200 -Subject: [PATCH 02/12] scsi: cleanup scsi_clear_unit_attention() - -RH-Author: Stefano Garzarella -RH-MergeRequest: 184: scsi: fix issue with Linux guest and unit attention -RH-Bugzilla: 2176702 -RH-Acked-by: Thomas Huth -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [2/3] b3a06a91644e44fae3d76d0fbe72448652db517a (sgarzarella/qemu-kvm-c-9-s) - -The previous commit moved the unit attention clearing when we create -the request. So now we can clean scsi_clear_unit_attention() to handle -only the case of the REPORT LUNS command: this is the only case in -which a UNIT ATTENTION is cleared without having been reported. - -Suggested-by: Paolo Bonzini -Signed-off-by: Stefano Garzarella -Message-ID: <20230712134352.118655-3-sgarzare@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit ba947dab98e7cd4337c70975bd255701a2a6aad8) -Signed-off-by: Stefano Garzarella ---- - hw/scsi/scsi-bus.c | 28 ++++++---------------------- - 1 file changed, 6 insertions(+), 22 deletions(-) - -diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c -index 5d22313b9d..cecd26479e 100644 ---- a/hw/scsi/scsi-bus.c -+++ b/hw/scsi/scsi-bus.c -@@ -828,26 +828,12 @@ static void scsi_clear_unit_attention(SCSIRequest *req) - return; - } - -- if (req->dev->unit_attention.key != UNIT_ATTENTION && -- req->bus->unit_attention.key != UNIT_ATTENTION) { -- return; -- } -- -- /* -- * If an INQUIRY command enters the enabled command state, -- * the device server shall [not] clear any unit attention condition; -- * See also MMC-6, paragraphs 6.5 and 6.6.2. -- */ -- if (req->cmd.buf[0] == INQUIRY || -- req->cmd.buf[0] == GET_CONFIGURATION || -- req->cmd.buf[0] == GET_EVENT_STATUS_NOTIFICATION) { -- return; -- } -- - if (req->dev->unit_attention.key == UNIT_ATTENTION) { - ua = &req->dev->unit_attention; -- } else { -+ } else if (req->bus->unit_attention.key == UNIT_ATTENTION) { - ua = &req->bus->unit_attention; -+ } else { -+ return; - } - - /* -@@ -856,12 +842,10 @@ static void scsi_clear_unit_attention(SCSIRequest *req) - * with an additional sense code of REPORTED LUNS DATA HAS CHANGED. - */ - if (req->cmd.buf[0] == REPORT_LUNS && -- !(ua->asc == SENSE_CODE(REPORTED_LUNS_CHANGED).asc && -- ua->ascq == SENSE_CODE(REPORTED_LUNS_CHANGED).ascq)) { -- return; -+ ua->asc == SENSE_CODE(REPORTED_LUNS_CHANGED).asc && -+ ua->ascq == SENSE_CODE(REPORTED_LUNS_CHANGED).ascq) { -+ *ua = SENSE_CODE(NO_SENSE); - } -- -- *ua = SENSE_CODE(NO_SENSE); - } - - int scsi_req_get_sense(SCSIRequest *req, uint8_t *buf, int len) --- -2.39.3 - diff --git a/SOURCES/kvm-scsi-clear-unit-attention-only-for-REPORT-LUNS-comma.patch b/SOURCES/kvm-scsi-clear-unit-attention-only-for-REPORT-LUNS-comma.patch deleted file mode 100644 index cb3b24e..0000000 --- a/SOURCES/kvm-scsi-clear-unit-attention-only-for-REPORT-LUNS-comma.patch +++ /dev/null @@ -1,110 +0,0 @@ -From 0a784c45a7b7ee32c36bf86eebb24c8431a89f49 Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Wed, 12 Jul 2023 15:43:52 +0200 -Subject: [PATCH 03/12] scsi: clear unit attention only for REPORT LUNS - commands - -RH-Author: Stefano Garzarella -RH-MergeRequest: 184: scsi: fix issue with Linux guest and unit attention -RH-Bugzilla: 2176702 -RH-Acked-by: Thomas Huth -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [3/3] 01d5e112ef9ae204d96ceb01b4a453fdb4e8b669 (sgarzarella/qemu-kvm-c-9-s) - -scsi_clear_unit_attention() now only handles REPORTED LUNS DATA HAS -CHANGED. - -This only happens when we handle REPORT LUNS commands, so let's rename -the function in scsi_clear_reported_luns_changed() and call it only in -scsi_target_emulate_report_luns(). - -Suggested-by: Paolo Bonzini -Signed-off-by: Stefano Garzarella -Message-ID: <20230712134352.118655-4-sgarzare@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 2eb5599e8a73e70a9e86a97120818ff95a43a23a) -Signed-off-by: Stefano Garzarella ---- - hw/scsi/scsi-bus.c | 34 +++++++++++----------------------- - 1 file changed, 11 insertions(+), 23 deletions(-) - -diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c -index cecd26479e..9542410800 100644 ---- a/hw/scsi/scsi-bus.c -+++ b/hw/scsi/scsi-bus.c -@@ -22,6 +22,7 @@ static char *scsibus_get_fw_dev_path(DeviceState *dev); - static void scsi_req_dequeue(SCSIRequest *req); - static uint8_t *scsi_target_alloc_buf(SCSIRequest *req, size_t len); - static void scsi_target_free_buf(SCSIRequest *req); -+static void scsi_clear_reported_luns_changed(SCSIRequest *req); - - static int next_scsi_bus; - -@@ -518,6 +519,14 @@ static bool scsi_target_emulate_report_luns(SCSITargetReq *r) - - /* store the LUN list length */ - stl_be_p(&r->buf[0], len - 8); -+ -+ /* -+ * If a REPORT LUNS command enters the enabled command state, [...] -+ * the device server shall clear any pending unit attention condition -+ * with an additional sense code of REPORTED LUNS DATA HAS CHANGED. -+ */ -+ scsi_clear_reported_luns_changed(&r->req); -+ - return true; - } - -@@ -816,18 +825,10 @@ uint8_t *scsi_req_get_buf(SCSIRequest *req) - return req->ops->get_buf(req); - } - --static void scsi_clear_unit_attention(SCSIRequest *req) -+static void scsi_clear_reported_luns_changed(SCSIRequest *req) - { - SCSISense *ua; - -- /* -- * scsi_fetch_unit_attention_sense() already cleaned the unit attention -- * in this case. -- */ -- if (req->ops == &reqops_unit_attention) { -- return; -- } -- - if (req->dev->unit_attention.key == UNIT_ATTENTION) { - ua = &req->dev->unit_attention; - } else if (req->bus->unit_attention.key == UNIT_ATTENTION) { -@@ -836,13 +837,7 @@ static void scsi_clear_unit_attention(SCSIRequest *req) - return; - } - -- /* -- * If a REPORT LUNS command enters the enabled command state, [...] -- * the device server shall clear any pending unit attention condition -- * with an additional sense code of REPORTED LUNS DATA HAS CHANGED. -- */ -- if (req->cmd.buf[0] == REPORT_LUNS && -- ua->asc == SENSE_CODE(REPORTED_LUNS_CHANGED).asc && -+ if (ua->asc == SENSE_CODE(REPORTED_LUNS_CHANGED).asc && - ua->ascq == SENSE_CODE(REPORTED_LUNS_CHANGED).ascq) { - *ua = SENSE_CODE(NO_SENSE); - } -@@ -1528,13 +1523,6 @@ void scsi_req_complete(SCSIRequest *req, int status) - req->dev->sense_is_ua = false; - } - -- /* -- * Unit attention state is now stored in the device's sense buffer -- * if the HBA didn't do autosense. Clear the pending unit attention -- * flags. -- */ -- scsi_clear_unit_attention(req); -- - scsi_req_ref(req); - scsi_req_dequeue(req); - req->bus->info->complete(req, req->residual); --- -2.39.3 - diff --git a/SOURCES/kvm-scsi-disk-Add-warning-comments-that-host_status-erro.patch b/SOURCES/kvm-scsi-disk-Add-warning-comments-that-host_status-erro.patch new file mode 100644 index 0000000..20aa88a --- /dev/null +++ b/SOURCES/kvm-scsi-disk-Add-warning-comments-that-host_status-erro.patch @@ -0,0 +1,79 @@ +From eebe5fe8cbc854a6365e7c1adbb701079b137bcb Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 31 Jul 2024 14:32:06 +0200 +Subject: [PATCH 095/100] scsi-disk: Add warning comments that host_status + errors take a shortcut + +RH-Author: Kevin Wolf +RH-MergeRequest: 261: scsi-block: Fix error handling with r/werror=stop +RH-Jira: RHEL-50000 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [3/4] 6fcd603fc78fda65a425a1acd9a8710d81c6ed7f (kmwolf/centos-qemu-kvm) + +scsi_block_sgio_complete() has surprising behaviour in that there are +error cases in which it directly completes the request and never calls +the passed callback. In the current state of the code, this doesn't seem +to result in bugs, but with future code changes, we must be careful to +never rely on the callback doing some cleanup until this code smell is +fixed. For now, just add warnings to make people aware of the trap. + +Signed-off-by: Kevin Wolf +Acked-by: Paolo Bonzini +Message-ID: <20240731123207.27636-4-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 8a0495624f23f8f01dfb1484f367174f3b3572e8) +Signed-off-by: Kevin Wolf +--- + hw/scsi/scsi-disk.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index e7f57f3230..b4062ac2ff 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -65,6 +65,9 @@ struct SCSIDiskClass { + /* + * Callbacks receive ret == 0 for success. Errors are represented either as + * negative errno values, or as positive SAM status codes. ++ * ++ * Beware: For errors returned in host_status, the function may directly ++ * complete the request and never call the callback. + */ + DMAIOFunc *dma_readv; + DMAIOFunc *dma_writev; +@@ -359,6 +362,7 @@ done: + scsi_req_unref(&r->req); + } + ++/* May not be called in all error cases, don't rely on cleanup here */ + static void scsi_dma_complete(void *opaque, int ret) + { + SCSIDiskReq *r = (SCSIDiskReq *)opaque; +@@ -399,6 +403,7 @@ done: + scsi_req_unref(&r->req); + } + ++/* May not be called in all error cases, don't rely on cleanup here */ + static void scsi_read_complete(void *opaque, int ret) + { + SCSIDiskReq *r = (SCSIDiskReq *)opaque; +@@ -538,6 +543,7 @@ done: + scsi_req_unref(&r->req); + } + ++/* May not be called in all error cases, don't rely on cleanup here */ + static void scsi_write_complete(void * opaque, int ret) + { + SCSIDiskReq *r = (SCSIDiskReq *)opaque; +@@ -2793,6 +2799,7 @@ static void scsi_block_sgio_complete(void *opaque, int ret) + sg_io_hdr_t *io_hdr = &req->io_header; + + if (ret == 0) { ++ /* FIXME This skips calling req->cb() and any cleanup in it */ + if (io_hdr->host_status != SCSI_HOST_OK) { + scsi_req_complete_failed(&r->req, io_hdr->host_status); + scsi_req_unref(&r->req); +-- +2.39.3 + diff --git a/SOURCES/kvm-scsi-disk-Always-report-RESERVATION_CONFLICT-to-gues.patch b/SOURCES/kvm-scsi-disk-Always-report-RESERVATION_CONFLICT-to-gues.patch new file mode 100644 index 0000000..0e2aeaf --- /dev/null +++ b/SOURCES/kvm-scsi-disk-Always-report-RESERVATION_CONFLICT-to-gues.patch @@ -0,0 +1,106 @@ +From bd5cace452183053e356a27317c759ecfe0391aa Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 31 Jul 2024 14:32:07 +0200 +Subject: [PATCH 096/100] scsi-disk: Always report RESERVATION_CONFLICT to + guest + +RH-Author: Kevin Wolf +RH-MergeRequest: 261: scsi-block: Fix error handling with r/werror=stop +RH-Jira: RHEL-50000 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [4/4] eb4142071e5cbe385a949a6c48b0c8f8c6086918 (kmwolf/centos-qemu-kvm) + +In the case of scsi-block, RESERVATION_CONFLICT is not a backend error, +but indicates that the guest tried to make a request that it isn't +allowed to execute. Pass the error to the guest so that it can decide +what to do with it. + +Without this, if we stop the VM in response to a RESERVATION_CONFLICT +(as is the default policy in management software such as oVirt or +KubeVirt), it can happen that the VM cannot be resumed any more because +every attempt to resume it immediately runs into the same error and +stops the VM again. + +One case that expects RESERVATION_CONFLICT errors to be visible in the +guest is running the validation tests in Windows 2019's Failover Cluster +Manager, which intentionally tries to execute invalid requests to see if +they are properly rejected. + +Buglink: https://issues.redhat.com/browse/RHEL-50000 +Signed-off-by: Kevin Wolf +Acked-by: Paolo Bonzini +Message-ID: <20240731123207.27636-5-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 9da6bd39f92434f55573acd017841b195c60188f) +Signed-off-by: Kevin Wolf +--- + hw/scsi/scsi-disk.c | 35 ++++++++++++++++++++++++++++++----- + 1 file changed, 30 insertions(+), 5 deletions(-) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index b4062ac2ff..91ccf37fef 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -202,7 +202,7 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int ret, bool acct_failed) + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s)); + SCSISense sense = SENSE_CODE(NO_SENSE); +- int error = 0; ++ int error; + bool req_has_sense = false; + BlockErrorAction action; + int status; +@@ -213,11 +213,35 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int ret, bool acct_failed) + } else { + /* A passthrough command has completed with nonzero status. */ + status = ret; +- if (status == CHECK_CONDITION) { ++ switch (status) { ++ case CHECK_CONDITION: + req_has_sense = true; + error = scsi_sense_buf_to_errno(r->req.sense, sizeof(r->req.sense)); +- } else { ++ break; ++ case RESERVATION_CONFLICT: ++ /* ++ * Don't apply the error policy, always report to the guest. ++ * ++ * This is a passthrough code path, so it's not a backend error, but ++ * a response to an invalid guest request. ++ * ++ * Windows Failover Cluster validation intentionally sends invalid ++ * requests to verify that reservations work as intended. It is ++ * crucial that it sees the resulting errors. ++ * ++ * Treating a reservation conflict as a guest-side error is obvious ++ * when a pr-manager is in use. Without one, the situation is less ++ * clear, but there might be nothing that can be fixed on the host ++ * (like in the above example), and we don't want to be stuck in a ++ * loop where resuming the VM and retrying the request immediately ++ * stops it again. So always reporting is still the safer option in ++ * this case, too. ++ */ ++ error = 0; ++ break; ++ default: + error = EINVAL; ++ break; + } + } + +@@ -227,8 +251,9 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int ret, bool acct_failed) + * are usually retried immediately, so do not post them to QMP and + * do not account them as failed I/O. + */ +- if (req_has_sense && +- scsi_sense_buf_is_guest_recoverable(r->req.sense, sizeof(r->req.sense))) { ++ if (!error || (req_has_sense && ++ scsi_sense_buf_is_guest_recoverable(r->req.sense, ++ sizeof(r->req.sense)))) { + action = BLOCK_ERROR_ACTION_REPORT; + acct_failed = false; + } else { +-- +2.39.3 + diff --git a/SOURCES/kvm-scsi-disk-Use-positive-return-value-for-status-in-dm.patch b/SOURCES/kvm-scsi-disk-Use-positive-return-value-for-status-in-dm.patch new file mode 100644 index 0000000..409028a --- /dev/null +++ b/SOURCES/kvm-scsi-disk-Use-positive-return-value-for-status-in-dm.patch @@ -0,0 +1,125 @@ +From 1a0aa9bbdad63d72628002740410b8a28282a96e Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 31 Jul 2024 14:32:04 +0200 +Subject: [PATCH 093/100] scsi-disk: Use positive return value for status in + dma_readv/writev + +RH-Author: Kevin Wolf +RH-MergeRequest: 261: scsi-block: Fix error handling with r/werror=stop +RH-Jira: RHEL-50000 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [1/4] a0b3e7bfd7b7059c0ec3706f2eb1698c1d430b08 (kmwolf/centos-qemu-kvm) + +In some error cases, scsi_block_sgio_complete() never calls the passed +callback, but directly completes the request. This leads to bugs because +its error paths are not exact copies of what the callback would normally +do. + +In preparation to fix this, allow passing positive return values to the +callbacks that represent the status code that should be used to complete +the request. + +scsi_handle_rw_error() already handles positive values for its ret +parameter because scsi_block_sgio_complete() calls directly into it. + +Signed-off-by: Kevin Wolf +Acked-by: Paolo Bonzini +Message-ID: <20240731123207.27636-2-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit cfe0880835cd364b590ffd27ef8dbd2ad8838bc5) +Signed-off-by: Kevin Wolf +--- + hw/scsi/scsi-disk.c | 21 ++++++++++++++------- + 1 file changed, 14 insertions(+), 7 deletions(-) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index 4bd7af9d0c..bed2c8746c 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -62,6 +62,10 @@ OBJECT_DECLARE_TYPE(SCSIDiskState, SCSIDiskClass, SCSI_DISK_BASE) + + struct SCSIDiskClass { + SCSIDeviceClass parent_class; ++ /* ++ * Callbacks receive ret == 0 for success. Errors are represented either as ++ * negative errno values, or as positive SAM status codes. ++ */ + DMAIOFunc *dma_readv; + DMAIOFunc *dma_writev; + bool (*need_fua_emulation)(SCSICommand *cmd); +@@ -261,7 +265,7 @@ static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed) + return true; + } + +- if (ret < 0) { ++ if (ret != 0) { + return scsi_handle_rw_error(r, ret, acct_failed); + } + +@@ -338,7 +342,7 @@ static void scsi_write_do_fua(SCSIDiskReq *r) + static void scsi_dma_complete_noio(SCSIDiskReq *r, int ret) + { + assert(r->req.aiocb == NULL); +- if (scsi_disk_req_check_error(r, ret, false)) { ++ if (scsi_disk_req_check_error(r, ret, ret > 0)) { + goto done; + } + +@@ -363,9 +367,10 @@ static void scsi_dma_complete(void *opaque, int ret) + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + ++ /* ret > 0 is accounted for in scsi_disk_req_check_error() */ + if (ret < 0) { + block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); +- } else { ++ } else if (ret == 0) { + block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); + } + scsi_dma_complete_noio(r, ret); +@@ -381,7 +386,7 @@ static void scsi_read_complete_noio(SCSIDiskReq *r, int ret) + qemu_get_current_aio_context()); + + assert(r->req.aiocb == NULL); +- if (scsi_disk_req_check_error(r, ret, false)) { ++ if (scsi_disk_req_check_error(r, ret, ret > 0)) { + goto done; + } + +@@ -402,9 +407,10 @@ static void scsi_read_complete(void *opaque, int ret) + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + ++ /* ret > 0 is accounted for in scsi_disk_req_check_error() */ + if (ret < 0) { + block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); +- } else { ++ } else if (ret == 0) { + block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); + trace_scsi_disk_read_complete(r->req.tag, r->qiov.size); + } +@@ -512,7 +518,7 @@ static void scsi_write_complete_noio(SCSIDiskReq *r, int ret) + qemu_get_current_aio_context()); + + assert (r->req.aiocb == NULL); +- if (scsi_disk_req_check_error(r, ret, false)) { ++ if (scsi_disk_req_check_error(r, ret, ret > 0)) { + goto done; + } + +@@ -540,9 +546,10 @@ static void scsi_write_complete(void * opaque, int ret) + assert (r->req.aiocb != NULL); + r->req.aiocb = NULL; + ++ /* ret > 0 is accounted for in scsi_disk_req_check_error() */ + if (ret < 0) { + block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); +- } else { ++ } else if (ret == 0) { + block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); + } + scsi_write_complete_noio(r, ret); +-- +2.39.3 + diff --git a/SOURCES/kvm-scsi-fetch-unit-attention-when-creating-the-request.patch b/SOURCES/kvm-scsi-fetch-unit-attention-when-creating-the-request.patch deleted file mode 100644 index a41ae82..0000000 --- a/SOURCES/kvm-scsi-fetch-unit-attention-when-creating-the-request.patch +++ /dev/null @@ -1,132 +0,0 @@ -From 562ea3a2d602cf41c548f3ddf52c43c04fded347 Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Wed, 12 Jul 2023 15:43:50 +0200 -Subject: [PATCH 01/12] scsi: fetch unit attention when creating the request - -RH-Author: Stefano Garzarella -RH-MergeRequest: 184: scsi: fix issue with Linux guest and unit attention -RH-Bugzilla: 2176702 -RH-Acked-by: Thomas Huth -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [1/3] 04563caac45d0110ea65eda8e55472556cd317c0 (sgarzarella/qemu-kvm-c-9-s) - -Commit 1880ad4f4e ("virtio-scsi: Batched prepare for cmd reqs") split -calls to scsi_req_new() and scsi_req_enqueue() in the virtio-scsi device. -No ill effects were observed until commit 8cc5583abe ("virtio-scsi: Send -"REPORTED LUNS CHANGED" sense data upon disk hotplug events") added a -unit attention that was easy to trigger with device hotplug and -hot-unplug. - -Because the two calls were separated, all requests in the batch were -prepared calling scsi_req_new() to report a sense. The first one -submitted would report the right sense and reset it to NO_SENSE, while -the others reported CHECK_CONDITION with no sense data. This caused -SCSI errors in Linux. - -To solve this issue, let's fetch the unit attention as early as possible -when we prepare the request, so that only the first request in the batch -will use the unit attention SCSIReqOps and the others will not report -CHECK CONDITION. - -Fixes: 1880ad4f4e ("virtio-scsi: Batched prepare for cmd reqs") -Fixes: 8cc5583abe ("virtio-scsi: Send "REPORTED LUNS CHANGED" sense data upon disk hotplug events") -Reported-by: Thomas Huth -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2176702 -Co-developed-by: Paolo Bonzini -Signed-off-by: Stefano Garzarella -Message-ID: <20230712134352.118655-2-sgarzare@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 9472083e642bfb9bc836b38662baddd9bc964ebc) -Signed-off-by: Stefano Garzarella ---- - hw/scsi/scsi-bus.c | 36 +++++++++++++++++++++++++++++++++--- - include/hw/scsi/scsi.h | 1 + - 2 files changed, 34 insertions(+), 3 deletions(-) - -diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c -index 3c20b47ad0..5d22313b9d 100644 ---- a/hw/scsi/scsi-bus.c -+++ b/hw/scsi/scsi-bus.c -@@ -413,19 +413,35 @@ static const struct SCSIReqOps reqops_invalid_opcode = { - - /* SCSIReqOps implementation for unit attention conditions. */ - --static int32_t scsi_unit_attention(SCSIRequest *req, uint8_t *buf) -+static void scsi_fetch_unit_attention_sense(SCSIRequest *req) - { -+ SCSISense *ua = NULL; -+ - if (req->dev->unit_attention.key == UNIT_ATTENTION) { -- scsi_req_build_sense(req, req->dev->unit_attention); -+ ua = &req->dev->unit_attention; - } else if (req->bus->unit_attention.key == UNIT_ATTENTION) { -- scsi_req_build_sense(req, req->bus->unit_attention); -+ ua = &req->bus->unit_attention; - } -+ -+ /* -+ * Fetch the unit attention sense immediately so that another -+ * scsi_req_new does not use reqops_unit_attention. -+ */ -+ if (ua) { -+ scsi_req_build_sense(req, *ua); -+ *ua = SENSE_CODE(NO_SENSE); -+ } -+} -+ -+static int32_t scsi_unit_attention(SCSIRequest *req, uint8_t *buf) -+{ - scsi_req_complete(req, CHECK_CONDITION); - return 0; - } - - static const struct SCSIReqOps reqops_unit_attention = { - .size = sizeof(SCSIRequest), -+ .init_req = scsi_fetch_unit_attention_sense, - .send_command = scsi_unit_attention - }; - -@@ -699,6 +715,11 @@ SCSIRequest *scsi_req_alloc(const SCSIReqOps *reqops, SCSIDevice *d, - object_ref(OBJECT(d)); - object_ref(OBJECT(qbus->parent)); - notifier_list_init(&req->cancel_notifiers); -+ -+ if (reqops->init_req) { -+ reqops->init_req(req); -+ } -+ - trace_scsi_req_alloc(req->dev->id, req->lun, req->tag); - return req; - } -@@ -798,6 +819,15 @@ uint8_t *scsi_req_get_buf(SCSIRequest *req) - static void scsi_clear_unit_attention(SCSIRequest *req) - { - SCSISense *ua; -+ -+ /* -+ * scsi_fetch_unit_attention_sense() already cleaned the unit attention -+ * in this case. -+ */ -+ if (req->ops == &reqops_unit_attention) { -+ return; -+ } -+ - if (req->dev->unit_attention.key != UNIT_ATTENTION && - req->bus->unit_attention.key != UNIT_ATTENTION) { - return; -diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h -index 6f23a7a73e..1787ddd01e 100644 ---- a/include/hw/scsi/scsi.h -+++ b/include/hw/scsi/scsi.h -@@ -108,6 +108,7 @@ int cdrom_read_toc_raw(int nb_sectors, uint8_t *buf, int msf, int session_num); - /* scsi-bus.c */ - struct SCSIReqOps { - size_t size; -+ void (*init_req)(SCSIRequest *req); - void (*free_req)(SCSIRequest *req); - int32_t (*send_command)(SCSIRequest *req, uint8_t *buf); - void (*read_data)(SCSIRequest *req); --- -2.39.3 - diff --git a/SOURCES/kvm-spice-move-client_migrate_info-command-to-ui.patch b/SOURCES/kvm-spice-move-client_migrate_info-command-to-ui.patch deleted file mode 100644 index f1de158..0000000 --- a/SOURCES/kvm-spice-move-client_migrate_info-command-to-ui.patch +++ /dev/null @@ -1,248 +0,0 @@ -From 00f6e941e75f378c84c773a15efde7dd085d9ce3 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 19:40:14 +0100 -Subject: [PATCH 21/56] spice: move client_migrate_info command to ui/ -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [20/50] a587bb001b51a1f9fdf2fcfb0978bb931ae443b6 (peterx/qemu-kvm) - -It has nothing to do with migration, except for the "migrate" in the -name of the command. Move it with the rest of the ui commands. - -Signed-off-by: Juan Quintela -Reviewed-by: Philippe Mathieu-Daudé -(cherry picked from commit f9e1ef7482f1ee289b04f4b45702a1701bc8929d) -Signed-off-by: Peter Xu ---- - migration/migration-hmp-cmds.c | 17 ----------------- - migration/migration.c | 30 ------------------------------ - qapi/migration.json | 28 ---------------------------- - qapi/ui.json | 28 ++++++++++++++++++++++++++++ - ui/ui-hmp-cmds.c | 17 +++++++++++++++++ - ui/ui-qmp-cmds.c | 29 +++++++++++++++++++++++++++++ - 6 files changed, 74 insertions(+), 75 deletions(-) - -diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c -index 71da91967a..4e9f00e7dc 100644 ---- a/migration/migration-hmp-cmds.c -+++ b/migration/migration-hmp-cmds.c -@@ -636,23 +636,6 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) - hmp_handle_error(mon, err); - } - --void hmp_client_migrate_info(Monitor *mon, const QDict *qdict) --{ -- Error *err = NULL; -- const char *protocol = qdict_get_str(qdict, "protocol"); -- const char *hostname = qdict_get_str(qdict, "hostname"); -- bool has_port = qdict_haskey(qdict, "port"); -- int port = qdict_get_try_int(qdict, "port", -1); -- bool has_tls_port = qdict_haskey(qdict, "tls-port"); -- int tls_port = qdict_get_try_int(qdict, "tls-port", -1); -- const char *cert_subject = qdict_get_try_str(qdict, "cert-subject"); -- -- qmp_client_migrate_info(protocol, hostname, -- has_port, port, has_tls_port, tls_port, -- cert_subject, &err); -- hmp_handle_error(mon, err); --} -- - void hmp_migrate_start_postcopy(Monitor *mon, const QDict *qdict) - { - Error *err = NULL; -diff --git a/migration/migration.c b/migration/migration.c -index aa96ffdc5b..b745d829a4 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -63,7 +63,6 @@ - #include "sysemu/cpus.h" - #include "yank_functions.h" - #include "sysemu/qtest.h" --#include "ui/qemu-spice.h" - - #define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ - -@@ -1018,35 +1017,6 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) - return params; - } - --void qmp_client_migrate_info(const char *protocol, const char *hostname, -- bool has_port, int64_t port, -- bool has_tls_port, int64_t tls_port, -- const char *cert_subject, -- Error **errp) --{ -- if (strcmp(protocol, "spice") == 0) { -- if (!qemu_using_spice(errp)) { -- return; -- } -- -- if (!has_port && !has_tls_port) { -- error_setg(errp, QERR_MISSING_PARAMETER, "port/tls-port"); -- return; -- } -- -- if (qemu_spice.migrate_info(hostname, -- has_port ? port : -1, -- has_tls_port ? tls_port : -1, -- cert_subject)) { -- error_setg(errp, "Could not set up display for migration"); -- return; -- } -- return; -- } -- -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "protocol", "'spice'"); --} -- - AnnounceParameters *migrate_announce_params(void) - { - static AnnounceParameters ap; -diff --git a/qapi/migration.json b/qapi/migration.json -index c84fa10e86..2c35b7b9cf 100644 ---- a/qapi/migration.json -+++ b/qapi/migration.json -@@ -1203,34 +1203,6 @@ - { 'command': 'query-migrate-parameters', - 'returns': 'MigrationParameters' } - --## --# @client_migrate_info: --# --# Set migration information for remote display. This makes the server --# ask the client to automatically reconnect using the new parameters --# once migration finished successfully. Only implemented for SPICE. --# --# @protocol: must be "spice" --# @hostname: migration target hostname --# @port: spice tcp port for plaintext channels --# @tls-port: spice tcp port for tls-secured channels --# @cert-subject: server certificate subject --# --# Since: 0.14 --# --# Example: --# --# -> { "execute": "client_migrate_info", --# "arguments": { "protocol": "spice", --# "hostname": "virt42.lab.kraxel.org", --# "port": 1234 } } --# <- { "return": {} } --# --## --{ 'command': 'client_migrate_info', -- 'data': { 'protocol': 'str', 'hostname': 'str', '*port': 'int', -- '*tls-port': 'int', '*cert-subject': 'str' } } -- - ## - # @migrate-start-postcopy: - # -diff --git a/qapi/ui.json b/qapi/ui.json -index 98322342f7..7ddd27a932 100644 ---- a/qapi/ui.json -+++ b/qapi/ui.json -@@ -1554,3 +1554,31 @@ - { 'command': 'display-update', - 'data': 'DisplayUpdateOptions', - 'boxed' : true } -+ -+## -+# @client_migrate_info: -+# -+# Set migration information for remote display. This makes the server -+# ask the client to automatically reconnect using the new parameters -+# once migration finished successfully. Only implemented for SPICE. -+# -+# @protocol: must be "spice" -+# @hostname: migration target hostname -+# @port: spice tcp port for plaintext channels -+# @tls-port: spice tcp port for tls-secured channels -+# @cert-subject: server certificate subject -+# -+# Since: 0.14 -+# -+# Example: -+# -+# -> { "execute": "client_migrate_info", -+# "arguments": { "protocol": "spice", -+# "hostname": "virt42.lab.kraxel.org", -+# "port": 1234 } } -+# <- { "return": {} } -+# -+## -+{ 'command': 'client_migrate_info', -+ 'data': { 'protocol': 'str', 'hostname': 'str', '*port': 'int', -+ '*tls-port': 'int', '*cert-subject': 'str' } } -diff --git a/ui/ui-hmp-cmds.c b/ui/ui-hmp-cmds.c -index 5c456ecc02..c671389473 100644 ---- a/ui/ui-hmp-cmds.c -+++ b/ui/ui-hmp-cmds.c -@@ -458,3 +458,20 @@ hmp_screendump(Monitor *mon, const QDict *qdict) - end: - hmp_handle_error(mon, err); - } -+ -+void hmp_client_migrate_info(Monitor *mon, const QDict *qdict) -+{ -+ Error *err = NULL; -+ const char *protocol = qdict_get_str(qdict, "protocol"); -+ const char *hostname = qdict_get_str(qdict, "hostname"); -+ bool has_port = qdict_haskey(qdict, "port"); -+ int port = qdict_get_try_int(qdict, "port", -1); -+ bool has_tls_port = qdict_haskey(qdict, "tls-port"); -+ int tls_port = qdict_get_try_int(qdict, "tls-port", -1); -+ const char *cert_subject = qdict_get_try_str(qdict, "cert-subject"); -+ -+ qmp_client_migrate_info(protocol, hostname, -+ has_port, port, has_tls_port, tls_port, -+ cert_subject, &err); -+ hmp_handle_error(mon, err); -+} -diff --git a/ui/ui-qmp-cmds.c b/ui/ui-qmp-cmds.c -index dbc4afcd73..a37a7024f3 100644 ---- a/ui/ui-qmp-cmds.c -+++ b/ui/ui-qmp-cmds.c -@@ -175,3 +175,32 @@ void qmp_display_update(DisplayUpdateOptions *arg, Error **errp) - abort(); - } - } -+ -+void qmp_client_migrate_info(const char *protocol, const char *hostname, -+ bool has_port, int64_t port, -+ bool has_tls_port, int64_t tls_port, -+ const char *cert_subject, -+ Error **errp) -+{ -+ if (strcmp(protocol, "spice") == 0) { -+ if (!qemu_using_spice(errp)) { -+ return; -+ } -+ -+ if (!has_port && !has_tls_port) { -+ error_setg(errp, QERR_MISSING_PARAMETER, "port/tls-port"); -+ return; -+ } -+ -+ if (qemu_spice.migrate_info(hostname, -+ has_port ? port : -1, -+ has_tls_port ? tls_port : -1, -+ cert_subject)) { -+ error_setg(errp, "Could not set up display for migration"); -+ return; -+ } -+ return; -+ } -+ -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "protocol", "'spice'"); -+} --- -2.39.1 - diff --git a/SOURCES/kvm-target-i386-Add-EPYC-Genoa-model-to-support-Zen-4-pr.patch b/SOURCES/kvm-target-i386-Add-EPYC-Genoa-model-to-support-Zen-4-pr.patch deleted file mode 100644 index 43c239a..0000000 --- a/SOURCES/kvm-target-i386-Add-EPYC-Genoa-model-to-support-Zen-4-pr.patch +++ /dev/null @@ -1,203 +0,0 @@ -From 03011d00cfb5862edb7394a9b79b269198af5c89 Mon Sep 17 00:00:00 2001 -From: Bandan Das -Date: Wed, 9 Aug 2023 12:48:34 -0400 -Subject: [PATCH 7/7] target/i386: Add EPYC-Genoa model to support Zen 4 - processor series - -RH-Author: Bandan Das -RH-MergeRequest: 198: Add EPYC-Genoa CPU model in qemu -RH-Bugzilla: 2094913 -RH-Acked-by: Wei Huang -RH-Acked-by: Miroslav Rezanina -RH-Commit: [7/7] 158091c691169a5d30c7c8005371ee7a0d9fc4ce (bdas1/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094913 - -commit 166b1741884dd4fd7090b753cd7333868457a29b -Author: Babu Moger -Date: Thu May 4 15:53:12 2023 -0500 - - target/i386: Add EPYC-Genoa model to support Zen 4 processor series - - Adds the support for AMD EPYC Genoa generation processors. The model - display for the new processor will be EPYC-Genoa. - - Adds the following new feature bits on top of the feature bits from - the previous generation EPYC models. - - avx512f : AVX-512 Foundation instruction - avx512dq : AVX-512 Doubleword & Quadword Instruction - avx512ifma : AVX-512 Integer Fused Multiply Add instruction - avx512cd : AVX-512 Conflict Detection instruction - avx512bw : AVX-512 Byte and Word Instructions - avx512vl : AVX-512 Vector Length Extension Instructions - avx512vbmi : AVX-512 Vector Byte Manipulation Instruction - avx512_vbmi2 : AVX-512 Additional Vector Byte Manipulation Instruction - gfni : AVX-512 Galois Field New Instructions - avx512_vnni : AVX-512 Vector Neural Network Instructions - avx512_bitalg : AVX-512 Bit Algorithms, add bit algorithms Instructions - avx512_vpopcntdq: AVX-512 AVX-512 Vector Population Count Doubleword and - Quadword Instructions - avx512_bf16 : AVX-512 BFLOAT16 instructions - la57 : 57-bit virtual address support (5-level Page Tables) - vnmi : Virtual NMI (VNMI) allows the hypervisor to inject the NMI - into the guest without using Event Injection mechanism - meaning not required to track the guest NMI and intercepting - the IRET. - auto-ibrs : The AMD Zen4 core supports a new feature called Automatic IBRS. - It is a "set-and-forget" feature that means that, unlike e.g., - s/w-toggled SPEC_CTRL.IBRS, h/w manages its IBRS mitigation - resources automatically across CPL transitions. - - Signed-off-by: Babu Moger - Message-Id: <20230504205313.225073-8-babu.moger@amd.com> - Signed-off-by: Paolo Bonzini - -Signed-off-by: Bandan Das ---- - target/i386/cpu.c | 122 ++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 122 insertions(+) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index f1baefe775..b27db050a2 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1973,6 +1973,56 @@ static const CPUCaches epyc_milan_v2_cache_info = { - }, - }; - -+static const CPUCaches epyc_genoa_cache_info = { -+ .l1d_cache = &(CPUCacheInfo) { -+ .type = DATA_CACHE, -+ .level = 1, -+ .size = 32 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 64, -+ .lines_per_tag = 1, -+ .self_init = 1, -+ .no_invd_sharing = true, -+ }, -+ .l1i_cache = &(CPUCacheInfo) { -+ .type = INSTRUCTION_CACHE, -+ .level = 1, -+ .size = 32 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 64, -+ .lines_per_tag = 1, -+ .self_init = 1, -+ .no_invd_sharing = true, -+ }, -+ .l2_cache = &(CPUCacheInfo) { -+ .type = UNIFIED_CACHE, -+ .level = 2, -+ .size = 1 * MiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 2048, -+ .lines_per_tag = 1, -+ }, -+ .l3_cache = &(CPUCacheInfo) { -+ .type = UNIFIED_CACHE, -+ .level = 3, -+ .size = 32 * MiB, -+ .line_size = 64, -+ .associativity = 16, -+ .partitions = 1, -+ .sets = 32768, -+ .lines_per_tag = 1, -+ .self_init = true, -+ .inclusive = true, -+ .complex_indexing = false, -+ }, -+}; -+ - /* The following VMX features are not supported by KVM and are left out in the - * CPU definitions: - * -@@ -4493,6 +4543,78 @@ static const X86CPUDefinition builtin_x86_defs[] = { - { /* end of list */ } - } - }, -+ { -+ .name = "EPYC-Genoa", -+ .level = 0xd, -+ .vendor = CPUID_VENDOR_AMD, -+ .family = 25, -+ .model = 17, -+ .stepping = 0, -+ .features[FEAT_1_EDX] = -+ CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | -+ CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE | -+ CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE | -+ CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | CPUID_DE | -+ CPUID_VME | CPUID_FP87, -+ .features[FEAT_1_ECX] = -+ CPUID_EXT_RDRAND | CPUID_EXT_F16C | CPUID_EXT_AVX | -+ CPUID_EXT_XSAVE | CPUID_EXT_AES | CPUID_EXT_POPCNT | -+ CPUID_EXT_MOVBE | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | -+ CPUID_EXT_PCID | CPUID_EXT_CX16 | CPUID_EXT_FMA | -+ CPUID_EXT_SSSE3 | CPUID_EXT_MONITOR | CPUID_EXT_PCLMULQDQ | -+ CPUID_EXT_SSE3, -+ .features[FEAT_8000_0001_EDX] = -+ CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_PDPE1GB | -+ CPUID_EXT2_FFXSR | CPUID_EXT2_MMXEXT | CPUID_EXT2_NX | -+ CPUID_EXT2_SYSCALL, -+ .features[FEAT_8000_0001_ECX] = -+ CPUID_EXT3_OSVW | CPUID_EXT3_3DNOWPREFETCH | -+ CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | -+ CPUID_EXT3_CR8LEG | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM | -+ CPUID_EXT3_TOPOEXT | CPUID_EXT3_PERFCORE, -+ .features[FEAT_8000_0008_EBX] = -+ CPUID_8000_0008_EBX_CLZERO | CPUID_8000_0008_EBX_XSAVEERPTR | -+ CPUID_8000_0008_EBX_WBNOINVD | CPUID_8000_0008_EBX_IBPB | -+ CPUID_8000_0008_EBX_IBRS | CPUID_8000_0008_EBX_STIBP | -+ CPUID_8000_0008_EBX_STIBP_ALWAYS_ON | -+ CPUID_8000_0008_EBX_AMD_SSBD | CPUID_8000_0008_EBX_AMD_PSFD, -+ .features[FEAT_8000_0021_EAX] = -+ CPUID_8000_0021_EAX_No_NESTED_DATA_BP | -+ CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING | -+ CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE | -+ CPUID_8000_0021_EAX_AUTO_IBRS, -+ .features[FEAT_7_0_EBX] = -+ CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | -+ CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | -+ CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_AVX512F | -+ CPUID_7_0_EBX_AVX512DQ | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | -+ CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_AVX512IFMA | -+ CPUID_7_0_EBX_CLFLUSHOPT | CPUID_7_0_EBX_CLWB | -+ CPUID_7_0_EBX_AVX512CD | CPUID_7_0_EBX_SHA_NI | -+ CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512VL, -+ .features[FEAT_7_0_ECX] = -+ CPUID_7_0_ECX_AVX512_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | -+ CPUID_7_0_ECX_AVX512_VBMI2 | CPUID_7_0_ECX_GFNI | -+ CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ | -+ CPUID_7_0_ECX_AVX512VNNI | CPUID_7_0_ECX_AVX512BITALG | -+ CPUID_7_0_ECX_AVX512_VPOPCNTDQ | CPUID_7_0_ECX_LA57 | -+ CPUID_7_0_ECX_RDPID, -+ .features[FEAT_7_0_EDX] = -+ CPUID_7_0_EDX_FSRM, -+ .features[FEAT_7_1_EAX] = -+ CPUID_7_1_EAX_AVX512_BF16, -+ .features[FEAT_XSAVE] = -+ CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | -+ CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, -+ .features[FEAT_6_EAX] = -+ CPUID_6_EAX_ARAT, -+ .features[FEAT_SVM] = -+ CPUID_SVM_NPT | CPUID_SVM_NRIPSAVE | CPUID_SVM_VNMI | -+ CPUID_SVM_SVME_ADDR_CHK, -+ .xlevel = 0x80000022, -+ .model_id = "AMD EPYC-Genoa Processor", -+ .cache_info = &epyc_genoa_cache_info, -+ }, - }; - - /* --- -2.39.3 - diff --git a/SOURCES/kvm-target-i386-Add-VNMI-and-automatic-IBRS-feature-bits.patch b/SOURCES/kvm-target-i386-Add-VNMI-and-automatic-IBRS-feature-bits.patch deleted file mode 100644 index 5e8f79b..0000000 --- a/SOURCES/kvm-target-i386-Add-VNMI-and-automatic-IBRS-feature-bits.patch +++ /dev/null @@ -1,105 +0,0 @@ -From 95c5cee20741b055dea9ac3ad3176bbaa1eaf705 Mon Sep 17 00:00:00 2001 -From: Bandan Das -Date: Wed, 9 Aug 2023 12:46:25 -0400 -Subject: [PATCH 6/7] target/i386: Add VNMI and automatic IBRS feature bits -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Bandan Das -RH-MergeRequest: 198: Add EPYC-Genoa CPU model in qemu -RH-Bugzilla: 2094913 -RH-Acked-by: Wei Huang -RH-Acked-by: Miroslav Rezanina -RH-Commit: [6/7] 24c0fb08973aa2615817f67576550ce2efadb75c (bdas1/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094913 - -commit 62a798d4bc2c3e767d94670776c77a7df274d7c5 -Author: Babu Moger -Date: Thu May 4 15:53:11 2023 -0500 - - target/i386: Add VNMI and automatic IBRS feature bits - - Add the following featute bits. - - vnmi: Virtual NMI (VNMI) allows the hypervisor to inject the NMI into the - guest without using Event Injection mechanism meaning not required to - track the guest NMI and intercepting the IRET. - The presence of this feature is indicated via the CPUID function - 0x8000000A_EDX[25]. - - automatic-ibrs : - The AMD Zen4 core supports a new feature called Automatic IBRS. - It is a "set-and-forget" feature that means that, unlike e.g., - s/w-toggled SPEC_CTRL.IBRS, h/w manages its IBRS mitigation - resources automatically across CPL transitions. - The presence of this feature is indicated via the CPUID function - 0x80000021_EAX[8]. - - The documention for the features are available in the links below. - a. Processor Programming Reference (PPR) for AMD Family 19h Model 01h, - Revision B1 Processors - b. AMD64 Architecture Programmer’s Manual Volumes 1–5 Publication No. Revision - 40332 4.05 Date October 2022 - - Signed-off-by: Santosh Shukla - Signed-off-by: Kim Phillips - Signed-off-by: Babu Moger - Link: https://www.amd.com/system/files/TechDocs/55898_B1_pub_0.50.zip - Link: https://www.amd.com/system/files/TechDocs/40332_4.05.pdf - Message-Id: <20230504205313.225073-7-babu.moger@amd.com> - Signed-off-by: Paolo Bonzini - -Signed-off-by: Bandan Das ---- - target/i386/cpu.c | 4 ++-- - target/i386/cpu.h | 3 +++ - 2 files changed, 5 insertions(+), 2 deletions(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index bbddc682df..f1baefe775 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -806,7 +806,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - "pfthreshold", "avic", NULL, "v-vmsave-vmload", - "vgif", NULL, NULL, NULL, - NULL, NULL, NULL, NULL, -- NULL, NULL, NULL, NULL, -+ NULL, "vnmi", NULL, NULL, - "svme-addr-chk", NULL, NULL, NULL, - }, - .cpuid = { .eax = 0x8000000A, .reg = R_EDX, }, -@@ -925,7 +925,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - .feat_names = { - "no-nested-data-bp", NULL, "lfence-always-serializing", NULL, - NULL, NULL, "null-sel-clr-base", NULL, -- NULL, NULL, NULL, NULL, -+ "auto-ibrs", NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index c37abf62ae..f7d225e4f1 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -773,6 +773,7 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, - #define CPUID_SVM_AVIC (1U << 13) - #define CPUID_SVM_V_VMSAVE_VMLOAD (1U << 15) - #define CPUID_SVM_VGIF (1U << 16) -+#define CPUID_SVM_VNMI (1U << 25) - #define CPUID_SVM_SVME_ADDR_CHK (1U << 28) - - /* Support RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE */ -@@ -948,6 +949,8 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, - #define CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING (1U << 2) - /* Null Selector Clears Base */ - #define CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE (1U << 6) -+/* Automatic IBRS */ -+#define CPUID_8000_0021_EAX_AUTO_IBRS (1U << 8) - - #define CPUID_XSAVE_XSAVEOPT (1U << 0) - #define CPUID_XSAVE_XSAVEC (1U << 1) --- -2.39.3 - diff --git a/SOURCES/kvm-target-i386-Add-a-couple-of-feature-bits-in-8000_000.patch b/SOURCES/kvm-target-i386-Add-a-couple-of-feature-bits-in-8000_000.patch deleted file mode 100644 index 772bbbd..0000000 --- a/SOURCES/kvm-target-i386-Add-a-couple-of-feature-bits-in-8000_000.patch +++ /dev/null @@ -1,94 +0,0 @@ -From 2d7fb99c02a7666f1d8fe70a4749f0b7771a68ed Mon Sep 17 00:00:00 2001 -From: Bandan Das -Date: Wed, 9 Aug 2023 12:29:55 -0400 -Subject: [PATCH 3/7] target/i386: Add a couple of feature bits in - 8000_0008_EBX - -RH-Author: Bandan Das -RH-MergeRequest: 198: Add EPYC-Genoa CPU model in qemu -RH-Bugzilla: 2094913 -RH-Acked-by: Wei Huang -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/7] b11020b249d4ecc2e3e1ddf4fdc4b52c42ec2642 (bdas1/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094913 - -commit bb039a230e6a7920d71d21fa9afee2653a678c48 -Author: Babu Moger -Date: Thu May 4 15:53:08 2023 -0500 - - target/i386: Add a couple of feature bits in 8000_0008_EBX - - Add the following feature bits. - - amd-psfd : Predictive Store Forwarding Disable: - PSF is a hardware-based micro-architectural optimization - designed to improve the performance of code execution by - predicting address dependencies between loads and stores. - While SSBD (Speculative Store Bypass Disable) disables both - PSF and speculative store bypass, PSFD only disables PSF. - PSFD may be desirable for the software which is concerned - with the speculative behavior of PSF but desires a smaller - performance impact than setting SSBD. - Depends on the following kernel commit: - b73a54321ad8 ("KVM: x86: Expose Predictive Store Forwarding Disable") - - stibp-always-on : - Single Thread Indirect Branch Prediction mode has enhanced - performance and may be left always on. - - The documentation for the features are available in the links below. - a. Processor Programming Reference (PPR) for AMD Family 19h Model 01h, - Revision B1 Processors - b. SECURITY ANALYSIS OF AMD PREDICTIVE STORE FORWARDING - - Signed-off-by: Babu Moger - Acked-by: Michael S. Tsirkin - Link: https://www.amd.com/system/files/documents/security-analysis-predictive-store-forwarding.pdf - Link: https://www.amd.com/system/files/TechDocs/55898_B1_pub_0.50.zip - Message-Id: <20230504205313.225073-4-babu.moger@amd.com> - Signed-off-by: Paolo Bonzini - -Signed-off-by: Bandan Das ---- - target/i386/cpu.c | 4 ++-- - target/i386/cpu.h | 4 ++++ - 2 files changed, 6 insertions(+), 2 deletions(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 8aa7eb611c..c8f88aefc7 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -911,10 +911,10 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - NULL, NULL, NULL, NULL, - NULL, "wbnoinvd", NULL, NULL, - "ibpb", NULL, "ibrs", "amd-stibp", -- NULL, NULL, NULL, NULL, -+ NULL, "stibp-always-on", NULL, NULL, - NULL, NULL, NULL, NULL, - "amd-ssbd", "virt-ssbd", "amd-no-ssb", NULL, -- NULL, NULL, NULL, NULL, -+ "amd-psfd", NULL, NULL, NULL, - }, - .cpuid = { .eax = 0x80000008, .reg = R_EBX, }, - .tcg_features = 0, -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index c28b9df217..81d2200543 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -934,8 +934,12 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, - #define CPUID_8000_0008_EBX_IBRS (1U << 14) - /* Single Thread Indirect Branch Predictors */ - #define CPUID_8000_0008_EBX_STIBP (1U << 15) -+/* STIBP mode has enhanced performance and may be left always on */ -+#define CPUID_8000_0008_EBX_STIBP_ALWAYS_ON (1U << 17) - /* Speculative Store Bypass Disable */ - #define CPUID_8000_0008_EBX_AMD_SSBD (1U << 24) -+/* Predictive Store Forwarding Disable */ -+#define CPUID_8000_0008_EBX_AMD_PSFD (1U << 28) - - #define CPUID_XSAVE_XSAVEOPT (1U << 0) - #define CPUID_XSAVE_XSAVEC (1U << 1) --- -2.39.3 - diff --git a/SOURCES/kvm-target-i386-Add-feature-bits-for-CPUID_Fn80000021_EA.patch b/SOURCES/kvm-target-i386-Add-feature-bits-for-CPUID_Fn80000021_EA.patch deleted file mode 100644 index c714e49..0000000 --- a/SOURCES/kvm-target-i386-Add-feature-bits-for-CPUID_Fn80000021_EA.patch +++ /dev/null @@ -1,126 +0,0 @@ -From 2a2f74c53258ef67034307b59afe2f4c679afaa2 Mon Sep 17 00:00:00 2001 -From: Bandan Das -Date: Wed, 9 Aug 2023 12:32:00 -0400 -Subject: [PATCH 4/7] target/i386: Add feature bits for CPUID_Fn80000021_EAX -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Bandan Das -RH-MergeRequest: 198: Add EPYC-Genoa CPU model in qemu -RH-Bugzilla: 2094913 -RH-Acked-by: Wei Huang -RH-Acked-by: Miroslav Rezanina -RH-Commit: [4/7] 133044a7245226308406a684a875e1f96a394516 (bdas1/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094913 - -commit b70eec312b185197d639bff689007727e596afd1 -Author: Babu Moger -Date: Thu May 4 15:53:09 2023 -0500 - - target/i386: Add feature bits for CPUID_Fn80000021_EAX - - Add the following feature bits. - no-nested-data-bp : Processor ignores nested data breakpoints. - lfence-always-serializing : LFENCE instruction is always serializing. - null-sel-cls-base : Null Selector Clears Base. When this bit is - set, a null segment load clears the segment base. - - The documentation for the features are available in the links below. - a. Processor Programming Reference (PPR) for AMD Family 19h Model 01h, - Revision B1 Processors - b. AMD64 Architecture Programmer’s Manual Volumes 1–5 Publication No. Revision - 40332 4.05 Date October 2022 - - Signed-off-by: Babu Moger - Acked-by: Michael S. Tsirkin - Link: https://www.amd.com/system/files/TechDocs/55898_B1_pub_0.50.zip - Link: https://www.amd.com/system/files/TechDocs/40332_4.05.pdf - Message-Id: <20230504205313.225073-5-babu.moger@amd.com> - Signed-off-by: Paolo Bonzini - -Signed-off-by: Bandan Das ---- - target/i386/cpu.c | 24 ++++++++++++++++++++++++ - target/i386/cpu.h | 8 ++++++++ - 2 files changed, 32 insertions(+) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index c8f88aefc7..7ddebbaa3c 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -920,6 +920,22 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - .tcg_features = 0, - .unmigratable_flags = 0, - }, -+ [FEAT_8000_0021_EAX] = { -+ .type = CPUID_FEATURE_WORD, -+ .feat_names = { -+ "no-nested-data-bp", NULL, "lfence-always-serializing", NULL, -+ NULL, NULL, "null-sel-clr-base", NULL, -+ NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ }, -+ .cpuid = { .eax = 0x80000021, .reg = R_EAX, }, -+ .tcg_features = 0, -+ .unmigratable_flags = 0, -+ }, - [FEAT_XSAVE] = { - .type = CPUID_FEATURE_WORD, - .feat_names = { -@@ -6156,6 +6172,10 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, - *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */ - } - break; -+ case 0x80000021: -+ *eax = env->features[FEAT_8000_0021_EAX]; -+ *ebx = *ecx = *edx = 0; -+ break; - default: - /* reserved values: zero */ - *eax = 0; -@@ -6585,6 +6605,10 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) - x86_cpu_adjust_level(cpu, &env->cpuid_min_xlevel, 0x8000001F); - } - -+ if (env->features[FEAT_8000_0021_EAX]) { -+ x86_cpu_adjust_level(cpu, &env->cpuid_min_xlevel, 0x80000021); -+ } -+ - /* SGX requires CPUID[0x12] for EPC enumeration */ - if (env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_SGX) { - x86_cpu_adjust_level(cpu, &env->cpuid_min_level, 0x12); -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index 81d2200543..c37abf62ae 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -600,6 +600,7 @@ typedef enum FeatureWord { - FEAT_8000_0001_ECX, /* CPUID[8000_0001].ECX */ - FEAT_8000_0007_EDX, /* CPUID[8000_0007].EDX */ - FEAT_8000_0008_EBX, /* CPUID[8000_0008].EBX */ -+ FEAT_8000_0021_EAX, /* CPUID[8000_0021].EAX */ - FEAT_C000_0001_EDX, /* CPUID[C000_0001].EDX */ - FEAT_KVM, /* CPUID[4000_0001].EAX (KVM_CPUID_FEATURES) */ - FEAT_KVM_HINTS, /* CPUID[4000_0001].EDX */ -@@ -941,6 +942,13 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, - /* Predictive Store Forwarding Disable */ - #define CPUID_8000_0008_EBX_AMD_PSFD (1U << 28) - -+/* Processor ignores nested data breakpoints */ -+#define CPUID_8000_0021_EAX_No_NESTED_DATA_BP (1U << 0) -+/* LFENCE is always serializing */ -+#define CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING (1U << 2) -+/* Null Selector Clears Base */ -+#define CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE (1U << 6) -+ - #define CPUID_XSAVE_XSAVEOPT (1U << 0) - #define CPUID_XSAVE_XSAVEC (1U << 1) - #define CPUID_XSAVE_XGETBV1 (1U << 2) --- -2.39.3 - diff --git a/SOURCES/kvm-target-i386-Add-missing-feature-bits-in-EPYC-Milan-m.patch b/SOURCES/kvm-target-i386-Add-missing-feature-bits-in-EPYC-Milan-m.patch deleted file mode 100644 index 9bb4bf9..0000000 --- a/SOURCES/kvm-target-i386-Add-missing-feature-bits-in-EPYC-Milan-m.patch +++ /dev/null @@ -1,152 +0,0 @@ -From a8180665019d537ee9775614627bf9eb8bd4770e Mon Sep 17 00:00:00 2001 -From: Bandan Das -Date: Wed, 9 Aug 2023 12:35:33 -0400 -Subject: [PATCH 5/7] target/i386: Add missing feature bits in EPYC-Milan model -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Bandan Das -RH-MergeRequest: 198: Add EPYC-Genoa CPU model in qemu -RH-Bugzilla: 2094913 -RH-Acked-by: Wei Huang -RH-Acked-by: Miroslav Rezanina -RH-Commit: [5/7] 8f77315c8d7010564423df3e3c594c90fd5f9c00 (bdas1/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094913 - -commit 27f03be6f59d04bd5673ba1e1628b2b490f9a9ff -Author: Babu Moger -Date: Thu May 4 15:53:10 2023 -0500 - - target/i386: Add missing feature bits in EPYC-Milan model - - Add the following feature bits for EPYC-Milan model and bump the version. - vaes : Vector VAES(ENC|DEC), VAES(ENC|DEC)LAST instruction support - vpclmulqdq : Vector VPCLMULQDQ instruction support - stibp-always-on : Single Thread Indirect Branch Prediction Mode has enhanced - performance and may be left Always on - amd-psfd : Predictive Store Forward Disable - no-nested-data-bp : Processor ignores nested data breakpoints - lfence-always-serializing : LFENCE instruction is always serializing - null-sel-clr-base : Null Selector Clears Base. When this bit is - set, a null segment load clears the segment base - - These new features will be added in EPYC-Milan-v2. The "-cpu help" output - after the change will be. - - x86 EPYC-Milan (alias configured by machine type) - x86 EPYC-Milan-v1 AMD EPYC-Milan Processor - x86 EPYC-Milan-v2 AMD EPYC-Milan Processor - - The documentation for the features are available in the links below. - a. Processor Programming Reference (PPR) for AMD Family 19h Model 01h, - Revision B1 Processors - b. SECURITY ANALYSIS OF AMD PREDICTIVE STORE FORWARDING - c. AMD64 Architecture Programmer’s Manual Volumes 1–5 Publication No. Revision - 40332 4.05 Date October 2022 - - Signed-off-by: Babu Moger - Acked-by: Michael S. Tsirkin - Link: https://www.amd.com/system/files/TechDocs/55898_B1_pub_0.50.zip - Link: https://www.amd.com/system/files/documents/security-analysis-predictive-store-forwarding.pdf - Link: https://www.amd.com/system/files/TechDocs/40332_4.05.pdf - Message-Id: <20230504205313.225073-6-babu.moger@amd.com> - Signed-off-by: Paolo Bonzini - -Signed-off-by: Bandan Das ---- - target/i386/cpu.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 70 insertions(+) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 7ddebbaa3c..bbddc682df 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1923,6 +1923,56 @@ static const CPUCaches epyc_milan_cache_info = { - }, - }; - -+static const CPUCaches epyc_milan_v2_cache_info = { -+ .l1d_cache = &(CPUCacheInfo) { -+ .type = DATA_CACHE, -+ .level = 1, -+ .size = 32 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 64, -+ .lines_per_tag = 1, -+ .self_init = 1, -+ .no_invd_sharing = true, -+ }, -+ .l1i_cache = &(CPUCacheInfo) { -+ .type = INSTRUCTION_CACHE, -+ .level = 1, -+ .size = 32 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 64, -+ .lines_per_tag = 1, -+ .self_init = 1, -+ .no_invd_sharing = true, -+ }, -+ .l2_cache = &(CPUCacheInfo) { -+ .type = UNIFIED_CACHE, -+ .level = 2, -+ .size = 512 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 1024, -+ .lines_per_tag = 1, -+ }, -+ .l3_cache = &(CPUCacheInfo) { -+ .type = UNIFIED_CACHE, -+ .level = 3, -+ .size = 32 * MiB, -+ .line_size = 64, -+ .associativity = 16, -+ .partitions = 1, -+ .sets = 32768, -+ .lines_per_tag = 1, -+ .self_init = true, -+ .inclusive = true, -+ .complex_indexing = false, -+ }, -+}; -+ - /* The following VMX features are not supported by KVM and are left out in the - * CPU definitions: - * -@@ -4422,6 +4472,26 @@ static const X86CPUDefinition builtin_x86_defs[] = { - .xlevel = 0x8000001E, - .model_id = "AMD EPYC-Milan Processor", - .cache_info = &epyc_milan_cache_info, -+ .versions = (X86CPUVersionDefinition[]) { -+ { .version = 1 }, -+ { -+ .version = 2, -+ .props = (PropValue[]) { -+ { "model-id", -+ "AMD EPYC-Milan-v2 Processor" }, -+ { "vaes", "on" }, -+ { "vpclmulqdq", "on" }, -+ { "stibp-always-on", "on" }, -+ { "amd-psfd", "on" }, -+ { "no-nested-data-bp", "on" }, -+ { "lfence-always-serializing", "on" }, -+ { "null-sel-clr-base", "on" }, -+ { /* end of list */ } -+ }, -+ .cache_info = &epyc_milan_v2_cache_info -+ }, -+ { /* end of list */ } -+ } - }, - }; - --- -2.39.3 - diff --git a/SOURCES/kvm-target-i386-Add-new-CPU-model-SierraForest.patch b/SOURCES/kvm-target-i386-Add-new-CPU-model-SierraForest.patch new file mode 100644 index 0000000..c72f290 --- /dev/null +++ b/SOURCES/kvm-target-i386-Add-new-CPU-model-SierraForest.patch @@ -0,0 +1,215 @@ +From d9595fecd03c9a69ac562e3f240d50b2fa8d14a4 Mon Sep 17 00:00:00 2001 +From: Tao Su +Date: Wed, 20 Mar 2024 10:10:44 +0800 +Subject: [PATCH 006/100] target/i386: Add new CPU model SierraForest +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [6/91] 4bc71f82c258db46569a7e08965d1d358b19416c (bonzini/rhel-qemu-kvm) + +According to table 1-2 in Intel Architecture Instruction Set Extensions and +Future Features (rev 051) [1], SierraForest has the following new features +which have already been virtualized: + +- CMPCCXADD CPUID.(EAX=7,ECX=1):EAX[bit 7] +- AVX-IFMA CPUID.(EAX=7,ECX=1):EAX[bit 23] +- AVX-VNNI-INT8 CPUID.(EAX=7,ECX=1):EDX[bit 4] +- AVX-NE-CONVERT CPUID.(EAX=7,ECX=1):EDX[bit 5] + +Add above features to new CPU model SierraForest. Comparing with GraniteRapids +CPU model, SierraForest bare-metal removes the following features: + +- HLE CPUID.(EAX=7,ECX=0):EBX[bit 4] +- RTM CPUID.(EAX=7,ECX=0):EBX[bit 11] +- AVX512F CPUID.(EAX=7,ECX=0):EBX[bit 16] +- AVX512DQ CPUID.(EAX=7,ECX=0):EBX[bit 17] +- AVX512_IFMA CPUID.(EAX=7,ECX=0):EBX[bit 21] +- AVX512CD CPUID.(EAX=7,ECX=0):EBX[bit 28] +- AVX512BW CPUID.(EAX=7,ECX=0):EBX[bit 30] +- AVX512VL CPUID.(EAX=7,ECX=0):EBX[bit 31] +- AVX512_VBMI CPUID.(EAX=7,ECX=0):ECX[bit 1] +- AVX512_VBMI2 CPUID.(EAX=7,ECX=0):ECX[bit 6] +- AVX512_VNNI CPUID.(EAX=7,ECX=0):ECX[bit 11] +- AVX512_BITALG CPUID.(EAX=7,ECX=0):ECX[bit 12] +- AVX512_VPOPCNTDQ CPUID.(EAX=7,ECX=0):ECX[bit 14] +- LA57 CPUID.(EAX=7,ECX=0):ECX[bit 16] +- TSXLDTRK CPUID.(EAX=7,ECX=0):EDX[bit 16] +- AMX-BF16 CPUID.(EAX=7,ECX=0):EDX[bit 22] +- AVX512_FP16 CPUID.(EAX=7,ECX=0):EDX[bit 23] +- AMX-TILE CPUID.(EAX=7,ECX=0):EDX[bit 24] +- AMX-INT8 CPUID.(EAX=7,ECX=0):EDX[bit 25] +- AVX512_BF16 CPUID.(EAX=7,ECX=1):EAX[bit 5] +- fast zero-length MOVSB CPUID.(EAX=7,ECX=1):EAX[bit 10] +- fast short CMPSB, SCASB CPUID.(EAX=7,ECX=1):EAX[bit 12] +- AMX-FP16 CPUID.(EAX=7,ECX=1):EAX[bit 21] +- PREFETCHI CPUID.(EAX=7,ECX=1):EDX[bit 14] +- XFD CPUID.(EAX=0xD,ECX=1):EAX[bit 4] +- EPT_PAGE_WALK_LENGTH_5 VMX_EPT_VPID_CAP(0x48c)[bit 7] + +Add all features of GraniteRapids CPU model except above features to +SierraForest CPU model. + +SierraForest doesn’t support TSX and RTM but supports TAA_NO. When RTM is +not enabled in host, KVM will not report TAA_NO. So, just don't include +TAA_NO in SierraForest CPU model. + +[1] https://cdrdv2.intel.com/v1/dl/getContent/671368 + +Reviewed-by: Zhao Liu +Reviewed-by: Xiaoyao Li +Signed-off-by: Tao Su +Message-ID: <20240320021044.508263-1-tao1.su@linux.intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 6e82d3b6220777667968a04c87e1667f164ebe88) +Signed-off-by: Paolo Bonzini +--- + target/i386/cpu.c | 126 ++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 126 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 0aa88d9b48..efbadc3ed7 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -4127,6 +4127,132 @@ static const X86CPUDefinition builtin_x86_defs[] = { + { /* end of list */ }, + }, + }, ++ { ++ .name = "SierraForest", ++ .level = 0x23, ++ .vendor = CPUID_VENDOR_INTEL, ++ .family = 6, ++ .model = 175, ++ .stepping = 0, ++ /* ++ * please keep the ascending order so that we can have a clear view of ++ * bit position of each feature. ++ */ ++ .features[FEAT_1_EDX] = ++ CPUID_FP87 | CPUID_VME | CPUID_DE | CPUID_PSE | CPUID_TSC | ++ CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | ++ CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | ++ CPUID_PAT | CPUID_PSE36 | CPUID_CLFLUSH | CPUID_MMX | CPUID_FXSR | ++ CPUID_SSE | CPUID_SSE2, ++ .features[FEAT_1_ECX] = ++ CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSSE3 | ++ CPUID_EXT_FMA | CPUID_EXT_CX16 | CPUID_EXT_PCID | CPUID_EXT_SSE41 | ++ CPUID_EXT_SSE42 | CPUID_EXT_X2APIC | CPUID_EXT_MOVBE | ++ CPUID_EXT_POPCNT | CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_AES | ++ CPUID_EXT_XSAVE | CPUID_EXT_AVX | CPUID_EXT_F16C | CPUID_EXT_RDRAND, ++ .features[FEAT_8000_0001_EDX] = ++ CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | CPUID_EXT2_PDPE1GB | ++ CPUID_EXT2_RDTSCP | CPUID_EXT2_LM, ++ .features[FEAT_8000_0001_ECX] = ++ CPUID_EXT3_LAHF_LM | CPUID_EXT3_ABM | CPUID_EXT3_3DNOWPREFETCH, ++ .features[FEAT_8000_0008_EBX] = ++ CPUID_8000_0008_EBX_WBNOINVD, ++ .features[FEAT_7_0_EBX] = ++ CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | ++ CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | ++ CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | ++ CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLFLUSHOPT | CPUID_7_0_EBX_CLWB | ++ CPUID_7_0_EBX_SHA_NI, ++ .features[FEAT_7_0_ECX] = ++ CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | CPUID_7_0_ECX_GFNI | ++ CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ | ++ CPUID_7_0_ECX_RDPID | CPUID_7_0_ECX_BUS_LOCK_DETECT, ++ .features[FEAT_7_0_EDX] = ++ CPUID_7_0_EDX_FSRM | CPUID_7_0_EDX_SERIALIZE | ++ CPUID_7_0_EDX_SPEC_CTRL | CPUID_7_0_EDX_ARCH_CAPABILITIES | ++ CPUID_7_0_EDX_SPEC_CTRL_SSBD, ++ .features[FEAT_ARCH_CAPABILITIES] = ++ MSR_ARCH_CAP_RDCL_NO | MSR_ARCH_CAP_IBRS_ALL | ++ MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY | MSR_ARCH_CAP_MDS_NO | ++ MSR_ARCH_CAP_PSCHANGE_MC_NO | MSR_ARCH_CAP_SBDR_SSDP_NO | ++ MSR_ARCH_CAP_FBSDP_NO | MSR_ARCH_CAP_PSDP_NO | ++ MSR_ARCH_CAP_PBRSB_NO, ++ .features[FEAT_XSAVE] = ++ CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | ++ CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, ++ .features[FEAT_6_EAX] = ++ CPUID_6_EAX_ARAT, ++ .features[FEAT_7_1_EAX] = ++ CPUID_7_1_EAX_AVX_VNNI | CPUID_7_1_EAX_CMPCCXADD | ++ CPUID_7_1_EAX_FSRS | CPUID_7_1_EAX_AVX_IFMA, ++ .features[FEAT_7_1_EDX] = ++ CPUID_7_1_EDX_AVX_VNNI_INT8 | CPUID_7_1_EDX_AVX_NE_CONVERT, ++ .features[FEAT_7_2_EDX] = ++ CPUID_7_2_EDX_MCDT_NO, ++ .features[FEAT_VMX_BASIC] = ++ MSR_VMX_BASIC_INS_OUTS | MSR_VMX_BASIC_TRUE_CTLS, ++ .features[FEAT_VMX_ENTRY_CTLS] = ++ VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_IA32E_MODE | ++ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | ++ VMX_VM_ENTRY_LOAD_IA32_PAT | VMX_VM_ENTRY_LOAD_IA32_EFER, ++ .features[FEAT_VMX_EPT_VPID_CAPS] = ++ MSR_VMX_EPT_EXECONLY | MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | ++ MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | MSR_VMX_EPT_1GB | ++ MSR_VMX_EPT_INVEPT | MSR_VMX_EPT_AD_BITS | ++ MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | ++ MSR_VMX_EPT_INVVPID_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS, ++ .features[FEAT_VMX_EXIT_CTLS] = ++ VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | ++ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | ++ VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_IA32_PAT | ++ VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | ++ VMX_VM_EXIT_LOAD_IA32_EFER | VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, ++ .features[FEAT_VMX_MISC] = ++ MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_ACTIVITY_HLT | ++ MSR_VMX_MISC_VMWRITE_VMEXIT, ++ .features[FEAT_VMX_PINBASED_CTLS] = ++ VMX_PIN_BASED_EXT_INTR_MASK | VMX_PIN_BASED_NMI_EXITING | ++ VMX_PIN_BASED_VIRTUAL_NMIS | VMX_PIN_BASED_VMX_PREEMPTION_TIMER | ++ VMX_PIN_BASED_POSTED_INTR, ++ .features[FEAT_VMX_PROCBASED_CTLS] = ++ VMX_CPU_BASED_VIRTUAL_INTR_PENDING | ++ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | ++ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | ++ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | ++ VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | ++ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | ++ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_VIRTUAL_NMI_PENDING | ++ VMX_CPU_BASED_MOV_DR_EXITING | VMX_CPU_BASED_UNCOND_IO_EXITING | ++ VMX_CPU_BASED_USE_IO_BITMAPS | VMX_CPU_BASED_MONITOR_TRAP_FLAG | ++ VMX_CPU_BASED_USE_MSR_BITMAPS | VMX_CPU_BASED_MONITOR_EXITING | ++ VMX_CPU_BASED_PAUSE_EXITING | ++ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, ++ .features[FEAT_VMX_SECONDARY_CTLS] = ++ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | ++ VMX_SECONDARY_EXEC_ENABLE_EPT | VMX_SECONDARY_EXEC_DESC | ++ VMX_SECONDARY_EXEC_RDTSCP | ++ VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | ++ VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_WBINVD_EXITING | ++ VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | ++ VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | ++ VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | ++ VMX_SECONDARY_EXEC_RDRAND_EXITING | ++ VMX_SECONDARY_EXEC_ENABLE_INVPCID | ++ VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS | ++ VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML | ++ VMX_SECONDARY_EXEC_XSAVES, ++ .features[FEAT_VMX_VMFUNC] = ++ MSR_VMX_VMFUNC_EPT_SWITCHING, ++ .xlevel = 0x80000008, ++ .model_id = "Intel Xeon Processor (SierraForest)", ++ .versions = (X86CPUVersionDefinition[]) { ++ { .version = 1 }, ++ { /* end of list */ }, ++ }, ++ }, + { + .name = "Denverton", + .level = 21, +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-Add-new-EPYC-CPU-versions-with-updated-c.patch b/SOURCES/kvm-target-i386-Add-new-EPYC-CPU-versions-with-updated-c.patch deleted file mode 100644 index 40c289a..0000000 --- a/SOURCES/kvm-target-i386-Add-new-EPYC-CPU-versions-with-updated-c.patch +++ /dev/null @@ -1,192 +0,0 @@ -From 92f0b5d0c7a841a21cabbc6efc1d7baf0e5a3e0f Mon Sep 17 00:00:00 2001 -From: Bandan Das -Date: Wed, 9 Aug 2023 12:26:12 -0400 -Subject: [PATCH 2/7] target/i386: Add new EPYC CPU versions with updated - cache_info - -RH-Author: Bandan Das -RH-MergeRequest: 198: Add EPYC-Genoa CPU model in qemu -RH-Bugzilla: 2094913 -RH-Acked-by: Wei Huang -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/7] 71a2fd907636733f86729bc9328600f6f9306eaf (bdas1/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094913 - -commit d7c72735f618a7ee27ee109d8b1468193734606a -Author: Michael Roth -Date: Thu May 4 15:53:07 2023 -0500 - - target/i386: Add new EPYC CPU versions with updated cache_info - - Introduce new EPYC cpu versions: EPYC-v4 and EPYC-Rome-v3. - The only difference vs. older models is an updated cache_info with - the 'complex_indexing' bit unset, since this bit is not currently - defined for AMD and may cause problems should it be used for - something else in the future. Setting this bit will also cause - CPUID validation failures when running SEV-SNP guests. - - Signed-off-by: Michael Roth - Signed-off-by: Babu Moger - Acked-by: Michael S. Tsirkin - Message-Id: <20230504205313.225073-3-babu.moger@amd.com> - Signed-off-by: Paolo Bonzini - -Signed-off-by: Bandan Das ---- - target/i386/cpu.c | 118 ++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 118 insertions(+) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 3558c92ed0..8aa7eb611c 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1707,6 +1707,56 @@ static const CPUCaches epyc_cache_info = { - }, - }; - -+static CPUCaches epyc_v4_cache_info = { -+ .l1d_cache = &(CPUCacheInfo) { -+ .type = DATA_CACHE, -+ .level = 1, -+ .size = 32 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 64, -+ .lines_per_tag = 1, -+ .self_init = 1, -+ .no_invd_sharing = true, -+ }, -+ .l1i_cache = &(CPUCacheInfo) { -+ .type = INSTRUCTION_CACHE, -+ .level = 1, -+ .size = 64 * KiB, -+ .line_size = 64, -+ .associativity = 4, -+ .partitions = 1, -+ .sets = 256, -+ .lines_per_tag = 1, -+ .self_init = 1, -+ .no_invd_sharing = true, -+ }, -+ .l2_cache = &(CPUCacheInfo) { -+ .type = UNIFIED_CACHE, -+ .level = 2, -+ .size = 512 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 1024, -+ .lines_per_tag = 1, -+ }, -+ .l3_cache = &(CPUCacheInfo) { -+ .type = UNIFIED_CACHE, -+ .level = 3, -+ .size = 8 * MiB, -+ .line_size = 64, -+ .associativity = 16, -+ .partitions = 1, -+ .sets = 8192, -+ .lines_per_tag = 1, -+ .self_init = true, -+ .inclusive = true, -+ .complex_indexing = false, -+ }, -+}; -+ - static const CPUCaches epyc_rome_cache_info = { - .l1d_cache = &(CPUCacheInfo) { - .type = DATA_CACHE, -@@ -1757,6 +1807,56 @@ static const CPUCaches epyc_rome_cache_info = { - }, - }; - -+static const CPUCaches epyc_rome_v3_cache_info = { -+ .l1d_cache = &(CPUCacheInfo) { -+ .type = DATA_CACHE, -+ .level = 1, -+ .size = 32 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 64, -+ .lines_per_tag = 1, -+ .self_init = 1, -+ .no_invd_sharing = true, -+ }, -+ .l1i_cache = &(CPUCacheInfo) { -+ .type = INSTRUCTION_CACHE, -+ .level = 1, -+ .size = 32 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 64, -+ .lines_per_tag = 1, -+ .self_init = 1, -+ .no_invd_sharing = true, -+ }, -+ .l2_cache = &(CPUCacheInfo) { -+ .type = UNIFIED_CACHE, -+ .level = 2, -+ .size = 512 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 1024, -+ .lines_per_tag = 1, -+ }, -+ .l3_cache = &(CPUCacheInfo) { -+ .type = UNIFIED_CACHE, -+ .level = 3, -+ .size = 16 * MiB, -+ .line_size = 64, -+ .associativity = 16, -+ .partitions = 1, -+ .sets = 16384, -+ .lines_per_tag = 1, -+ .self_init = true, -+ .inclusive = true, -+ .complex_indexing = false, -+ }, -+}; -+ - static const CPUCaches epyc_milan_cache_info = { - .l1d_cache = &(CPUCacheInfo) { - .type = DATA_CACHE, -@@ -4112,6 +4212,15 @@ static const X86CPUDefinition builtin_x86_defs[] = { - { /* end of list */ } - } - }, -+ { -+ .version = 4, -+ .props = (PropValue[]) { -+ { "model-id", -+ "AMD EPYC-v4 Processor" }, -+ { /* end of list */ } -+ }, -+ .cache_info = &epyc_v4_cache_info -+ }, - { /* end of list */ } - } - }, -@@ -4231,6 +4340,15 @@ static const X86CPUDefinition builtin_x86_defs[] = { - { /* end of list */ } - } - }, -+ { -+ .version = 3, -+ .props = (PropValue[]) { -+ { "model-id", -+ "AMD EPYC-Rome-v3 Processor" }, -+ { /* end of list */ } -+ }, -+ .cache_info = &epyc_rome_v3_cache_info -+ }, - { /* end of list */ } - } - }, --- -2.39.3 - diff --git a/SOURCES/kvm-target-i386-Export-RFDS-bit-to-guests.patch b/SOURCES/kvm-target-i386-Export-RFDS-bit-to-guests.patch new file mode 100644 index 0000000..74de391 --- /dev/null +++ b/SOURCES/kvm-target-i386-Export-RFDS-bit-to-guests.patch @@ -0,0 +1,50 @@ +From ae6229a3e45318b1101291b99a0e894399dcb1db Mon Sep 17 00:00:00 2001 +From: Pawan Gupta +Date: Wed, 13 Mar 2024 07:53:23 -0700 +Subject: [PATCH 007/100] target/i386: Export RFDS bit to guests + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [7/91] 7eb6cae8821a2e953d3ff2033fa2e973011ad771 (bonzini/rhel-qemu-kvm) + +Register File Data Sampling (RFDS) is a CPU side-channel vulnerability +that may expose stale register value. CPUs that set RFDS_NO bit in MSR +IA32_ARCH_CAPABILITIES indicate that they are not vulnerable to RFDS. +Similarly, RFDS_CLEAR indicates that CPU is affected by RFDS, and has +the microcode to help mitigate RFDS. + +Make RFDS_CLEAR and RFDS_NO bits available to guests. + +Signed-off-by: Pawan Gupta +Reviewed-by: Xiaoyao Li +Reviewed-by: Zhao Liu +Message-ID: <9a38877857392b5c2deae7e7db1b170d15510314.1710341348.git.pawan.kumar.gupta@linux.intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 41bdd9812863c150284a9339a048ed88c40f4df7) +Signed-off-by: Paolo Bonzini +--- + target/i386/cpu.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index efbadc3ed7..489c853b42 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1158,8 +1158,8 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + NULL, "sbdr-ssdp-no", "fbsdp-no", "psdp-no", + NULL, "fb-clear", NULL, NULL, + NULL, NULL, NULL, NULL, +- "pbrsb-no", NULL, "gds-no", NULL, +- NULL, NULL, NULL, NULL, ++ "pbrsb-no", NULL, "gds-no", "rfds-no", ++ "rfds-clear", NULL, NULL, NULL, + }, + .msr = { + .index = MSR_IA32_ARCH_CAPABILITIES, +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-Implement-mc-kvm_type-to-get-VM-type.patch b/SOURCES/kvm-target-i386-Implement-mc-kvm_type-to-get-VM-type.patch new file mode 100644 index 0000000..f37dbc2 --- /dev/null +++ b/SOURCES/kvm-target-i386-Implement-mc-kvm_type-to-get-VM-type.patch @@ -0,0 +1,192 @@ +From 4a811f54cdb3c9329f193ea43c76ed4eb1b14c19 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Tue, 19 Mar 2024 15:29:33 +0100 +Subject: [PATCH 022/100] target/i386: Implement mc->kvm_type() to get VM type + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [22/91] d58cf6ead2de37852adc15c7642166904403453f (bonzini/rhel-qemu-kvm) + +KVM is introducing a new API to create confidential guests, which +will be used by TDX and SEV-SNP but is also available for SEV and +SEV-ES. The API uses the VM type argument to KVM_CREATE_VM to +identify which confidential computing technology to use. + +Since there are no other expected uses of VM types, delegate +mc->kvm_type() for x86 boards to the confidential-guest-support +object pointed to by ms->cgs. + +For example, if a sev-guest object is specified to confidential-guest-support, +like, + + qemu -machine ...,confidential-guest-support=sev0 \ + -object sev-guest,id=sev0,... + +it will check if a VM type KVM_X86_SEV_VM or KVM_X86_SEV_ES_VM +is supported, and if so use them together with the KVM_SEV_INIT2 +function of the KVM_MEMORY_ENCRYPT_OP ioctl. If not, it will fall back to +KVM_SEV_INIT and KVM_SEV_ES_INIT. + +This is a preparatory work towards TDX and SEV-SNP support, but it +will also enable support for VMSA features such as DebugSwap, which +are only available via KVM_SEV_INIT2. + +Co-developed-by: Xiaoyao Li +Signed-off-by: Xiaoyao Li +Signed-off-by: Paolo Bonzini +(cherry picked from commit ee88612df1e8d6c2bfec75bff3f9482ea44acec1) +Signed-off-by: Paolo Bonzini +--- + hw/i386/x86.c | 11 ++++++++ + target/i386/confidential-guest.h | 19 ++++++++++++++ + target/i386/kvm/kvm.c | 44 ++++++++++++++++++++++++++++++++ + target/i386/kvm/kvm_i386.h | 2 ++ + 4 files changed, 76 insertions(+) + +diff --git a/hw/i386/x86.c b/hw/i386/x86.c +index 84a4801977..3d5b51e92d 100644 +--- a/hw/i386/x86.c ++++ b/hw/i386/x86.c +@@ -1381,6 +1381,16 @@ static void machine_set_sgx_epc(Object *obj, Visitor *v, const char *name, + qapi_free_SgxEPCList(list); + } + ++static int x86_kvm_type(MachineState *ms, const char *vm_type) ++{ ++ /* ++ * No x86 machine has a kvm-type property. If one is added that has ++ * it, it should call kvm_get_vm_type() directly or not use it at all. ++ */ ++ assert(vm_type == NULL); ++ return kvm_enabled() ? kvm_get_vm_type(ms) : 0; ++} ++ + static void x86_machine_initfn(Object *obj) + { + X86MachineState *x86ms = X86_MACHINE(obj); +@@ -1405,6 +1415,7 @@ static void x86_machine_class_init(ObjectClass *oc, void *data) + mc->cpu_index_to_instance_props = x86_cpu_index_to_props; + mc->get_default_cpu_node_id = x86_get_default_cpu_node_id; + mc->possible_cpu_arch_ids = x86_possible_cpu_arch_ids; ++ mc->kvm_type = x86_kvm_type; + x86mc->save_tsc_khz = true; + x86mc->fwcfg_dma_enabled = true; + nc->nmi_monitor_handler = x86_nmi; +diff --git a/target/i386/confidential-guest.h b/target/i386/confidential-guest.h +index ca12d5a8fb..532e172a60 100644 +--- a/target/i386/confidential-guest.h ++++ b/target/i386/confidential-guest.h +@@ -36,5 +36,24 @@ struct X86ConfidentialGuest { + struct X86ConfidentialGuestClass { + /* */ + ConfidentialGuestSupportClass parent; ++ ++ /* */ ++ int (*kvm_type)(X86ConfidentialGuest *cg); + }; ++ ++/** ++ * x86_confidential_guest_kvm_type: ++ * ++ * Calls #X86ConfidentialGuestClass.unplug callback of @plug_handler. ++ */ ++static inline int x86_confidential_guest_kvm_type(X86ConfidentialGuest *cg) ++{ ++ X86ConfidentialGuestClass *klass = X86_CONFIDENTIAL_GUEST_GET_CLASS(cg); ++ ++ if (klass->kvm_type) { ++ return klass->kvm_type(cg); ++ } else { ++ return 0; ++ } ++} + #endif +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index a12207a8ee..1f0ab12c2e 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -31,6 +31,7 @@ + #include "sysemu/kvm_int.h" + #include "sysemu/runstate.h" + #include "kvm_i386.h" ++#include "../confidential-guest.h" + #include "sev.h" + #include "xen-emu.h" + #include "hyperv.h" +@@ -161,6 +162,49 @@ static KVMMSRHandlers msr_handlers[KVM_MSR_FILTER_MAX_RANGES]; + static RateLimit bus_lock_ratelimit_ctrl; + static int kvm_get_one_msr(X86CPU *cpu, int index, uint64_t *value); + ++static const char *vm_type_name[] = { ++ [KVM_X86_DEFAULT_VM] = "default", ++}; ++ ++bool kvm_is_vm_type_supported(int type) ++{ ++ uint32_t machine_types; ++ ++ /* ++ * old KVM doesn't support KVM_CAP_VM_TYPES but KVM_X86_DEFAULT_VM ++ * is always supported ++ */ ++ if (type == KVM_X86_DEFAULT_VM) { ++ return true; ++ } ++ ++ machine_types = kvm_check_extension(KVM_STATE(current_machine->accelerator), ++ KVM_CAP_VM_TYPES); ++ return !!(machine_types & BIT(type)); ++} ++ ++int kvm_get_vm_type(MachineState *ms) ++{ ++ int kvm_type = KVM_X86_DEFAULT_VM; ++ ++ if (ms->cgs) { ++ if (!object_dynamic_cast(OBJECT(ms->cgs), TYPE_X86_CONFIDENTIAL_GUEST)) { ++ error_report("configuration type %s not supported for x86 guests", ++ object_get_typename(OBJECT(ms->cgs))); ++ exit(1); ++ } ++ kvm_type = x86_confidential_guest_kvm_type( ++ X86_CONFIDENTIAL_GUEST(ms->cgs)); ++ } ++ ++ if (!kvm_is_vm_type_supported(kvm_type)) { ++ error_report("vm-type %s not supported by KVM", vm_type_name[kvm_type]); ++ exit(1); ++ } ++ ++ return kvm_type; ++} ++ + bool kvm_has_smm(void) + { + return kvm_vm_check_extension(kvm_state, KVM_CAP_X86_SMM); +diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h +index 30fedcffea..6b44844d95 100644 +--- a/target/i386/kvm/kvm_i386.h ++++ b/target/i386/kvm/kvm_i386.h +@@ -37,6 +37,7 @@ bool kvm_hv_vpindex_settable(void); + bool kvm_enable_sgx_provisioning(KVMState *s); + bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp); + ++int kvm_get_vm_type(MachineState *ms); + void kvm_arch_reset_vcpu(X86CPU *cs); + void kvm_arch_after_reset_vcpu(X86CPU *cpu); + void kvm_arch_do_init_vcpu(X86CPU *cs); +@@ -49,6 +50,7 @@ void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask); + + #ifdef CONFIG_KVM + ++bool kvm_is_vm_type_supported(int type); + bool kvm_has_adjust_clock_stable(void); + bool kvm_has_exception_payload(void); + void kvm_synchronize_all_tsc(void); +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-Introduce-Icelake-Server-v7-to-enable-TS.patch b/SOURCES/kvm-target-i386-Introduce-Icelake-Server-v7-to-enable-TS.patch new file mode 100644 index 0000000..9844da7 --- /dev/null +++ b/SOURCES/kvm-target-i386-Introduce-Icelake-Server-v7-to-enable-TS.patch @@ -0,0 +1,68 @@ +From fe60f8d47b6e14f17dd6c06b03bd00e6bcdbeefb Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Wed, 20 Mar 2024 17:31:38 +0800 +Subject: [PATCH 005/100] target/i386: Introduce Icelake-Server-v7 to enable + TSX + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [5/91] 66d865899e0d510b6c86763422d6b28b904b208a (bonzini/rhel-qemu-kvm) + +When start L2 guest with both L1/L2 using Icelake-Server-v3 or above, +QEMU reports below warning: + +"warning: host doesn't support requested feature: MSR(10AH).taa-no [bit 8]" + +Reason is QEMU Icelake-Server-v3 has TSX feature disabled but enables taa-no +bit. It's meaningless that TSX isn't supported but still claim TSX is secure. +So L1 KVM doesn't expose taa-no to L2 if TSX is unsupported, then starting L2 +triggers the warning. + +Fix it by introducing a new version Icelake-Server-v7 which has both TSX +and taa-no features. Then guest can use TSX securely when it see taa-no. + +This matches the production Icelake which supports TSX and isn't susceptible +to TSX Async Abort (TAA) vulnerabilities, a.k.a, taa-no. + +Ideally, TSX should have being enabled together with taa-no since v3, but for +compatibility, we'd better to add v7 to enable it. + +Fixes: d965dc35592d ("target/i386: Add ARCH_CAPABILITIES related bits into Icelake-Server CPU model") +Tested-by: Xiangfei Ma +Signed-off-by: Zhenzhong Duan +Message-ID: <20240320093138.80267-2-zhenzhong.duan@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit c895fa54e3060c5ac6f3888dce96c9b78626072b) +Signed-off-by: Paolo Bonzini +--- + target/i386/cpu.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index a7f71422ea..0aa88d9b48 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -3840,6 +3840,16 @@ static const X86CPUDefinition builtin_x86_defs[] = { + { /* end of list */ } + }, + }, ++ { ++ .version = 7, ++ .note = "TSX, taa-no", ++ .props = (PropValue[]) { ++ /* Restore TSX features removed by -v2 above */ ++ { "hle", "on" }, ++ { "rtm", "on" }, ++ { /* end of list */ } ++ }, ++ }, + { /* end of list */ } + } + }, +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-SEV-fix-formatting-of-CPUID-mismatch-mes.patch b/SOURCES/kvm-target-i386-SEV-fix-formatting-of-CPUID-mismatch-mes.patch new file mode 100644 index 0000000..ace0367 --- /dev/null +++ b/SOURCES/kvm-target-i386-SEV-fix-formatting-of-CPUID-mismatch-mes.patch @@ -0,0 +1,49 @@ +From 070dda07559a7488c62fc80a8c79e8baaee125eb Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Wed, 3 Jul 2024 10:37:23 +0200 +Subject: [PATCH 087/100] target/i386: SEV: fix formatting of CPUID mismatch + message + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [87/91] 36bc2cc80d5ffc1ceeb1836540660ff45885a818 (bonzini/rhel-qemu-kvm) + +Fixes: 70943ad8e4d ("i386/sev: Add support for SNP CPUID validation", 2024-06-05) +Signed-off-by: Paolo Bonzini +(cherry picked from commit f45ef010e19fe86314bffd5d5c9d5d77f4ce8103) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index c40562dce3..37de80adc7 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -839,7 +839,7 @@ sev_snp_cpuid_report_mismatches(SnpCpuidInfo *old, + size_t i; + + if (old->count != new->count) { +- error_report("SEV-SNP: CPUID validation failed due to count mismatch," ++ error_report("SEV-SNP: CPUID validation failed due to count mismatch, " + "provided: %d, expected: %d", old->count, new->count); + return; + } +@@ -851,8 +851,8 @@ sev_snp_cpuid_report_mismatches(SnpCpuidInfo *old, + new_func = &new->entries[i]; + + if (memcmp(old_func, new_func, sizeof(SnpCpuidFunc))) { +- error_report("SEV-SNP: CPUID validation failed for function 0x%x, index: 0x%x" +- "provided: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 0x%08x" ++ error_report("SEV-SNP: CPUID validation failed for function 0x%x, index: 0x%x, " ++ "provided: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 0x%08x, " + "expected: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 0x%08x", + old_func->eax_in, old_func->ecx_in, + old_func->eax, old_func->ebx, old_func->ecx, old_func->edx, +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-SEV-fix-mismatch-in-vcek-disabled-proper.patch b/SOURCES/kvm-target-i386-SEV-fix-mismatch-in-vcek-disabled-proper.patch new file mode 100644 index 0000000..3030d59 --- /dev/null +++ b/SOURCES/kvm-target-i386-SEV-fix-mismatch-in-vcek-disabled-proper.patch @@ -0,0 +1,42 @@ +From 37b7e2185f1d23dd5f5a95b545b8d760492915ed Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 2 Aug 2024 01:43:37 +0200 +Subject: [PATCH 091/100] target/i386: SEV: fix mismatch in vcek-disabled + property name + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [91/91] 3a8abc4a0547b985cb79cef29bd3e8350d3d4b48 (bonzini/rhel-qemu-kvm) + +The vcek-disabled property of the sev-snp-guest object is misspelled +vcek-required (which I suppose would use the opposite polarity) in +the call to object_class_property_add_bool(). Fix it. + +Reported-by: Zixi Chen +Reviewed-by: Pankaj Gupta +Signed-off-by: Paolo Bonzini +(cherry picked from commit d4392415c328f83b2e30517a3561be523874f441) +--- + target/i386/sev.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index b921defb63..aed565dbe8 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -2378,7 +2378,7 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data) + object_class_property_add_bool(oc, "author-key-enabled", + sev_snp_guest_get_author_key_enabled, + sev_snp_guest_set_author_key_enabled); +- object_class_property_add_bool(oc, "vcek-required", ++ object_class_property_add_bool(oc, "vcek-disabled", + sev_snp_guest_get_vcek_disabled, + sev_snp_guest_set_vcek_disabled); + object_class_property_add_str(oc, "host-data", +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-SEV-use-KVM_SEV_INIT2-if-possible.patch b/SOURCES/kvm-target-i386-SEV-use-KVM_SEV_INIT2-if-possible.patch new file mode 100644 index 0000000..7c17e53 --- /dev/null +++ b/SOURCES/kvm-target-i386-SEV-use-KVM_SEV_INIT2-if-possible.patch @@ -0,0 +1,146 @@ +From 6bb738fb90a3a1221ae35596b3d03a17e0b1c34d Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Tue, 19 Mar 2024 15:30:25 +0100 +Subject: [PATCH 023/100] target/i386: SEV: use KVM_SEV_INIT2 if possible + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [23/91] 9579d772ae5124a94c6b1e3a4566bf3470d2bc8f (bonzini/rhel-qemu-kvm) + +Implement support for the KVM_X86_SEV_VM and KVM_X86_SEV_ES_VM virtual +machine types, and the KVM_SEV_INIT2 function of KVM_MEMORY_ENCRYPT_OP. + +These replace the KVM_SEV_INIT and KVM_SEV_ES_INIT functions, and have +several advantages: + +- sharing the initialization sequence with SEV-SNP and TDX + +- allowing arguments including the set of desired VMSA features + +- protection against invalid use of KVM_GET/SET_* ioctls for guests + with encrypted state + +If the KVM_X86_SEV_VM and KVM_X86_SEV_ES_VM types are not supported, +fall back to KVM_SEV_INIT and KVM_SEV_ES_INIT (which use the +default x86 VM type). + +Signed-off-by: Paolo Bonzini +(cherry picked from commit 663e2f443e5722370708ce2f4c27d94a2087d2d3) +Signed-off-by: Paolo Bonzini +--- + target/i386/kvm/kvm.c | 2 ++ + target/i386/sev.c | 41 +++++++++++++++++++++++++++++++++++++---- + 2 files changed, 39 insertions(+), 4 deletions(-) + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 1f0ab12c2e..408568d053 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -164,6 +164,8 @@ static int kvm_get_one_msr(X86CPU *cpu, int index, uint64_t *value); + + static const char *vm_type_name[] = { + [KVM_X86_DEFAULT_VM] = "default", ++ [KVM_X86_SEV_VM] = "SEV", ++ [KVM_X86_SEV_ES_VM] = "SEV-ES", + }; + + bool kvm_is_vm_type_supported(int type) +diff --git a/target/i386/sev.c b/target/i386/sev.c +index ebe36d4c10..9dab4060b8 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -26,6 +26,7 @@ + #include "qemu/error-report.h" + #include "crypto/hash.h" + #include "sysemu/kvm.h" ++#include "kvm/kvm_i386.h" + #include "sev.h" + #include "sysemu/sysemu.h" + #include "sysemu/runstate.h" +@@ -56,6 +57,8 @@ OBJECT_DECLARE_SIMPLE_TYPE(SevGuestState, SEV_GUEST) + struct SevGuestState { + X86ConfidentialGuest parent_obj; + ++ int kvm_type; ++ + /* configuration parameters */ + char *sev_device; + uint32_t policy; +@@ -850,6 +853,26 @@ sev_vm_state_change(void *opaque, bool running, RunState state) + } + } + ++static int sev_kvm_type(X86ConfidentialGuest *cg) ++{ ++ SevGuestState *sev = SEV_GUEST(cg); ++ int kvm_type; ++ ++ if (sev->kvm_type != -1) { ++ goto out; ++ } ++ ++ kvm_type = (sev->policy & SEV_POLICY_ES) ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM; ++ if (kvm_is_vm_type_supported(kvm_type)) { ++ sev->kvm_type = kvm_type; ++ } else { ++ sev->kvm_type = KVM_X86_DEFAULT_VM; ++ } ++ ++out: ++ return sev->kvm_type; ++} ++ + static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + { + SevGuestState *sev = SEV_GUEST(cgs); +@@ -929,13 +952,19 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + __func__); + goto err; + } +- cmd = KVM_SEV_ES_INIT; +- } else { +- cmd = KVM_SEV_INIT; + } + + trace_kvm_sev_init(); +- ret = sev_ioctl(sev->sev_fd, cmd, NULL, &fw_error); ++ if (sev_kvm_type(X86_CONFIDENTIAL_GUEST(sev)) == KVM_X86_DEFAULT_VM) { ++ cmd = sev_es_enabled() ? KVM_SEV_ES_INIT : KVM_SEV_INIT; ++ ++ ret = sev_ioctl(sev->sev_fd, cmd, NULL, &fw_error); ++ } else { ++ struct kvm_sev_init args = { 0 }; ++ ++ ret = sev_ioctl(sev->sev_fd, KVM_SEV_INIT2, &args, &fw_error); ++ } ++ + if (ret) { + error_setg(errp, "%s: failed to initialize ret=%d fw_error=%d '%s'", + __func__, ret, fw_error, fw_error_to_str(fw_error)); +@@ -1327,8 +1356,10 @@ static void + sev_guest_class_init(ObjectClass *oc, void *data) + { + ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); ++ X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + + klass->kvm_init = sev_kvm_init; ++ x86_klass->kvm_type = sev_kvm_type; + + object_class_property_add_str(oc, "sev-device", + sev_guest_get_sev_device, +@@ -1357,6 +1388,8 @@ sev_guest_instance_init(Object *obj) + { + SevGuestState *sev = SEV_GUEST(obj); + ++ sev->kvm_type = -1; ++ + sev->sev_device = g_strdup(DEFAULT_SEV_DEVICE); + sev->policy = DEFAULT_GUEST_POLICY; + object_property_add_uint32_ptr(obj, "policy", &sev->policy, +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-add-guest-phys-bits-cpu-property.patch b/SOURCES/kvm-target-i386-add-guest-phys-bits-cpu-property.patch new file mode 100644 index 0000000..cd41279 --- /dev/null +++ b/SOURCES/kvm-target-i386-add-guest-phys-bits-cpu-property.patch @@ -0,0 +1,124 @@ +From 090c64ea622534ff2ae6c9b66cdf0b1ddb58bf26 Mon Sep 17 00:00:00 2001 +From: Gerd Hoffmann +Date: Mon, 18 Mar 2024 16:53:36 +0100 +Subject: [PATCH 002/100] target/i386: add guest-phys-bits cpu property + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [2/91] 6603e842012dc484e1f571ea0a77b59095f37003 (bonzini/rhel-qemu-kvm) + +Allows to set guest-phys-bits (cpuid leaf 80000008, eax[23:16]) +via -cpu $model,guest-phys-bits=$nr. + +Signed-off-by: Gerd Hoffmann +Message-ID: <20240318155336.156197-3-kraxel@redhat.com> +Reviewed-by: Zhao Liu +Signed-off-by: Paolo Bonzini +(cherry picked from commit 513ba32dccc659c80722b3a43233b26eaa50309a) +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc.c | 2 ++ + target/i386/cpu.c | 22 ++++++++++++++++++++++ + target/i386/cpu.h | 8 ++++++++ + 3 files changed, 32 insertions(+) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 648762d908..b9fde3cec1 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -349,6 +349,8 @@ GlobalProperty pc_rhel_compat[] = { + const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + + GlobalProperty pc_rhel_9_5_compat[] = { ++ /* pc_rhel_9_5_compat from pc_compat_pc_9_0 (backported from 9.1) */ ++ { TYPE_X86_CPU, "guest-phys-bits", "0" }, + }; + const size_t pc_rhel_9_5_compat_len = G_N_ELEMENTS(pc_rhel_9_5_compat); + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index be7b0663cd..a7f71422ea 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -6591,6 +6591,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) { + /* 64 bit processor */ + *eax |= (cpu_x86_virtual_addr_width(env) << 8); ++ *eax |= (cpu->guest_phys_bits << 16); + } + *ebx = env->features[FEAT_8000_0008_EBX]; + if (cs->nr_cores * cs->nr_threads > 1) { +@@ -7350,6 +7351,14 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) + goto out; + } + ++ if (cpu->guest_phys_bits == -1) { ++ /* ++ * If it was not set by the user, or by the accelerator via ++ * cpu_exec_realizefn, clear. ++ */ ++ cpu->guest_phys_bits = 0; ++ } ++ + if (cpu->ucode_rev == 0) { + /* + * The default is the same as KVM's. Note that this check +@@ -7400,6 +7409,14 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) + if (cpu->phys_bits == 0) { + cpu->phys_bits = TCG_PHYS_ADDR_BITS; + } ++ if (cpu->guest_phys_bits && ++ (cpu->guest_phys_bits > cpu->phys_bits || ++ cpu->guest_phys_bits < 32)) { ++ error_setg(errp, "guest-phys-bits should be between 32 and %u " ++ " (but is %u)", ++ cpu->phys_bits, cpu->guest_phys_bits); ++ return; ++ } + } else { + /* For 32 bit systems don't use the user set value, but keep + * phys_bits consistent with what we tell the guest. +@@ -7408,6 +7425,10 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) + error_setg(errp, "phys-bits is not user-configurable in 32 bit"); + return; + } ++ if (cpu->guest_phys_bits != 0) { ++ error_setg(errp, "guest-phys-bits is not user-configurable in 32 bit"); ++ return; ++ } + + if (env->features[FEAT_1_EDX] & (CPUID_PSE36 | CPUID_PAE)) { + cpu->phys_bits = 36; +@@ -7908,6 +7929,7 @@ static Property x86_cpu_properties[] = { + DEFINE_PROP_BOOL("x-force-features", X86CPU, force_features, false), + DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true), + DEFINE_PROP_UINT32("phys-bits", X86CPU, phys_bits, 0), ++ DEFINE_PROP_UINT32("guest-phys-bits", X86CPU, guest_phys_bits, -1), + DEFINE_PROP_BOOL("host-phys-bits", X86CPU, host_phys_bits, false), + DEFINE_PROP_UINT8("host-phys-bits-limit", X86CPU, host_phys_bits_limit, 0), + DEFINE_PROP_BOOL("fill-mtrr-mask", X86CPU, fill_mtrr_mask, true), +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 6b05738079..6112e27bfd 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -2027,6 +2027,14 @@ struct ArchCPU { + /* Number of physical address bits supported */ + uint32_t phys_bits; + ++ /* ++ * Number of guest physical address bits available. Usually this is ++ * identical to host physical address bits. With NPT or EPT 4-level ++ * paging, guest physical address space might be restricted to 48 bits ++ * even if the host cpu supports more physical address bits. ++ */ ++ uint32_t guest_phys_bits; ++ + /* in order to simplify APIC support, we leave this pointer to the + user */ + struct DeviceState *apic_state; +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-add-support-for-FB_CLEAR-feature.patch b/SOURCES/kvm-target-i386-add-support-for-FB_CLEAR-feature.patch deleted file mode 100644 index 2b1cbc9..0000000 --- a/SOURCES/kvm-target-i386-add-support-for-FB_CLEAR-feature.patch +++ /dev/null @@ -1,71 +0,0 @@ -From 0d056d6da9e4147d5965bf3507f6d6d6a413924d Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Wed, 24 May 2023 06:52:43 -0400 -Subject: [PATCH 2/5] target/i386: add support for FB_CLEAR feature - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 167: target/i386: add support for FB_CLEAR feature -RH-Bugzilla: 2216201 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/2] 5f191964ba25754107a06ef907f4ac614280aaa1 (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2216201 - -commit 22e1094ca82d5518c1b69aff3e87c550776ae1eb -Author: Emanuele Giuseppe Esposito -Date: Wed Feb 1 08:57:59 2023 -0500 - - target/i386: add support for FB_CLEAR feature - - As reported by the Intel's doc: - "FB_CLEAR: The processor will overwrite fill buffer values as part of - MD_CLEAR operations with the VERW instruction. - On these processors, L1D_FLUSH does not overwrite fill buffer values." - - If this cpu feature is present in host, allow QEMU to choose whether to - show it to the guest too. - One disadvantage of not exposing it is that the guest will report - a non existing vulnerability in - /sys/devices/system/cpu/vulnerabilities/mmio_stale_data - because the mitigation is present only when the cpu has - (FLUSH_L1D and MD_CLEAR) or FB_CLEAR - features enabled. - - Signed-off-by: Emanuele Giuseppe Esposito - Message-Id: <20230201135759.555607-3-eesposit@redhat.com> - Signed-off-by: Paolo Bonzini - -Signed-off-by: Emanuele Giuseppe Esposito ---- - target/i386/cpu.c | 2 +- - target/i386/cpu.h | 1 + - 2 files changed, 2 insertions(+), 1 deletion(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index caf6338cc0..839706b430 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1012,7 +1012,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - "ssb-no", "mds-no", "pschange-mc-no", "tsx-ctrl", - "taa-no", NULL, NULL, NULL, - NULL, NULL, NULL, NULL, -- NULL, NULL, NULL, NULL, -+ NULL, "fb-clear", NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index 74fa649b60..c28b9df217 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -989,6 +989,7 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, - #define MSR_ARCH_CAP_PSCHANGE_MC_NO (1U << 6) - #define MSR_ARCH_CAP_TSX_CTRL_MSR (1U << 7) - #define MSR_ARCH_CAP_TAA_NO (1U << 8) -+#define MSR_ARCH_CAP_FB_CLEAR (1U << 17) - - #define MSR_CORE_CAP_SPLIT_LOCK_DETECT (1U << 5) - --- -2.39.3 - diff --git a/SOURCES/kvm-target-i386-add-support-for-FLUSH_L1D-feature.patch b/SOURCES/kvm-target-i386-add-support-for-FLUSH_L1D-feature.patch deleted file mode 100644 index 39f2542..0000000 --- a/SOURCES/kvm-target-i386-add-support-for-FLUSH_L1D-feature.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 14eae569030805680570d93412100ad26242c7e6 Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Wed, 24 May 2023 06:52:34 -0400 -Subject: [PATCH 1/5] target/i386: add support for FLUSH_L1D feature - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 167: target/i386: add support for FB_CLEAR feature -RH-Bugzilla: 2216201 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/2] e296c75c5cd7e1d16d3c70483d52aeba9f9eb2cd (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2216201 - -commit 0e7e3bf1a552c178924867fa7c2f30ccc8a179e0 -Author: Emanuele Giuseppe Esposito -Date: Wed Feb 1 08:57:58 2023 -0500 - - target/i386: add support for FLUSH_L1D feature - - As reported by Intel's doc: - "L1D_FLUSH: Writeback and invalidate the L1 data cache" - - If this cpu feature is present in host, allow QEMU to choose whether to - show it to the guest too. - One disadvantage of not exposing it is that the guest will report - a non existing vulnerability in - /sys/devices/system/cpu/vulnerabilities/mmio_stale_data - because the mitigation is present only when the cpu has - (FLUSH_L1D and MD_CLEAR) or FB_CLEAR - features enabled. - - Signed-off-by: Emanuele Giuseppe Esposito - Message-Id: <20230201135759.555607-2-eesposit@redhat.com> - Signed-off-by: Paolo Bonzini - -Signed-off-by: Emanuele Giuseppe Esposito ---- - target/i386/cpu.c | 2 +- - target/i386/cpu.h | 2 ++ - 2 files changed, 3 insertions(+), 1 deletion(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 0ef2bf1b93..caf6338cc0 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -860,7 +860,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - "tsx-ldtrk", NULL, NULL /* pconfig */, "arch-lbr", - NULL, NULL, "amx-bf16", "avx512-fp16", - "amx-tile", "amx-int8", "spec-ctrl", "stibp", -- NULL, "arch-capabilities", "core-capability", "ssbd", -+ "flush-l1d", "arch-capabilities", "core-capability", "ssbd", - }, - .cpuid = { - .eax = 7, -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index d243e290d3..74fa649b60 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -896,6 +896,8 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, - #define CPUID_7_0_EDX_SPEC_CTRL (1U << 26) - /* Single Thread Indirect Branch Predictors */ - #define CPUID_7_0_EDX_STIBP (1U << 27) -+/* Flush L1D cache */ -+#define CPUID_7_0_EDX_FLUSH_L1D (1U << 28) - /* Arch Capabilities */ - #define CPUID_7_0_EDX_ARCH_CAPABILITIES (1U << 29) - /* Core Capability */ --- -2.39.3 - diff --git a/SOURCES/kvm-target-i386-allow-versioned-CPUs-to-specify-new-cach.patch b/SOURCES/kvm-target-i386-allow-versioned-CPUs-to-specify-new-cach.patch deleted file mode 100644 index 2c81c72..0000000 --- a/SOURCES/kvm-target-i386-allow-versioned-CPUs-to-specify-new-cach.patch +++ /dev/null @@ -1,116 +0,0 @@ -From 457e74c076e0fe7b64631dfd4369d167f0762c9a Mon Sep 17 00:00:00 2001 -From: Bandan Das -Date: Wed, 9 Aug 2023 12:22:41 -0400 -Subject: [PATCH 1/7] target/i386: allow versioned CPUs to specify new - cache_info - -RH-Author: Bandan Das -RH-MergeRequest: 198: Add EPYC-Genoa CPU model in qemu -RH-Bugzilla: 2094913 -RH-Acked-by: Wei Huang -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/7] 6070e07a4bb070d1c15a811b2bd3195929c18d61 (bdas1/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094913 - -commit cca0a000d06f897411a8af4402e5d0522bbe450b -Author: Michael Roth -Date: Thu May 4 15:53:06 2023 -0500 - - target/i386: allow versioned CPUs to specify new cache_info - - New EPYC CPUs versions require small changes to their cache_info's. - Because current QEMU x86 CPU definition does not support versioned - cach_info, we would have to declare a new CPU type for each such case. - To avoid the dup work, add "cache_info" in X86CPUVersionDefinition", - to allow new cache_info pointers to be specified for a new CPU version. - - Co-developed-by: Wei Huang - Signed-off-by: Wei Huang - Signed-off-by: Michael Roth - Signed-off-by: Babu Moger - Acked-by: Michael S. Tsirkin - Message-Id: <20230504205313.225073-2-babu.moger@amd.com> - Signed-off-by: Paolo Bonzini - -Signed-off-by: Bandan Das ---- - target/i386/cpu.c | 35 ++++++++++++++++++++++++++++++++--- - 1 file changed, 32 insertions(+), 3 deletions(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 4ac3046313..3558c92ed0 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1598,6 +1598,7 @@ typedef struct X86CPUVersionDefinition { - const char *alias; - const char *note; - PropValue *props; -+ const CPUCaches *const cache_info; - } X86CPUVersionDefinition; - - /* Base definition for a CPU model */ -@@ -5213,6 +5214,31 @@ static void x86_cpu_apply_version_props(X86CPU *cpu, X86CPUModel *model) - assert(vdef->version == version); - } - -+static const CPUCaches *x86_cpu_get_versioned_cache_info(X86CPU *cpu, -+ X86CPUModel *model) -+{ -+ const X86CPUVersionDefinition *vdef; -+ X86CPUVersion version = x86_cpu_model_resolve_version(model); -+ const CPUCaches *cache_info = model->cpudef->cache_info; -+ -+ if (version == CPU_VERSION_LEGACY) { -+ return cache_info; -+ } -+ -+ for (vdef = x86_cpu_def_get_versions(model->cpudef); vdef->version; vdef++) { -+ if (vdef->cache_info) { -+ cache_info = vdef->cache_info; -+ } -+ -+ if (vdef->version == version) { -+ break; -+ } -+ } -+ -+ assert(vdef->version == version); -+ return cache_info; -+} -+ - /* - * Load data from X86CPUDefinition into a X86CPU object. - * Only for builtin_x86_defs models initialized with x86_register_cpudef_types. -@@ -5245,7 +5271,7 @@ static void x86_cpu_load_model(X86CPU *cpu, X86CPUModel *model) - } - - /* legacy-cache defaults to 'off' if CPU model provides cache info */ -- cpu->legacy_cache = !def->cache_info; -+ cpu->legacy_cache = !x86_cpu_get_versioned_cache_info(cpu, model); - - env->features[FEAT_1_ECX] |= CPUID_EXT_HYPERVISOR; - -@@ -6724,14 +6750,17 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) - - /* Cache information initialization */ - if (!cpu->legacy_cache) { -- if (!xcc->model || !xcc->model->cpudef->cache_info) { -+ const CPUCaches *cache_info = -+ x86_cpu_get_versioned_cache_info(cpu, xcc->model); -+ -+ if (!xcc->model || !cache_info) { - g_autofree char *name = x86_cpu_class_get_model_name(xcc); - error_setg(errp, - "CPU model '%s' doesn't support legacy-cache=off", name); - return; - } - env->cache_info_cpuid2 = env->cache_info_cpuid4 = env->cache_info_amd = -- *xcc->model->cpudef->cache_info; -+ *cache_info; - } else { - /* Build legacy cache information */ - env->cache_info_cpuid2.l1d_cache = &legacy_l1d_cache; --- -2.39.3 - diff --git a/SOURCES/kvm-target-i386-introduce-x86-confidential-guest.patch b/SOURCES/kvm-target-i386-introduce-x86-confidential-guest.patch new file mode 100644 index 0000000..dec3220 --- /dev/null +++ b/SOURCES/kvm-target-i386-introduce-x86-confidential-guest.patch @@ -0,0 +1,161 @@ +From 0573fcd1775b6613127b1906d59d02e65f7519f3 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 18 Mar 2024 11:07:43 -0400 +Subject: [PATCH 021/100] target/i386: introduce x86-confidential-guest + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [21/91] e86d3bcde7e1c2fa1ba8c9bc83e02033644f1ac0 (bonzini/rhel-qemu-kvm) + +Introduce a common superclass for x86 confidential guest implementations. +It will extend ConfidentialGuestSupportClass with a method that provides +the VM type to be passed to KVM_CREATE_VM. + +Signed-off-by: Paolo Bonzini +(cherry picked from commit d82e9c843d662f13821026618aba936eda31a6c0) +Signed-off-by: Paolo Bonzini +--- + target/i386/confidential-guest.c | 33 ++++++++++++++++++++++++++ + target/i386/confidential-guest.h | 40 ++++++++++++++++++++++++++++++++ + target/i386/meson.build | 2 +- + target/i386/sev.c | 6 ++--- + 4 files changed, 77 insertions(+), 4 deletions(-) + create mode 100644 target/i386/confidential-guest.c + create mode 100644 target/i386/confidential-guest.h + +diff --git a/target/i386/confidential-guest.c b/target/i386/confidential-guest.c +new file mode 100644 +index 0000000000..b3727845ad +--- /dev/null ++++ b/target/i386/confidential-guest.c +@@ -0,0 +1,33 @@ ++/* ++ * QEMU Confidential Guest support ++ * ++ * Copyright (C) 2024 Red Hat, Inc. ++ * ++ * Authors: ++ * Paolo Bonzini ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or ++ * later. See the COPYING file in the top-level directory. ++ * ++ */ ++ ++#include "qemu/osdep.h" ++ ++#include "confidential-guest.h" ++ ++OBJECT_DEFINE_ABSTRACT_TYPE(X86ConfidentialGuest, ++ x86_confidential_guest, ++ X86_CONFIDENTIAL_GUEST, ++ CONFIDENTIAL_GUEST_SUPPORT) ++ ++static void x86_confidential_guest_class_init(ObjectClass *oc, void *data) ++{ ++} ++ ++static void x86_confidential_guest_init(Object *obj) ++{ ++} ++ ++static void x86_confidential_guest_finalize(Object *obj) ++{ ++} +diff --git a/target/i386/confidential-guest.h b/target/i386/confidential-guest.h +new file mode 100644 +index 0000000000..ca12d5a8fb +--- /dev/null ++++ b/target/i386/confidential-guest.h +@@ -0,0 +1,40 @@ ++/* ++ * x86-specific confidential guest methods. ++ * ++ * Copyright (c) 2024 Red Hat Inc. ++ * ++ * Authors: ++ * Paolo Bonzini ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++#ifndef TARGET_I386_CG_H ++#define TARGET_I386_CG_H ++ ++#include "qom/object.h" ++ ++#include "exec/confidential-guest-support.h" ++ ++#define TYPE_X86_CONFIDENTIAL_GUEST "x86-confidential-guest" ++ ++OBJECT_DECLARE_TYPE(X86ConfidentialGuest, ++ X86ConfidentialGuestClass, ++ X86_CONFIDENTIAL_GUEST) ++ ++struct X86ConfidentialGuest { ++ /* */ ++ ConfidentialGuestSupport parent_obj; ++}; ++ ++/** ++ * X86ConfidentialGuestClass: ++ * ++ * Class to be implemented by confidential-guest-support concrete objects ++ * for the x86 target. ++ */ ++struct X86ConfidentialGuestClass { ++ /* */ ++ ConfidentialGuestSupportClass parent; ++}; ++#endif +diff --git a/target/i386/meson.build b/target/i386/meson.build +index 7c74bfa859..8abce725f8 100644 +--- a/target/i386/meson.build ++++ b/target/i386/meson.build +@@ -6,7 +6,7 @@ i386_ss.add(files( + 'xsave_helper.c', + 'cpu-dump.c', + )) +-i386_ss.add(when: 'CONFIG_SEV', if_true: files('host-cpu.c')) ++i386_ss.add(when: 'CONFIG_SEV', if_true: files('host-cpu.c', 'confidential-guest.c')) + + # x86 cpu type + i386_ss.add(when: 'CONFIG_KVM', if_true: files('host-cpu.c')) +diff --git a/target/i386/sev.c b/target/i386/sev.c +index c49a8fd55e..ebe36d4c10 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -35,7 +35,7 @@ + #include "monitor/monitor.h" + #include "monitor/hmp-target.h" + #include "qapi/qapi-commands-misc-target.h" +-#include "exec/confidential-guest-support.h" ++#include "confidential-guest.h" + #include "hw/i386/pc.h" + #include "exec/address-spaces.h" + +@@ -54,7 +54,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(SevGuestState, SEV_GUEST) + * -machine ...,memory-encryption=sev0 + */ + struct SevGuestState { +- ConfidentialGuestSupport parent_obj; ++ X86ConfidentialGuest parent_obj; + + /* configuration parameters */ + char *sev_device; +@@ -1372,7 +1372,7 @@ sev_guest_instance_init(Object *obj) + + /* sev guest info */ + static const TypeInfo sev_guest_info = { +- .parent = TYPE_CONFIDENTIAL_GUEST_SUPPORT, ++ .parent = TYPE_X86_CONFIDENTIAL_GUEST, + .name = TYPE_SEV_GUEST, + .instance_size = sizeof(SevGuestState), + .instance_finalize = sev_guest_finalize, +-- +2.39.3 + diff --git a/SOURCES/kvm-trace-kvm-Split-address-space-and-slot-id-in-trace_k.patch b/SOURCES/kvm-trace-kvm-Split-address-space-and-slot-id-in-trace_k.patch new file mode 100644 index 0000000..d21d298 --- /dev/null +++ b/SOURCES/kvm-trace-kvm-Split-address-space-and-slot-id-in-trace_k.patch @@ -0,0 +1,59 @@ +From b02dc1e5c0f01228053e784f9ec7ac3a47e91d7c Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Thu, 29 Feb 2024 01:36:25 -0500 +Subject: [PATCH 026/100] trace/kvm: Split address space and slot id in + trace_kvm_set_user_memory() + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [26/91] 640511c4ab0ba76bb4483f6c3fb73e060d914f0a (bonzini/rhel-qemu-kvm) + +The upper 16 bits of kvm_userspace_memory_region::slot are +address space id. Parse it separately in trace_kvm_set_user_memory(). + +Signed-off-by: Xiaoyao Li +Message-ID: <20240229063726.610065-5-xiaoyao.li@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 72853afc638b3e28779c86dd05da2f3bb149fe2c) +Signed-off-by: Paolo Bonzini +--- + accel/kvm/kvm-all.c | 5 +++-- + accel/kvm/trace-events | 2 +- + 2 files changed, 4 insertions(+), 3 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index b51e09a583..9bd235c969 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -303,8 +303,9 @@ static int kvm_set_user_memory_region(KVMMemoryListener *kml, KVMSlot *slot, boo + ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); + slot->old_flags = mem.flags; + err: +- trace_kvm_set_user_memory(mem.slot, mem.flags, mem.guest_phys_addr, +- mem.memory_size, mem.userspace_addr, ret); ++ trace_kvm_set_user_memory(mem.slot >> 16, (uint16_t)mem.slot, mem.flags, ++ mem.guest_phys_addr, mem.memory_size, ++ mem.userspace_addr, ret); + if (ret < 0) { + error_report("%s: KVM_SET_USER_MEMORY_REGION failed, slot=%d," + " start=0x%" PRIx64 ", size=0x%" PRIx64 ": %s", +diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events +index a25902597b..9f599abc17 100644 +--- a/accel/kvm/trace-events ++++ b/accel/kvm/trace-events +@@ -15,7 +15,7 @@ kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d" + kvm_irqchip_release_virq(int virq) "virq %d" + kvm_set_ioeventfd_mmio(int fd, uint64_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%" PRIx64 " val=0x%x assign: %d size: %d match: %d" + kvm_set_ioeventfd_pio(int fd, uint16_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%x val=0x%x assign: %d size: %d match: %d" +-kvm_set_user_memory(uint32_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, int ret) "Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " ret=%d" ++kvm_set_user_memory(uint16_t as, uint16_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, int ret) "AddrSpace#%d Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " ret=%d" + kvm_clear_dirty_log(uint32_t slot, uint64_t start, uint32_t size) "slot#%"PRId32" start 0x%"PRIx64" size 0x%"PRIx32 + kvm_resample_fd_notify(int gsi) "gsi %d" + kvm_dirty_ring_full(int id) "vcpu %d" +-- +2.39.3 + diff --git a/SOURCES/kvm-ui-Fix-pixel-colour-channel-order-for-PNG-screenshot.patch b/SOURCES/kvm-ui-Fix-pixel-colour-channel-order-for-PNG-screenshot.patch deleted file mode 100644 index ef99b30..0000000 --- a/SOURCES/kvm-ui-Fix-pixel-colour-channel-order-for-PNG-screenshot.patch +++ /dev/null @@ -1,88 +0,0 @@ -From b998f8474846886fa1e0428fe79fe2a79231cc05 Mon Sep 17 00:00:00 2001 -From: Peter Maydell -Date: Fri, 12 May 2023 15:43:38 +0100 -Subject: [PATCH 35/37] ui: Fix pixel colour channel order for PNG screenshots -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -RH-MergeRequest: 183: ui: Fix pixel colour channel order for PNG screenshots -RH-Bugzilla: 2222579 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/1] 76acd3c5526639e70bc2998f584503c78fc9bc56 (marcandre.lureau-rh/qemu-kvm-centos) - -When we take a PNG screenshot the ordering of the colour channels in -the data is not correct, resulting in the image having weird -colouring compared to the actual display. (Specifically, on a -little-endian host the blue and red channels are swapped; on -big-endian everything is wrong.) - -This happens because the pixman idea of the pixel data and the libpng -idea differ. PIXMAN_a8r8g8b8 defines that pixels are 32-bit values, -with A in bits 24-31, R in bits 16-23, G in bits 8-15 and B in bits -0-7. This means that on little-endian systems the bytes in memory -are - B G R A -and on big-endian systems they are - A R G B - -libpng, on the other hand, thinks of pixels as being a series of -values for each channel, so its format PNG_COLOR_TYPE_RGB_ALPHA -always wants bytes in the order - R G B A - -This isn't the same as the pixman order for either big or little -endian hosts. - -The alpha channel is also unnecessary bulk in the output PNG file, -because there is no alpha information in a screenshot. - -To handle the endianness issue, we already define in ui/qemu-pixman.h -various PIXMAN_BE_* and PIXMAN_LE_* values that give consistent -byte-order pixel channel formats. So we can use PIXMAN_BE_r8g8b8 and -PNG_COLOR_TYPE_RGB, which both have an in-memory byte order of - R G B -and 3 bytes per pixel. - -(PPM format screenshots get this right; they already use the -PIXMAN_BE_r8g8b8 format.) - -Cc: qemu-stable@nongnu.org -Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1622 -Fixes: 9a0a119a382867 ("Added parameter to take screenshot with screendump as PNG") -Signed-off-by: Peter Maydell -Reviewed-by: Marc-André Lureau -Message-id: 20230502135548.2451309-1-peter.maydell@linaro.org - -(cherry picked from commit cd22a0f520f471e3bd33bc19cf3b2fa772cdb2a8) -Signed-off-by: Marc-André Lureau ---- - ui/console.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/ui/console.c b/ui/console.c -index 6e8a3cdc62..e173731e20 100644 ---- a/ui/console.c -+++ b/ui/console.c -@@ -311,7 +311,7 @@ static bool png_save(int fd, pixman_image_t *image, Error **errp) - png_struct *png_ptr; - png_info *info_ptr; - g_autoptr(pixman_image_t) linebuf = -- qemu_pixman_linebuf_create(PIXMAN_a8r8g8b8, width); -+ qemu_pixman_linebuf_create(PIXMAN_BE_r8g8b8, width); - uint8_t *buf = (uint8_t *)pixman_image_get_data(linebuf); - FILE *f = fdopen(fd, "wb"); - int y; -@@ -341,7 +341,7 @@ static bool png_save(int fd, pixman_image_t *image, Error **errp) - png_init_io(png_ptr, f); - - png_set_IHDR(png_ptr, info_ptr, width, height, 8, -- PNG_COLOR_TYPE_RGB_ALPHA, PNG_INTERLACE_NONE, -+ PNG_COLOR_TYPE_RGB, PNG_INTERLACE_NONE, - PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE); - - png_write_info(png_ptr, info_ptr); --- -2.39.3 - diff --git a/SOURCES/kvm-update-linux-headers-fix-forwarding-to-asm-generic-h.patch b/SOURCES/kvm-update-linux-headers-fix-forwarding-to-asm-generic-h.patch new file mode 100644 index 0000000..f141bf1 --- /dev/null +++ b/SOURCES/kvm-update-linux-headers-fix-forwarding-to-asm-generic-h.patch @@ -0,0 +1,62 @@ +From e185104a10a37174d13d981fa1febafbb7e651aa Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 3 Jun 2024 13:49:49 +0200 +Subject: [PATCH 050/100] update-linux-headers: fix forwarding to asm-generic + headers + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [50/91] 3c98a7fe790d943bb5ff8dca1da83f5944ec3e2e (bonzini/rhel-qemu-kvm) + +Afer commit 3efc75ad9d9 ("scripts/update-linux-headers.sh: Remove +temporary directory inbetween", 2024-05-29), updating linux-headers/ +results in errors such as + + cp: cannot stat '/tmp/tmp.1A1Eejh1UE/headers/include/asm/bitsperlong.h': No such file or directory + +because Loongarch does not have an asm/bitsperlong.h file and uses the +generic version. Before commit 3efc75ad9d9, the missing file would +incorrectly cause stale files to be included in linux-headers/. The files +were never committed to qemu.git, but were wrong nevertheless. The build +would just use the system version of the files, which is opposite to +the idea of importing Linux header files into QEMU's tree. + +Create forwarding headers, resembling the ones that are generated during a +kernel build by scripts/Makefile.asm-generic, if a file is only installed +under include/asm-generic/. + +Reviewed-by: Thomas Huth +Signed-off-by: Paolo Bonzini +(cherry picked from commit ef7c70f020ca1fe9e7c98ea2cd9d6ba3c5714716) +Signed-off-by: Paolo Bonzini +--- + scripts/update-linux-headers.sh | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh +index f084bee72e..78c0f2c43e 100755 +--- a/scripts/update-linux-headers.sh ++++ b/scripts/update-linux-headers.sh +@@ -119,7 +119,14 @@ for arch in $ARCHLIST; do + rm -rf "$output/linux-headers/asm-$arch" + mkdir -p "$output/linux-headers/asm-$arch" + for header in kvm.h unistd.h bitsperlong.h mman.h; do +- cp "$hdrdir/include/asm/$header" "$output/linux-headers/asm-$arch" ++ if test -f "$hdrdir/include/asm/$header"; then ++ cp "$hdrdir/include/asm/$header" "$output/linux-headers/asm-$arch" ++ elif test -f "$hdrdir/include/asm-generic/$header"; then ++ # not installed as , but used as such in kernel sources ++ cat <$output/linux-headers/asm-$arch/$header ++#include ++EOF ++ fi + done + + if [ $arch = mips ]; then +-- +2.39.3 + diff --git a/SOURCES/kvm-update-linux-headers-import-linux-kvm_para.h-header.patch b/SOURCES/kvm-update-linux-headers-import-linux-kvm_para.h-header.patch new file mode 100644 index 0000000..a75a2aa --- /dev/null +++ b/SOURCES/kvm-update-linux-headers-import-linux-kvm_para.h-header.patch @@ -0,0 +1,175 @@ +From 8d6c37ddc253f63202cc9519670c258e9d81b98e Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 3 Jun 2024 14:25:06 +0200 +Subject: [PATCH 053/100] update-linux-headers: import linux/kvm_para.h header + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [53/91] 27d4db0ecec7d0b8adeba1ec85fca32eacee1009 (bonzini/rhel-qemu-kvm) + +Right now QEMU is importing arch/x86/include/uapi/asm/kvm_para.h +because it includes definitions for kvmclock and for KVM CPUID +bits. However, other definitions for KVM hypercall values and return +codes are included in include/uapi/linux/kvm_para.h and they will be +used by SEV-SNP. + +To ensure that it is possible to include both and +"standard-headers/asm-x86/kvm_para.h" without conflicts, provide +linux/kvm_para.h as a portable header too, and forward linux-headers/ +files to those in include/standard-headers. Note that +will include architecture-specific definitions as well, but +"standard-headers/linux/kvm_para.h" will not because it can be used in +architecture-independent files. + +This could easily be extended to other architectures, but right now +they do not need any symbol in their specific kvm_para.h files. + +Reviewed-by: Thomas Huth +Signed-off-by: Paolo Bonzini +(cherry picked from commit aa274c33c39e7de981dc195abe60e1a246c9d248) +Signed-off-by: Paolo Bonzini +--- + include/standard-headers/linux/kvm_para.h | 38 +++++++++++++++++++++++ + linux-headers/asm-x86/kvm_para.h | 1 + + linux-headers/linux/kvm_para.h | 2 ++ + scripts/update-linux-headers.sh | 22 ++++++++++++- + 4 files changed, 62 insertions(+), 1 deletion(-) + create mode 100644 include/standard-headers/linux/kvm_para.h + create mode 100644 linux-headers/asm-x86/kvm_para.h + create mode 100644 linux-headers/linux/kvm_para.h + +diff --git a/include/standard-headers/linux/kvm_para.h b/include/standard-headers/linux/kvm_para.h +new file mode 100644 +index 0000000000..015c166302 +--- /dev/null ++++ b/include/standard-headers/linux/kvm_para.h +@@ -0,0 +1,38 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++#ifndef __LINUX_KVM_PARA_H ++#define __LINUX_KVM_PARA_H ++ ++/* ++ * This header file provides a method for making a hypercall to the host ++ * Architectures should define: ++ * - kvm_hypercall0, kvm_hypercall1... ++ * - kvm_arch_para_features ++ * - kvm_para_available ++ */ ++ ++/* Return values for hypercalls */ ++#define KVM_ENOSYS 1000 ++#define KVM_EFAULT EFAULT ++#define KVM_EINVAL EINVAL ++#define KVM_E2BIG E2BIG ++#define KVM_EPERM EPERM ++#define KVM_EOPNOTSUPP 95 ++ ++#define KVM_HC_VAPIC_POLL_IRQ 1 ++#define KVM_HC_MMU_OP 2 ++#define KVM_HC_FEATURES 3 ++#define KVM_HC_PPC_MAP_MAGIC_PAGE 4 ++#define KVM_HC_KICK_CPU 5 ++#define KVM_HC_MIPS_GET_CLOCK_FREQ 6 ++#define KVM_HC_MIPS_EXIT_VM 7 ++#define KVM_HC_MIPS_CONSOLE_OUTPUT 8 ++#define KVM_HC_CLOCK_PAIRING 9 ++#define KVM_HC_SEND_IPI 10 ++#define KVM_HC_SCHED_YIELD 11 ++#define KVM_HC_MAP_GPA_RANGE 12 ++ ++/* ++ * hypercalls use architecture specific ++ */ ++ ++#endif /* __LINUX_KVM_PARA_H */ +diff --git a/linux-headers/asm-x86/kvm_para.h b/linux-headers/asm-x86/kvm_para.h +new file mode 100644 +index 0000000000..1d3e0e0b07 +--- /dev/null ++++ b/linux-headers/asm-x86/kvm_para.h +@@ -0,0 +1 @@ ++#include "standard-headers/asm-x86/kvm_para.h" +diff --git a/linux-headers/linux/kvm_para.h b/linux-headers/linux/kvm_para.h +new file mode 100644 +index 0000000000..6a1e672259 +--- /dev/null ++++ b/linux-headers/linux/kvm_para.h +@@ -0,0 +1,2 @@ ++#include "standard-headers/linux/kvm_para.h" ++#include +diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh +index 90759dcfe0..64d1989961 100755 +--- a/scripts/update-linux-headers.sh ++++ b/scripts/update-linux-headers.sh +@@ -64,6 +64,7 @@ cp_portable() { + -e 'linux/kernel' \ + -e 'linux/sysinfo' \ + -e 'asm/setup_data.h' \ ++ -e 'asm/kvm_para.h' \ + > /dev/null + then + echo "Unexpected #include in input file $f". +@@ -71,6 +72,15 @@ cp_portable() { + fi + + header=$(basename "$f"); ++ ++ if test -z "$arch"; then ++ # Let users of include/standard-headers/linux/ headers pick the ++ # asm-* header that they care about ++ arch_cmd='/]*\)>/d' ++ else ++ arch_cmd='s/]*\)>/"standard-headers\/asm-'$arch'\/\1"/' ++ fi ++ + sed -e 's/__aligned_u64/__u64 __attribute__((aligned(8)))/g' \ + -e 's/__u\([0-9][0-9]*\)/uint\1_t/g' \ + -e 's/u\([0-9][0-9]*\)/uint\1_t/g' \ +@@ -79,7 +89,7 @@ cp_portable() { + -e 's/__be\([0-9][0-9]*\)/uint\1_t/g' \ + -e 's/"\(input-event-codes\.h\)"/"standard-headers\/linux\/\1"/' \ + -e 's/]*\)>/"standard-headers\/linux\/\1"/' \ +- -e 's/]*\)>/"standard-headers\/asm-'$arch'\/\1"/' \ ++ -e "$arch_cmd" \ + -e 's/__bitwise//' \ + -e 's/__attribute__((packed))/QEMU_PACKED/' \ + -e 's/__inline__/inline/' \ +@@ -159,7 +169,12 @@ EOF + cp "$hdrdir/include/asm/unistd_32.h" "$output/linux-headers/asm-x86/" + cp "$hdrdir/include/asm/unistd_x32.h" "$output/linux-headers/asm-x86/" + cp "$hdrdir/include/asm/unistd_64.h" "$output/linux-headers/asm-x86/" ++ + cp_portable "$hdrdir/include/asm/kvm_para.h" "$output/include/standard-headers/asm-$arch" ++ cat <$output/linux-headers/asm-$arch/kvm_para.h ++#include "standard-headers/asm-$arch/kvm_para.h" ++EOF ++ + # Remove everything except the macros from bootparam.h avoiding the + # unnecessary import of several video/ist/etc headers + sed -e '/__ASSEMBLY__/,/__ASSEMBLY__/d' \ +@@ -209,6 +224,10 @@ if [ -d "$linux/LICENSES" ]; then + done + fi + ++cat <$output/linux-headers/linux/kvm_para.h ++#include "standard-headers/linux/kvm_para.h" ++#include ++EOF + cat <$output/linux-headers/linux/virtio_config.h + #include "standard-headers/linux/virtio_config.h" + EOF +@@ -231,6 +250,7 @@ for i in "$hdrdir"/include/linux/*virtio*.h \ + "$hdrdir/include/linux/ethtool.h" \ + "$hdrdir/include/linux/const.h" \ + "$hdrdir/include/linux/kernel.h" \ ++ "$hdrdir/include/linux/kvm_para.h" \ + "$hdrdir/include/linux/vhost_types.h" \ + "$hdrdir/include/linux/sysinfo.h"; do + cp_portable "$i" "$output/include/standard-headers/linux" +-- +2.39.3 + diff --git a/SOURCES/kvm-update-linux-headers-move-pvpanic.h-to-correct-direc.patch b/SOURCES/kvm-update-linux-headers-move-pvpanic.h-to-correct-direc.patch new file mode 100644 index 0000000..cb0a4d4 --- /dev/null +++ b/SOURCES/kvm-update-linux-headers-move-pvpanic.h-to-correct-direc.patch @@ -0,0 +1,95 @@ +From 00e250d9df1949d363758a34e3f46d8c71be054f Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 3 Jun 2024 14:16:55 +0200 +Subject: [PATCH 051/100] update-linux-headers: move pvpanic.h to correct + directory + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [51/91] 10efff5bbcb867ba34f3f9ff8045381ea96f94c7 (bonzini/rhel-qemu-kvm) + +Linux has , not . Use the same +directory for QEMU's include/standard-headers/ copy. + +Reviewed-by: Thomas Huth +Signed-off-by: Paolo Bonzini +(cherry picked from commit b8116f4cbaa0f64bb07564f20b3b5219e23c8bff) +Signed-off-by: Paolo Bonzini +--- + hw/misc/pvpanic-isa.c | 2 +- + hw/misc/pvpanic-pci.c | 2 +- + hw/misc/pvpanic.c | 2 +- + include/standard-headers/{linux => misc}/pvpanic.h | 0 + scripts/update-linux-headers.sh | 6 ++++-- + 5 files changed, 7 insertions(+), 5 deletions(-) + rename include/standard-headers/{linux => misc}/pvpanic.h (100%) + +diff --git a/hw/misc/pvpanic-isa.c b/hw/misc/pvpanic-isa.c +index ccec50f61b..b4f84c4110 100644 +--- a/hw/misc/pvpanic-isa.c ++++ b/hw/misc/pvpanic-isa.c +@@ -21,7 +21,7 @@ + #include "hw/misc/pvpanic.h" + #include "qom/object.h" + #include "hw/isa/isa.h" +-#include "standard-headers/linux/pvpanic.h" ++#include "standard-headers/misc/pvpanic.h" + #include "hw/acpi/acpi_aml_interface.h" + + OBJECT_DECLARE_SIMPLE_TYPE(PVPanicISAState, PVPANIC_ISA_DEVICE) +diff --git a/hw/misc/pvpanic-pci.c b/hw/misc/pvpanic-pci.c +index 83be95d0d2..4d44a881da 100644 +--- a/hw/misc/pvpanic-pci.c ++++ b/hw/misc/pvpanic-pci.c +@@ -21,7 +21,7 @@ + #include "hw/misc/pvpanic.h" + #include "qom/object.h" + #include "hw/pci/pci_device.h" +-#include "standard-headers/linux/pvpanic.h" ++#include "standard-headers/misc/pvpanic.h" + + OBJECT_DECLARE_SIMPLE_TYPE(PVPanicPCIState, PVPANIC_PCI_DEVICE) + +diff --git a/hw/misc/pvpanic.c b/hw/misc/pvpanic.c +index 1540e9091a..80289ecf5f 100644 +--- a/hw/misc/pvpanic.c ++++ b/hw/misc/pvpanic.c +@@ -21,7 +21,7 @@ + #include "hw/qdev-properties.h" + #include "hw/misc/pvpanic.h" + #include "qom/object.h" +-#include "standard-headers/linux/pvpanic.h" ++#include "standard-headers/misc/pvpanic.h" + + static void handle_event(int event) + { +diff --git a/include/standard-headers/linux/pvpanic.h b/include/standard-headers/misc/pvpanic.h +similarity index 100% +rename from include/standard-headers/linux/pvpanic.h +rename to include/standard-headers/misc/pvpanic.h +diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh +index 78c0f2c43e..90759dcfe0 100755 +--- a/scripts/update-linux-headers.sh ++++ b/scripts/update-linux-headers.sh +@@ -232,10 +232,12 @@ for i in "$hdrdir"/include/linux/*virtio*.h \ + "$hdrdir/include/linux/const.h" \ + "$hdrdir/include/linux/kernel.h" \ + "$hdrdir/include/linux/vhost_types.h" \ +- "$hdrdir/include/linux/sysinfo.h" \ +- "$hdrdir/include/misc/pvpanic.h"; do ++ "$hdrdir/include/linux/sysinfo.h"; do + cp_portable "$i" "$output/include/standard-headers/linux" + done ++mkdir -p "$output/include/standard-headers/misc" ++cp_portable "$hdrdir/include/misc/pvpanic.h" \ ++ "$output/include/standard-headers/misc" + mkdir -p "$output/include/standard-headers/drm" + cp_portable "$hdrdir/include/drm/drm_fourcc.h" \ + "$output/include/standard-headers/drm" +-- +2.39.3 + diff --git a/SOURCES/kvm-util-async-teardown-wire-up-query-command-line-optio.patch b/SOURCES/kvm-util-async-teardown-wire-up-query-command-line-optio.patch deleted file mode 100644 index 8c468d8..0000000 --- a/SOURCES/kvm-util-async-teardown-wire-up-query-command-line-optio.patch +++ /dev/null @@ -1,180 +0,0 @@ -From c1502b0cd16378d6d5bd4259b90bf81a5fb5aad3 Mon Sep 17 00:00:00 2001 -From: Claudio Imbrenda -Date: Fri, 5 May 2023 14:00:51 +0200 -Subject: [PATCH 20/21] util/async-teardown: wire up query-command-line-options -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -RH-MergeRequest: 173: Improve memory reclaiming for z15 Secure Execution guests -RH-Bugzilla: 2168500 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Cédric Le Goater -RH-Commit: [1/2] 76e5f25df2c02721f5a29f552ee3061be589abb2 (thuth/qemu-kvm-cs9) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168500 - -Add new -run-with option with an async-teardown=on|off parameter. It is -visible in the output of query-command-line-options QMP command, so it -can be discovered and used by libvirt. - -The option -async-teardown is now redundant, deprecate it. - -Reported-by: Boris Fiuczynski -Fixes: c891c24b1a ("os-posix: asynchronous teardown for shutdown on Linux") -Signed-off-by: Claudio Imbrenda -Message-Id: <20230505120051.36605-2-imbrenda@linux.ibm.com> -[thuth: Add curly braces to fix error with GCC 8.5, fix bug in deprecated.rst] -Signed-off-by: Thomas Huth - -(cherry picked from commit 80bd81cadd127c1e2fc784612a52abe392670ba4) -Conflicts: - docs/about/deprecated.rst (missing context from other patches) -Signed-off-by: Thomas Huth ---- - docs/about/deprecated.rst | 5 +++++ - os-posix.c | 14 ++++++++++++++ - qemu-options.hx | 34 +++++++++++++++++++++++----------- - util/async-teardown.c | 21 +++++++++++++++++++++ - 4 files changed, 63 insertions(+), 11 deletions(-) - -diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst -index 1ca9dc33d6..52893fcf38 100644 ---- a/docs/about/deprecated.rst -+++ b/docs/about/deprecated.rst -@@ -111,6 +111,11 @@ Use ``-machine acpi=off`` instead. - The HAXM project has been retired (see https://github.com/intel/haxm#status). - Use "whpx" (on Windows) or "hvf" (on macOS) instead. - -+``-async-teardown`` (since 8.1) -+''''''''''''''''''''''''''''''' -+ -+Use ``-run-with async-teardown=on`` instead. -+ - - QEMU Machine Protocol (QMP) commands - ------------------------------------ -diff --git a/os-posix.c b/os-posix.c -index 5adc69f560..90ea71725f 100644 ---- a/os-posix.c -+++ b/os-posix.c -@@ -36,6 +36,8 @@ - #include "qemu/log.h" - #include "sysemu/runstate.h" - #include "qemu/cutils.h" -+#include "qemu/config-file.h" -+#include "qemu/option.h" - - #ifdef CONFIG_LINUX - #include -@@ -152,9 +154,21 @@ int os_parse_cmd_args(int index, const char *optarg) - daemonize = 1; - break; - #if defined(CONFIG_LINUX) -+ /* deprecated */ - case QEMU_OPTION_asyncteardown: - init_async_teardown(); - break; -+ case QEMU_OPTION_run_with: { -+ QemuOpts *opts = qemu_opts_parse_noisily(qemu_find_opts("run-with"), -+ optarg, false); -+ if (!opts) { -+ exit(1); -+ } -+ if (qemu_opt_get_bool(opts, "async-teardown", false)) { -+ init_async_teardown(); -+ } -+ break; -+ } - #endif - default: - return -1; -diff --git a/qemu-options.hx b/qemu-options.hx -index 52b49f1f6a..b18f933703 100644 ---- a/qemu-options.hx -+++ b/qemu-options.hx -@@ -4766,20 +4766,32 @@ DEF("qtest-log", HAS_ARG, QEMU_OPTION_qtest_log, "", QEMU_ARCH_ALL) - DEF("async-teardown", 0, QEMU_OPTION_asyncteardown, - "-async-teardown enable asynchronous teardown\n", - QEMU_ARCH_ALL) --#endif - SRST - ``-async-teardown`` -- Enable asynchronous teardown. A new process called "cleanup/" -- will be created at startup sharing the address space with the main qemu -- process, using clone. It will wait for the main qemu process to -- terminate completely, and then exit. -- This allows qemu to terminate very quickly even if the guest was -- huge, leaving the teardown of the address space to the cleanup -- process. Since the cleanup process shares the same cgroups as the -- main qemu process, accounting is performed correctly. This only -- works if the cleanup process is not forcefully killed with SIGKILL -- before the main qemu process has terminated completely. -+ This option is deprecated and should no longer be used. The new option -+ ``-run-with async-teardown=on`` is a replacement. - ERST -+DEF("run-with", HAS_ARG, QEMU_OPTION_run_with, -+ "-run-with async-teardown[=on|off]\n" -+ " misc QEMU process lifecycle options\n" -+ " async-teardown=on enables asynchronous teardown\n", -+ QEMU_ARCH_ALL) -+SRST -+``-run-with`` -+ Set QEMU process lifecycle options. -+ -+ ``async-teardown=on`` enables asynchronous teardown. A new process called -+ "cleanup/" will be created at startup sharing the address -+ space with the main QEMU process, using clone. It will wait for the -+ main QEMU process to terminate completely, and then exit. This allows -+ QEMU to terminate very quickly even if the guest was huge, leaving the -+ teardown of the address space to the cleanup process. Since the cleanup -+ process shares the same cgroups as the main QEMU process, accounting is -+ performed correctly. This only works if the cleanup process is not -+ forcefully killed with SIGKILL before the main QEMU process has -+ terminated completely. -+ERST -+#endif - - DEF("msg", HAS_ARG, QEMU_OPTION_msg, - "-msg [timestamp[=on|off]][,guest-name=[on|off]]\n" -diff --git a/util/async-teardown.c b/util/async-teardown.c -index 62cdeb0f20..3ab19c8740 100644 ---- a/util/async-teardown.c -+++ b/util/async-teardown.c -@@ -12,6 +12,9 @@ - */ - - #include "qemu/osdep.h" -+#include "qemu/config-file.h" -+#include "qemu/option.h" -+#include "qemu/module.h" - #include - #include - #include -@@ -144,3 +147,21 @@ void init_async_teardown(void) - clone(async_teardown_fn, new_stack_for_clone(), CLONE_VM, NULL); - sigprocmask(SIG_SETMASK, &old_signals, NULL); - } -+ -+static QemuOptsList qemu_run_with_opts = { -+ .name = "run-with", -+ .head = QTAILQ_HEAD_INITIALIZER(qemu_run_with_opts.head), -+ .desc = { -+ { -+ .name = "async-teardown", -+ .type = QEMU_OPT_BOOL, -+ }, -+ { /* end of list */ } -+ }, -+}; -+ -+static void register_teardown(void) -+{ -+ qemu_add_opts(&qemu_run_with_opts); -+} -+opts_init(register_teardown); --- -2.39.3 - diff --git a/SOURCES/kvm-util-iov-Make-qiov_slice-public.patch b/SOURCES/kvm-util-iov-Make-qiov_slice-public.patch deleted file mode 100644 index fe68d18..0000000 --- a/SOURCES/kvm-util-iov-Make-qiov_slice-public.patch +++ /dev/null @@ -1,97 +0,0 @@ -From 64652225695c23855cfb1252cea2b55c24da2260 Mon Sep 17 00:00:00 2001 -From: Hanna Czenczek -Date: Tue, 11 Apr 2023 19:34:15 +0200 -Subject: [PATCH 1/9] util/iov: Make qiov_slice() public - -RH-Author: Hanna Czenczek -RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX -RH-Bugzilla: 2174676 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/5] 9c3cd661f7139ce124ee4f4d5fcbeaf3dbb9c45c (hreitz/qemu-kvm-c-9-s) - -We want to inline qemu_iovec_init_extended() in block/io.c for padding -requests, and having access to qiov_slice() is useful for this. As a -public function, it is renamed to qemu_iovec_slice(). - -(We will need to count the number of I/O vector elements of a slice -there, and then later process this slice. Without qiov_slice(), we -would need to call qemu_iovec_subvec_niov(), and all further -IOV-processing functions may need to skip prefixing elements to -accomodate for a qiov_offset. Because qemu_iovec_subvec_niov() -internally calls qiov_slice(), we can just have the block/io.c code call -qiov_slice() itself, thus get the number of elements, and also create an -iovec array with the superfluous prefixing elements stripped, so the -following processing functions no longer need to skip them.) - -Reviewed-by: Eric Blake -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Hanna Czenczek -Message-Id: <20230411173418.19549-2-hreitz@redhat.com> -(cherry picked from commit 3d06cea8256d54a6b0238934c31012f7f17100f5) -Signed-off-by: Hanna Czenczek ---- - include/qemu/iov.h | 3 +++ - util/iov.c | 14 +++++++------- - 2 files changed, 10 insertions(+), 7 deletions(-) - -diff --git a/include/qemu/iov.h b/include/qemu/iov.h -index 9330746680..46fadfb27a 100644 ---- a/include/qemu/iov.h -+++ b/include/qemu/iov.h -@@ -229,6 +229,9 @@ int qemu_iovec_init_extended( - void *tail_buf, size_t tail_len); - void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source, - size_t offset, size_t len); -+struct iovec *qemu_iovec_slice(QEMUIOVector *qiov, -+ size_t offset, size_t len, -+ size_t *head, size_t *tail, int *niov); - int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len); - void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len); - void qemu_iovec_concat(QEMUIOVector *dst, -diff --git a/util/iov.c b/util/iov.c -index b4be580022..65a70449da 100644 ---- a/util/iov.c -+++ b/util/iov.c -@@ -378,15 +378,15 @@ static struct iovec *iov_skip_offset(struct iovec *iov, size_t offset, - } - - /* -- * qiov_slice -+ * qemu_iovec_slice - * - * Find subarray of iovec's, containing requested range. @head would - * be offset in first iov (returned by the function), @tail would be - * count of extra bytes in last iovec (returned iov + @niov - 1). - */ --static struct iovec *qiov_slice(QEMUIOVector *qiov, -- size_t offset, size_t len, -- size_t *head, size_t *tail, int *niov) -+struct iovec *qemu_iovec_slice(QEMUIOVector *qiov, -+ size_t offset, size_t len, -+ size_t *head, size_t *tail, int *niov) - { - struct iovec *iov, *end_iov; - -@@ -411,7 +411,7 @@ int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len) - size_t head, tail; - int niov; - -- qiov_slice(qiov, offset, len, &head, &tail, &niov); -+ qemu_iovec_slice(qiov, offset, len, &head, &tail, &niov); - - return niov; - } -@@ -439,8 +439,8 @@ int qemu_iovec_init_extended( - } - - if (mid_len) { -- mid_iov = qiov_slice(mid_qiov, mid_offset, mid_len, -- &mid_head, &mid_tail, &mid_niov); -+ mid_iov = qemu_iovec_slice(mid_qiov, mid_offset, mid_len, -+ &mid_head, &mid_tail, &mid_niov); - } - - total_niov = !!head_len + mid_niov + !!tail_len; --- -2.39.3 - diff --git a/SOURCES/kvm-util-iov-Remove-qemu_iovec_init_extended.patch b/SOURCES/kvm-util-iov-Remove-qemu_iovec_init_extended.patch deleted file mode 100644 index fd21880..0000000 --- a/SOURCES/kvm-util-iov-Remove-qemu_iovec_init_extended.patch +++ /dev/null @@ -1,156 +0,0 @@ -From 8ff973985a04fec1a3cdf886976a03e0dca7b0ea Mon Sep 17 00:00:00 2001 -From: Hanna Czenczek -Date: Tue, 11 Apr 2023 19:34:17 +0200 -Subject: [PATCH 3/9] util/iov: Remove qemu_iovec_init_extended() - -RH-Author: Hanna Czenczek -RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX -RH-Bugzilla: 2174676 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/5] 1740d7b15ea4fbfbe71e7adc122741e85e83fb8c (hreitz/qemu-kvm-c-9-s) - -bdrv_pad_request() was the main user of qemu_iovec_init_extended(). -HEAD^ has removed that use, so we can remove qemu_iovec_init_extended() -now. - -The only remaining user is qemu_iovec_init_slice(), which can easily -inline the small part it really needs. - -Note that qemu_iovec_init_extended() offered a memcpy() optimization to -initialize the new I/O vector. qemu_iovec_concat_iov(), which is used -to replace its functionality, does not, but calls qemu_iovec_add() for -every single element. If we decide this optimization was important, we -will need to re-implement it in qemu_iovec_concat_iov(), which might -also benefit its pre-existing users. - -Reviewed-by: Eric Blake -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Hanna Czenczek -Message-Id: <20230411173418.19549-4-hreitz@redhat.com> -(cherry picked from commit cc63f6f6fa1aaa4b6405dd69432c693e9c8d18ca) -Signed-off-by: Hanna Czenczek ---- - include/qemu/iov.h | 5 --- - util/iov.c | 79 +++++++--------------------------------------- - 2 files changed, 11 insertions(+), 73 deletions(-) - -diff --git a/include/qemu/iov.h b/include/qemu/iov.h -index 46fadfb27a..63a1c01965 100644 ---- a/include/qemu/iov.h -+++ b/include/qemu/iov.h -@@ -222,11 +222,6 @@ static inline void *qemu_iovec_buf(QEMUIOVector *qiov) - - void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint); - void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov); --int qemu_iovec_init_extended( -- QEMUIOVector *qiov, -- void *head_buf, size_t head_len, -- QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len, -- void *tail_buf, size_t tail_len); - void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source, - size_t offset, size_t len); - struct iovec *qemu_iovec_slice(QEMUIOVector *qiov, -diff --git a/util/iov.c b/util/iov.c -index 65a70449da..866fb577f3 100644 ---- a/util/iov.c -+++ b/util/iov.c -@@ -416,70 +416,6 @@ int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len) - return niov; - } - --/* -- * Compile new iovec, combining @head_buf buffer, sub-qiov of @mid_qiov, -- * and @tail_buf buffer into new qiov. -- */ --int qemu_iovec_init_extended( -- QEMUIOVector *qiov, -- void *head_buf, size_t head_len, -- QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len, -- void *tail_buf, size_t tail_len) --{ -- size_t mid_head, mid_tail; -- int total_niov, mid_niov = 0; -- struct iovec *p, *mid_iov = NULL; -- -- assert(mid_qiov->niov <= IOV_MAX); -- -- if (SIZE_MAX - head_len < mid_len || -- SIZE_MAX - head_len - mid_len < tail_len) -- { -- return -EINVAL; -- } -- -- if (mid_len) { -- mid_iov = qemu_iovec_slice(mid_qiov, mid_offset, mid_len, -- &mid_head, &mid_tail, &mid_niov); -- } -- -- total_niov = !!head_len + mid_niov + !!tail_len; -- if (total_niov > IOV_MAX) { -- return -EINVAL; -- } -- -- if (total_niov == 1) { -- qemu_iovec_init_buf(qiov, NULL, 0); -- p = &qiov->local_iov; -- } else { -- qiov->niov = qiov->nalloc = total_niov; -- qiov->size = head_len + mid_len + tail_len; -- p = qiov->iov = g_new(struct iovec, qiov->niov); -- } -- -- if (head_len) { -- p->iov_base = head_buf; -- p->iov_len = head_len; -- p++; -- } -- -- assert(!mid_niov == !mid_len); -- if (mid_niov) { -- memcpy(p, mid_iov, mid_niov * sizeof(*p)); -- p[0].iov_base = (uint8_t *)p[0].iov_base + mid_head; -- p[0].iov_len -= mid_head; -- p[mid_niov - 1].iov_len -= mid_tail; -- p += mid_niov; -- } -- -- if (tail_len) { -- p->iov_base = tail_buf; -- p->iov_len = tail_len; -- } -- -- return 0; --} -- - /* - * Check if the contents of subrange of qiov data is all zeroes. - */ -@@ -511,14 +447,21 @@ bool qemu_iovec_is_zero(QEMUIOVector *qiov, size_t offset, size_t bytes) - void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source, - size_t offset, size_t len) - { -- int ret; -+ struct iovec *slice_iov; -+ int slice_niov; -+ size_t slice_head, slice_tail; - - assert(source->size >= len); - assert(source->size - len >= offset); - -- /* We shrink the request, so we can't overflow neither size_t nor MAX_IOV */ -- ret = qemu_iovec_init_extended(qiov, NULL, 0, source, offset, len, NULL, 0); -- assert(ret == 0); -+ slice_iov = qemu_iovec_slice(source, offset, len, -+ &slice_head, &slice_tail, &slice_niov); -+ if (slice_niov == 1) { -+ qemu_iovec_init_buf(qiov, slice_iov[0].iov_base + slice_head, len); -+ } else { -+ qemu_iovec_init(qiov, slice_niov); -+ qemu_iovec_concat_iov(qiov, slice_iov, slice_niov, slice_head, len); -+ } - } - - void qemu_iovec_destroy(QEMUIOVector *qiov) --- -2.39.3 - diff --git a/SOURCES/kvm-util-mmap-alloc-qemu_fd_getfs.patch b/SOURCES/kvm-util-mmap-alloc-qemu_fd_getfs.patch deleted file mode 100644 index b0e66f6..0000000 --- a/SOURCES/kvm-util-mmap-alloc-qemu_fd_getfs.patch +++ /dev/null @@ -1,95 +0,0 @@ -From 439a8cdd010dfd253fc2277ae4ec605b5ba621d9 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Wed, 19 Apr 2023 12:17:36 -0400 -Subject: [PATCH 02/56] util/mmap-alloc: qemu_fd_getfs() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [1/50] 8970b5ae611a933d693e0c90cbf4eda073635494 (peterx/qemu-kvm) - -This new helper fetches file system type for a fd. Only Linux is -implemented so far. Currently only tmpfs and hugetlbfs are defined, -but it can grow as needed. - -Signed-off-by: Peter Xu -Reviewed-by: David Hildenbrand -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -(cherry picked from commit fa45f8dab9613993c042176ea2d25552bfebc955) -Signed-off-by: Peter Xu ---- - include/qemu/mmap-alloc.h | 7 +++++++ - util/mmap-alloc.c | 28 ++++++++++++++++++++++++++++ - 2 files changed, 35 insertions(+) - -diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h -index 2825e231a7..8344daaa03 100644 ---- a/include/qemu/mmap-alloc.h -+++ b/include/qemu/mmap-alloc.h -@@ -1,8 +1,15 @@ - #ifndef QEMU_MMAP_ALLOC_H - #define QEMU_MMAP_ALLOC_H - -+typedef enum { -+ QEMU_FS_TYPE_UNKNOWN = 0, -+ QEMU_FS_TYPE_TMPFS, -+ QEMU_FS_TYPE_HUGETLBFS, -+ QEMU_FS_TYPE_NUM, -+} QemuFsType; - - size_t qemu_fd_getpagesize(int fd); -+QemuFsType qemu_fd_getfs(int fd); - - /** - * qemu_ram_mmap: mmap anonymous memory, the specified file or device. -diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c -index 5ed7d29183..ed14f9c64d 100644 ---- a/util/mmap-alloc.c -+++ b/util/mmap-alloc.c -@@ -27,8 +27,36 @@ - - #ifdef CONFIG_LINUX - #include -+#include - #endif - -+QemuFsType qemu_fd_getfs(int fd) -+{ -+#ifdef CONFIG_LINUX -+ struct statfs fs; -+ int ret; -+ -+ if (fd < 0) { -+ return QEMU_FS_TYPE_UNKNOWN; -+ } -+ -+ do { -+ ret = fstatfs(fd, &fs); -+ } while (ret != 0 && errno == EINTR); -+ -+ switch (fs.f_type) { -+ case TMPFS_MAGIC: -+ return QEMU_FS_TYPE_TMPFS; -+ case HUGETLBFS_MAGIC: -+ return QEMU_FS_TYPE_HUGETLBFS; -+ default: -+ return QEMU_FS_TYPE_UNKNOWN; -+ } -+#else -+ return QEMU_FS_TYPE_UNKNOWN; -+#endif -+} -+ - size_t qemu_fd_getpagesize(int fd) - { - #ifdef CONFIG_LINUX --- -2.39.1 - diff --git a/SOURCES/kvm-util-vfio-helpers-Use-g_file_read_link.patch b/SOURCES/kvm-util-vfio-helpers-Use-g_file_read_link.patch deleted file mode 100644 index 4e492d9..0000000 --- a/SOURCES/kvm-util-vfio-helpers-Use-g_file_read_link.patch +++ /dev/null @@ -1,82 +0,0 @@ -From fb2d40cc84f689e46138a81c57ccd1f234dbbb7c Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 07/37] util/vfio-helpers: Use g_file_read_link() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [5/28] 3545a07c967782dba8dd081415232f91d3f600a9 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit dbdea0dbfe2c -Author: Akihiko Odaki -Date: Tue May 23 11:39:12 2023 +0900 - - util/vfio-helpers: Use g_file_read_link() - - When _FORTIFY_SOURCE=2, glibc version is 2.35, and GCC version is - 12.1.0, the compiler complains as follows: - - In file included from /usr/include/features.h:490, - from /usr/include/bits/libc-header-start.h:33, - from /usr/include/stdint.h:26, - from /usr/lib/gcc/aarch64-unknown-linux-gnu/12.1.0/include/stdint.h:9, - from /home/alarm/q/var/qemu/include/qemu/osdep.h:94, - from ../util/vfio-helpers.c:13: - In function 'readlink', - inlined from 'sysfs_find_group_file' at ../util/vfio-helpers.c:116:9, - inlined from 'qemu_vfio_init_pci' at ../util/vfio-helpers.c:326:18, - inlined from 'qemu_vfio_open_pci' at ../util/vfio-helpers.c:517:9: - /usr/include/bits/unistd.h:119:10: error: argument 2 is null but the corresponding size argument 3 value is 4095 [-Werror=nonnull] - 119 | return __glibc_fortify (readlink, __len, sizeof (char), - | ^~~~~~~~~~~~~~~ - - This error implies the allocated buffer can be NULL. Use - g_file_read_link(), which allocates buffer automatically to avoid the - error. - - Signed-off-by: Akihiko Odaki - Reviewed-by: Philippe Mathieu-Daudé - Reviewed-by: Cédric Le Goater - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - util/vfio-helpers.c | 8 +++++--- - 1 file changed, 5 insertions(+), 3 deletions(-) - -diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c -index 2d8af38f88..f8bab46c68 100644 ---- a/util/vfio-helpers.c -+++ b/util/vfio-helpers.c -@@ -106,15 +106,17 @@ struct QEMUVFIOState { - */ - static char *sysfs_find_group_file(const char *device, Error **errp) - { -+ g_autoptr(GError) gerr = NULL; - char *sysfs_link; - char *sysfs_group; - char *p; - char *path = NULL; - - sysfs_link = g_strdup_printf("/sys/bus/pci/devices/%s/iommu_group", device); -- sysfs_group = g_malloc0(PATH_MAX); -- if (readlink(sysfs_link, sysfs_group, PATH_MAX - 1) == -1) { -- error_setg_errno(errp, errno, "Failed to find iommu group sysfs path"); -+ sysfs_group = g_file_read_link(sysfs_link, &gerr); -+ if (gerr) { -+ error_setg(errp, "Failed to find iommu group sysfs path: %s", -+ gerr->message); - goto out; - } - p = strrchr(sysfs_group, '/'); --- -2.39.3 - diff --git a/SOURCES/kvm-vdpa-do-not-block-migration-if-device-has-cvq-and-x-.patch b/SOURCES/kvm-vdpa-do-not-block-migration-if-device-has-cvq-and-x-.patch deleted file mode 100644 index 56b9aed..0000000 --- a/SOURCES/kvm-vdpa-do-not-block-migration-if-device-has-cvq-and-x-.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 74c2f378bdf278a03c02ae48948b00b4431a3fd6 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Fri, 2 Jun 2023 16:38:52 +0200 -Subject: [PATCH 6/9] vdpa: do not block migration if device has cvq and - x-svq=on -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 190: vdpa: do not block migration if device has cvq and x-svq=on -RH-Jira: RHEL-573 -RH-Acked-by: Jason Wang -RH-Acked-by: Laurent Vivier -RH-Commit: [1/1] b0e2ec3c9e5c17252cf6a043fe1374ddc3c37de7 (eperezmartin/qemu-kvm) - -It was a mistake to forbid in all cases, as SVQ is already able to send -all the CVQ messages before start forwarding data vqs. It actually -caused a regression, making impossible to migrate device previously -migratable. - -Fixes: 36e4647247f2 ("vdpa: add vhost_vdpa_net_valid_svq_features") -Signed-off-by: Eugenio Pérez -Message-Id: <20230602143854.1879091-2-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Tested-by: Lei Yang -(cherry picked from commit 8bc0049eadafb984d305c847cedff550b58e5fc0) -Signed-off-by: Eugenio Pérez ---- - net/vhost-vdpa.c | 11 +++++++---- - 1 file changed, 7 insertions(+), 4 deletions(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 8c8900f0f4..1ae839da34 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -844,13 +844,16 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - s->vhost_vdpa.shadow_vq_ops_opaque = s; - - /* -- * TODO: We cannot migrate devices with CVQ as there is no way to set -- * the device state (MAC, MQ, etc) before starting the datapath. -+ * TODO: We cannot migrate devices with CVQ and no x-svq enabled as -+ * there is no way to set the device state (MAC, MQ, etc) before -+ * starting the datapath. - * - * Migration blocker ownership now belongs to s->vhost_vdpa. - */ -- error_setg(&s->vhost_vdpa.migration_blocker, -- "net vdpa cannot migrate with CVQ feature"); -+ if (!svq) { -+ error_setg(&s->vhost_vdpa.migration_blocker, -+ "net vdpa cannot migrate with CVQ feature"); -+ } - } - ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); - if (ret) { --- -2.39.3 - diff --git a/SOURCES/kvm-vdpa-export-vhost_vdpa_set_vring_ready.patch b/SOURCES/kvm-vdpa-export-vhost_vdpa_set_vring_ready.patch deleted file mode 100644 index 1ab8f02..0000000 --- a/SOURCES/kvm-vdpa-export-vhost_vdpa_set_vring_ready.patch +++ /dev/null @@ -1,105 +0,0 @@ -From 636eb63cbf23b31fc9880528490ac4bef680305b Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Wed, 25 Jan 2023 08:47:34 +0100 -Subject: [PATCH 4/7] vdpa: export vhost_vdpa_set_vring_ready -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 199: CVQ migration support -RH-Jira: RHEL-923 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Commit: [4/7] 8d1fecec7a993b8b68e268e8783c200c158f5ee0 (eperezmartin/qemu-kvm) - -The vhost-vdpa net backend needs to enable vrings in a different order -than default, so export it. - -No functional change intended except for tracing, that now includes the -(virtio) index being enabled and the return value of the ioctl. - -Still ignoring return value of this function if called from -vhost_vdpa_dev_start, as reorganize calling code around it is out of -the scope of this series. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang ---- - hw/virtio/trace-events | 2 +- - hw/virtio/vhost-vdpa.c | 25 +++++++++++++------------ - include/hw/virtio/vhost-vdpa.h | 1 + - 3 files changed, 15 insertions(+), 13 deletions(-) - -diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events -index 300dec8d3e..85b43cd8fe 100644 ---- a/hw/virtio/trace-events -+++ b/hw/virtio/trace-events -@@ -48,7 +48,7 @@ vhost_vdpa_set_features(void *dev, uint64_t features) "dev: %p features: 0x%"PRI - vhost_vdpa_get_device_id(void *dev, uint32_t device_id) "dev: %p device_id %"PRIu32 - vhost_vdpa_reset_device(void *dev, uint8_t status) "dev: %p status: 0x%"PRIx8 - vhost_vdpa_get_vq_index(void *dev, int idx, int vq_idx) "dev: %p idx: %d vq idx: %d" --vhost_vdpa_set_vring_ready(void *dev) "dev: %p" -+vhost_vdpa_set_vring_ready(void *dev, unsigned i, int r) "dev: %p, idx: %u, r: %d" - vhost_vdpa_dump_config(void *dev, const char *line) "dev: %p %s" - vhost_vdpa_set_config(void *dev, uint32_t offset, uint32_t size, uint32_t flags) "dev: %p offset: %"PRIu32" size: %"PRIu32" flags: 0x%"PRIx32 - vhost_vdpa_get_config(void *dev, void *config, uint32_t config_len) "dev: %p config: %p config_len: %"PRIu32 -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index c04f14420d..e4d0101327 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -733,18 +733,17 @@ static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx) - return idx; - } - --static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev) -+int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, unsigned idx) - { -- int i; -- trace_vhost_vdpa_set_vring_ready(dev); -- for (i = 0; i < dev->nvqs; ++i) { -- struct vhost_vring_state state = { -- .index = dev->vq_index + i, -- .num = 1, -- }; -- vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state); -- } -- return 0; -+ struct vhost_dev *dev = v->dev; -+ struct vhost_vring_state state = { -+ .index = idx, -+ .num = 1, -+ }; -+ int r = vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state); -+ -+ trace_vhost_vdpa_set_vring_ready(dev, idx, r); -+ return r; - } - - static int vhost_vdpa_set_config_call(struct vhost_dev *dev, -@@ -1155,7 +1154,9 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) - if (unlikely(!ok)) { - return -1; - } -- vhost_vdpa_set_vring_ready(dev); -+ for (int i = 0; i < dev->nvqs; ++i) { -+ vhost_vdpa_set_vring_ready(v, dev->vq_index + i); -+ } - } else { - vhost_vdpa_suspend(dev); - vhost_vdpa_svqs_stop(dev); -diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h -index c278a2a8de..540642d304 100644 ---- a/include/hw/virtio/vhost-vdpa.h -+++ b/include/hw/virtio/vhost-vdpa.h -@@ -55,6 +55,7 @@ typedef struct vhost_vdpa { - } VhostVDPA; - - int vhost_vdpa_get_iova_range(int fd, struct vhost_vdpa_iova_range *iova_range); -+int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, unsigned idx); - - int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, - hwaddr size, void *vaddr, bool readonly); --- -2.39.3 - diff --git a/SOURCES/kvm-vdpa-move-CVQ-isolation-check-to-net_init_vhost_vdpa.patch b/SOURCES/kvm-vdpa-move-CVQ-isolation-check-to-net_init_vhost_vdpa.patch deleted file mode 100644 index a37612c..0000000 --- a/SOURCES/kvm-vdpa-move-CVQ-isolation-check-to-net_init_vhost_vdpa.patch +++ /dev/null @@ -1,286 +0,0 @@ -From 1609e47511c9a02b26e0023ff6e1e999d7cdf179 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Fri, 26 May 2023 17:31:43 +0200 -Subject: [PATCH 2/7] vdpa: move CVQ isolation check to net_init_vhost_vdpa -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 199: CVQ migration support -RH-Jira: RHEL-923 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Commit: [2/7] caed8f81c3e30e6147817e7f43225aa3ee90ff37 (eperezmartin/qemu-kvm) - -Evaluating it at start time instead of initialization time may make the -guest capable of dynamically adding or removing migration blockers. - -Also, moving to initialization reduces the number of ioctls in the -migration, reducing failure possibilities. - -As a drawback we need to check for CVQ isolation twice: one time with no -MQ negotiated and another one acking it, as long as the device supports -it. This is because Vring ASID / group management is based on vq -indexes, but we don't know the index of CVQ before negotiating MQ. - -Signed-off-by: Eugenio Pérez -Message-Id: <20230526153143.470745-3-eperezma@redhat.com> -Tested-by: Lei Yang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Acked-by: Jason Wang ---- - net/vhost-vdpa.c | 155 ++++++++++++++++++++++++++++++++++------------- - 1 file changed, 112 insertions(+), 43 deletions(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 801d4e0422..ce17e4416a 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -43,6 +43,10 @@ typedef struct VhostVDPAState { - - /* The device always have SVQ enabled */ - bool always_svq; -+ -+ /* The device can isolate CVQ in its own ASID */ -+ bool cvq_isolated; -+ - bool started; - } VhostVDPAState; - -@@ -369,15 +373,8 @@ static NetClientInfo net_vhost_vdpa_info = { - .check_peer_type = vhost_vdpa_check_peer_type, - }; - --/** -- * Get vring virtqueue group -- * -- * @device_fd vdpa device fd -- * @vq_index Virtqueue index -- * -- * Return -errno in case of error, or vq group if success. -- */ --static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) -+static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index, -+ Error **errp) - { - struct vhost_vring_state state = { - .index = vq_index, -@@ -386,8 +383,7 @@ static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) - - if (unlikely(r < 0)) { - r = -errno; -- error_report("Cannot get VQ %u group: %s", vq_index, -- g_strerror(errno)); -+ error_setg_errno(errp, errno, "Cannot get VQ %u group", vq_index); - return r; - } - -@@ -487,9 +483,9 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) - { - VhostVDPAState *s, *s0; - struct vhost_vdpa *v; -- uint64_t backend_features; - int64_t cvq_group; -- int cvq_index, r; -+ int r; -+ Error *err = NULL; - - assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); - -@@ -509,41 +505,22 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) - /* - * If we early return in these cases SVQ will not be enabled. The migration - * will be blocked as long as vhost-vdpa backends will not offer _F_LOG. -- * -- * Calling VHOST_GET_BACKEND_FEATURES as they are not available in v->dev -- * yet. - */ -- r = ioctl(v->device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features); -- if (unlikely(r < 0)) { -- error_report("Cannot get vdpa backend_features: %s(%d)", -- g_strerror(errno), errno); -- return -1; -+ if (!vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { -+ return 0; - } -- if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)) || -- !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { -+ -+ if (!s->cvq_isolated) { - return 0; - } - -- /* -- * Check if all the virtqueues of the virtio device are in a different vq -- * than the last vq. VQ group of last group passed in cvq_group. -- */ -- cvq_index = v->dev->vq_index_end - 1; -- cvq_group = vhost_vdpa_get_vring_group(v->device_fd, cvq_index); -+ cvq_group = vhost_vdpa_get_vring_group(v->device_fd, -+ v->dev->vq_index_end - 1, -+ &err); - if (unlikely(cvq_group < 0)) { -+ error_report_err(err); - return cvq_group; - } -- for (int i = 0; i < cvq_index; ++i) { -- int64_t group = vhost_vdpa_get_vring_group(v->device_fd, i); -- -- if (unlikely(group < 0)) { -- return group; -- } -- -- if (group == cvq_group) { -- return 0; -- } -- } - - r = vhost_vdpa_set_address_space_id(v, cvq_group, VHOST_VDPA_NET_CVQ_ASID); - if (unlikely(r < 0)) { -@@ -806,6 +783,87 @@ static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = { - .avail_handler = vhost_vdpa_net_handle_ctrl_avail, - }; - -+/** -+ * Probe if CVQ is isolated -+ * -+ * @device_fd The vdpa device fd -+ * @features Features offered by the device. -+ * @cvq_index The control vq pair index -+ * -+ * Returns <0 in case of failure, 0 if false and 1 if true. -+ */ -+static int vhost_vdpa_probe_cvq_isolation(int device_fd, uint64_t features, -+ int cvq_index, Error **errp) -+{ -+ uint64_t backend_features; -+ int64_t cvq_group; -+ uint8_t status = VIRTIO_CONFIG_S_ACKNOWLEDGE | -+ VIRTIO_CONFIG_S_DRIVER | -+ VIRTIO_CONFIG_S_FEATURES_OK; -+ int r; -+ -+ ERRP_GUARD(); -+ -+ r = ioctl(device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features); -+ if (unlikely(r < 0)) { -+ error_setg_errno(errp, errno, "Cannot get vdpa backend_features"); -+ return r; -+ } -+ -+ if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) { -+ return 0; -+ } -+ -+ r = ioctl(device_fd, VHOST_SET_FEATURES, &features); -+ if (unlikely(r)) { -+ error_setg_errno(errp, errno, "Cannot set features"); -+ } -+ -+ r = ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status); -+ if (unlikely(r)) { -+ error_setg_errno(errp, -r, "Cannot set device features"); -+ goto out; -+ } -+ -+ cvq_group = vhost_vdpa_get_vring_group(device_fd, cvq_index, errp); -+ if (unlikely(cvq_group < 0)) { -+ if (cvq_group != -ENOTSUP) { -+ r = cvq_group; -+ goto out; -+ } -+ -+ /* -+ * The kernel report VHOST_BACKEND_F_IOTLB_ASID if the vdpa frontend -+ * support ASID even if the parent driver does not. The CVQ cannot be -+ * isolated in this case. -+ */ -+ error_free(*errp); -+ *errp = NULL; -+ r = 0; -+ goto out; -+ } -+ -+ for (int i = 0; i < cvq_index; ++i) { -+ int64_t group = vhost_vdpa_get_vring_group(device_fd, i, errp); -+ if (unlikely(group < 0)) { -+ r = group; -+ goto out; -+ } -+ -+ if (group == (int64_t)cvq_group) { -+ r = 0; -+ goto out; -+ } -+ } -+ -+ r = 1; -+ -+out: -+ status = 0; -+ ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status); -+ return r; -+} -+ - static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - const char *device, - const char *name, -@@ -815,16 +873,26 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - bool is_datapath, - bool svq, - struct vhost_vdpa_iova_range iova_range, -- uint64_t features) -+ uint64_t features, -+ Error **errp) - { - NetClientState *nc = NULL; - VhostVDPAState *s; - int ret = 0; - assert(name); -+ int cvq_isolated; -+ - if (is_datapath) { - nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device, - name); - } else { -+ cvq_isolated = vhost_vdpa_probe_cvq_isolation(vdpa_device_fd, features, -+ queue_pair_index * 2, -+ errp); -+ if (unlikely(cvq_isolated < 0)) { -+ return NULL; -+ } -+ - nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer, - device, name); - } -@@ -851,6 +919,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - - s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; - s->vhost_vdpa.shadow_vq_ops_opaque = s; -+ s->cvq_isolated = cvq_isolated; - - /* - * TODO: We cannot migrate devices with CVQ and no x-svq enabled as -@@ -982,7 +1051,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - for (i = 0; i < queue_pairs; i++) { - ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, - vdpa_device_fd, i, 2, true, opts->x_svq, -- iova_range, features); -+ iova_range, features, errp); - if (!ncs[i]) - goto err; - } -@@ -990,7 +1059,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - if (has_cvq) { - nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, - vdpa_device_fd, i, 1, false, -- opts->x_svq, iova_range, features); -+ opts->x_svq, iova_range, features, errp); - if (!nc) - goto err; - } --- -2.39.3 - diff --git a/SOURCES/kvm-vdpa-move-vhost_vdpa_set_vring_ready-to-the-caller.patch b/SOURCES/kvm-vdpa-move-vhost_vdpa_set_vring_ready-to-the-caller.patch deleted file mode 100644 index 4ebd8bd..0000000 --- a/SOURCES/kvm-vdpa-move-vhost_vdpa_set_vring_ready-to-the-caller.patch +++ /dev/null @@ -1,134 +0,0 @@ -From 09bf0febef2512f00e71edca0fcbaf452652c2c7 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 10 Aug 2023 11:27:28 +0200 -Subject: [PATCH 6/7] vdpa: move vhost_vdpa_set_vring_ready to the caller -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 199: CVQ migration support -RH-Jira: RHEL-923 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Commit: [6/7] cf4fd1071ca127914c8e8d6aefec451cad97ecc1 (eperezmartin/qemu-kvm) - -Doing that way allows CVQ to be enabled before the dataplane vqs, -restoring the state as MQ or MAC addresses properly in the case of a -migration. - -The patch does it by defining a ->load NetClientInfo callback also for -dataplane. Ideally, this should be done by an independent patch, but -the function is already static so it would only add an empty -vhost_vdpa_net_data_load stub. - -Signed-off-by: Eugenio Pérez ---- -v3: -* Fix subject typo -* Expand patch message so it explains why ---- - hw/virtio/vdpa-dev.c | 3 +++ - hw/virtio/vhost-vdpa.c | 3 --- - net/vhost-vdpa.c | 41 +++++++++++++++++++++++++++++++---------- - 3 files changed, 34 insertions(+), 13 deletions(-) - -diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c -index 01b41eb0f1..8c47d643bf 100644 ---- a/hw/virtio/vdpa-dev.c -+++ b/hw/virtio/vdpa-dev.c -@@ -256,6 +256,9 @@ static int vhost_vdpa_device_start(VirtIODevice *vdev, Error **errp) - error_setg_errno(errp, -ret, "Error starting vhost"); - goto err_guest_notifiers; - } -+ for (i = 0; i < s->dev.nvqs; ++i) { -+ vhost_vdpa_set_vring_ready(&s->vdpa, i); -+ } - s->started = true; - - /* -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index e4d0101327..0d9d311abd 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -1154,9 +1154,6 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) - if (unlikely(!ok)) { - return -1; - } -- for (int i = 0; i < dev->nvqs; ++i) { -- vhost_vdpa_set_vring_ready(v, dev->vq_index + i); -- } - } else { - vhost_vdpa_suspend(dev); - vhost_vdpa_svqs_stop(dev); -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index a1b16bbc52..47b87bf80d 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -344,6 +344,22 @@ static int vhost_vdpa_net_data_start(NetClientState *nc) - return 0; - } - -+static int vhost_vdpa_net_data_load(NetClientState *nc) -+{ -+ VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); -+ struct vhost_vdpa *v = &s->vhost_vdpa; -+ bool has_cvq = v->dev->vq_index_end % 2; -+ -+ if (has_cvq) { -+ return 0; -+ } -+ -+ for (int i = 0; i < v->dev->nvqs; ++i) { -+ vhost_vdpa_set_vring_ready(v, i + v->dev->vq_index); -+ } -+ return 0; -+} -+ - static void vhost_vdpa_net_client_stop(NetClientState *nc) - { - VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); -@@ -366,6 +382,7 @@ static NetClientInfo net_vhost_vdpa_info = { - .size = sizeof(VhostVDPAState), - .receive = vhost_vdpa_receive, - .start = vhost_vdpa_net_data_start, -+ .load = vhost_vdpa_net_data_load, - .stop = vhost_vdpa_net_client_stop, - .cleanup = vhost_vdpa_cleanup, - .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, -@@ -682,18 +699,22 @@ static int vhost_vdpa_net_cvq_load(NetClientState *nc) - - assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); - -- if (!v->shadow_vqs_enabled) { -- return 0; -- } -+ vhost_vdpa_set_vring_ready(v, v->dev->vq_index); - -- n = VIRTIO_NET(v->dev->vdev); -- r = vhost_vdpa_net_load_mac(s, n); -- if (unlikely(r < 0)) { -- return r; -+ if (v->shadow_vqs_enabled) { -+ n = VIRTIO_NET(v->dev->vdev); -+ r = vhost_vdpa_net_load_mac(s, n); -+ if (unlikely(r < 0)) { -+ return r; -+ } -+ r = vhost_vdpa_net_load_mq(s, n); -+ if (unlikely(r)) { -+ return r; -+ } - } -- r = vhost_vdpa_net_load_mq(s, n); -- if (unlikely(r)) { -- return r; -+ -+ for (int i = 0; i < v->dev->vq_index; ++i) { -+ vhost_vdpa_set_vring_ready(v, i); - } - - return 0; --- -2.39.3 - diff --git a/SOURCES/kvm-vdpa-remove-net-cvq-migration-blocker.patch b/SOURCES/kvm-vdpa-remove-net-cvq-migration-blocker.patch deleted file mode 100644 index 9388d75..0000000 --- a/SOURCES/kvm-vdpa-remove-net-cvq-migration-blocker.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 46d5b861a39b7d0d3222162e6b7707526c131230 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Fri, 24 Mar 2023 13:28:15 +0100 -Subject: [PATCH 7/7] vdpa: remove net cvq migration blocker -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 199: CVQ migration support -RH-Jira: RHEL-923 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Commit: [7/7] 9542e305c7ea3a47e0f1fe0629281238b0bb2111 (eperezmartin/qemu-kvm) - -Now that we have add migration blockers if the device does not support -all the needed features, remove the general blocker applied to all net -devices with CVQ. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang ---- - net/vhost-vdpa.c | 12 ------------ - 1 file changed, 12 deletions(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 47b87bf80d..6e03db4afa 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -941,18 +941,6 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; - s->vhost_vdpa.shadow_vq_ops_opaque = s; - s->cvq_isolated = cvq_isolated; -- -- /* -- * TODO: We cannot migrate devices with CVQ and no x-svq enabled as -- * there is no way to set the device state (MAC, MQ, etc) before -- * starting the datapath. -- * -- * Migration blocker ownership now belongs to s->vhost_vdpa. -- */ -- if (!svq) { -- error_setg(&s->vhost_vdpa.migration_blocker, -- "net vdpa cannot migrate with CVQ feature"); -- } - } - ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); - if (ret) { --- -2.39.3 - diff --git a/SOURCES/kvm-vdpa-rename-vhost_vdpa_net_load-to-vhost_vdpa_net_cv.patch b/SOURCES/kvm-vdpa-rename-vhost_vdpa_net_load-to-vhost_vdpa_net_cv.patch deleted file mode 100644 index 15dc410..0000000 --- a/SOURCES/kvm-vdpa-rename-vhost_vdpa_net_load-to-vhost_vdpa_net_cv.patch +++ /dev/null @@ -1,49 +0,0 @@ -From db7ca7692e264e8bf1bd9e08e3de7a92fc76a363 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Wed, 9 Aug 2023 18:07:26 +0200 -Subject: [PATCH 5/7] vdpa: rename vhost_vdpa_net_load to - vhost_vdpa_net_cvq_load -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 199: CVQ migration support -RH-Jira: RHEL-923 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Commit: [5/7] aea91f3274786665725af892eb905818eb0f44f1 (eperezmartin/qemu-kvm) - -Next patches will add the corresponding data load. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang ---- - net/vhost-vdpa.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 29d3fd3ca6..a1b16bbc52 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -673,7 +673,7 @@ static int vhost_vdpa_net_load_mq(VhostVDPAState *s, - return *s->status != VIRTIO_NET_OK; - } - --static int vhost_vdpa_net_load(NetClientState *nc) -+static int vhost_vdpa_net_cvq_load(NetClientState *nc) - { - VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); - struct vhost_vdpa *v = &s->vhost_vdpa; -@@ -704,7 +704,7 @@ static NetClientInfo net_vhost_vdpa_cvq_info = { - .size = sizeof(VhostVDPAState), - .receive = vhost_vdpa_receive, - .start = vhost_vdpa_net_cvq_start, -- .load = vhost_vdpa_net_load, -+ .load = vhost_vdpa_net_cvq_load, - .stop = vhost_vdpa_net_cvq_stop, - .cleanup = vhost_vdpa_cleanup, - .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, --- -2.39.3 - diff --git a/SOURCES/kvm-vdpa-return-errno-in-vhost_vdpa_get_vring_group-erro.patch b/SOURCES/kvm-vdpa-return-errno-in-vhost_vdpa_get_vring_group-erro.patch deleted file mode 100644 index c8b4913..0000000 --- a/SOURCES/kvm-vdpa-return-errno-in-vhost_vdpa_get_vring_group-erro.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 09583f39d51d16079c9fda32545d7a44b6f5c8c6 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Fri, 26 May 2023 17:31:42 +0200 -Subject: [PATCH 1/7] vdpa: return errno in vhost_vdpa_get_vring_group error -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 199: CVQ migration support -RH-Jira: RHEL-923 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Commit: [1/7] 89745b1828a1af535c40657022d385250688d11d (eperezmartin/qemu-kvm) - -We need to tell in the caller, as some errors are expected in a normal -workflow. In particular, parent drivers in recent kernels with -VHOST_BACKEND_F_IOTLB_ASID may not support vring groups. In that case, --ENOTSUP is returned. - -This is the case of vp_vdpa in Linux 6.2. - -Next patches in this series will use that information to know if it must -abort or not. Also, next patches return properly an errp instead of -printing with error_report. - -Reviewed-by: Stefano Garzarella -Acked-by: Jason Wang -Signed-off-by: Eugenio Pérez -Message-Id: <20230526153143.470745-2-eperezma@redhat.com> -Tested-by: Lei Yang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin ---- - net/vhost-vdpa.c | 9 +++++++++ - 1 file changed, 9 insertions(+) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 1ae839da34..801d4e0422 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -369,6 +369,14 @@ static NetClientInfo net_vhost_vdpa_info = { - .check_peer_type = vhost_vdpa_check_peer_type, - }; - -+/** -+ * Get vring virtqueue group -+ * -+ * @device_fd vdpa device fd -+ * @vq_index Virtqueue index -+ * -+ * Return -errno in case of error, or vq group if success. -+ */ - static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) - { - struct vhost_vring_state state = { -@@ -377,6 +385,7 @@ static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) - int r = ioctl(device_fd, VHOST_VDPA_GET_VRING_GROUP, &state); - - if (unlikely(r < 0)) { -+ r = -errno; - error_report("Cannot get VQ %u group: %s", vq_index, - g_strerror(errno)); - return r; --- -2.39.3 - diff --git a/SOURCES/kvm-vdpa-use-first-queue-SVQ-state-for-CVQ-default.patch b/SOURCES/kvm-vdpa-use-first-queue-SVQ-state-for-CVQ-default.patch deleted file mode 100644 index bfb1b8e..0000000 --- a/SOURCES/kvm-vdpa-use-first-queue-SVQ-state-for-CVQ-default.patch +++ /dev/null @@ -1,46 +0,0 @@ -From 726662aee0bc295f6931b7aba1bd68f033e949aa Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 10 Aug 2023 16:08:18 +0200 -Subject: [PATCH 3/7] vdpa: use first queue SVQ state for CVQ default -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 199: CVQ migration support -RH-Jira: RHEL-923 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Commit: [3/7] 5c98f11b5080552a62c8e37ff2c23339455b7b86 (eperezmartin/qemu-kvm) - -Previous to this patch the only way CVQ would be shadowed is if it does -support to isolate CVQ group or if all vqs were shadowed from the -beginning. The second condition was checked at the beginning, and no -more configuration was done. - -After this series we need to check if data queues are shadowed because -they are in the middle of the migration. As checking if they are -shadowed already covers the previous case, let's just mimic it. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang ---- - net/vhost-vdpa.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index ce17e4416a..29d3fd3ca6 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -494,7 +494,7 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) - - s0 = vhost_vdpa_net_first_nc_vdpa(s); - v->shadow_data = s0->vhost_vdpa.shadow_vqs_enabled; -- v->shadow_vqs_enabled = s->always_svq; -+ v->shadow_vqs_enabled = s0->vhost_vdpa.shadow_vqs_enabled; - s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID; - - if (s->vhost_vdpa.shadow_data) { --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-Fix-null-pointer-dereference-bug-in-vfio_bars_f.patch b/SOURCES/kvm-vfio-Fix-null-pointer-dereference-bug-in-vfio_bars_f.patch deleted file mode 100644 index 1e00427..0000000 --- a/SOURCES/kvm-vfio-Fix-null-pointer-dereference-bug-in-vfio_bars_f.patch +++ /dev/null @@ -1,72 +0,0 @@ -From 97124d4f2afbc8e65a3ecf76096e6b34a9b71541 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 30/37] vfio: Fix null pointer dereference bug in - vfio_bars_finalize() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [28/28] 4bbdf7f9c5595897244c6cc3d88d487dd5f99bf0 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 8af87a3ec7e4 -Author: Avihai Horon -Date: Tue Jul 4 16:39:27 2023 +0300 - - vfio: Fix null pointer dereference bug in vfio_bars_finalize() - - vfio_realize() has the following flow: - 1. vfio_bars_prepare() -- sets VFIOBAR->size. - 2. msix_early_setup(). - 3. vfio_bars_register() -- allocates VFIOBAR->mr. - - After vfio_bars_prepare() is called msix_early_setup() can fail. If it - does fail, vfio_bars_register() is never called and VFIOBAR->mr is not - allocated. - - In this case, vfio_bars_finalize() is called as part of the error flow - to free the bars' resources. However, vfio_bars_finalize() calls - object_unparent() for VFIOBAR->mr after checking only VFIOBAR->size, and - thus we get a null pointer dereference. - - Fix it by checking VFIOBAR->mr in vfio_bars_finalize(). - - Fixes: 89d5202edc50 ("vfio/pci: Allow relocating MSI-X MMIO") - Signed-off-by: Avihai Horon - Reviewed-by: Philippe Mathieu-Daudé - Reviewed-by: Cédric Le Goater - Reviewed-by: Alex Williamson - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/pci.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index ba40ca8784..9189459a38 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -1755,9 +1755,11 @@ static void vfio_bars_finalize(VFIOPCIDevice *vdev) - - vfio_bar_quirk_finalize(vdev, i); - vfio_region_finalize(&bar->region); -- if (bar->size) { -+ if (bar->mr) { -+ assert(bar->size); - object_unparent(OBJECT(bar->mr)); - g_free(bar->mr); -+ bar->mr = NULL; - } - } - --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-Implement-a-common-device-info-helper.patch b/SOURCES/kvm-vfio-Implement-a-common-device-info-helper.patch deleted file mode 100644 index 78a554d..0000000 --- a/SOURCES/kvm-vfio-Implement-a-common-device-info-helper.patch +++ /dev/null @@ -1,196 +0,0 @@ -From f68e8c5d841cd7fc785cc3d15b3c280211bfb4c3 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 17/37] vfio: Implement a common device info helper -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [15/28] 9cfd233ab1b95dc7de776e8ef901823bd37c5a6b (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 634f38f0f73f -Author: Alex Williamson -Date: Thu Jun 1 08:45:06 2023 -0600 - - vfio: Implement a common device info helper - - A common helper implementing the realloc algorithm for handling - capabilities. - - Reviewed-by: Philippe Mathieu-Daudé - Reviewed-by: Cédric Le Goater - Signed-off-by: Alex Williamson - Reviewed-by: Robin Voetter - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/s390x/s390-pci-vfio.c | 37 ++++------------------------ - hw/vfio/common.c | 46 ++++++++++++++++++++++++++--------- - include/hw/vfio/vfio-common.h | 1 + - 3 files changed, 41 insertions(+), 43 deletions(-) - -diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c -index f51190d466..59a2e03873 100644 ---- a/hw/s390x/s390-pci-vfio.c -+++ b/hw/s390x/s390-pci-vfio.c -@@ -289,38 +289,11 @@ static void s390_pci_read_pfip(S390PCIBusDevice *pbdev, - memcpy(pbdev->zpci_fn.pfip, cap->pfip, CLP_PFIP_NR_SEGMENTS); - } - --static struct vfio_device_info *get_device_info(S390PCIBusDevice *pbdev, -- uint32_t argsz) -+static struct vfio_device_info *get_device_info(S390PCIBusDevice *pbdev) - { -- struct vfio_device_info *info = g_malloc0(argsz); -- VFIOPCIDevice *vfio_pci; -- int fd; -+ VFIOPCIDevice *vfio_pci = container_of(pbdev->pdev, VFIOPCIDevice, pdev); - -- vfio_pci = container_of(pbdev->pdev, VFIOPCIDevice, pdev); -- fd = vfio_pci->vbasedev.fd; -- -- /* -- * If the specified argsz is not large enough to contain all capabilities -- * it will be updated upon return from the ioctl. Retry until we have -- * a big enough buffer to hold the entire capability chain. On error, -- * just exit and rely on CLP defaults. -- */ --retry: -- info->argsz = argsz; -- -- if (ioctl(fd, VFIO_DEVICE_GET_INFO, info)) { -- trace_s390_pci_clp_dev_info(vfio_pci->vbasedev.name); -- g_free(info); -- return NULL; -- } -- -- if (info->argsz > argsz) { -- argsz = info->argsz; -- info = g_realloc(info, argsz); -- goto retry; -- } -- -- return info; -+ return vfio_get_device_info(vfio_pci->vbasedev.fd); - } - - /* -@@ -335,7 +308,7 @@ bool s390_pci_get_host_fh(S390PCIBusDevice *pbdev, uint32_t *fh) - - assert(fh); - -- info = get_device_info(pbdev, sizeof(*info)); -+ info = get_device_info(pbdev); - if (!info) { - return false; - } -@@ -356,7 +329,7 @@ void s390_pci_get_clp_info(S390PCIBusDevice *pbdev) - { - g_autofree struct vfio_device_info *info = NULL; - -- info = get_device_info(pbdev, sizeof(*info)); -+ info = get_device_info(pbdev); - if (!info) { - return; - } -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index b73086e17a..3b4ac53f15 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -2845,11 +2845,35 @@ void vfio_put_group(VFIOGroup *group) - } - } - -+struct vfio_device_info *vfio_get_device_info(int fd) -+{ -+ struct vfio_device_info *info; -+ uint32_t argsz = sizeof(*info); -+ -+ info = g_malloc0(argsz); -+ -+retry: -+ info->argsz = argsz; -+ -+ if (ioctl(fd, VFIO_DEVICE_GET_INFO, info)) { -+ g_free(info); -+ return NULL; -+ } -+ -+ if (info->argsz > argsz) { -+ argsz = info->argsz; -+ info = g_realloc(info, argsz); -+ goto retry; -+ } -+ -+ return info; -+} -+ - int vfio_get_device(VFIOGroup *group, const char *name, - VFIODevice *vbasedev, Error **errp) - { -- struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) }; -- int ret, fd; -+ g_autofree struct vfio_device_info *info = NULL; -+ int fd; - - fd = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name); - if (fd < 0) { -@@ -2861,11 +2885,11 @@ int vfio_get_device(VFIOGroup *group, const char *name, - return fd; - } - -- ret = ioctl(fd, VFIO_DEVICE_GET_INFO, &dev_info); -- if (ret) { -+ info = vfio_get_device_info(fd); -+ if (!info) { - error_setg_errno(errp, errno, "error getting device info"); - close(fd); -- return ret; -+ return -1; - } - - /* -@@ -2893,14 +2917,14 @@ int vfio_get_device(VFIOGroup *group, const char *name, - vbasedev->group = group; - QLIST_INSERT_HEAD(&group->device_list, vbasedev, next); - -- vbasedev->num_irqs = dev_info.num_irqs; -- vbasedev->num_regions = dev_info.num_regions; -- vbasedev->flags = dev_info.flags; -+ vbasedev->num_irqs = info->num_irqs; -+ vbasedev->num_regions = info->num_regions; -+ vbasedev->flags = info->flags; -+ -+ trace_vfio_get_device(name, info->flags, info->num_regions, info->num_irqs); - -- trace_vfio_get_device(name, dev_info.flags, dev_info.num_regions, -- dev_info.num_irqs); -+ vbasedev->reset_works = !!(info->flags & VFIO_DEVICE_FLAGS_RESET); - -- vbasedev->reset_works = !!(dev_info.flags & VFIO_DEVICE_FLAGS_RESET); - return 0; - } - -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 3dc5f2104c..6d1b8487c3 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -216,6 +216,7 @@ void vfio_region_finalize(VFIORegion *region); - void vfio_reset_handler(void *opaque); - VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp); - void vfio_put_group(VFIOGroup *group); -+struct vfio_device_info *vfio_get_device_info(int fd); - int vfio_get_device(VFIOGroup *group, const char *name, - VFIODevice *vbasedev, Error **errp); - --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-migration-Add-VFIO-migration-pre-copy-support.patch b/SOURCES/kvm-vfio-migration-Add-VFIO-migration-pre-copy-support.patch deleted file mode 100644 index b8e72e6..0000000 --- a/SOURCES/kvm-vfio-migration-Add-VFIO-migration-pre-copy-support.patch +++ /dev/null @@ -1,438 +0,0 @@ -From 080d28c191b7d951f1f4596dcaa13d590c07d886 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 15/37] vfio/migration: Add VFIO migration pre-copy support -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [13/28] 7b2ea1471440d47e5aed1211c96942ca7bface96 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit eda7362af959 -Author: Avihai Horon -Date: Wed Jun 21 14:12:00 2023 +0300 - - vfio/migration: Add VFIO migration pre-copy support - - Pre-copy support allows the VFIO device data to be transferred while the - VM is running. This helps to accommodate VFIO devices that have a large - amount of data that needs to be transferred, and it can reduce migration - downtime. - - Pre-copy support is optional in VFIO migration protocol v2. - Implement pre-copy of VFIO migration protocol v2 and use it for devices - that support it. Full description of it can be found in the following - Linux commit: 4db52602a607 ("vfio: Extend the device migration protocol - with PRE_COPY"). - - Signed-off-by: Avihai Horon - Reviewed-by: Cédric Le Goater - Tested-by: YangHang Liu - Acked-by: Alex Williamson - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - docs/devel/vfio-migration.rst | 35 +++++--- - hw/vfio/common.c | 6 +- - hw/vfio/migration.c | 165 ++++++++++++++++++++++++++++++++-- - hw/vfio/trace-events | 4 +- - include/hw/vfio/vfio-common.h | 2 + - 5 files changed, 190 insertions(+), 22 deletions(-) - -diff --git a/docs/devel/vfio-migration.rst b/docs/devel/vfio-migration.rst -index 1b68ccf115..e896b2a673 100644 ---- a/docs/devel/vfio-migration.rst -+++ b/docs/devel/vfio-migration.rst -@@ -7,12 +7,14 @@ the guest is running on source host and restoring this saved state on the - destination host. This document details how saving and restoring of VFIO - devices is done in QEMU. - --Migration of VFIO devices currently consists of a single stop-and-copy phase. --During the stop-and-copy phase the guest is stopped and the entire VFIO device --data is transferred to the destination. -- --The pre-copy phase of migration is currently not supported for VFIO devices. --Support for VFIO pre-copy will be added later on. -+Migration of VFIO devices consists of two phases: the optional pre-copy phase, -+and the stop-and-copy phase. The pre-copy phase is iterative and allows to -+accommodate VFIO devices that have a large amount of data that needs to be -+transferred. The iterative pre-copy phase of migration allows for the guest to -+continue whilst the VFIO device state is transferred to the destination, this -+helps to reduce the total downtime of the VM. VFIO devices opt-in to pre-copy -+support by reporting the VFIO_MIGRATION_PRE_COPY flag in the -+VFIO_DEVICE_FEATURE_MIGRATION ioctl. - - Note that currently VFIO migration is supported only for a single device. This - is due to VFIO migration's lack of P2P support. However, P2P support is planned -@@ -29,10 +31,20 @@ VFIO implements the device hooks for the iterative approach as follows: - * A ``load_setup`` function that sets the VFIO device on the destination in - _RESUMING state. - -+* A ``state_pending_estimate`` function that reports an estimate of the -+ remaining pre-copy data that the vendor driver has yet to save for the VFIO -+ device. -+ - * A ``state_pending_exact`` function that reads pending_bytes from the vendor - driver, which indicates the amount of data that the vendor driver has yet to - save for the VFIO device. - -+* An ``is_active_iterate`` function that indicates ``save_live_iterate`` is -+ active only when the VFIO device is in pre-copy states. -+ -+* A ``save_live_iterate`` function that reads the VFIO device's data from the -+ vendor driver during iterative pre-copy phase. -+ - * A ``save_state`` function to save the device config space if it is present. - - * A ``save_live_complete_precopy`` function that sets the VFIO device in -@@ -111,8 +123,10 @@ Flow of state changes during Live migration - =========================================== - - Below is the flow of state change during live migration. --The values in the brackets represent the VM state, the migration state, and -+The values in the parentheses represent the VM state, the migration state, and - the VFIO device state, respectively. -+The text in the square brackets represents the flow if the VFIO device supports -+pre-copy. - - Live migration save path - ------------------------ -@@ -124,11 +138,12 @@ Live migration save path - | - migrate_init spawns migration_thread - Migration thread then calls each device's .save_setup() -- (RUNNING, _SETUP, _RUNNING) -+ (RUNNING, _SETUP, _RUNNING [_PRE_COPY]) - | -- (RUNNING, _ACTIVE, _RUNNING) -- If device is active, get pending_bytes by .state_pending_exact() -+ (RUNNING, _ACTIVE, _RUNNING [_PRE_COPY]) -+ If device is active, get pending_bytes by .state_pending_{estimate,exact}() - If total pending_bytes >= threshold_size, call .save_live_iterate() -+ [Data of VFIO device for pre-copy phase is copied] - Iterate till total pending bytes converge and are less than threshold - | - On migration completion, vCPU stops and calls .save_live_complete_precopy for -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 78358ede27..b73086e17a 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -492,7 +492,8 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) - } - - if (vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF && -- migration->device_state == VFIO_DEVICE_STATE_RUNNING) { -+ (migration->device_state == VFIO_DEVICE_STATE_RUNNING || -+ migration->device_state == VFIO_DEVICE_STATE_PRE_COPY)) { - return false; - } - } -@@ -537,7 +538,8 @@ static bool vfio_devices_all_running_and_mig_active(VFIOContainer *container) - return false; - } - -- if (migration->device_state == VFIO_DEVICE_STATE_RUNNING) { -+ if (migration->device_state == VFIO_DEVICE_STATE_RUNNING || -+ migration->device_state == VFIO_DEVICE_STATE_PRE_COPY) { - continue; - } else { - return false; -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index 8d33414379..d8f6a22ae1 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -68,6 +68,8 @@ static const char *mig_state_to_str(enum vfio_device_mig_state state) - return "STOP_COPY"; - case VFIO_DEVICE_STATE_RESUMING: - return "RESUMING"; -+ case VFIO_DEVICE_STATE_PRE_COPY: -+ return "PRE_COPY"; - default: - return "UNKNOWN STATE"; - } -@@ -241,6 +243,25 @@ static int vfio_query_stop_copy_size(VFIODevice *vbasedev, - return 0; - } - -+static int vfio_query_precopy_size(VFIOMigration *migration) -+{ -+ struct vfio_precopy_info precopy = { -+ .argsz = sizeof(precopy), -+ }; -+ -+ migration->precopy_init_size = 0; -+ migration->precopy_dirty_size = 0; -+ -+ if (ioctl(migration->data_fd, VFIO_MIG_GET_PRECOPY_INFO, &precopy)) { -+ return -errno; -+ } -+ -+ migration->precopy_init_size = precopy.initial_bytes; -+ migration->precopy_dirty_size = precopy.dirty_bytes; -+ -+ return 0; -+} -+ - /* Returns the size of saved data on success and -errno on error */ - static ssize_t vfio_save_block(QEMUFile *f, VFIOMigration *migration) - { -@@ -249,6 +270,14 @@ static ssize_t vfio_save_block(QEMUFile *f, VFIOMigration *migration) - data_size = read(migration->data_fd, migration->data_buffer, - migration->data_buffer_size); - if (data_size < 0) { -+ /* -+ * Pre-copy emptied all the device state for now. For more information, -+ * please refer to the Linux kernel VFIO uAPI. -+ */ -+ if (errno == ENOMSG) { -+ return 0; -+ } -+ - return -errno; - } - if (data_size == 0) { -@@ -265,6 +294,38 @@ static ssize_t vfio_save_block(QEMUFile *f, VFIOMigration *migration) - return qemu_file_get_error(f) ?: data_size; - } - -+static void vfio_update_estimated_pending_data(VFIOMigration *migration, -+ uint64_t data_size) -+{ -+ if (!data_size) { -+ /* -+ * Pre-copy emptied all the device state for now, update estimated sizes -+ * accordingly. -+ */ -+ migration->precopy_init_size = 0; -+ migration->precopy_dirty_size = 0; -+ -+ return; -+ } -+ -+ if (migration->precopy_init_size) { -+ uint64_t init_size = MIN(migration->precopy_init_size, data_size); -+ -+ migration->precopy_init_size -= init_size; -+ data_size -= init_size; -+ } -+ -+ migration->precopy_dirty_size -= MIN(migration->precopy_dirty_size, -+ data_size); -+} -+ -+static bool vfio_precopy_supported(VFIODevice *vbasedev) -+{ -+ VFIOMigration *migration = vbasedev->migration; -+ -+ return migration->mig_flags & VFIO_MIGRATION_PRE_COPY; -+} -+ - /* ---------------------------------------------------------------------- */ - - static int vfio_save_setup(QEMUFile *f, void *opaque) -@@ -285,6 +346,28 @@ static int vfio_save_setup(QEMUFile *f, void *opaque) - return -ENOMEM; - } - -+ if (vfio_precopy_supported(vbasedev)) { -+ int ret; -+ -+ switch (migration->device_state) { -+ case VFIO_DEVICE_STATE_RUNNING: -+ ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_PRE_COPY, -+ VFIO_DEVICE_STATE_RUNNING); -+ if (ret) { -+ return ret; -+ } -+ -+ vfio_query_precopy_size(migration); -+ -+ break; -+ case VFIO_DEVICE_STATE_STOP: -+ /* vfio_save_complete_precopy() will go to STOP_COPY */ -+ break; -+ default: -+ return -EINVAL; -+ } -+ } -+ - trace_vfio_save_setup(vbasedev->name, migration->data_buffer_size); - - qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); -@@ -299,26 +382,42 @@ static void vfio_save_cleanup(void *opaque) - - g_free(migration->data_buffer); - migration->data_buffer = NULL; -+ migration->precopy_init_size = 0; -+ migration->precopy_dirty_size = 0; - vfio_migration_cleanup(vbasedev); - trace_vfio_save_cleanup(vbasedev->name); - } - -+static void vfio_state_pending_estimate(void *opaque, uint64_t *must_precopy, -+ uint64_t *can_postcopy) -+{ -+ VFIODevice *vbasedev = opaque; -+ VFIOMigration *migration = vbasedev->migration; -+ -+ if (migration->device_state != VFIO_DEVICE_STATE_PRE_COPY) { -+ return; -+ } -+ -+ *must_precopy += -+ migration->precopy_init_size + migration->precopy_dirty_size; -+ -+ trace_vfio_state_pending_estimate(vbasedev->name, *must_precopy, -+ *can_postcopy, -+ migration->precopy_init_size, -+ migration->precopy_dirty_size); -+} -+ - /* - * Migration size of VFIO devices can be as little as a few KBs or as big as - * many GBs. This value should be big enough to cover the worst case. - */ - #define VFIO_MIG_STOP_COPY_SIZE (100 * GiB) - --/* -- * Only exact function is implemented and not estimate function. The reason is -- * that during pre-copy phase of migration the estimate function is called -- * repeatedly while pending RAM size is over the threshold, thus migration -- * can't converge and querying the VFIO device pending data size is useless. -- */ - static void vfio_state_pending_exact(void *opaque, uint64_t *must_precopy, - uint64_t *can_postcopy) - { - VFIODevice *vbasedev = opaque; -+ VFIOMigration *migration = vbasedev->migration; - uint64_t stop_copy_size = VFIO_MIG_STOP_COPY_SIZE; - - /* -@@ -328,8 +427,48 @@ static void vfio_state_pending_exact(void *opaque, uint64_t *must_precopy, - vfio_query_stop_copy_size(vbasedev, &stop_copy_size); - *must_precopy += stop_copy_size; - -+ if (migration->device_state == VFIO_DEVICE_STATE_PRE_COPY) { -+ vfio_query_precopy_size(migration); -+ -+ *must_precopy += -+ migration->precopy_init_size + migration->precopy_dirty_size; -+ } -+ - trace_vfio_state_pending_exact(vbasedev->name, *must_precopy, *can_postcopy, -- stop_copy_size); -+ stop_copy_size, migration->precopy_init_size, -+ migration->precopy_dirty_size); -+} -+ -+static bool vfio_is_active_iterate(void *opaque) -+{ -+ VFIODevice *vbasedev = opaque; -+ VFIOMigration *migration = vbasedev->migration; -+ -+ return migration->device_state == VFIO_DEVICE_STATE_PRE_COPY; -+} -+ -+static int vfio_save_iterate(QEMUFile *f, void *opaque) -+{ -+ VFIODevice *vbasedev = opaque; -+ VFIOMigration *migration = vbasedev->migration; -+ ssize_t data_size; -+ -+ data_size = vfio_save_block(f, migration); -+ if (data_size < 0) { -+ return data_size; -+ } -+ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); -+ -+ vfio_update_estimated_pending_data(migration, data_size); -+ -+ trace_vfio_save_iterate(vbasedev->name, migration->precopy_init_size, -+ migration->precopy_dirty_size); -+ -+ /* -+ * A VFIO device's pre-copy dirty_bytes is not guaranteed to reach zero. -+ * Return 1 so following handlers will not be potentially blocked. -+ */ -+ return 1; - } - - static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) -@@ -338,7 +477,7 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) - ssize_t data_size; - int ret; - -- /* We reach here with device state STOP only */ -+ /* We reach here with device state STOP or STOP_COPY only */ - ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY, - VFIO_DEVICE_STATE_STOP); - if (ret) { -@@ -457,7 +596,10 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) - static const SaveVMHandlers savevm_vfio_handlers = { - .save_setup = vfio_save_setup, - .save_cleanup = vfio_save_cleanup, -+ .state_pending_estimate = vfio_state_pending_estimate, - .state_pending_exact = vfio_state_pending_exact, -+ .is_active_iterate = vfio_is_active_iterate, -+ .save_live_iterate = vfio_save_iterate, - .save_live_complete_precopy = vfio_save_complete_precopy, - .save_state = vfio_save_state, - .load_setup = vfio_load_setup, -@@ -470,13 +612,18 @@ static const SaveVMHandlers savevm_vfio_handlers = { - static void vfio_vmstate_change(void *opaque, bool running, RunState state) - { - VFIODevice *vbasedev = opaque; -+ VFIOMigration *migration = vbasedev->migration; - enum vfio_device_mig_state new_state; - int ret; - - if (running) { - new_state = VFIO_DEVICE_STATE_RUNNING; - } else { -- new_state = VFIO_DEVICE_STATE_STOP; -+ new_state = -+ (migration->device_state == VFIO_DEVICE_STATE_PRE_COPY && -+ (state == RUN_STATE_FINISH_MIGRATE || state == RUN_STATE_PAUSED)) ? -+ VFIO_DEVICE_STATE_STOP_COPY : -+ VFIO_DEVICE_STATE_STOP; - } - - /* -diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events -index 646e42fd27..4150b59e58 100644 ---- a/hw/vfio/trace-events -+++ b/hw/vfio/trace-events -@@ -162,6 +162,8 @@ vfio_save_block(const char *name, int data_size) " (%s) data_size %d" - vfio_save_cleanup(const char *name) " (%s)" - vfio_save_complete_precopy(const char *name, int ret) " (%s) ret %d" - vfio_save_device_config_state(const char *name) " (%s)" -+vfio_save_iterate(const char *name, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64 - vfio_save_setup(const char *name, uint64_t data_buffer_size) " (%s) data buffer size 0x%"PRIx64 --vfio_state_pending_exact(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t stopcopy_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" stopcopy size 0x%"PRIx64 -+vfio_state_pending_estimate(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64 -+vfio_state_pending_exact(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t stopcopy_size, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" stopcopy size 0x%"PRIx64" precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64 - vfio_vmstate_change(const char *name, int running, const char *reason, const char *dev_state) " (%s) running %d reason %s device state %s" -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 5f29dab839..1db901c194 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -67,6 +67,8 @@ typedef struct VFIOMigration { - void *data_buffer; - size_t data_buffer_size; - uint64_t mig_flags; -+ uint64_t precopy_init_size; -+ uint64_t precopy_dirty_size; - } VFIOMigration; - - typedef struct VFIOAddressSpace { --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-migration-Add-support-for-switchover-ack-capabi.patch b/SOURCES/kvm-vfio-migration-Add-support-for-switchover-ack-capabi.patch deleted file mode 100644 index d87680d..0000000 --- a/SOURCES/kvm-vfio-migration-Add-support-for-switchover-ack-capabi.patch +++ /dev/null @@ -1,192 +0,0 @@ -From 169dc1bb051b3aebc571936d956b49ba0621ae43 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 16/37] vfio/migration: Add support for switchover ack - capability -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [14/28] b3bd2eb2d0ca49ff05a0a82ae5bb956a354aed47 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 745c42912a04 -Author: Avihai Horon -Date: Wed Jun 21 14:12:01 2023 +0300 - - vfio/migration: Add support for switchover ack capability - - Loading of a VFIO device's data can take a substantial amount of time as - the device may need to allocate resources, prepare internal data - structures, etc. This can increase migration downtime, especially for - VFIO devices with a lot of resources. - - To solve this, VFIO migration uAPI defines "initial bytes" as part of - its precopy data stream. Initial bytes can be used in various ways to - improve VFIO migration performance. For example, it can be used to - transfer device metadata to pre-allocate resources in the destination. - However, for this to work we need to make sure that all initial bytes - are sent and loaded in the destination before the source VM is stopped. - - Use migration switchover ack capability to make sure a VFIO device's - initial bytes are sent and loaded in the destination before the source - stops the VM and attempts to complete the migration. - This can significantly reduce migration downtime for some devices. - - Signed-off-by: Avihai Horon - Reviewed-by: Cédric Le Goater - Tested-by: YangHang Liu - Acked-by: Alex Williamson - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - docs/devel/vfio-migration.rst | 10 +++++++++ - hw/vfio/migration.c | 39 ++++++++++++++++++++++++++++++++++- - include/hw/vfio/vfio-common.h | 1 + - 3 files changed, 49 insertions(+), 1 deletion(-) - -diff --git a/docs/devel/vfio-migration.rst b/docs/devel/vfio-migration.rst -index e896b2a673..b433cb5bb2 100644 ---- a/docs/devel/vfio-migration.rst -+++ b/docs/devel/vfio-migration.rst -@@ -16,6 +16,13 @@ helps to reduce the total downtime of the VM. VFIO devices opt-in to pre-copy - support by reporting the VFIO_MIGRATION_PRE_COPY flag in the - VFIO_DEVICE_FEATURE_MIGRATION ioctl. - -+When pre-copy is supported, it's possible to further reduce downtime by -+enabling "switchover-ack" migration capability. -+VFIO migration uAPI defines "initial bytes" as part of its pre-copy data stream -+and recommends that the initial bytes are sent and loaded in the destination -+before stopping the source VM. Enabling this migration capability will -+guarantee that and thus, can potentially reduce downtime even further. -+ - Note that currently VFIO migration is supported only for a single device. This - is due to VFIO migration's lack of P2P support. However, P2P support is planned - to be added later on. -@@ -45,6 +52,9 @@ VFIO implements the device hooks for the iterative approach as follows: - * A ``save_live_iterate`` function that reads the VFIO device's data from the - vendor driver during iterative pre-copy phase. - -+* A ``switchover_ack_needed`` function that checks if the VFIO device uses -+ "switchover-ack" migration capability when this capability is enabled. -+ - * A ``save_state`` function to save the device config space if it is present. - - * A ``save_live_complete_precopy`` function that sets the VFIO device in -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index d8f6a22ae1..acbf0bb7ab 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -18,6 +18,8 @@ - #include "sysemu/runstate.h" - #include "hw/vfio/vfio-common.h" - #include "migration/migration.h" -+#include "migration/options.h" -+#include "migration/savevm.h" - #include "migration/vmstate.h" - #include "migration/qemu-file.h" - #include "migration/register.h" -@@ -45,6 +47,7 @@ - #define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL) - #define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) - #define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) -+#define VFIO_MIG_FLAG_DEV_INIT_DATA_SENT (0xffffffffef100005ULL) - - /* - * This is an arbitrary size based on migration of mlx5 devices, where typically -@@ -384,6 +387,7 @@ static void vfio_save_cleanup(void *opaque) - migration->data_buffer = NULL; - migration->precopy_init_size = 0; - migration->precopy_dirty_size = 0; -+ migration->initial_data_sent = false; - vfio_migration_cleanup(vbasedev); - trace_vfio_save_cleanup(vbasedev->name); - } -@@ -457,10 +461,17 @@ static int vfio_save_iterate(QEMUFile *f, void *opaque) - if (data_size < 0) { - return data_size; - } -- qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); - - vfio_update_estimated_pending_data(migration, data_size); - -+ if (migrate_switchover_ack() && !migration->precopy_init_size && -+ !migration->initial_data_sent) { -+ qemu_put_be64(f, VFIO_MIG_FLAG_DEV_INIT_DATA_SENT); -+ migration->initial_data_sent = true; -+ } else { -+ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); -+ } -+ - trace_vfio_save_iterate(vbasedev->name, migration->precopy_init_size, - migration->precopy_dirty_size); - -@@ -579,6 +590,24 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) - } - break; - } -+ case VFIO_MIG_FLAG_DEV_INIT_DATA_SENT: -+ { -+ if (!vfio_precopy_supported(vbasedev) || -+ !migrate_switchover_ack()) { -+ error_report("%s: Received INIT_DATA_SENT but switchover ack " -+ "is not used", vbasedev->name); -+ return -EINVAL; -+ } -+ -+ ret = qemu_loadvm_approve_switchover(); -+ if (ret) { -+ error_report( -+ "%s: qemu_loadvm_approve_switchover failed, err=%d (%s)", -+ vbasedev->name, ret, strerror(-ret)); -+ } -+ -+ return ret; -+ } - default: - error_report("%s: Unknown tag 0x%"PRIx64, vbasedev->name, data); - return -EINVAL; -@@ -593,6 +622,13 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) - return ret; - } - -+static bool vfio_switchover_ack_needed(void *opaque) -+{ -+ VFIODevice *vbasedev = opaque; -+ -+ return vfio_precopy_supported(vbasedev); -+} -+ - static const SaveVMHandlers savevm_vfio_handlers = { - .save_setup = vfio_save_setup, - .save_cleanup = vfio_save_cleanup, -@@ -605,6 +641,7 @@ static const SaveVMHandlers savevm_vfio_handlers = { - .load_setup = vfio_load_setup, - .load_cleanup = vfio_load_cleanup, - .load_state = vfio_load_state, -+ .switchover_ack_needed = vfio_switchover_ack_needed, - }; - - /* ---------------------------------------------------------------------- */ -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 1db901c194..3dc5f2104c 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -69,6 +69,7 @@ typedef struct VFIOMigration { - uint64_t mig_flags; - uint64_t precopy_init_size; - uint64_t precopy_dirty_size; -+ bool initial_data_sent; - } VFIOMigration; - - typedef struct VFIOAddressSpace { --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-migration-Change-vIOMMU-blocker-from-global-to-.patch b/SOURCES/kvm-vfio-migration-Change-vIOMMU-blocker-from-global-to-.patch deleted file mode 100644 index dde2e24..0000000 --- a/SOURCES/kvm-vfio-migration-Change-vIOMMU-blocker-from-global-to-.patch +++ /dev/null @@ -1,171 +0,0 @@ -From 35c7d0d3b02d61d6f29afae74bd83edd70a6a1b4 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 26/37] vfio/migration: Change vIOMMU blocker from global to - per device -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [24/28] 8fda1c82a81fadd4f38e6a5e878c9228a81c0f6e (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 3c26c80a0a26 -Author: Zhenzhong Duan -Date: Mon Jul 3 15:15:07 2023 +0800 - - vfio/migration: Change vIOMMU blocker from global to per device - - Contrary to multiple device blocker which needs to consider already-attached - devices to unblock/block dynamically, the vIOMMU migration blocker is a device - specific config. Meaning it only needs to know whether the device is bypassing - or not the vIOMMU (via machine property, or per pxb-pcie::bypass_iommu), and - does not need the state of currently present devices. For this reason, the - vIOMMU global migration blocker can be consolidated into the per-device - migration blocker, allowing us to remove some unnecessary code. - - This change also makes vfio_mig_active() more accurate as it doesn't check for - global blocker. - - Signed-off-by: Zhenzhong Duan - Reviewed-by: Joao Martins - Reviewed-by: Cédric Le Goater - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/common.c | 51 ++--------------------------------- - hw/vfio/migration.c | 7 ++--- - hw/vfio/pci.c | 1 - - include/hw/vfio/vfio-common.h | 3 +-- - 4 files changed, 7 insertions(+), 55 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 136d8243d6..e815f6ba30 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -362,7 +362,6 @@ bool vfio_mig_active(void) - } - - static Error *multiple_devices_migration_blocker; --static Error *giommu_migration_blocker; - - static unsigned int vfio_migratable_device_num(void) - { -@@ -420,55 +419,9 @@ void vfio_unblock_multiple_devices_migration(void) - multiple_devices_migration_blocker = NULL; - } - --static bool vfio_viommu_preset(void) -+bool vfio_viommu_preset(VFIODevice *vbasedev) - { -- VFIOAddressSpace *space; -- -- QLIST_FOREACH(space, &vfio_address_spaces, list) { -- if (space->as != &address_space_memory) { -- return true; -- } -- } -- -- return false; --} -- --int vfio_block_giommu_migration(VFIODevice *vbasedev, Error **errp) --{ -- int ret; -- -- if (giommu_migration_blocker || -- !vfio_viommu_preset()) { -- return 0; -- } -- -- if (vbasedev->enable_migration == ON_OFF_AUTO_ON) { -- error_setg(errp, -- "Migration is currently not supported with vIOMMU enabled"); -- return -EINVAL; -- } -- -- error_setg(&giommu_migration_blocker, -- "Migration is currently not supported with vIOMMU enabled"); -- ret = migrate_add_blocker(giommu_migration_blocker, errp); -- if (ret < 0) { -- error_free(giommu_migration_blocker); -- giommu_migration_blocker = NULL; -- } -- -- return ret; --} -- --void vfio_migration_finalize(void) --{ -- if (!giommu_migration_blocker || -- vfio_viommu_preset()) { -- return; -- } -- -- migrate_del_blocker(giommu_migration_blocker); -- error_free(giommu_migration_blocker); -- giommu_migration_blocker = NULL; -+ return vbasedev->group->container->space->as != &address_space_memory; - } - - static void vfio_set_migration_error(int err) -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index 1db7d52ab2..e6e5e85f75 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -878,9 +878,10 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) - return ret; - } - -- ret = vfio_block_giommu_migration(vbasedev, errp); -- if (ret) { -- return ret; -+ if (vfio_viommu_preset(vbasedev)) { -+ error_setg(&err, "%s: Migration is currently not supported " -+ "with vIOMMU enabled", vbasedev->name); -+ return vfio_block_migration(vbasedev, err, errp); - } - - trace_vfio_migration_realize(vbasedev->name); -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 2d059832a4..922c81872c 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -3279,7 +3279,6 @@ static void vfio_instance_finalize(Object *obj) - */ - vfio_put_device(vdev); - vfio_put_group(group); -- vfio_migration_finalize(); - } - - static void vfio_exitfn(PCIDevice *pdev) -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 93429b9abb..45167c8a8a 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -227,7 +227,7 @@ extern VFIOGroupList vfio_group_list; - bool vfio_mig_active(void); - int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp); - void vfio_unblock_multiple_devices_migration(void); --int vfio_block_giommu_migration(VFIODevice *vbasedev, Error **errp); -+bool vfio_viommu_preset(VFIODevice *vbasedev); - int64_t vfio_mig_bytes_transferred(void); - void vfio_reset_bytes_transferred(void); - -@@ -254,6 +254,5 @@ int vfio_spapr_remove_window(VFIOContainer *container, - - int vfio_migration_realize(VFIODevice *vbasedev, Error **errp); - void vfio_migration_exit(VFIODevice *vbasedev); --void vfio_migration_finalize(void); - - #endif /* HW_VFIO_VFIO_COMMON_H */ --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-migration-Free-resources-when-vfio_migration_re.patch b/SOURCES/kvm-vfio-migration-Free-resources-when-vfio_migration_re.patch deleted file mode 100644 index 9deaf1a..0000000 --- a/SOURCES/kvm-vfio-migration-Free-resources-when-vfio_migration_re.patch +++ /dev/null @@ -1,145 +0,0 @@ -From a36fa46369fe9bf2a2174e9ed6ab83042e904066 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 27/37] vfio/migration: Free resources when - vfio_migration_realize fails -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [25/28] b3ab8d3443d4bc12a689dc7d88a94da315814bb7 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 2b43b2995b02 -Author: Zhenzhong Duan -Date: Mon Jul 3 15:15:08 2023 +0800 - - vfio/migration: Free resources when vfio_migration_realize fails - - When vfio_realize() succeeds, hot unplug will call vfio_exitfn() - to free resources allocated in vfio_realize(); when vfio_realize() - fails, vfio_exitfn() is never called and we need to free resources - in vfio_realize(). - - In the case that vfio_migration_realize() fails, - e.g: with -only-migratable & enable-migration=off, we see below: - - (qemu) device_add vfio-pci,host=81:11.1,id=vfio1,bus=root1,enable-migration=off - 0000:81:11.1: Migration disabled - Error: disallowing migration blocker (--only-migratable) for: 0000:81:11.1: Migration is disabled for VFIO device - - If we hotplug again we should see same log as above, but we see: - (qemu) device_add vfio-pci,host=81:11.1,id=vfio1,bus=root1,enable-migration=off - Error: vfio 0000:81:11.1: device is already attached - - That's because some references to VFIO device isn't released. - For resources allocated in vfio_migration_realize(), free them by - jumping to out_deinit path with calling a new function - vfio_migration_deinit(). For resources allocated in vfio_realize(), - free them by jumping to de-register path in vfio_realize(). - - Signed-off-by: Zhenzhong Duan - Fixes: a22651053b59 ("vfio: Make vfio-pci device migration capable") - Reviewed-by: Cédric Le Goater - Reviewed-by: Joao Martins - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/migration.c | 33 +++++++++++++++++++++++---------- - hw/vfio/pci.c | 1 + - 2 files changed, 24 insertions(+), 10 deletions(-) - -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index e6e5e85f75..e3954570c8 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -802,6 +802,17 @@ static int vfio_migration_init(VFIODevice *vbasedev) - return 0; - } - -+static void vfio_migration_deinit(VFIODevice *vbasedev) -+{ -+ VFIOMigration *migration = vbasedev->migration; -+ -+ remove_migration_state_change_notifier(&migration->migration_state); -+ qemu_del_vm_change_state_handler(migration->vm_state); -+ unregister_savevm(VMSTATE_IF(vbasedev->dev), "vfio", vbasedev); -+ vfio_migration_free(vbasedev); -+ vfio_unblock_multiple_devices_migration(); -+} -+ - static int vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp) - { - int ret; -@@ -866,7 +877,7 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) - error_setg(&err, - "%s: VFIO device doesn't support device dirty tracking", - vbasedev->name); -- return vfio_block_migration(vbasedev, err, errp); -+ goto add_blocker; - } - - warn_report("%s: VFIO device doesn't support device dirty tracking", -@@ -875,29 +886,31 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) - - ret = vfio_block_multiple_devices_migration(vbasedev, errp); - if (ret) { -- return ret; -+ goto out_deinit; - } - - if (vfio_viommu_preset(vbasedev)) { - error_setg(&err, "%s: Migration is currently not supported " - "with vIOMMU enabled", vbasedev->name); -- return vfio_block_migration(vbasedev, err, errp); -+ goto add_blocker; - } - - trace_vfio_migration_realize(vbasedev->name); - return 0; -+ -+add_blocker: -+ ret = vfio_block_migration(vbasedev, err, errp); -+out_deinit: -+ if (ret) { -+ vfio_migration_deinit(vbasedev); -+ } -+ return ret; - } - - void vfio_migration_exit(VFIODevice *vbasedev) - { - if (vbasedev->migration) { -- VFIOMigration *migration = vbasedev->migration; -- -- remove_migration_state_change_notifier(&migration->migration_state); -- qemu_del_vm_change_state_handler(migration->vm_state); -- unregister_savevm(VMSTATE_IF(vbasedev->dev), "vfio", vbasedev); -- vfio_migration_free(vbasedev); -- vfio_unblock_multiple_devices_migration(); -+ vfio_migration_deinit(vbasedev); - } - - if (vbasedev->migration_blocker) { -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 922c81872c..037b7d4176 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -3234,6 +3234,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - ret = vfio_migration_realize(vbasedev, errp); - if (ret) { - error_report("%s: Migration disabled", vbasedev->name); -+ goto out_deregister; - } - } - --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-migration-Make-VFIO-migration-non-experimental.patch b/SOURCES/kvm-vfio-migration-Make-VFIO-migration-non-experimental.patch deleted file mode 100644 index 3258541..0000000 --- a/SOURCES/kvm-vfio-migration-Make-VFIO-migration-non-experimental.patch +++ /dev/null @@ -1,283 +0,0 @@ -From 747c34c0a3b8048ebdab387d22f2b922c81d572a Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 21/37] vfio/migration: Make VFIO migration non-experimental -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [19/28] 2f457c1c0de95a3fced0270f2edbbc5193cc4de9 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 8bbcb64a71d8 -Author: Avihai Horon -Date: Wed Jun 28 10:31:12 2023 +0300 - - vfio/migration: Make VFIO migration non-experimental - - The major parts of VFIO migration are supported today in QEMU. This - includes basic VFIO migration, device dirty page tracking and precopy - support. - - Thus, at this point in time, it seems appropriate to make VFIO migration - non-experimental: remove the x prefix from enable_migration property, - change it to ON_OFF_AUTO and let the default value be AUTO. - - In addition, make the following adjustments: - 1. When enable_migration is ON and migration is not supported, fail VFIO - device realization. - 2. When enable_migration is AUTO (i.e., not explicitly enabled), require - device dirty tracking support. This is because device dirty tracking - is currently the only method to do dirty page tracking, which is - essential for migrating in a reasonable downtime. Setting - enable_migration to ON will not require device dirty tracking. - 3. Make migration error and blocker messages more elaborate. - 4. Remove error prints in vfio_migration_query_flags(). - 5. Rename trace_vfio_migration_probe() to - trace_vfio_migration_realize(). - - Signed-off-by: Avihai Horon - Reviewed-by: Joao Martins - Reviewed-by: Cédric Le Goater - Reviewed-by: Alex Williamson - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/common.c | 16 ++++++- - hw/vfio/migration.c | 79 +++++++++++++++++++++++------------ - hw/vfio/pci.c | 4 +- - hw/vfio/trace-events | 2 +- - include/hw/vfio/vfio-common.h | 6 +-- - 5 files changed, 73 insertions(+), 34 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 3b4ac53f15..136d8243d6 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -381,7 +381,7 @@ static unsigned int vfio_migratable_device_num(void) - return device_num; - } - --int vfio_block_multiple_devices_migration(Error **errp) -+int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp) - { - int ret; - -@@ -390,6 +390,12 @@ int vfio_block_multiple_devices_migration(Error **errp) - return 0; - } - -+ if (vbasedev->enable_migration == ON_OFF_AUTO_ON) { -+ error_setg(errp, "Migration is currently not supported with multiple " -+ "VFIO devices"); -+ return -EINVAL; -+ } -+ - error_setg(&multiple_devices_migration_blocker, - "Migration is currently not supported with multiple " - "VFIO devices"); -@@ -427,7 +433,7 @@ static bool vfio_viommu_preset(void) - return false; - } - --int vfio_block_giommu_migration(Error **errp) -+int vfio_block_giommu_migration(VFIODevice *vbasedev, Error **errp) - { - int ret; - -@@ -436,6 +442,12 @@ int vfio_block_giommu_migration(Error **errp) - return 0; - } - -+ if (vbasedev->enable_migration == ON_OFF_AUTO_ON) { -+ error_setg(errp, -+ "Migration is currently not supported with vIOMMU enabled"); -+ return -EINVAL; -+ } -+ - error_setg(&giommu_migration_blocker, - "Migration is currently not supported with vIOMMU enabled"); - ret = migrate_add_blocker(giommu_migration_blocker, errp); -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index 7cf143926c..1db7d52ab2 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -724,14 +724,6 @@ static int vfio_migration_query_flags(VFIODevice *vbasedev, uint64_t *mig_flags) - feature->argsz = sizeof(buf); - feature->flags = VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_MIGRATION; - if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) { -- if (errno == ENOTTY) { -- error_report("%s: VFIO migration is not supported in kernel", -- vbasedev->name); -- } else { -- error_report("%s: Failed to query VFIO migration support, err: %s", -- vbasedev->name, strerror(errno)); -- } -- - return -errno; - } - -@@ -810,6 +802,27 @@ static int vfio_migration_init(VFIODevice *vbasedev) - return 0; - } - -+static int vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp) -+{ -+ int ret; -+ -+ if (vbasedev->enable_migration == ON_OFF_AUTO_ON) { -+ error_propagate(errp, err); -+ return -EINVAL; -+ } -+ -+ vbasedev->migration_blocker = error_copy(err); -+ error_free(err); -+ -+ ret = migrate_add_blocker(vbasedev->migration_blocker, errp); -+ if (ret < 0) { -+ error_free(vbasedev->migration_blocker); -+ vbasedev->migration_blocker = NULL; -+ } -+ -+ return ret; -+} -+ - /* ---------------------------------------------------------------------- */ - - int64_t vfio_mig_bytes_transferred(void) -@@ -824,40 +837,54 @@ void vfio_reset_bytes_transferred(void) - - int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) - { -- int ret = -ENOTSUP; -+ Error *err = NULL; -+ int ret; - -- if (!vbasedev->enable_migration) { -- goto add_blocker; -+ if (vbasedev->enable_migration == ON_OFF_AUTO_OFF) { -+ error_setg(&err, "%s: Migration is disabled for VFIO device", -+ vbasedev->name); -+ return vfio_block_migration(vbasedev, err, errp); - } - - ret = vfio_migration_init(vbasedev); - if (ret) { -- goto add_blocker; -+ if (ret == -ENOTTY) { -+ error_setg(&err, "%s: VFIO migration is not supported in kernel", -+ vbasedev->name); -+ } else { -+ error_setg(&err, -+ "%s: Migration couldn't be initialized for VFIO device, " -+ "err: %d (%s)", -+ vbasedev->name, ret, strerror(-ret)); -+ } -+ -+ return vfio_block_migration(vbasedev, err, errp); -+ } -+ -+ if (!vbasedev->dirty_pages_supported) { -+ if (vbasedev->enable_migration == ON_OFF_AUTO_AUTO) { -+ error_setg(&err, -+ "%s: VFIO device doesn't support device dirty tracking", -+ vbasedev->name); -+ return vfio_block_migration(vbasedev, err, errp); -+ } -+ -+ warn_report("%s: VFIO device doesn't support device dirty tracking", -+ vbasedev->name); - } - -- ret = vfio_block_multiple_devices_migration(errp); -+ ret = vfio_block_multiple_devices_migration(vbasedev, errp); - if (ret) { - return ret; - } - -- ret = vfio_block_giommu_migration(errp); -+ ret = vfio_block_giommu_migration(vbasedev, errp); - if (ret) { - return ret; - } - -- trace_vfio_migration_probe(vbasedev->name); -+ trace_vfio_migration_realize(vbasedev->name); - return 0; -- --add_blocker: -- error_setg(&vbasedev->migration_blocker, -- "VFIO device doesn't support migration"); -- -- ret = migrate_add_blocker(vbasedev->migration_blocker, errp); -- if (ret < 0) { -- error_free(vbasedev->migration_blocker); -- vbasedev->migration_blocker = NULL; -- } -- return ret; - } - - void vfio_migration_exit(VFIODevice *vbasedev) -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 15e7554954..6634945a70 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -3371,8 +3371,8 @@ static Property vfio_pci_dev_properties[] = { - VFIO_FEATURE_ENABLE_REQ_BIT, true), - DEFINE_PROP_BIT("x-igd-opregion", VFIOPCIDevice, features, - VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false), -- DEFINE_PROP_BOOL("x-enable-migration", VFIOPCIDevice, -- vbasedev.enable_migration, false), -+ DEFINE_PROP_ON_OFF_AUTO("enable-migration", VFIOPCIDevice, -+ vbasedev.enable_migration, ON_OFF_AUTO_AUTO), - DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false), - DEFINE_PROP_BOOL("x-balloon-allowed", VFIOPCIDevice, - vbasedev.ram_block_discard_allowed, false), -diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events -index 4150b59e58..0391bd583b 100644 ---- a/hw/vfio/trace-events -+++ b/hw/vfio/trace-events -@@ -155,7 +155,7 @@ vfio_load_cleanup(const char *name) " (%s)" - vfio_load_device_config_state(const char *name) " (%s)" - vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64 - vfio_load_state_device_data(const char *name, uint64_t data_size, int ret) " (%s) size 0x%"PRIx64" ret %d" --vfio_migration_probe(const char *name) " (%s)" -+vfio_migration_realize(const char *name) " (%s)" - vfio_migration_set_state(const char *name, const char *state) " (%s) state %s" - vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s" - vfio_save_block(const char *name, int data_size) " (%s) data_size %d" -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 1d19c6f251..93429b9abb 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -139,7 +139,7 @@ typedef struct VFIODevice { - bool needs_reset; - bool no_mmap; - bool ram_block_discard_allowed; -- bool enable_migration; -+ OnOffAuto enable_migration; - VFIODeviceOps *ops; - unsigned int num_irqs; - unsigned int num_regions; -@@ -225,9 +225,9 @@ typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList; - extern VFIOGroupList vfio_group_list; - - bool vfio_mig_active(void); --int vfio_block_multiple_devices_migration(Error **errp); -+int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp); - void vfio_unblock_multiple_devices_migration(void); --int vfio_block_giommu_migration(Error **errp); -+int vfio_block_giommu_migration(VFIODevice *vbasedev, Error **errp); - int64_t vfio_mig_bytes_transferred(void); - void vfio_reset_bytes_transferred(void); - --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-migration-Refactor-vfio_save_block-to-return-sa.patch b/SOURCES/kvm-vfio-migration-Refactor-vfio_save_block-to-return-sa.patch deleted file mode 100644 index 3b61c5d..0000000 --- a/SOURCES/kvm-vfio-migration-Refactor-vfio_save_block-to-return-sa.patch +++ /dev/null @@ -1,102 +0,0 @@ -From edcf24a08d66d620a10c746824e31d230c8516ce Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 13/37] vfio/migration: Refactor vfio_save_block() to return - saved data size -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [11/28] b4aed6ddcbde159e98275a0675dcdf45d644673b (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit cf53efbbda2e -Author: Avihai Horon -Date: Wed Jun 21 14:11:58 2023 +0300 - - vfio/migration: Refactor vfio_save_block() to return saved data size - - Refactor vfio_save_block() to return the size of saved data on success - and -errno on error. - - This will be used in next patch to implement VFIO migration pre-copy - support. - - Signed-off-by: Avihai Horon - Reviewed-by: Cédric Le Goater - Reviewed-by: Juan Quintela - Tested-by: YangHang Liu - Acked-by: Alex Williamson - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/migration.c | 17 +++++++++-------- - 1 file changed, 9 insertions(+), 8 deletions(-) - -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index 6b58dddb88..235978fd68 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -241,8 +241,8 @@ static int vfio_query_stop_copy_size(VFIODevice *vbasedev, - return 0; - } - --/* Returns 1 if end-of-stream is reached, 0 if more data and -errno if error */ --static int vfio_save_block(QEMUFile *f, VFIOMigration *migration) -+/* Returns the size of saved data on success and -errno on error */ -+static ssize_t vfio_save_block(QEMUFile *f, VFIOMigration *migration) - { - ssize_t data_size; - -@@ -252,7 +252,7 @@ static int vfio_save_block(QEMUFile *f, VFIOMigration *migration) - return -errno; - } - if (data_size == 0) { -- return 1; -+ return 0; - } - - qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE); -@@ -262,7 +262,7 @@ static int vfio_save_block(QEMUFile *f, VFIOMigration *migration) - - trace_vfio_save_block(migration->vbasedev->name, data_size); - -- return qemu_file_get_error(f); -+ return qemu_file_get_error(f) ?: data_size; - } - - /* ---------------------------------------------------------------------- */ -@@ -335,6 +335,7 @@ static void vfio_state_pending_exact(void *opaque, uint64_t *must_precopy, - static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) - { - VFIODevice *vbasedev = opaque; -+ ssize_t data_size; - int ret; - - /* We reach here with device state STOP only */ -@@ -345,11 +346,11 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) - } - - do { -- ret = vfio_save_block(f, vbasedev->migration); -- if (ret < 0) { -- return ret; -+ data_size = vfio_save_block(f, vbasedev->migration); -+ if (data_size < 0) { -+ return data_size; - } -- } while (!ret); -+ } while (data_size); - - qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); - ret = qemu_file_get_error(f); --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-migration-Remove-print-of-Migration-disabled.patch b/SOURCES/kvm-vfio-migration-Remove-print-of-Migration-disabled.patch deleted file mode 100644 index ad3c6ca..0000000 --- a/SOURCES/kvm-vfio-migration-Remove-print-of-Migration-disabled.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 5bb94c4eaeb94f0b41a57660098a4c12a295b725 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 28/37] vfio/migration: Remove print of "Migration disabled" -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [26/28] c7ff1f9c90b4cfcb327ef474042ea71ea577a94d (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 0520d63c7701 -Author: Zhenzhong Duan -Date: Mon Jul 3 15:15:09 2023 +0800 - - vfio/migration: Remove print of "Migration disabled" - - Property enable_migration supports [on/off/auto]. - In ON mode, error pointer is passed to errp and logged. - In OFF mode, we doesn't need to log "Migration disabled" as it's intentional. - In AUTO mode, we should only ever see errors or warnings if the device - supports migration and an error or incompatibility occurs while further - probing or configuring it. Lack of support for migration shoundn't - generate an error or warning. - - Signed-off-by: Zhenzhong Duan - Reviewed-by: Joao Martins - Reviewed-by: Cédric Le Goater - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/pci.c | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 037b7d4176..a60b868c38 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -3233,7 +3233,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - if (!pdev->failover_pair_id) { - ret = vfio_migration_realize(vbasedev, errp); - if (ret) { -- error_report("%s: Migration disabled", vbasedev->name); - goto out_deregister; - } - } --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-migration-Reset-bytes_transferred-properly.patch b/SOURCES/kvm-vfio-migration-Reset-bytes_transferred-properly.patch deleted file mode 100644 index 2666460..0000000 --- a/SOURCES/kvm-vfio-migration-Reset-bytes_transferred-properly.patch +++ /dev/null @@ -1,165 +0,0 @@ -From a63b4010ba4f491c9144afff363bebcf35ecf496 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 20/37] vfio/migration: Reset bytes_transferred properly -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [18/28] e9a70faeca4fd5aa7ef36502cf76bf0b62f65057 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 808642a2f640 -Author: Avihai Horon -Date: Wed Jun 28 10:31:11 2023 +0300 - - vfio/migration: Reset bytes_transferred properly - - Currently, VFIO bytes_transferred is not reset properly: - 1. bytes_transferred is not reset after a VM snapshot (so a migration - following a snapshot will report incorrect value). - 2. bytes_transferred is a single counter for all VFIO devices, however - upon migration failure it is reset multiple times, by each VFIO - device. - - Fix it by introducing a new function vfio_reset_bytes_transferred() and - calling it during migration and snapshot start. - - Remove existing bytes_transferred reset in VFIO migration state - notifier, which is not needed anymore. - - Fixes: 3710586caa5d ("qapi: Add VFIO devices migration stats in Migration stats") - Signed-off-by: Avihai Horon - Reviewed-by: Cédric Le Goater - Reviewed-by: Alex Williamson - Signed-off-by: Cédric Le Goater - -Conflicts: - - migration/migration.c - migration/savevm.c - context changes due to commit aff3f6606d14 ("migration: Rename - ram_counters to mig_stats") - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/migration.c | 6 +++++- - include/hw/vfio/vfio-common.h | 1 + - migration/migration.c | 1 + - migration/migration.h | 1 + - migration/savevm.c | 1 + - migration/target.c | 17 +++++++++++++++-- - 6 files changed, 24 insertions(+), 3 deletions(-) - -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index acbf0bb7ab..7cf143926c 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -697,7 +697,6 @@ static void vfio_migration_state_notifier(Notifier *notifier, void *data) - case MIGRATION_STATUS_CANCELLING: - case MIGRATION_STATUS_CANCELLED: - case MIGRATION_STATUS_FAILED: -- bytes_transferred = 0; - /* - * If setting the device in RUNNING state fails, the device should - * be reset. To do so, use ERROR state as a recover state. -@@ -818,6 +817,11 @@ int64_t vfio_mig_bytes_transferred(void) - return bytes_transferred; - } - -+void vfio_reset_bytes_transferred(void) -+{ -+ bytes_transferred = 0; -+} -+ - int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) - { - int ret = -ENOTSUP; -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 6d1b8487c3..1d19c6f251 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -229,6 +229,7 @@ int vfio_block_multiple_devices_migration(Error **errp); - void vfio_unblock_multiple_devices_migration(void); - int vfio_block_giommu_migration(Error **errp); - int64_t vfio_mig_bytes_transferred(void); -+void vfio_reset_bytes_transferred(void); - - #ifdef CONFIG_LINUX - int vfio_get_region_info(VFIODevice *vbasedev, int index, -diff --git a/migration/migration.c b/migration/migration.c -index 9bf1caee6c..47ad6c43cb 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1638,6 +1638,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, - */ - memset(&ram_counters, 0, sizeof(ram_counters)); - memset(&compression_counters, 0, sizeof(compression_counters)); -+ reset_vfio_bytes_transferred(); - - return true; - } -diff --git a/migration/migration.h b/migration/migration.h -index e9679f8029..7ccf460aa2 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -495,6 +495,7 @@ bool migration_rate_limit(void); - void migration_cancel(const Error *error); - - void populate_vfio_info(MigrationInfo *info); -+void reset_vfio_bytes_transferred(void); - void postcopy_temp_page_reset(PostcopyTmpPage *tmp_page); - - #endif -diff --git a/migration/savevm.c b/migration/savevm.c -index aff70e6263..83088fc3f8 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -1620,6 +1620,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) - migrate_init(ms); - memset(&ram_counters, 0, sizeof(ram_counters)); - memset(&compression_counters, 0, sizeof(compression_counters)); -+ reset_vfio_bytes_transferred(); - ms->to_dst_file = f; - - qemu_mutex_unlock_iothread(); -diff --git a/migration/target.c b/migration/target.c -index 00ca007f97..f39c9a8d88 100644 ---- a/migration/target.c -+++ b/migration/target.c -@@ -14,12 +14,25 @@ - #include "hw/vfio/vfio-common.h" - #endif - -+#ifdef CONFIG_VFIO - void populate_vfio_info(MigrationInfo *info) - { --#ifdef CONFIG_VFIO - if (vfio_mig_active()) { - info->vfio = g_malloc0(sizeof(*info->vfio)); - info->vfio->transferred = vfio_mig_bytes_transferred(); - } --#endif - } -+ -+void reset_vfio_bytes_transferred(void) -+{ -+ vfio_reset_bytes_transferred(); -+} -+#else -+void populate_vfio_info(MigrationInfo *info) -+{ -+} -+ -+void reset_vfio_bytes_transferred(void) -+{ -+} -+#endif --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-migration-Return-bool-type-for-vfio_migration_r.patch b/SOURCES/kvm-vfio-migration-Return-bool-type-for-vfio_migration_r.patch deleted file mode 100644 index efd42a9..0000000 --- a/SOURCES/kvm-vfio-migration-Return-bool-type-for-vfio_migration_r.patch +++ /dev/null @@ -1,125 +0,0 @@ -From 223eef8363c9ba58514b2d4f93e5ff015d111ff2 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 29/37] vfio/migration: Return bool type for - vfio_migration_realize() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [27/28] d5aea3ea4c53e4573076cbacbbe3134f9f0f9e53 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit d4a2af747d5a -Author: Zhenzhong Duan -Date: Mon Jul 3 15:15:10 2023 +0800 - - vfio/migration: Return bool type for vfio_migration_realize() - - Make vfio_migration_realize() adhere to the convention of other realize() - callbacks(like qdev_realize) by returning bool instead of int. - - Suggested-by: Cédric Le Goater - Suggested-by: Joao Martins - Signed-off-by: Zhenzhong Duan - Reviewed-by: Joao Martins - Reviewed-by: Cédric Le Goater - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/migration.c | 15 ++++++++++----- - hw/vfio/pci.c | 3 +-- - include/hw/vfio/vfio-common.h | 2 +- - 3 files changed, 12 insertions(+), 8 deletions(-) - -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index e3954570c8..2674f4bc47 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -846,7 +846,12 @@ void vfio_reset_bytes_transferred(void) - bytes_transferred = 0; - } - --int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) -+/* -+ * Return true when either migration initialized or blocker registered. -+ * Currently only return false when adding blocker fails which will -+ * de-register vfio device. -+ */ -+bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp) - { - Error *err = NULL; - int ret; -@@ -854,7 +859,7 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) - if (vbasedev->enable_migration == ON_OFF_AUTO_OFF) { - error_setg(&err, "%s: Migration is disabled for VFIO device", - vbasedev->name); -- return vfio_block_migration(vbasedev, err, errp); -+ return !vfio_block_migration(vbasedev, err, errp); - } - - ret = vfio_migration_init(vbasedev); -@@ -869,7 +874,7 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) - vbasedev->name, ret, strerror(-ret)); - } - -- return vfio_block_migration(vbasedev, err, errp); -+ return !vfio_block_migration(vbasedev, err, errp); - } - - if (!vbasedev->dirty_pages_supported) { -@@ -896,7 +901,7 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) - } - - trace_vfio_migration_realize(vbasedev->name); -- return 0; -+ return true; - - add_blocker: - ret = vfio_block_migration(vbasedev, err, errp); -@@ -904,7 +909,7 @@ out_deinit: - if (ret) { - vfio_migration_deinit(vbasedev); - } -- return ret; -+ return !ret; - } - - void vfio_migration_exit(VFIODevice *vbasedev) -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index a60b868c38..ba40ca8784 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -3231,8 +3231,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - } - - if (!pdev->failover_pair_id) { -- ret = vfio_migration_realize(vbasedev, errp); -- if (ret) { -+ if (!vfio_migration_realize(vbasedev, errp)) { - goto out_deregister; - } - } -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 45167c8a8a..da43d27352 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -252,7 +252,7 @@ int vfio_spapr_create_window(VFIOContainer *container, - int vfio_spapr_remove_window(VFIOContainer *container, - hwaddr offset_within_address_space); - --int vfio_migration_realize(VFIODevice *vbasedev, Error **errp); -+bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp); - void vfio_migration_exit(VFIODevice *vbasedev); - - #endif /* HW_VFIO_VFIO_COMMON_H */ --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-migration-Skip-log_sync-during-migration-SETUP-.patch b/SOURCES/kvm-vfio-migration-Skip-log_sync-during-migration-SETUP-.patch deleted file mode 100644 index 6211db7..0000000 --- a/SOURCES/kvm-vfio-migration-Skip-log_sync-during-migration-SETUP-.patch +++ /dev/null @@ -1,68 +0,0 @@ -From 76208f7824d5139ac8d86140b0e01031b67638cc Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:56 +0200 -Subject: [PATCH 04/37] vfio/migration: Skip log_sync during migration SETUP - state -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/28] 4c340992b472ac4627b57705f4e971f14bbb0846 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit ff180c6bd7a8 -Author: Avihai Horon -Date: Mon Apr 3 16:00:00 2023 +0300 - - vfio/migration: Skip log_sync during migration SETUP state - - Currently, VFIO log_sync can be issued while migration is in SETUP - state. However, doing this log_sync is at best redundant and at worst - can fail. - - Redundant -- all RAM is marked dirty in migration SETUP state and is - transferred only after migration is set to ACTIVE state, so doing - log_sync during migration SETUP is pointless. - - Can fail -- there is a time window, between setting migration state to - SETUP and starting dirty tracking by RAM save_live_setup handler, during - which dirty tracking is still not started. Any VFIO log_sync call that - is issued during this time window will fail. For example, this error can - be triggered by migrating a VM when a GUI is active, which constantly - calls log_sync. - - Fix it by skipping VFIO log_sync while migration is in SETUP state. - - Fixes: 758b96b61d5c ("vfio/migrate: Move switch of dirty tracking into vfio_memory_listener") - Signed-off-by: Avihai Horon - Link: https://lore.kernel.org/r/20230403130000.6422-1-avihaih@nvidia.com - Signed-off-by: Alex Williamson - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/common.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 4d01ea3515..78358ede27 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -478,7 +478,8 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) - VFIODevice *vbasedev; - MigrationState *ms = migrate_get_current(); - -- if (!migration_is_setup_or_active(ms->state)) { -+ if (ms->state != MIGRATION_STATUS_ACTIVE && -+ ms->state != MIGRATION_STATUS_DEVICE) { - return false; - } - --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-migration-Store-VFIO-migration-flags-in-VFIOMig.patch b/SOURCES/kvm-vfio-migration-Store-VFIO-migration-flags-in-VFIOMig.patch deleted file mode 100644 index 2db8511..0000000 --- a/SOURCES/kvm-vfio-migration-Store-VFIO-migration-flags-in-VFIOMig.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 77353cdafd08562dff9c99e9f3984d12224bee52 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 14/37] vfio/migration: Store VFIO migration flags in - VFIOMigration -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [12/28] 31a9c39e6ee6338a35dc08c3e7f5c1a204166249 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 6cd1fe11598a -Author: Avihai Horon -Date: Wed Jun 21 14:11:59 2023 +0300 - - vfio/migration: Store VFIO migration flags in VFIOMigration - - VFIO migration flags are queried once in vfio_migration_init(). Store - them in VFIOMigration so they can be used later to check the device's - migration capabilities without re-querying them. - - This will be used in the next patch to check if the device supports - precopy migration. - - Signed-off-by: Avihai Horon - Reviewed-by: Cédric Le Goater - Tested-by: YangHang Liu - Acked-by: Alex Williamson - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/migration.c | 1 + - include/hw/vfio/vfio-common.h | 1 + - 2 files changed, 2 insertions(+) - -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index 235978fd68..8d33414379 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -603,6 +603,7 @@ static int vfio_migration_init(VFIODevice *vbasedev) - migration->vbasedev = vbasedev; - migration->device_state = VFIO_DEVICE_STATE_RUNNING; - migration->data_fd = -1; -+ migration->mig_flags = mig_flags; - - vbasedev->dirty_pages_supported = vfio_dma_logging_supported(vbasedev); - -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index eed244f25f..5f29dab839 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -66,6 +66,7 @@ typedef struct VFIOMigration { - int data_fd; - void *data_buffer; - size_t data_buffer_size; -+ uint64_t mig_flags; - } VFIOMigration; - - typedef struct VFIOAddressSpace { --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-pci-Call-vfio_prepare_kvm_msi_virq_batch-in-MSI.patch b/SOURCES/kvm-vfio-pci-Call-vfio_prepare_kvm_msi_virq_batch-in-MSI.patch deleted file mode 100644 index b5d9d37..0000000 --- a/SOURCES/kvm-vfio-pci-Call-vfio_prepare_kvm_msi_virq_batch-in-MSI.patch +++ /dev/null @@ -1,67 +0,0 @@ -From b5a69101abac153c9c9be7f539d810e3e4af3bdf Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 19/37] vfio/pci: Call vfio_prepare_kvm_msi_virq_batch() in MSI - retry path -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [17/28] 2067bb58f3a2c1a793e5566cee3c78a8299c9c1c (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit c17408892319 -Author: Shameer Kolothum -Date: Tue Jun 13 15:09:43 2023 +0100 - - vfio/pci: Call vfio_prepare_kvm_msi_virq_batch() in MSI retry path - - When vfio_enable_vectors() returns with less than requested nr_vectors - we retry with what kernel reported back. But the retry path doesn't - call vfio_prepare_kvm_msi_virq_batch() and this results in, - - qemu-system-aarch64: vfio: Error: Failed to enable 4 MSI vectors, retry with 1 - qemu-system-aarch64: ../hw/vfio/pci.c:602: vfio_commit_kvm_msi_virq_batch: Assertion `vdev->defer_kvm_irq_routing' failed - - Fixes: dc580d51f7dd ("vfio: defer to commit kvm irq routing when enable msi/msix") - Reviewed-by: Longpeng - Signed-off-by: Shameer Kolothum - Reviewed-by: Cédric Le Goater - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/pci.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 7c5e2b5996..15e7554954 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -666,6 +666,8 @@ static void vfio_msi_enable(VFIOPCIDevice *vdev) - - vfio_disable_interrupts(vdev); - -+ vdev->nr_vectors = msi_nr_vectors_allocated(&vdev->pdev); -+retry: - /* - * Setting vector notifiers needs to enable route for each vector. - * Deferring to commit the KVM routes once rather than per vector -@@ -673,8 +675,6 @@ static void vfio_msi_enable(VFIOPCIDevice *vdev) - */ - vfio_prepare_kvm_msi_virq_batch(vdev); - -- vdev->nr_vectors = msi_nr_vectors_allocated(&vdev->pdev); --retry: - vdev->msi_vectors = g_new0(VFIOMSIVector, vdev->nr_vectors); - - for (i = 0; i < vdev->nr_vectors; i++) { --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-pci-Disable-INTx-in-vfio_realize-error-path.patch b/SOURCES/kvm-vfio-pci-Disable-INTx-in-vfio_realize-error-path.patch deleted file mode 100644 index 0aca4ef..0000000 --- a/SOURCES/kvm-vfio-pci-Disable-INTx-in-vfio_realize-error-path.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 816c20b23546d31316c9ca450db8a6668ac6216c Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 25/37] vfio/pci: Disable INTx in vfio_realize error path -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [23/28] 2fde4bad00c4286e6bbe24947c2bfd6468fc0ff3 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit adee0da0368f -Author: Zhenzhong Duan -Date: Mon Jul 3 15:15:06 2023 +0800 - - vfio/pci: Disable INTx in vfio_realize error path - - When vfio realize fails, INTx isn't disabled if it has been enabled. - This may confuse host side with unhandled interrupt report. - - Fixes: c5478fea27ac ("vfio/pci: Respond to KVM irqchip change notifier") - Signed-off-by: Zhenzhong Duan - Reviewed-by: Joao Martins - Reviewed-by: Cédric Le Goater - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/pci.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 87bd440504..2d059832a4 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -3244,6 +3244,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - return; - - out_deregister: -+ if (vdev->interrupt == VFIO_INT_INTx) { -+ vfio_intx_disable(vdev); -+ } - pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); - if (vdev->irqchip_change_notifier.notify) { - kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-pci-Fix-a-segfault-in-vfio_realize.patch b/SOURCES/kvm-vfio-pci-Fix-a-segfault-in-vfio_realize.patch deleted file mode 100644 index d05d114..0000000 --- a/SOURCES/kvm-vfio-pci-Fix-a-segfault-in-vfio_realize.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 0b1ab3aacc02e70bfe8440236eb9def426bbe10e Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 22/37] vfio/pci: Fix a segfault in vfio_realize -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [20/28] 48b9c1efe295c2672693d9c99f6d11738d2b98d1 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 357bd7932a13 -Author: Zhenzhong Duan -Date: Thu Jun 29 16:40:38 2023 +0800 - - vfio/pci: Fix a segfault in vfio_realize - - The kvm irqchip notifier is only registered if the device supports - INTx, however it's unconditionally removed in vfio realize error - path. If the assigned device does not support INTx, this will cause - QEMU to crash when vfio realize fails. Change it to conditionally - remove the notifier only if the notify hook is setup. - - Before fix: - (qemu) device_add vfio-pci,host=81:11.1,id=vfio1,bus=root1,xres=1 - Connection closed by foreign host. - - After fix: - (qemu) device_add vfio-pci,host=81:11.1,id=vfio1,bus=root1,xres=1 - Error: vfio 0000:81:11.1: xres and yres properties require display=on - (qemu) - - Fixes: c5478fea27ac ("vfio/pci: Respond to KVM irqchip change notifier") - Signed-off-by: Zhenzhong Duan - Reviewed-by: Cédric Le Goater - Reviewed-by: Joao Martins - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/pci.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 6634945a70..d08e6c1a20 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -3245,7 +3245,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - - out_deregister: - pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); -- kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); -+ if (vdev->irqchip_change_notifier.notify) { -+ kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); -+ } - out_teardown: - vfio_teardown_msi(vdev); - vfio_bars_exit(vdev); --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-pci-Fix-a-use-after-free-issue.patch b/SOURCES/kvm-vfio-pci-Fix-a-use-after-free-issue.patch deleted file mode 100644 index 1fa725f..0000000 --- a/SOURCES/kvm-vfio-pci-Fix-a-use-after-free-issue.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 2437a06ff137c4bc856df096e42407c1f50b25b0 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:56 +0200 -Subject: [PATCH 06/37] vfio/pci: Fix a use-after-free issue -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [4/28] eca69a7e0a6fb8c1c70be8b91209a53b040e30ba (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit b83b40b61484 -Author: Zhenzhong Duan -Date: Wed May 17 10:46:51 2023 +0800 - - vfio/pci: Fix a use-after-free issue - - vbasedev->name is freed wrongly which leads to garbage VFIO trace log. - Fix it by allocating a dup of vbasedev->name and then free the dup. - - Fixes: 2dca1b37a760 ("vfio/pci: add support for VF token") - Suggested-by: Alex Williamson - Signed-off-by: Zhenzhong Duan - Reviewed-by: Cédric Le Goater - Reviewed-by: Matthew Rosato - Acked-by: Alex Williamson - Reviewed-by: Philippe Mathieu-Daudé - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/pci.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 6cd3a98c39..7c5e2b5996 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -3018,7 +3018,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - qemu_uuid_unparse(&vdev->vf_token, uuid); - name = g_strdup_printf("%s vf_token=%s", vbasedev->name, uuid); - } else { -- name = vbasedev->name; -+ name = g_strdup(vbasedev->name); - } - - ret = vfio_get_device(group, name, vbasedev, errp); --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-pci-Free-leaked-timer-in-vfio_realize-error-pat.patch b/SOURCES/kvm-vfio-pci-Free-leaked-timer-in-vfio_realize-error-pat.patch deleted file mode 100644 index 3978b96..0000000 --- a/SOURCES/kvm-vfio-pci-Free-leaked-timer-in-vfio_realize-error-pat.patch +++ /dev/null @@ -1,55 +0,0 @@ -From 9c5016c9b3f9cf66d1b531de829e8b5010962695 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 23/37] vfio/pci: Free leaked timer in vfio_realize error path -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [21/28] dbaae4e484de4613f7f7735be519b7357627326e (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 0cc889c8826c -Author: Zhenzhong Duan -Date: Thu Jun 29 16:40:39 2023 +0800 - - vfio/pci: Free leaked timer in vfio_realize error path - - When vfio_realize fails, the mmap_timer used for INTx optimization - isn't freed. As this timer isn't activated yet, the potential impact - is just a piece of leaked memory. - - Fixes: ea486926b07d ("vfio-pci: Update slow path INTx algorithm timer related") - Signed-off-by: Zhenzhong Duan - Reviewed-by: Cédric Le Goater - Reviewed-by: Joao Martins - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/pci.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index d08e6c1a20..87bd440504 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -3248,6 +3248,9 @@ out_deregister: - if (vdev->irqchip_change_notifier.notify) { - kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); - } -+ if (vdev->intx.mmap_timer) { -+ timer_free(vdev->intx.mmap_timer); -+ } - out_teardown: - vfio_teardown_msi(vdev); - vfio_bars_exit(vdev); --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-pci-Static-Resizable-BAR-capability.patch b/SOURCES/kvm-vfio-pci-Static-Resizable-BAR-capability.patch deleted file mode 100644 index d937140..0000000 --- a/SOURCES/kvm-vfio-pci-Static-Resizable-BAR-capability.patch +++ /dev/null @@ -1,141 +0,0 @@ -From db53345dba5682c3ba0bc3fc596b30a98dadb88f Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:56 +0200 -Subject: [PATCH 05/37] vfio/pci: Static Resizable BAR capability -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/28] 42e9f4b517eb919c77c6fdbe771d9d05a91955bd (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit b5048a4cbfa0 -Author: Alex Williamson -Date: Thu May 4 14:42:48 2023 -0600 - - vfio/pci: Static Resizable BAR capability - - The PCI Resizable BAR (ReBAR) capability is currently hidden from the - VM because the protocol for interacting with the capability does not - support a mechanism for the device to reject an advertised supported - BAR size. However, when assigned to a VM, the act of resizing the - BAR requires adjustment of host resources for the device, which - absolutely can fail. Linux does not currently allow us to reserve - resources for the device independent of the current usage. - - The only writable field within the ReBAR capability is the BAR Size - register. The PCIe spec indicates that when written, the device - should immediately begin to operate with the provided BAR size. The - spec however also notes that software must only write values - corresponding to supported sizes as indicated in the capability and - control registers. Writing unsupported sizes produces undefined - results. Therefore, if the hypervisor were to virtualize the - capability and control registers such that the current size is the - only indicated available size, then a write of anything other than - the current size falls into the category of undefined behavior, - where we can essentially expose the modified ReBAR capability as - read-only. - - This may seem pointless, but users have reported that virtualizing - the capability in this way not only allows guest software to expose - related features as available (even if only cosmetic), but in some - scenarios can resolve guest driver issues. Additionally, no - regressions in behavior have been reported for this change. - - A caveat here is that the PCIe spec requires for compatibility that - devices report support for a size in the range of 1MB to 512GB, - therefore if the current BAR size falls outside that range we revert - to hiding the capability. - - Reviewed-by: Cédric Le Goater - Link: https://lore.kernel.org/r/20230505232308.2869912-1-alex.williamson@redhat.com - Signed-off-by: Alex Williamson - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/pci.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++- - 1 file changed, 53 insertions(+), 1 deletion(-) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 579b92a6ed..6cd3a98c39 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -2069,6 +2069,54 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos, Error **errp) - return 0; - } - -+static int vfio_setup_rebar_ecap(VFIOPCIDevice *vdev, uint16_t pos) -+{ -+ uint32_t ctrl; -+ int i, nbar; -+ -+ ctrl = pci_get_long(vdev->pdev.config + pos + PCI_REBAR_CTRL); -+ nbar = (ctrl & PCI_REBAR_CTRL_NBAR_MASK) >> PCI_REBAR_CTRL_NBAR_SHIFT; -+ -+ for (i = 0; i < nbar; i++) { -+ uint32_t cap; -+ int size; -+ -+ ctrl = pci_get_long(vdev->pdev.config + pos + PCI_REBAR_CTRL + (i * 8)); -+ size = (ctrl & PCI_REBAR_CTRL_BAR_SIZE) >> PCI_REBAR_CTRL_BAR_SHIFT; -+ -+ /* The cap register reports sizes 1MB to 128TB, with 4 reserved bits */ -+ cap = size <= 27 ? 1U << (size + 4) : 0; -+ -+ /* -+ * The PCIe spec (v6.0.1, 7.8.6) requires HW to support at least one -+ * size in the range 1MB to 512GB. We intend to mask all sizes except -+ * the one currently enabled in the size field, therefore if it's -+ * outside the range, hide the whole capability as this virtualization -+ * trick won't work. If >512GB resizable BARs start to appear, we -+ * might need an opt-in or reservation scheme in the kernel. -+ */ -+ if (!(cap & PCI_REBAR_CAP_SIZES)) { -+ return -EINVAL; -+ } -+ -+ /* Hide all sizes reported in the ctrl reg per above requirement. */ -+ ctrl &= (PCI_REBAR_CTRL_BAR_SIZE | -+ PCI_REBAR_CTRL_NBAR_MASK | -+ PCI_REBAR_CTRL_BAR_IDX); -+ -+ /* -+ * The BAR size field is RW, however we've mangled the capability -+ * register such that we only report a single size, ie. the current -+ * BAR size. A write of an unsupported value is undefined, therefore -+ * the register field is essentially RO. -+ */ -+ vfio_add_emulated_long(vdev, pos + PCI_REBAR_CAP + (i * 8), cap, ~0); -+ vfio_add_emulated_long(vdev, pos + PCI_REBAR_CTRL + (i * 8), ctrl, ~0); -+ } -+ -+ return 0; -+} -+ - static void vfio_add_ext_cap(VFIOPCIDevice *vdev) - { - PCIDevice *pdev = &vdev->pdev; -@@ -2142,9 +2190,13 @@ static void vfio_add_ext_cap(VFIOPCIDevice *vdev) - case 0: /* kernel masked capability */ - case PCI_EXT_CAP_ID_SRIOV: /* Read-only VF BARs confuse OVMF */ - case PCI_EXT_CAP_ID_ARI: /* XXX Needs next function virtualization */ -- case PCI_EXT_CAP_ID_REBAR: /* Can't expose read-only */ - trace_vfio_add_ext_cap_dropped(vdev->vbasedev.name, cap_id, next); - break; -+ case PCI_EXT_CAP_ID_REBAR: -+ if (!vfio_setup_rebar_ecap(vdev, next)) { -+ pcie_add_capability(pdev, cap_id, cap_ver, next, size); -+ } -+ break; - default: - pcie_add_capability(pdev, cap_id, cap_ver, next, size); - } --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-pci-add-support-for-VF-token.patch b/SOURCES/kvm-vfio-pci-add-support-for-VF-token.patch deleted file mode 100644 index 7b40e5e..0000000 --- a/SOURCES/kvm-vfio-pci-add-support-for-VF-token.patch +++ /dev/null @@ -1,104 +0,0 @@ -From 3022cc31bca5a5441e285c971eaf72b7643b9be0 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:56 +0200 -Subject: [PATCH 03/37] vfio/pci: add support for VF token -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/28] ff24284ede2806e21f4f6709d8abd4c4029b7d5c (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 2dca1b37a760 -Author: Minwoo Im -Date: Mon Mar 20 16:35:22 2023 +0900 - - vfio/pci: add support for VF token - - VF token was introduced [1] to kernel vfio-pci along with SR-IOV - support [2]. This patch adds support VF token among PF and VF(s). To - passthu PCIe VF to a VM, kernel >= v5.7 needs this. - - It can be configured with UUID like: - - -device vfio-pci,host=DDDD:BB:DD:F,vf-token=,... - - [1] https://lore.kernel.org/linux-pci/158396393244.5601.10297430724964025753.stgit@gimli.home/ - [2] https://lore.kernel.org/linux-pci/158396044753.5601.14804870681174789709.stgit@gimli.home/ - - Cc: Alex Williamson - Signed-off-by: Minwoo Im - Reviewed-by: Klaus Jensen - Link: https://lore.kernel.org/r/20230320073522epcms2p48f682ecdb73e0ae1a4850ad0712fd780@epcms2p4 - Signed-off-by: Alex Williamson - -Conflicts: - - hw/vfio/pci.c - context changes in vfio_realize () due to redhat commit 267071d16b23 - ("vfio: cap number of devices that can be assigned") - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/pci.c | 13 ++++++++++++- - hw/vfio/pci.h | 1 + - 2 files changed, 13 insertions(+), 1 deletion(-) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index a779053be3..579b92a6ed 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -2859,6 +2859,8 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - int groupid; - int ret, i = 0; - bool is_mdev; -+ char uuid[UUID_FMT_LEN]; -+ char *name; - - if (device_limit && device_limit != vdev->assigned_device_limit) { - error_setg(errp, "Assigned device limit has been redefined. " -@@ -2960,7 +2962,15 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - goto error; - } - -- ret = vfio_get_device(group, vbasedev->name, vbasedev, errp); -+ if (!qemu_uuid_is_null(&vdev->vf_token)) { -+ qemu_uuid_unparse(&vdev->vf_token, uuid); -+ name = g_strdup_printf("%s vf_token=%s", vbasedev->name, uuid); -+ } else { -+ name = vbasedev->name; -+ } -+ -+ ret = vfio_get_device(group, name, vbasedev, errp); -+ g_free(name); - if (ret) { - vfio_put_group(group); - goto error; -@@ -3292,6 +3302,7 @@ static void vfio_instance_init(Object *obj) - - static Property vfio_pci_dev_properties[] = { - DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIOPCIDevice, host), -+ DEFINE_PROP_UUID_NODEFAULT("vf-token", VFIOPCIDevice, vf_token), - DEFINE_PROP_STRING("sysfsdev", VFIOPCIDevice, vbasedev.sysfsdev), - DEFINE_PROP_ON_OFF_AUTO("x-pre-copy-dirty-page-tracking", VFIOPCIDevice, - vbasedev.pre_copy_dirty_page_tracking, -diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h -index 45235d38ba..10530743ad 100644 ---- a/hw/vfio/pci.h -+++ b/hw/vfio/pci.h -@@ -137,6 +137,7 @@ struct VFIOPCIDevice { - VFIOVGA *vga; /* 0xa0000, 0x3b0, 0x3c0 */ - void *igd_opregion; - PCIHostDeviceAddress host; -+ QemuUUID vf_token; - EventNotifier err_notifier; - EventNotifier req_notifier; - int (*resetfn)(struct VFIOPCIDevice *); --- -2.39.3 - diff --git a/SOURCES/kvm-vhost-fix-vhost_dev_enable_notifiers-error-case.patch b/SOURCES/kvm-vhost-fix-vhost_dev_enable_notifiers-error-case.patch deleted file mode 100644 index 3282c24..0000000 --- a/SOURCES/kvm-vhost-fix-vhost_dev_enable_notifiers-error-case.patch +++ /dev/null @@ -1,138 +0,0 @@ -From ac54f5f746782da89ab674733af5622e524b58eb Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Fri, 2 Jun 2023 18:27:35 +0200 -Subject: [PATCH 4/6] vhost: fix vhost_dev_enable_notifiers() error case -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 176: vhost: fix vhost_dev_enable_notifiers() error case -RH-Jira: RHEL-330 -RH-Acked-by: MST -RH-Acked-by: Cindy Lu -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Jason Wang -RH-Commit: [1/1] fd30d7501be59f7e5b9d6fc5ed84efcc4037d08e (lvivier/qemu-kvm-centos) - -JIRA: https://issues.redhat.com/browse/RHEL-330 - -in vhost_dev_enable_notifiers(), if virtio_bus_set_host_notifier(true) -fails, we call vhost_dev_disable_notifiers() that executes -virtio_bus_set_host_notifier(false) on all queues, even on queues that -have failed to be initialized. - -This triggers a core dump in memory_region_del_eventfd(): - - virtio_bus_set_host_notifier: unable to init event notifier: Too many open files (-24) - vhost VQ 1 notifier binding failed: 24 - .../softmmu/memory.c:2611: memory_region_del_eventfd: Assertion `i != mr->ioeventfd_nb' failed. - -Fix the problem by providing to vhost_dev_disable_notifiers() the -number of queues to disable. - -Fixes: 8771589b6f81 ("vhost: simplify vhost_dev_enable_notifiers") -Cc: longpeng2@huawei.com -Signed-off-by: Laurent Vivier -Message-Id: <20230602162735.3670785-1-lvivier@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Philippe Mathieu-Daudé -(cherry picked from commit 92099aa4e9a3bb6856c290afaf41c76f9e3dd9fd) ---- - hw/virtio/vhost.c | 65 ++++++++++++++++++++++++++--------------------- - 1 file changed, 36 insertions(+), 29 deletions(-) - -diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c -index a266396576..ae0a033e60 100644 ---- a/hw/virtio/vhost.c -+++ b/hw/virtio/vhost.c -@@ -1545,6 +1545,40 @@ void vhost_dev_cleanup(struct vhost_dev *hdev) - memset(hdev, 0, sizeof(struct vhost_dev)); - } - -+static void vhost_dev_disable_notifiers_nvqs(struct vhost_dev *hdev, -+ VirtIODevice *vdev, -+ unsigned int nvqs) -+{ -+ BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); -+ int i, r; -+ -+ /* -+ * Batch all the host notifiers in a single transaction to avoid -+ * quadratic time complexity in address_space_update_ioeventfds(). -+ */ -+ memory_region_transaction_begin(); -+ -+ for (i = 0; i < nvqs; ++i) { -+ r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i, -+ false); -+ if (r < 0) { -+ error_report("vhost VQ %d notifier cleanup failed: %d", i, -r); -+ } -+ assert(r >= 0); -+ } -+ -+ /* -+ * The transaction expects the ioeventfds to be open when it -+ * commits. Do it now, before the cleanup loop. -+ */ -+ memory_region_transaction_commit(); -+ -+ for (i = 0; i < nvqs; ++i) { -+ virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i); -+ } -+ virtio_device_release_ioeventfd(vdev); -+} -+ - /* Stop processing guest IO notifications in qemu. - * Start processing them in vhost in kernel. - */ -@@ -1574,7 +1608,7 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) - if (r < 0) { - error_report("vhost VQ %d notifier binding failed: %d", i, -r); - memory_region_transaction_commit(); -- vhost_dev_disable_notifiers(hdev, vdev); -+ vhost_dev_disable_notifiers_nvqs(hdev, vdev, i); - return r; - } - } -@@ -1591,34 +1625,7 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) - */ - void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) - { -- BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); -- int i, r; -- -- /* -- * Batch all the host notifiers in a single transaction to avoid -- * quadratic time complexity in address_space_update_ioeventfds(). -- */ -- memory_region_transaction_begin(); -- -- for (i = 0; i < hdev->nvqs; ++i) { -- r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i, -- false); -- if (r < 0) { -- error_report("vhost VQ %d notifier cleanup failed: %d", i, -r); -- } -- assert (r >= 0); -- } -- -- /* -- * The transaction expects the ioeventfds to be open when it -- * commits. Do it now, before the cleanup loop. -- */ -- memory_region_transaction_commit(); -- -- for (i = 0; i < hdev->nvqs; ++i) { -- virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i); -- } -- virtio_device_release_ioeventfd(vdev); -+ vhost_dev_disable_notifiers_nvqs(hdev, vdev, hdev->nvqs); - } - - /* Test and clear event pending status. --- -2.39.3 - diff --git a/SOURCES/kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch b/SOURCES/kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch deleted file mode 100644 index fd29eb7..0000000 --- a/SOURCES/kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 4e30ca551fb3740a428017a0debf0a6aab976639 Mon Sep 17 00:00:00 2001 -From: Ani Sinha -Date: Mon, 19 Jun 2023 12:22:09 +0530 -Subject: [PATCH 6/6] vhost-vdpa: do not cleanup the vdpa/vhost-net structures - if peer nic is present - -RH-Author: Ani Sinha -RH-MergeRequest: 174: vhost-vdpa: do not cleanup the vdpa/vhost-net structures if peer nic is present -RH-Bugzilla: 2128929 -RH-Acked-by: Igor Mammedov -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/1] c70d4e5fd93256326d318e0b507db6b9eb93ad86 (anisinha/centos-qemu-kvm) - -When a peer nic is still attached to the vdpa backend, it is too early to free -up the vhost-net and vdpa structures. If these structures are freed here, then -QEMU crashes when the guest is being shut down. The following call chain -would result in an assertion failure since the pointer returned from -vhost_vdpa_get_vhost_net() would be NULL: - -do_vm_stop() -> vm_state_notify() -> virtio_set_status() -> -virtio_net_vhost_status() -> get_vhost_net(). - -Therefore, we defer freeing up the structures until at guest shutdown -time when qemu_cleanup() calls net_cleanup() which then calls -qemu_del_net_client() which would eventually call vhost_vdpa_cleanup() -again to free up the structures. This time, the loop in net_cleanup() -ensures that vhost_vdpa_cleanup() will be called one last time when -all the peer nics are detached and freed. - -All unit tests pass with this change. - -CC: imammedo@redhat.com -CC: jusual@redhat.com -CC: mst@redhat.com -Fixes: CVE-2023-3301 -Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2128929 -Signed-off-by: Ani Sinha -Message-Id: <20230619065209.442185-1-anisinha@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit a0d7215e339b61c7d7a7b3fcf754954d80d93eb8) ---- - net/vhost-vdpa.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 99904a0da7..8c8900f0f4 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -184,6 +184,14 @@ static void vhost_vdpa_cleanup(NetClientState *nc) - { - VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); - -+ /* -+ * If a peer NIC is attached, do not cleanup anything. -+ * Cleanup will happen as a part of qemu_cleanup() -> net_cleanup() -+ * when the guest is shutting down. -+ */ -+ if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_NIC) { -+ return; -+ } - qemu_vfree(s->cvq_cmd_out_buffer); - qemu_vfree(s->status); - if (s->vhost_net) { --- -2.39.3 - diff --git a/SOURCES/kvm-vhost-vdpa-mute-unaligned-memory-error-report.patch b/SOURCES/kvm-vhost-vdpa-mute-unaligned-memory-error-report.patch deleted file mode 100644 index 3711949..0000000 --- a/SOURCES/kvm-vhost-vdpa-mute-unaligned-memory-error-report.patch +++ /dev/null @@ -1,86 +0,0 @@ -From 3b51a7b84ea21360c6d551284aecb8b6f371e888 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Tue, 4 Jul 2023 09:19:31 +0200 -Subject: [PATCH 9/9] vhost-vdpa: mute unaligned memory error report -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 193: vhost-vdpa: mute unaligned memory error report -RH-Bugzilla: 2141965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Eugenio Pérez -RH-Commit: [1/1] 60f5385d41269ce9310e1e8e0a2f1106e3a16ada (lvivier/qemu-kvm-centos) - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2141965 - -With TPM CRM device, vhost-vdpa reports an error when it tries -to register a listener for a non aligned memory region: - - qemu-system-x86_64: vhost_vdpa_listener_region_add received unaligned region - qemu-system-x86_64: vhost_vdpa_listener_region_del received unaligned region - -This error can be confusing for the user whereas we only need to skip -the region (as it's already done after the error_report()) - -Rather than introducing a special case for TPM CRB memory section -to not display the message in this case, simply replace the -error_report() by a trace function (with more information, like the -memory region name). - -Signed-off-by: Laurent Vivier -Message-Id: <20230704071931.575888-2-lvivier@redhat.com> -Reviewed-by: David Hildenbrand -Acked-by: Jason Wang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 77812aa7b1fdf8f547c35a7f9a4eb1cbf3a073db) ---- - hw/virtio/trace-events | 2 ++ - hw/virtio/vhost-vdpa.c | 8 ++++++-- - 2 files changed, 8 insertions(+), 2 deletions(-) - -diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events -index 68b752e304..300dec8d3e 100644 ---- a/hw/virtio/trace-events -+++ b/hw/virtio/trace-events -@@ -34,7 +34,9 @@ vhost_vdpa_dma_map(void *vdpa, int fd, uint32_t msg_type, uint32_t asid, uint64_ - vhost_vdpa_dma_unmap(void *vdpa, int fd, uint32_t msg_type, uint32_t asid, uint64_t iova, uint64_t size, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" asid: %"PRIu32" iova: 0x%"PRIx64" size: 0x%"PRIx64" type: %"PRIu8 - vhost_vdpa_listener_begin_batch(void *v, int fd, uint32_t msg_type, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" type: %"PRIu8 - vhost_vdpa_listener_commit(void *v, int fd, uint32_t msg_type, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" type: %"PRIu8 -+vhost_vdpa_listener_region_add_unaligned(void *v, const char *name, uint64_t offset_as, uint64_t offset_page) "vdpa: %p region %s offset_within_address_space %"PRIu64" offset_within_region %"PRIu64 - vhost_vdpa_listener_region_add(void *vdpa, uint64_t iova, uint64_t llend, void *vaddr, bool readonly) "vdpa: %p iova 0x%"PRIx64" llend 0x%"PRIx64" vaddr: %p read-only: %d" -+vhost_vdpa_listener_region_del_unaligned(void *v, const char *name, uint64_t offset_as, uint64_t offset_page) "vdpa: %p region %s offset_within_address_space %"PRIu64" offset_within_region %"PRIu64 - vhost_vdpa_listener_region_del(void *vdpa, uint64_t iova, uint64_t llend) "vdpa: %p iova 0x%"PRIx64" llend 0x%"PRIx64 - vhost_vdpa_add_status(void *dev, uint8_t status) "dev: %p status: 0x%"PRIx8 - vhost_vdpa_init(void *dev, void *vdpa) "dev: %p vdpa: %p" -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index bc6bad23d5..c04f14420d 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -202,7 +202,9 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, - - if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != - (section->offset_within_region & ~TARGET_PAGE_MASK))) { -- error_report("%s received unaligned region", __func__); -+ trace_vhost_vdpa_listener_region_add_unaligned(v, section->mr->name, -+ section->offset_within_address_space & ~TARGET_PAGE_MASK, -+ section->offset_within_region & ~TARGET_PAGE_MASK); - return; - } - -@@ -281,7 +283,9 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, - - if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != - (section->offset_within_region & ~TARGET_PAGE_MASK))) { -- error_report("%s received unaligned region", __func__); -+ trace_vhost_vdpa_listener_region_del_unaligned(v, section->mr->name, -+ section->offset_within_address_space & ~TARGET_PAGE_MASK, -+ section->offset_within_region & ~TARGET_PAGE_MASK); - return; - } - --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-gpu-fix-v2-migration.patch b/SOURCES/kvm-virtio-gpu-fix-v2-migration.patch new file mode 100644 index 0000000..7183a65 --- /dev/null +++ b/SOURCES/kvm-virtio-gpu-fix-v2-migration.patch @@ -0,0 +1,122 @@ +From 97d039841728570b54d11ee7e5322743f519d861 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Thu, 16 May 2024 12:40:22 +0400 +Subject: [PATCH 3/4] virtio-gpu: fix v2 migration +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +RH-MergeRequest: 246: virtio-gpu: fix v2 migration +RH-Jira: RHEL-34621 +RH-Acked-by: Peter Xu +RH-Acked-by: Thomas Huth +RH-Commit: [1/2] 187370bc6198a4ed0e4763314f8113ffcd21eb36 (marcandre.lureau-rh/qemu-kvm-centos) + +Commit dfcf74fa ("virtio-gpu: fix scanout migration post-load") broke +forward/backward version migration. Versioning of nested VMSD structures +is not straightforward, as the wire format doesn't have nested +structures versions. Introduce x-scanout-vmstate-version and a field +test to save/load appropriately according to the machine version. + +Fixes: dfcf74fa ("virtio-gpu: fix scanout migration post-load") +Signed-off-by: Marc-André Lureau +Signed-off-by: Peter Xu +Reviewed-by: Fiona Ebner +Tested-by: Fiona Ebner +[fixed long lines] +Signed-off-by: Fabiano Rosas + +Jira: https://issues.redhat.com/browse/RHEL-34621 +Signed-off-by: Marc-André Lureau +(cherry picked from commit 40a23ef643664b5c1021a9789f9d680b6294fb50) +--- + hw/core/machine.c | 1 + + hw/display/virtio-gpu.c | 30 ++++++++++++++++++++++-------- + include/hw/virtio/virtio-gpu.h | 1 + + 3 files changed, 24 insertions(+), 8 deletions(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 0f256d9633..cf1d7faaaf 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -37,6 +37,7 @@ GlobalProperty hw_compat_8_2[] = { + { "migration", "zero-page-detection", "legacy"}, + { TYPE_VIRTIO_IOMMU_PCI, "granule", "4k" }, + { TYPE_VIRTIO_IOMMU_PCI, "aw-bits", "64" }, ++ { "virtio-gpu-device", "x-scanout-vmstate-version", "1" }, + }; + const size_t hw_compat_8_2_len = G_N_ELEMENTS(hw_compat_8_2); + +diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c +index ae831b6b3e..d60b1b2973 100644 +--- a/hw/display/virtio-gpu.c ++++ b/hw/display/virtio-gpu.c +@@ -1166,10 +1166,17 @@ static void virtio_gpu_cursor_bh(void *opaque) + virtio_gpu_handle_cursor(&g->parent_obj.parent_obj, g->cursor_vq); + } + ++static bool scanout_vmstate_after_v2(void *opaque, int version) ++{ ++ struct VirtIOGPUBase *base = container_of(opaque, VirtIOGPUBase, scanout); ++ struct VirtIOGPU *gpu = container_of(base, VirtIOGPU, parent_obj); ++ ++ return gpu->scanout_vmstate_version >= 2; ++} ++ + static const VMStateDescription vmstate_virtio_gpu_scanout = { + .name = "virtio-gpu-one-scanout", +- .version_id = 2, +- .minimum_version_id = 1, ++ .version_id = 1, + .fields = (const VMStateField[]) { + VMSTATE_UINT32(resource_id, struct virtio_gpu_scanout), + VMSTATE_UINT32(width, struct virtio_gpu_scanout), +@@ -1181,12 +1188,18 @@ static const VMStateDescription vmstate_virtio_gpu_scanout = { + VMSTATE_UINT32(cursor.hot_y, struct virtio_gpu_scanout), + VMSTATE_UINT32(cursor.pos.x, struct virtio_gpu_scanout), + VMSTATE_UINT32(cursor.pos.y, struct virtio_gpu_scanout), +- VMSTATE_UINT32_V(fb.format, struct virtio_gpu_scanout, 2), +- VMSTATE_UINT32_V(fb.bytes_pp, struct virtio_gpu_scanout, 2), +- VMSTATE_UINT32_V(fb.width, struct virtio_gpu_scanout, 2), +- VMSTATE_UINT32_V(fb.height, struct virtio_gpu_scanout, 2), +- VMSTATE_UINT32_V(fb.stride, struct virtio_gpu_scanout, 2), +- VMSTATE_UINT32_V(fb.offset, struct virtio_gpu_scanout, 2), ++ VMSTATE_UINT32_TEST(fb.format, struct virtio_gpu_scanout, ++ scanout_vmstate_after_v2), ++ VMSTATE_UINT32_TEST(fb.bytes_pp, struct virtio_gpu_scanout, ++ scanout_vmstate_after_v2), ++ VMSTATE_UINT32_TEST(fb.width, struct virtio_gpu_scanout, ++ scanout_vmstate_after_v2), ++ VMSTATE_UINT32_TEST(fb.height, struct virtio_gpu_scanout, ++ scanout_vmstate_after_v2), ++ VMSTATE_UINT32_TEST(fb.stride, struct virtio_gpu_scanout, ++ scanout_vmstate_after_v2), ++ VMSTATE_UINT32_TEST(fb.offset, struct virtio_gpu_scanout, ++ scanout_vmstate_after_v2), + VMSTATE_END_OF_LIST() + }, + }; +@@ -1659,6 +1672,7 @@ static Property virtio_gpu_properties[] = { + DEFINE_PROP_BIT("blob", VirtIOGPU, parent_obj.conf.flags, + VIRTIO_GPU_FLAG_BLOB_ENABLED, false), + DEFINE_PROP_SIZE("hostmem", VirtIOGPU, parent_obj.conf.hostmem, 0), ++ DEFINE_PROP_UINT8("x-scanout-vmstate-version", VirtIOGPU, scanout_vmstate_version, 2), + DEFINE_PROP_END_OF_LIST(), + }; + +diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h +index ed44cdad6b..842315d51d 100644 +--- a/include/hw/virtio/virtio-gpu.h ++++ b/include/hw/virtio/virtio-gpu.h +@@ -177,6 +177,7 @@ typedef struct VGPUDMABuf { + struct VirtIOGPU { + VirtIOGPUBase parent_obj; + ++ uint8_t scanout_vmstate_version; + uint64_t conf_max_hostmem; + + VirtQueue *ctrl_vq; +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-iommu-Fix-64kB-host-page-size-VFIO-device-ass.patch b/SOURCES/kvm-virtio-iommu-Fix-64kB-host-page-size-VFIO-device-ass.patch deleted file mode 100644 index acfb3ae..0000000 --- a/SOURCES/kvm-virtio-iommu-Fix-64kB-host-page-size-VFIO-device-ass.patch +++ /dev/null @@ -1,151 +0,0 @@ -From 08c8af80dbd03b46a6a8397ef0c41cda3e6de22c Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Wed, 5 Jul 2023 18:51:17 +0200 -Subject: [PATCH 01/37] virtio-iommu: Fix 64kB host page size VFIO device - assignment - -RH-Author: Eric Auger -RH-MergeRequest: 182: VIRTIO-IOMMU/VFIO page size related fixes -RH-Bugzilla: 2211609 2211634 -RH-Acked-by: Gavin Shan -RH-Acked-by: Sebastian Ott -RH-Commit: [1/2] b48db1c964559505dda4c6c9a3b79d68207b25eb (eauger1/centos-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2211634 - -When running on a 64kB page size host and protecting a VFIO device -with the virtio-iommu, qemu crashes with this kind of message: - -qemu-kvm: virtio-iommu page mask 0xfffffffffffff000 is incompatible -with mask 0x20010000 -qemu: hardware error: vfio: DMA mapping failed, unable to continue - -This is due to the fact the IOMMU MR corresponding to the VFIO device -is enabled very late on domain attach, after the machine init. -The device reports a minimal 64kB page size but it is too late to be -applied. virtio_iommu_set_page_size_mask() fails and this causes -vfio_listener_region_add() to end up with hw_error(); - -To work around this issue, we transiently enable the IOMMU MR on -machine init to collect the page size requirements and then restore -the bypass state. - -Fixes: 90519b9053 ("virtio-iommu: Add bypass mode support to assigned device") -Signed-off-by: Eric Auger - -Message-Id: <20230705165118.28194-2-eric.auger@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Jean-Philippe Brucker -Tested-by: Jean-Philippe Brucker -Reviewed-by: Zhenzhong Duan -(cherry picked from commit 94df5b2180d61fb2ee2b04cc007981e58b6479a9) -Signed-off-by: Eric Auger ---- - hw/virtio/trace-events | 1 + - hw/virtio/virtio-iommu.c | 31 +++++++++++++++++++++++++++++-- - include/hw/virtio/virtio-iommu.h | 2 ++ - 3 files changed, 32 insertions(+), 2 deletions(-) - -diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events -index 8f8d05cf9b..68b752e304 100644 ---- a/hw/virtio/trace-events -+++ b/hw/virtio/trace-events -@@ -131,6 +131,7 @@ virtio_iommu_set_page_size_mask(const char *name, uint64_t old, uint64_t new) "m - virtio_iommu_notify_flag_add(const char *name) "add notifier to mr %s" - virtio_iommu_notify_flag_del(const char *name) "del notifier from mr %s" - virtio_iommu_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)" -+virtio_iommu_freeze_granule(uint64_t page_size_mask) "granule set to 0x%"PRIx64 - - # virtio-mem.c - virtio_mem_send_response(uint16_t type) "type=%" PRIu16 -diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c -index 1cd258135d..542679b321 100644 ---- a/hw/virtio/virtio-iommu.c -+++ b/hw/virtio/virtio-iommu.c -@@ -24,6 +24,7 @@ - #include "hw/virtio/virtio.h" - #include "sysemu/kvm.h" - #include "sysemu/reset.h" -+#include "sysemu/sysemu.h" - #include "qapi/error.h" - #include "qemu/error-report.h" - #include "trace.h" -@@ -1106,12 +1107,12 @@ static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr, - } - - /* -- * After the machine is finalized, we can't change the mask anymore. If by -+ * Once the granule is frozen we can't change the mask anymore. If by - * chance the hotplugged device supports the same granule, we can still - * accept it. Having a different masks is possible but the guest will use - * sub-optimal block sizes, so warn about it. - */ -- if (phase_check(PHASE_MACHINE_READY)) { -+ if (s->granule_frozen) { - int new_granule = ctz64(new_mask); - int cur_granule = ctz64(cur_mask); - -@@ -1146,6 +1147,28 @@ static void virtio_iommu_system_reset(void *opaque) - - } - -+static void virtio_iommu_freeze_granule(Notifier *notifier, void *data) -+{ -+ VirtIOIOMMU *s = container_of(notifier, VirtIOIOMMU, machine_done); -+ int granule; -+ -+ if (likely(s->config.bypass)) { -+ /* -+ * Transient IOMMU MR enable to collect page_size_mask requirements -+ * through memory_region_iommu_set_page_size_mask() called by -+ * VFIO region_add() callback -+ */ -+ s->config.bypass = false; -+ virtio_iommu_switch_address_space_all(s); -+ /* restore default */ -+ s->config.bypass = true; -+ virtio_iommu_switch_address_space_all(s); -+ } -+ s->granule_frozen = true; -+ granule = ctz64(s->config.page_size_mask); -+ trace_virtio_iommu_freeze_granule(BIT(granule)); -+} -+ - static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) - { - VirtIODevice *vdev = VIRTIO_DEVICE(dev); -@@ -1189,6 +1212,9 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) - error_setg(errp, "VIRTIO-IOMMU is not attached to any PCI bus!"); - } - -+ s->machine_done.notify = virtio_iommu_freeze_granule; -+ qemu_add_machine_init_done_notifier(&s->machine_done); -+ - qemu_register_reset(virtio_iommu_system_reset, s); - } - -@@ -1198,6 +1224,7 @@ static void virtio_iommu_device_unrealize(DeviceState *dev) - VirtIOIOMMU *s = VIRTIO_IOMMU(dev); - - qemu_unregister_reset(virtio_iommu_system_reset, s); -+ qemu_remove_machine_init_done_notifier(&s->machine_done); - - g_hash_table_destroy(s->as_by_busptr); - if (s->domains) { -diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h -index 2ad5ee320b..a93fc5383e 100644 ---- a/include/hw/virtio/virtio-iommu.h -+++ b/include/hw/virtio/virtio-iommu.h -@@ -61,6 +61,8 @@ struct VirtIOIOMMU { - QemuRecMutex mutex; - GTree *endpoints; - bool boot_bypass; -+ Notifier machine_done; -+ bool granule_frozen; - }; - - #endif --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-iommu-Rework-the-traces-in-virtio_iommu_set_p.patch b/SOURCES/kvm-virtio-iommu-Rework-the-traces-in-virtio_iommu_set_p.patch deleted file mode 100644 index 7934a12..0000000 --- a/SOURCES/kvm-virtio-iommu-Rework-the-traces-in-virtio_iommu_set_p.patch +++ /dev/null @@ -1,83 +0,0 @@ -From 643d93343759a350fe0f6327d308bf6a93c79d25 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Wed, 5 Jul 2023 18:51:18 +0200 -Subject: [PATCH 02/37] virtio-iommu: Rework the traces in - virtio_iommu_set_page_size_mask() - -RH-Author: Eric Auger -RH-MergeRequest: 182: VIRTIO-IOMMU/VFIO page size related fixes -RH-Bugzilla: 2211609 2211634 -RH-Acked-by: Gavin Shan -RH-Acked-by: Sebastian Ott -RH-Commit: [2/2] 0af7078dde158f07c83e2b293adc5d9d475688ae (eauger1/centos-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2211609 - -The current error messages in virtio_iommu_set_page_size_mask() -sound quite similar for different situations and miss the IOMMU -memory region that causes the issue. - -Clarify them and rework the comment. - -Also remove the trace when the new page_size_mask is not applied as -the current frozen granule is kept. This message is rather confusing -for the end user and anyway the current granule would have been used -by the driver. - -Signed-off-by: Eric Auger -Reviewed-by: Zhenzhong Duan -Message-Id: <20230705165118.28194-3-eric.auger@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Jean-Philippe Brucker -Tested-by: Jean-Philippe Brucker -(cherry picked from commit 587a7641d53055054d68d67d94c9408ef808f127) -Signed-off-by: Eric Auger ---- - hw/virtio/virtio-iommu.c | 19 +++++++------------ - 1 file changed, 7 insertions(+), 12 deletions(-) - -diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c -index 542679b321..421e2a944f 100644 ---- a/hw/virtio/virtio-iommu.c -+++ b/hw/virtio/virtio-iommu.c -@@ -1101,29 +1101,24 @@ static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr, - new_mask); - - if ((cur_mask & new_mask) == 0) { -- error_setg(errp, "virtio-iommu page mask 0x%"PRIx64 -- " is incompatible with mask 0x%"PRIx64, cur_mask, new_mask); -+ error_setg(errp, "virtio-iommu %s reports a page size mask 0x%"PRIx64 -+ " incompatible with currently supported mask 0x%"PRIx64, -+ mr->parent_obj.name, new_mask, cur_mask); - return -1; - } - - /* - * Once the granule is frozen we can't change the mask anymore. If by - * chance the hotplugged device supports the same granule, we can still -- * accept it. Having a different masks is possible but the guest will use -- * sub-optimal block sizes, so warn about it. -+ * accept it. - */ - if (s->granule_frozen) { -- int new_granule = ctz64(new_mask); - int cur_granule = ctz64(cur_mask); - -- if (new_granule != cur_granule) { -- error_setg(errp, "virtio-iommu page mask 0x%"PRIx64 -- " is incompatible with mask 0x%"PRIx64, cur_mask, -- new_mask); -+ if (!(BIT(cur_granule) & new_mask)) { -+ error_setg(errp, "virtio-iommu %s does not support frozen granule 0x%llx", -+ mr->parent_obj.name, BIT_ULL(cur_granule)); - return -1; -- } else if (new_mask != cur_mask) { -- warn_report("virtio-iommu page mask 0x%"PRIx64 -- " does not match 0x%"PRIx64, cur_mask, new_mask); - } - return 0; - } --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-iommu-Standardize-granule-extraction-and-form.patch b/SOURCES/kvm-virtio-iommu-Standardize-granule-extraction-and-form.patch deleted file mode 100644 index 638ae98..0000000 --- a/SOURCES/kvm-virtio-iommu-Standardize-granule-extraction-and-form.patch +++ /dev/null @@ -1,88 +0,0 @@ -From 59cd85621b1b14ada843ea0562cc76b6a7c93df4 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Tue, 18 Jul 2023 20:21:36 +0200 -Subject: [PATCH 08/14] virtio-iommu: Standardize granule extraction and - formatting - -RH-Author: Eric Auger -RH-MergeRequest: 197: virtio-iommu/smmu: backport some late fixes -RH-Bugzilla: 2229133 -RH-Acked-by: Thomas Huth -RH-Acked-by: Peter Xu -RH-Commit: [2/3] 48784ef2a19174518f66479dcb532230bffe8bf1 (eauger1/centos-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2229133 - -At several locations we compute the granule from the config -page_size_mask using ctz() and then format it in traces using -BIT(). As the page_size_mask is 64b we should use ctz64 and -BIT_ULL() for formatting. We failed to be consistent. - -Note the page_size_mask is garanteed to be non null. The spec -mandates the device to set at least one bit, so ctz64 cannot -return 64. This is garanteed by the fact the device -initializes the page_size_mask to qemu_target_page_mask() -and then the page_size_mask is further constrained by -virtio_iommu_set_page_size_mask() callback which can't -result in a new mask being null. So if Coverity complains -round those ctz64/BIT_ULL with CID 1517772 this is a false -positive - -Signed-off-by: Eric Auger -Fixes: 94df5b2180 ("virtio-iommu: Fix 64kB host page size VFIO device assignment") -Message-Id: <20230718182136.40096-1-eric.auger@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Jean-Philippe Brucker -(cherry picked from commit 1084feddc6a677cdfdde56936bfb97cf32cc4dee) -Signed-off-by: Eric Auger ---- - hw/virtio/virtio-iommu.c | 8 +++++--- - 1 file changed, 5 insertions(+), 3 deletions(-) - -diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c -index 17ce630200..17b3dcd158 100644 ---- a/hw/virtio/virtio-iommu.c -+++ b/hw/virtio/virtio-iommu.c -@@ -854,17 +854,19 @@ static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr, - VirtIOIOMMUEndpoint *ep; - uint32_t sid, flags; - bool bypass_allowed; -+ int granule; - bool found; - int i; - - interval.low = addr; - interval.high = addr + 1; -+ granule = ctz64(s->config.page_size_mask); - - IOMMUTLBEntry entry = { - .target_as = &address_space_memory, - .iova = addr, - .translated_addr = addr, -- .addr_mask = (1 << ctz32(s->config.page_size_mask)) - 1, -+ .addr_mask = BIT_ULL(granule) - 1, - .perm = IOMMU_NONE, - }; - -@@ -1117,7 +1119,7 @@ static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr, - if (s->granule_frozen) { - int cur_granule = ctz64(cur_mask); - -- if (!(BIT(cur_granule) & new_mask)) { -+ if (!(BIT_ULL(cur_granule) & new_mask)) { - error_setg(errp, "virtio-iommu %s does not support frozen granule 0x%llx", - mr->parent_obj.name, BIT_ULL(cur_granule)); - return -1; -@@ -1163,7 +1165,7 @@ static void virtio_iommu_freeze_granule(Notifier *notifier, void *data) - } - s->granule_frozen = true; - granule = ctz64(s->config.page_size_mask); -- trace_virtio_iommu_freeze_granule(BIT(granule)); -+ trace_virtio_iommu_freeze_granule(BIT_ULL(granule)); - } - - static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-net-correctly-report-maximum-tx_queue_size-va.patch b/SOURCES/kvm-virtio-net-correctly-report-maximum-tx_queue_size-va.patch deleted file mode 100644 index 119ea84..0000000 --- a/SOURCES/kvm-virtio-net-correctly-report-maximum-tx_queue_size-va.patch +++ /dev/null @@ -1,92 +0,0 @@ -From 4fe096a6fad61ab721fd29324d48383c7f427ac9 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Mon, 5 Jun 2023 16:21:25 +0200 -Subject: [PATCH 7/9] virtio-net: correctly report maximum tx_queue_size value -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 191: virtio-net: correctly report maximum tx_queue_size value -RH-Bugzilla: 2040509 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Eugenio Pérez -RH-Commit: [1/1] afb944c6d75fe476ac86fe267b1cca5f272dfbbd (lvivier/qemu-kvm-centos) - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2040509 - -Maximum value for tx_queue_size depends on the backend type. -1024 for vDPA/vhost-user, 256 for all the others. - -The value is returned by virtio_net_max_tx_queue_size() to set the -parameter: - - n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n), - n->net_conf.tx_queue_size); - -But the parameter checking uses VIRTQUEUE_MAX_SIZE (1024). - -So the parameter is silently ignored and ethtool reports a different -value than the one provided by the user. - - ... -netdev tap,... -device virtio-net,tx_queue_size=1024 - - # ethtool -g enp0s2 - Ring parameters for enp0s2: - Pre-set maximums: - RX: 256 - RX Mini: n/a - RX Jumbo: n/a - TX: 256 - Current hardware settings: - RX: 256 - RX Mini: n/a - RX Jumbo: n/a - TX: 256 - - ... -netdev vhost-user,... -device virtio-net,tx_queue_size=2048 - - Invalid tx_queue_size (= 2048), must be a power of 2 between 256 and 1024 - -With this patch the correct maximum value is checked and displayed. - -For vDPA/vhost-user: - - Invalid tx_queue_size (= 2048), must be a power of 2 between 256 and 1024 - -For all the others: - - Invalid tx_queue_size (= 512), must be a power of 2 between 256 and 256 - -Fixes: 2eef278b9e63 ("virtio-net: fix tx queue size for !vhost-user") -Cc: mst@redhat.com -Cc: qemu-stable@nongnu.org -Signed-off-by: Laurent Vivier -Signed-off-by: Jason Wang -(cherry picked from commit 4271f4038372f174dbafffacca1a748d058a03ba) ---- - hw/net/virtio-net.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index 447f669921..ae1e6a5e3d 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -3628,12 +3628,12 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) - } - - if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE || -- n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE || -+ n->net_conf.tx_queue_size > virtio_net_max_tx_queue_size(n) || - !is_power_of_2(n->net_conf.tx_queue_size)) { - error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), " - "must be a power of 2 between %d and %d", - n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE, -- VIRTQUEUE_MAX_SIZE); -+ virtio_net_max_tx_queue_size(n)); - virtio_cleanup(vdev); - return; - } --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-rng-block-max-bytes-0.patch b/SOURCES/kvm-virtio-rng-block-max-bytes-0.patch new file mode 100644 index 0000000..2fba53d --- /dev/null +++ b/SOURCES/kvm-virtio-rng-block-max-bytes-0.patch @@ -0,0 +1,49 @@ +From 3dd1412176a8ee6c06b5d41aa00ca49b535d99b7 Mon Sep 17 00:00:00 2001 +From: "Michael S. Tsirkin" +Date: Wed, 24 Jul 2024 06:48:59 -0400 +Subject: [PATCH 092/100] virtio-rng: block max-bytes=0 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 259: virtio-rng: block max-bytes=0 +RH-Jira: RHEL-50336 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Thomas Huth +RH-Acked-by: Eric Auger +RH-Commit: [1/1] 6d9852cc7cf7fdf49521b6301ceda26e11b1291f (lvivier/qemu-kvm-centos) + +JIRA: https://issues.redhat.com/browse/RHEL-50336 + +with max-bytes set to 0, quota is 0 and so device does not work. +block this to avoid user confusion + +Message-Id: <73a89a42d82ec8b47358f25119b87063e4a6ea57.1721818306.git.mst@redhat.com> +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Philippe Mathieu-Daudé +(cherry picked from commit 024d046bf41b5256adec671085bcee767a6da125) +Signed-off-by: Laurent Vivier +--- + hw/virtio/virtio-rng.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/virtio-rng.c b/hw/virtio/virtio-rng.c +index f74efffef7..7cf31da071 100644 +--- a/hw/virtio/virtio-rng.c ++++ b/hw/virtio/virtio-rng.c +@@ -184,8 +184,9 @@ static void virtio_rng_device_realize(DeviceState *dev, Error **errp) + + /* Workaround: Property parsing does not enforce unsigned integers, + * So this is a hack to reject such numbers. */ +- if (vrng->conf.max_bytes > INT64_MAX) { +- error_setg(errp, "'max-bytes' parameter must be non-negative, " ++ if (vrng->conf.max_bytes == 0 || ++ vrng->conf.max_bytes > INT64_MAX) { ++ error_setg(errp, "'max-bytes' parameter must be positive, " + "and less than 2^63"); + return; + } +-- +2.39.3 + diff --git a/SOURCES/kvm-vl.c-Create-late-backends-before-migration-object.patch b/SOURCES/kvm-vl.c-Create-late-backends-before-migration-object.patch deleted file mode 100644 index e1eef6d..0000000 --- a/SOURCES/kvm-vl.c-Create-late-backends-before-migration-object.patch +++ /dev/null @@ -1,58 +0,0 @@ -From 63e2339a6f38706c6fc5eb251426812520db6a6d Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Wed, 19 Apr 2023 12:17:37 -0400 -Subject: [PATCH 03/56] vl.c: Create late backends before migration object -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [2/50] 7209bb94faa48650388be8fef08c77afd26517d8 (peterx/qemu-kvm) - -The migration object may want to check against different types of memory -when initialized. Delay the creation to be after late backends. - -Signed-off-by: Peter Xu -Reviewed-by: Juan Quintela -Reviewed-by: David Hildenbrand -Signed-off-by: Juan Quintela -(cherry picked from commit cb9d8b8ce1aaf38f53295fc59ec1b8b7eb4338d2) -Signed-off-by: Peter Xu ---- - softmmu/vl.c | 9 +++++++-- - 1 file changed, 7 insertions(+), 2 deletions(-) - -diff --git a/softmmu/vl.c b/softmmu/vl.c -index ad4173138d..a44b49430b 100644 ---- a/softmmu/vl.c -+++ b/softmmu/vl.c -@@ -3592,14 +3592,19 @@ void qemu_init(int argc, char **argv) - machine_class->name, machine_class->deprecation_reason); - } - -+ /* -+ * Create backends before creating migration objects, so that it can -+ * check against compatibilities on the backend memories (e.g. postcopy -+ * over memory-backend-file objects). -+ */ -+ qemu_create_late_backends(); -+ - /* - * Note: creates a QOM object, must run only after global and - * compat properties have been set up. - */ - migration_object_init(); - -- qemu_create_late_backends(); -- - /* parse features once if machine provides default cpu_type */ - current_machine->cpu_type = machine_class->default_cpu_type; - if (cpu_option) { --- -2.39.1 - diff --git a/SOURCES/qemu-ga.sysconfig b/SOURCES/qemu-ga.sysconfig index a78b428..736b471 100644 --- a/SOURCES/qemu-ga.sysconfig +++ b/SOURCES/qemu-ga.sysconfig @@ -1,11 +1,19 @@ # This is a systemd environment file, not a shell script. # It provides settings for "/lib/systemd/system/qemu-guest-agent.service". -# Comma-separated blocked RPCs to disable, or empty list to enable all. +# Guest agent command with comma-separated blocked RPCs to disable, +# or empty list to enable all. # # You can get the list of RPC commands using "qemu-ga --block-rpcs='?'". # There should be no spaces between commas and commands in the block list. -BLOCK_RPCS=guest-file-open,guest-file-close,guest-file-read,guest-file-write,guest-file-seek,guest-file-flush,guest-exec,guest-exec-status +# FILTER_RPC_ARGS="--block-rpcs=guest-file-open,guest-file-close,guest-file-read,guest-file-write,guest-file-seek,guest-file-flush,guest-exec,guest-exec-status" + +# Guest agent command with comma-separated allowed RPCs to enable, +# or empty list to disable all. +# +# You can get the list of RPC commands using "qemu-ga --allow-rpcs='?'". +# There should be no spaces between commas and commands in the allow list. +FILTER_RPC_ARGS="--allow-rpcs=guest-sync-delimited,guest-sync,guest-ping,guest-get-time,guest-set-time,guest-info,guest-shutdown,guest-fsfreeze-status,guest-fsfreeze-freeze,guest-fsfreeze-freeze-list,guest-fsfreeze-thaw,guest-fstrim,guest-suspend-disk,guest-suspend-ram,guest-suspend-hybrid,guest-network-get-interfaces,guest-get-vcpus,guest-set-vcpus,guest-get-disks,guest-get-fsinfo,guest-set-user-password,guest-get-memory-blocks,guest-set-memory-blocks,guest-get-memory-block-info,guest-get-host-name,guest-get-users,guest-get-timezone,guest-get-osinfo,guest-get-devices,guest-ssh-get-authorized-keys,guest-ssh-add-authorized-keys,guest-ssh-remove-authorized-keys,guest-get-diskstats,guest-get-cpustats" # Fsfreeze hook script specification. # diff --git a/SOURCES/qemu-guest-agent.service b/SOURCES/qemu-guest-agent.service index 244da02..f74ebd0 100644 --- a/SOURCES/qemu-guest-agent.service +++ b/SOURCES/qemu-guest-agent.service @@ -10,7 +10,7 @@ EnvironmentFile=/etc/sysconfig/qemu-ga ExecStart=/usr/bin/qemu-ga \ --method=virtio-serial \ --path=/dev/virtio-ports/org.qemu.guest_agent.0 \ - --block-rpcs=${BLOCK_RPCS} \ + ${FILTER_RPC_ARGS} \ -F${FSFREEZE_HOOK_PATHNAME} Restart=always RestartSec=0 diff --git a/SPECS/qemu-kvm.spec b/SPECS/qemu-kvm.spec index 1d3f312..56123b7 100644 --- a/SPECS/qemu-kvm.spec +++ b/SPECS/qemu-kvm.spec @@ -58,7 +58,7 @@ %global tools_only 1 %endif -%ifnarch %{ix86} x86_64 +%ifnarch %{ix86} x86_64 aarch64 %global have_usbredir 0 %endif @@ -148,8 +148,8 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm -Version: 8.0.0 -Release: 13%{?rcrel}%{?dist}%{?cc_suffix}.inferit +Version: 9.0.0 +Release: 10%{?rcrel}%{?dist}%{?cc_suffix}.inferit # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -186,366 +186,252 @@ Patch0012: 0012-vfio-cap-number-of-devices-that-can-be-assigned.patch Patch0013: 0013-Add-support-statement-to-help-output.patch Patch0014: 0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch Patch0015: 0015-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch -Patch0016: 0016-qga-linux-add-usb-support-to-guest-get-fsinfo.patch -Patch0017: 0017-Add-RHEL-9.2.0-compat-structure.patch -Patch0018: 0018-redhat-hw-i386-pc-Update-x86-machine-type-compatibil.patch -Patch0019: 0019-Disable-unwanted-new-devices.patch -# For bz#2087047 - Disk detach is unsuccessful while the guest is still booting -Patch20: kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch -# For bz#1934134 - ACPI table limits warning when booting guest with 512 VCPUs -Patch21: kvm-hw-acpi-limit-warning-on-acpi-table-size-to-pc-machi.patch -# For bz#1934134 - ACPI table limits warning when booting guest with 512 VCPUs -Patch22: kvm-hw-acpi-Mark-acpi-blobs-as-resizable-on-RHEL-pc-mach.patch -# For bz#2058982 - Qemu core dump if cut off nfs storage during migration -Patch23: kvm-migration-Handle-block-device-inactivation-failures-.patch -# For bz#2058982 - Qemu core dump if cut off nfs storage during migration -Patch24: kvm-migration-Minor-control-flow-simplification.patch -# For bz#2058982 - Qemu core dump if cut off nfs storage during migration -Patch25: kvm-migration-Attempt-disk-reactivation-in-more-failure-.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch26: kvm-util-mmap-alloc-qemu_fd_getfs.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch27: kvm-vl.c-Create-late-backends-before-migration-object.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch28: kvm-migration-postcopy-Detect-file-system-on-dest-host.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch29: kvm-migration-mark-mixed-functions-that-can-suspend.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch30: kvm-postcopy-ram-do-not-use-qatomic_mb_read.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch31: kvm-migration-remove-extra-whitespace-character-for-code.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch32: kvm-migration-Merge-ram_counters-and-ram_atomic_counters.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch33: kvm-migration-Update-atomic-stats-out-of-the-mutex.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch34: kvm-migration-Make-multifd_bytes-atomic.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch35: kvm-migration-Make-dirty_sync_missed_zero_copy-atomic.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch36: kvm-migration-Make-precopy_bytes-atomic.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch37: kvm-migration-Make-downtime_bytes-atomic.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch38: kvm-migration-Make-dirty_sync_count-atomic.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch39: kvm-migration-Make-postcopy_requests-atomic.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch40: kvm-migration-Rename-duplicate-to-zero_pages.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch41: kvm-migration-Rename-normal-to-normal_pages.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch42: kvm-migration-rename-enabled_capabilities-to-capabilitie.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch43: kvm-migration-Pass-migrate_caps_check-the-old-and-new-ca.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch44: kvm-migration-move-migration_global_dump-to-migration-hm.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch45: kvm-spice-move-client_migrate_info-command-to-ui.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch46: kvm-migration-Create-migrate_cap_set.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch47: kvm-migration-Create-options.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch48: kvm-migration-Move-migrate_colo_enabled-to-options.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch49: kvm-migration-Move-migrate_use_compression-to-options.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch50: kvm-migration-Move-migrate_use_events-to-options.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch51: kvm-migration-Move-migrate_use_multifd-to-options.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch52: kvm-migration-Move-migrate_use_zero_copy_send-to-options.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch53: kvm-migration-Move-migrate_use_xbzrle-to-options.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch54: kvm-migration-Move-migrate_use_block-to-options.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch55: kvm-migration-Move-migrate_use_return-to-options.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch56: kvm-migration-Create-migrate_rdma_pin_all-function.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch57: kvm-migration-Move-migrate_caps_check-to-options.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch58: kvm-migration-Move-qmp_query_migrate_capabilities-to-opt.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch59: kvm-migration-Move-qmp_migrate_set_capabilities-to-optio.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch60: kvm-migration-Move-migrate_cap_set-to-options.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch61: kvm-migration-Move-parameters-functions-to-option.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch62: kvm-migration-Use-migrate_max_postcopy_bandwidth.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch63: kvm-migration-Move-migrate_use_block_incremental-to-opti.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch64: kvm-migration-Create-migrate_throttle_trigger_threshold.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch65: kvm-migration-Create-migrate_checkpoint_delay.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch66: kvm-migration-Create-migrate_max_cpu_throttle.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch67: kvm-migration-Move-migrate_announce_params-to-option.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch68: kvm-migration-Create-migrate_cpu_throttle_initial-to-opt.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch69: kvm-migration-Create-migrate_cpu_throttle_increment-func.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch70: kvm-migration-Create-migrate_cpu_throttle_tailslow-funct.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch71: kvm-migration-Move-migrate_postcopy-to-options.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch72: kvm-migration-Create-migrate_max_bandwidth-function.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch73: kvm-migration-Move-migrate_use_tls-to-options.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch74: kvm-migration-Move-qmp_migrate_set_parameters-to-options.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch75: kvm-migration-Allow-postcopy_ram_supported_by_host-to-re.patch -# For bz#2185688 - [qemu-kvm] no response with QMP command block_resize -Patch76: kvm-block-bdrv-blk_co_unref-for-calls-in-coroutine-conte.patch -# For bz#2185688 - [qemu-kvm] no response with QMP command block_resize -Patch77: kvm-block-Don-t-call-no_coroutine_fns-in-qmp_block_resiz.patch -# For bz#2185688 - [qemu-kvm] no response with QMP command block_resize -Patch78: kvm-iotests-Use-alternative-CPU-type-that-is-not-depreca.patch -# For bz#2185688 - [qemu-kvm] no response with QMP command block_resize -Patch79: kvm-iotests-Test-resizing-image-attached-to-an-iothread.patch -# For bz#2186725 - Qemu hang when commit during fio running(iothread enable) -Patch80: kvm-block-compile-out-assert_bdrv_graph_readable-by-defa.patch -# For bz#2186725 - Qemu hang when commit during fio running(iothread enable) -Patch81: kvm-graph-lock-Disable-locking-for-now.patch -# For bz#2186725 - Qemu hang when commit during fio running(iothread enable) -Patch82: kvm-nbd-server-Fix-drained_poll-to-wake-coroutine-in-rig.patch -# For bz#2186725 - Qemu hang when commit during fio running(iothread enable) -Patch83: kvm-iotests-Test-commit-with-iothreads-and-ongoing-I-O.patch -# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] -Patch84: kvm-memory-prevent-dma-reentracy-issues.patch -# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] -Patch85: kvm-async-Add-an-optional-reentrancy-guard-to-the-BH-API.patch -# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] -Patch86: kvm-checkpatch-add-qemu_bh_new-aio_bh_new-checks.patch -# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] -Patch87: kvm-hw-replace-most-qemu_bh_new-calls-with-qemu_bh_new_g.patch -# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] -Patch88: kvm-lsi53c895a-disable-reentrancy-detection-for-script-R.patch -# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] -Patch89: kvm-bcm2835_property-disable-reentrancy-detection-for-io.patch -# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] -Patch90: kvm-raven-disable-reentrancy-detection-for-iomem.patch -# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] -Patch91: kvm-apic-disable-reentrancy-detection-for-apic-msi.patch -# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] -Patch92: kvm-async-avoid-use-after-free-on-re-entrancy-guard.patch -# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] -Patch93: kvm-loongarch-mark-loongarch_ipi_iocsr-re-entrnacy-safe.patch -# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] -Patch94: kvm-memory-stricter-checks-prior-to-unsetting-engaged_in.patch -# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] -Patch95: kvm-lsi53c895a-disable-reentrancy-detection-for-MMIO-reg.patch -# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] -Patch96: kvm-hw-scsi-lsi53c895a-Fix-reentrancy-issues-in-the-LSI-.patch -# For bz#2189423 - Failed to migrate VM from rhel 9.3 to rhel 9.2 -Patch97: kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-register-for-machi.patch -# For bz#2196289 - Fix number of ready channels on multifd -Patch98: kvm-multifd-Fix-the-number-of-channels-ready.patch -# For bz#2168500 - [IBM 9.3 FEAT] KVM: Improve memory reclaiming for z15 Secure Execution guests - qemu part -Patch99: kvm-util-async-teardown-wire-up-query-command-line-optio.patch -# For bz#2168500 - [IBM 9.3 FEAT] KVM: Improve memory reclaiming for z15 Secure Execution guests - qemu part -Patch100: kvm-s390x-pv-Fix-spurious-warning-with-asynchronous-tear.patch -# For bz#2216201 - [qemu-kvm]VM reports vulnerabilty to mmio_stale_data on patched host with microcode -Patch101: kvm-target-i386-add-support-for-FLUSH_L1D-feature.patch -# For bz#2216201 - [qemu-kvm]VM reports vulnerabilty to mmio_stale_data on patched host with microcode -Patch102: kvm-target-i386-add-support-for-FB_CLEAR-feature.patch -# For bz#2180076 - [qemu-kvm] support fd passing for libblkio QEMU BlockDrivers -Patch103: kvm-block-blkio-use-qemu_open-to-support-fd-passing-for-.patch -# For bz#2180076 - [qemu-kvm] support fd passing for libblkio QEMU BlockDrivers -Patch104: kvm-qapi-add-fdset-feature-for-BlockdevOptionsVirtioBlkV.patch -# For bz#2171363 - [aarch64] Kernel hits Call trace with irregular CPU-to-NUMA association -Patch105: kvm-numa-Validate-cluster-and-NUMA-node-boundary-if-requ.patch -# For bz#2171363 - [aarch64] Kernel hits Call trace with irregular CPU-to-NUMA association -Patch106: kvm-hw-arm-Validate-cluster-and-NUMA-node-boundary.patch -# For bz#2171363 - [aarch64] Kernel hits Call trace with irregular CPU-to-NUMA association -Patch107: kvm-hw-arm-virt-Validate-cluster-and-NUMA-node-boundary-.patch -# For RHEL-330 - [virtual network][qemu-kvm-8.0.0-rc1]qemu core dump: qemu-kvm: ../softmmu/memory.c:2592: void memory_region_del_eventfd(MemoryRegion *, hwaddr, unsigned int, _Bool, uint64_t, EventNotifier *): Assertion `i != mr->ioeventfd_nb' failed -Patch108: kvm-vhost-fix-vhost_dev_enable_notifiers-error-case.patch -# For bz#2218644 - query-stats QMP command interrupts vcpus, the Max Latencies could be more than 100us (rhel 9.3.0 clone) -Patch109: kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch -# For bz#2128929 - [rhel9.2] hotplug/hotunplug mlx vdpa device to the occupied addr port, then qemu core dump occurs after shutdown guest -Patch110: kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch -# For bz#2211609 - With virtio-iommu and vfio-pci, qemu reports "warning: virtio-iommu page mask 0xfffffffffffff000 does not match 0x40201000" -# For bz#2211634 - [aarch64] With virtio-iommu and vfio-pci, qemu coredump when host using kernel-64k package -Patch111: kvm-virtio-iommu-Fix-64kB-host-page-size-VFIO-device-ass.patch -# For bz#2211609 - With virtio-iommu and vfio-pci, qemu reports "warning: virtio-iommu page mask 0xfffffffffffff000 does not match 0x40201000" -# For bz#2211634 - [aarch64] With virtio-iommu and vfio-pci, qemu coredump when host using kernel-64k package -Patch112: kvm-virtio-iommu-Rework-the-traces-in-virtio_iommu_set_p.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch113: kvm-vfio-pci-add-support-for-VF-token.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch114: kvm-vfio-migration-Skip-log_sync-during-migration-SETUP-.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch115: kvm-vfio-pci-Static-Resizable-BAR-capability.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch116: kvm-vfio-pci-Fix-a-use-after-free-issue.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch117: kvm-util-vfio-helpers-Use-g_file_read_link.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch118: kvm-migration-Make-all-functions-check-have-the-same-for.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch119: kvm-migration-Move-migration_properties-to-options.c.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch120: kvm-migration-Add-switchover-ack-capability.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch121: kvm-migration-Implement-switchover-ack-logic.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch122: kvm-migration-Enable-switchover-ack-capability.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch123: kvm-vfio-migration-Refactor-vfio_save_block-to-return-sa.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch124: kvm-vfio-migration-Store-VFIO-migration-flags-in-VFIOMig.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch125: kvm-vfio-migration-Add-VFIO-migration-pre-copy-support.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch126: kvm-vfio-migration-Add-support-for-switchover-ack-capabi.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch127: kvm-vfio-Implement-a-common-device-info-helper.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch128: kvm-hw-vfio-pci-quirks-Support-alternate-offset-for-GPUD.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch129: kvm-vfio-pci-Call-vfio_prepare_kvm_msi_virq_batch-in-MSI.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch130: kvm-vfio-migration-Reset-bytes_transferred-properly.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch131: kvm-vfio-migration-Make-VFIO-migration-non-experimental.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch132: kvm-vfio-pci-Fix-a-segfault-in-vfio_realize.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch133: kvm-vfio-pci-Free-leaked-timer-in-vfio_realize-error-pat.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch134: kvm-hw-vfio-pci-quirks-Sanitize-capability-pointer.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch135: kvm-vfio-pci-Disable-INTx-in-vfio_realize-error-path.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch136: kvm-vfio-migration-Change-vIOMMU-blocker-from-global-to-.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch137: kvm-vfio-migration-Free-resources-when-vfio_migration_re.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch138: kvm-vfio-migration-Remove-print-of-Migration-disabled.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch139: kvm-vfio-migration-Return-bool-type-for-vfio_migration_r.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch140: kvm-vfio-Fix-null-pointer-dereference-bug-in-vfio_bars_f.patch -# For bz#2220866 - Misaligned symbol for s390-ccw image during qemu-kvm build -Patch141: kvm-pc-bios-s390-ccw-Makefile-Use-z-noexecstack-to-silen.patch -# For bz#2220866 - Misaligned symbol for s390-ccw image during qemu-kvm build -Patch142: kvm-pc-bios-s390-ccw-Fix-indentation-in-start.S.patch -# For bz#2220866 - Misaligned symbol for s390-ccw image during qemu-kvm build -Patch143: kvm-pc-bios-s390-ccw-Provide-space-for-initial-stack-fra.patch -# For bz#2220866 - Misaligned symbol for s390-ccw image during qemu-kvm build -Patch144: kvm-pc-bios-s390-ccw-Don-t-use-__bss_start-with-the-larl.patch -# For bz#2222579 - PNG screendump doesn't save screen correctly -Patch145: kvm-ui-Fix-pixel-colour-channel-order-for-PNG-screenshot.patch -# For bz#2213317 - Enable libblkio-based block drivers in QEMU -Patch146: kvm-block-blkio-fix-module_block.py-parsing.patch -# For bz#2176702 - [RHEL9][virtio-scsi] scsi-hd cannot hot-plug successfully after hot-plug it repeatly -Patch147: kvm-scsi-fetch-unit-attention-when-creating-the-request.patch -# For bz#2176702 - [RHEL9][virtio-scsi] scsi-hd cannot hot-plug successfully after hot-plug it repeatly -Patch148: kvm-scsi-cleanup-scsi_clear_unit_attention.patch -# For bz#2176702 - [RHEL9][virtio-scsi] scsi-hd cannot hot-plug successfully after hot-plug it repeatly -Patch149: kvm-scsi-clear-unit-attention-only-for-REPORT-LUNS-comma.patch -# For RHEL-794 - Backport s390x fixes from QEMU 8.1 -Patch150: kvm-s390x-ap-Wire-up-the-device-request-notifier-interfa.patch -# For bz#2196295 - Multifd flushes its channels 10 times per second -Patch151: kvm-multifd-Create-property-multifd-flush-after-each-sec.patch -# For bz#2196295 - Multifd flushes its channels 10 times per second -Patch152: kvm-multifd-Protect-multifd_send_sync_main-calls.patch -# For bz#2196295 - Multifd flushes its channels 10 times per second -Patch153: kvm-multifd-Only-flush-once-each-full-round-of-memory.patch -# For RHEL-582 - [passt][rhel 9.3] qemu core dump occurs when guest is shutdown after hotunplug/hotplug a passt interface -Patch154: kvm-net-socket-prepare-to-cleanup-net_init_socket.patch -# For RHEL-582 - [passt][rhel 9.3] qemu core dump occurs when guest is shutdown after hotunplug/hotplug a passt interface -Patch155: kvm-net-socket-move-fd-type-checking-to-its-own-function.patch -# For RHEL-582 - [passt][rhel 9.3] qemu core dump occurs when guest is shutdown after hotunplug/hotplug a passt interface -Patch156: kvm-net-socket-remove-net_init_socket.patch -# For bz#2215819 - Migration test failed while guest with PCIe devices -Patch157: kvm-pcie-Add-hotplug-detect-state-register-to-cmask.patch -# For bz#2174676 - Guest hit EXT4-fs error on host 4K disk when repeatedly hot-plug/unplug running IO disk [RHEL9] -Patch158: kvm-util-iov-Make-qiov_slice-public.patch -# For bz#2174676 - Guest hit EXT4-fs error on host 4K disk when repeatedly hot-plug/unplug running IO disk [RHEL9] -Patch159: kvm-block-Collapse-padded-I-O-vecs-exceeding-IOV_MAX.patch -# For bz#2174676 - Guest hit EXT4-fs error on host 4K disk when repeatedly hot-plug/unplug running IO disk [RHEL9] -Patch160: kvm-util-iov-Remove-qemu_iovec_init_extended.patch -# For bz#2174676 - Guest hit EXT4-fs error on host 4K disk when repeatedly hot-plug/unplug running IO disk [RHEL9] -Patch161: kvm-iotests-iov-padding-New-test.patch -# For bz#2174676 - Guest hit EXT4-fs error on host 4K disk when repeatedly hot-plug/unplug running IO disk [RHEL9] -Patch162: kvm-block-Fix-pad_request-s-request-restriction.patch -# For RHEL-573 - [mlx vhost_vdpa][rhel 9.3]live migration fail with "net vdpa cannot migrate with CVQ feature" -Patch163: kvm-vdpa-do-not-block-migration-if-device-has-cvq-and-x-.patch -# For bz#2040509 - [RFE]:Add support for changing "tx_queue_size" to a setable value -Patch164: kvm-virtio-net-correctly-report-maximum-tx_queue_size-va.patch -# For bz#2223691 - [machine type 9.2]Failed to migrate VM from RHEL 9.3 to RHEL 9.2 -Patch165: kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-reg-for-machine-ty.patch -# For bz#2141965 - [TPM][vhost-vdpa][rhel9.2]Boot a guest with "vhost-vdpa + TPM emulator", qemu output: qemu-kvm: vhost_vdpa_listener_region_add received unaligned region -Patch166: kvm-vhost-vdpa-mute-unaligned-memory-error-report.patch -# For bz#2225354 - [vdpa-blk] The new driver virtio-blk-vhost-user not work in VM booting -# For bz#2225439 - [vdpa-blk] read-only=on option not work on driver virtio-blk-vhost-vdpa -Patch167: kvm-block-blkio-enable-the-completion-eventfd.patch -# For bz#2225354 - [vdpa-blk] The new driver virtio-blk-vhost-user not work in VM booting -# For bz#2225439 - [vdpa-blk] read-only=on option not work on driver virtio-blk-vhost-vdpa -Patch168: kvm-block-blkio-do-not-use-open-flags-in-qemu_open.patch -# For bz#2225354 - [vdpa-blk] The new driver virtio-blk-vhost-user not work in VM booting -# For bz#2225439 - [vdpa-blk] read-only=on option not work on driver virtio-blk-vhost-vdpa -Patch169: kvm-block-blkio-move-blkio_connect-in-the-drivers-functi.patch -# For bz#2225354 - [vdpa-blk] The new driver virtio-blk-vhost-user not work in VM booting -# For bz#2225439 - [vdpa-blk] read-only=on option not work on driver virtio-blk-vhost-vdpa -Patch170: kvm-block-blkio-retry-blkio_connect-if-it-fails-using-fd.patch -# For bz#2225354 - [vdpa-blk] The new driver virtio-blk-vhost-user not work in VM booting -# For bz#2225439 - [vdpa-blk] read-only=on option not work on driver virtio-blk-vhost-vdpa -Patch171: kvm-block-blkio-fall-back-on-using-path-when-fd-setting-.patch -# For bz#2225354 - [vdpa-blk] The new driver virtio-blk-vhost-user not work in VM booting -# For bz#2225439 - [vdpa-blk] read-only=on option not work on driver virtio-blk-vhost-vdpa -Patch172: kvm-block-blkio-use-blkio_set_int-fd-to-check-fd-support.patch -# For bz#2229133 - Backport some virtio-iommu and smmu fixes -Patch173: kvm-hw-virtio-iommu-Fix-potential-OOB-access-in-virtio_i.patch -# For bz#2229133 - Backport some virtio-iommu and smmu fixes -Patch174: kvm-virtio-iommu-Standardize-granule-extraction-and-form.patch -# For bz#2229133 - Backport some virtio-iommu and smmu fixes -Patch175: kvm-hw-arm-smmu-Handle-big-endian-hosts-correctly.patch -# For bz#2214839 - [AMDSERVER 9.3 Bug] Qemu SEV reduced-phys-bits fixes -Patch176: kvm-qapi-i386-sev-Change-the-reduced-phys-bits-value-fro.patch -# For bz#2214839 - [AMDSERVER 9.3 Bug] Qemu SEV reduced-phys-bits fixes -Patch177: kvm-qemu-options.hx-Update-the-reduced-phys-bits-documen.patch -# For bz#2214839 - [AMDSERVER 9.3 Bug] Qemu SEV reduced-phys-bits fixes -Patch178: kvm-i386-sev-Update-checks-and-information-related-to-re.patch -# For bz#2214839 - [AMDSERVER 9.3 Bug] Qemu SEV reduced-phys-bits fixes -Patch179: kvm-i386-cpu-Update-how-the-EBX-register-of-CPUID-0x8000.patch -# For bz#2094913 - Add EPYC-Genoa CPU model in qemu -Patch180: kvm-target-i386-allow-versioned-CPUs-to-specify-new-cach.patch -# For bz#2094913 - Add EPYC-Genoa CPU model in qemu -Patch181: kvm-target-i386-Add-new-EPYC-CPU-versions-with-updated-c.patch -# For bz#2094913 - Add EPYC-Genoa CPU model in qemu -Patch182: kvm-target-i386-Add-a-couple-of-feature-bits-in-8000_000.patch -# For bz#2094913 - Add EPYC-Genoa CPU model in qemu -Patch183: kvm-target-i386-Add-feature-bits-for-CPUID_Fn80000021_EA.patch -# For bz#2094913 - Add EPYC-Genoa CPU model in qemu -Patch184: kvm-target-i386-Add-missing-feature-bits-in-EPYC-Milan-m.patch -# For bz#2094913 - Add EPYC-Genoa CPU model in qemu -Patch185: kvm-target-i386-Add-VNMI-and-automatic-IBRS-feature-bits.patch -# For bz#2094913 - Add EPYC-Genoa CPU model in qemu -Patch186: kvm-target-i386-Add-EPYC-Genoa-model-to-support-Zen-4-pr.patch -# For RHEL-923 - vhost shadow virtqueue: state restore through CVQ -Patch187: kvm-vdpa-return-errno-in-vhost_vdpa_get_vring_group-erro.patch -# For RHEL-923 - vhost shadow virtqueue: state restore through CVQ -Patch188: kvm-vdpa-move-CVQ-isolation-check-to-net_init_vhost_vdpa.patch -# For RHEL-923 - vhost shadow virtqueue: state restore through CVQ -Patch189: kvm-vdpa-use-first-queue-SVQ-state-for-CVQ-default.patch -# For RHEL-923 - vhost shadow virtqueue: state restore through CVQ -Patch190: kvm-vdpa-export-vhost_vdpa_set_vring_ready.patch -# For RHEL-923 - vhost shadow virtqueue: state restore through CVQ -Patch191: kvm-vdpa-rename-vhost_vdpa_net_load-to-vhost_vdpa_net_cv.patch -# For RHEL-923 - vhost shadow virtqueue: state restore through CVQ -Patch192: kvm-vdpa-move-vhost_vdpa_set_vring_ready-to-the-caller.patch -# For RHEL-923 - vhost shadow virtqueue: state restore through CVQ -Patch193: kvm-vdpa-remove-net-cvq-migration-blocker.patch +Patch0016: 0016-Add-upstream-compatibility-bits.patch +Patch0017: 0017-x86-rhel-9.4.0-machine-type-compat-fix.patch +# For RHEL-34945 - [aarch64, kvm-unit-tests] all tests tagged as FAIL [qemu-kvm: GLib: g_ptr_array_add: assertion 'rarray' failed] +Patch18: kvm-hw-arm-virt-Fix-spurious-call-to-arm_virt_compat_set.patch +# For RHEL-30362 - Check/fix machine type compatibility for QEMU 9.0.0 [x86_64][rhel-9.5.0] +Patch19: kvm-Revert-x86-rhel-9.4.0-machine-type-compat-fix.patch +# For RHEL-33440 - Qemu hang when quit dst vm after storage migration(nbd+tls) +Patch20: kvm-nbd-server-do-not-poll-within-a-coroutine-context.patch +# For RHEL-33440 - Qemu hang when quit dst vm after storage migration(nbd+tls) +Patch21: kvm-nbd-server-Mark-negotiation-functions-as-coroutine_f.patch +# For RHEL-33440 - Qemu hang when quit dst vm after storage migration(nbd+tls) +Patch22: kvm-qio-Inherit-follow_coroutine_ctx-across-TLS.patch +# For RHEL-33440 - Qemu hang when quit dst vm after storage migration(nbd+tls) +Patch23: kvm-iotests-test-NBD-TLS-iothread.patch +# For RHEL-34621 - [RHEL9.5.0][stable_guest_abi]Failed to migrate VM with (qemu) qemu-kvm: Missing section footer for 0000:00:01.0/virtio-gpu qemu-kvm: load of migration failed: Invalid argument +Patch24: kvm-virtio-gpu-fix-v2-migration.patch +# For RHEL-34621 - [RHEL9.5.0][stable_guest_abi]Failed to migrate VM with (qemu) qemu-kvm: Missing section footer for 0000:00:01.0/virtio-gpu qemu-kvm: load of migration failed: Invalid argument +Patch25: kvm-rhel-9.4.0-machine-type-compat-for-virtio-gpu-migrat.patch +# For RHEL-42411 - qemu-kvm: linux-aio: add support for IO_CMD_FDSYNC command +Patch26: kvm-linux-aio-add-IO_CMD_FDSYNC-command-support.patch +# For RHEL-34618 - aio=io_uring: Assertion failure `luringcb->co->ctx == s->aio_context' with block_resize +# For RHEL-38697 - aio=native: Assertion failure `laiocb->co->ctx == laiocb->ctx->aio_context' with block_resize +Patch27: kvm-Revert-monitor-use-aio_co_reschedule_self.patch +# For RHEL-34618 - aio=io_uring: Assertion failure `luringcb->co->ctx == s->aio_context' with block_resize +# For RHEL-38697 - aio=native: Assertion failure `laiocb->co->ctx == laiocb->ctx->aio_context' with block_resize +Patch28: kvm-aio-warn-about-iohandler_ctx-special-casing.patch +# For RHEL-36159 - qemu crash on Assertion `block->n_free_ciphers > 0' failed in guest installation with luks and iothread-vq-mapping +Patch29: kvm-block-crypto-create-ciphers-on-demand.patch +# For RHEL-36159 - qemu crash on Assertion `block->n_free_ciphers > 0' failed in guest installation with luks and iothread-vq-mapping +Patch30: kvm-crypto-block-drop-qcrypto_block_open-n_threads-argum.patch +# For RHEL-35611 - CVE-2024-4467 qemu-kvm: QEMU: 'qemu-img info' leads to host file read/write [rhel-9.5] +Patch31: kvm-qcow2-Don-t-open-data_file-with-BDRV_O_NO_IO.patch +# For RHEL-35611 - CVE-2024-4467 qemu-kvm: QEMU: 'qemu-img info' leads to host file read/write [rhel-9.5] +Patch32: kvm-iotests-244-Don-t-store-data-file-with-protocol-in-i.patch +# For RHEL-35611 - CVE-2024-4467 qemu-kvm: QEMU: 'qemu-img info' leads to host file read/write [rhel-9.5] +Patch33: kvm-iotests-270-Don-t-store-data-file-with-json-prefix-i.patch +# For RHEL-35611 - CVE-2024-4467 qemu-kvm: QEMU: 'qemu-img info' leads to host file read/write [rhel-9.5] +Patch34: kvm-block-Parse-filenames-only-when-explicitly-requested.patch +# For RHEL-40708 - [RHEL9.5.0][virtio_fs][s390x] after hot-unplug the vhost-user-fs-ccw device, the device is failed to hot-plug again +Patch35: kvm-hw-virtio-Fix-the-de-initialization-of-vhost-user-de.patch +# For RHEL-39936 - ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (128 < 256) on FUJITSU +Patch36: kvm-hw-arm-virt-Avoid-unexpected-warning-from-Linux-gues.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch37: kvm-introduce-pc_rhel_9_5_compat.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch38: kvm-target-i386-add-guest-phys-bits-cpu-property.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch39: kvm-kvm-add-support-for-guest-physical-bits.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch40: kvm-i386-kvm-Move-architectural-CPUID-leaf-generation-to.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch41: kvm-target-i386-Introduce-Icelake-Server-v7-to-enable-TS.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch42: kvm-target-i386-Add-new-CPU-model-SierraForest.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch43: kvm-target-i386-Export-RFDS-bit-to-guests.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch44: kvm-pci-host-q35-Move-PAM-initialization-above-SMRAM-ini.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch45: kvm-q35-Introduce-smm_ranges-property-for-q35-pci-host.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch46: kvm-hw-i386-acpi-Set-PCAT_COMPAT-bit-only-when-pic-is-no.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch47: kvm-confidential-guest-support-Add-kvm_init-and-kvm_rese.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch48: kvm-i386-sev-Switch-to-use-confidential_guest_kvm_init.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch49: kvm-ppc-pef-switch-to-use-confidential_guest_kvm_init-re.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch50: kvm-s390-Switch-to-use-confidential_guest_kvm_init.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch51: kvm-scripts-update-linux-headers-Add-setup_data.h-to-imp.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch52: kvm-scripts-update-linux-headers-Add-bits.h-to-file-impo.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch53: kvm-linux-headers-update-to-current-kvm-next.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch54: kvm-runstate-skip-initial-CPU-reset-if-reset-is-not-actu.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch55: kvm-KVM-track-whether-guest-state-is-encrypted.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch56: kvm-KVM-remove-kvm_arch_cpu_check_are_resettable.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch57: kvm-target-i386-introduce-x86-confidential-guest.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch58: kvm-target-i386-Implement-mc-kvm_type-to-get-VM-type.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch59: kvm-target-i386-SEV-use-KVM_SEV_INIT2-if-possible.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch60: kvm-i386-sev-Add-legacy-vm-type-parameter-for-SEV-guest-.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch61: kvm-hw-i386-sev-Use-legacy-SEV-VM-types-for-older-machin.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch62: kvm-trace-kvm-Split-address-space-and-slot-id-in-trace_k.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch63: kvm-kvm-Introduce-support-for-memory_attributes.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch64: kvm-RAMBlock-Add-support-of-KVM-private-guest-memfd.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch65: kvm-kvm-Enable-KVM_SET_USER_MEMORY_REGION2-for-memslot.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch66: kvm-kvm-memory-Make-memory-type-private-by-default-if-it.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch67: kvm-HostMem-Add-mechanism-to-opt-in-kvm-guest-memfd-via-.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch68: kvm-RAMBlock-make-guest_memfd-require-uncoordinated-disc.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch69: kvm-physmem-Introduce-ram_block_discard_guest_memfd_rang.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch70: kvm-kvm-handle-KVM_EXIT_MEMORY_FAULT.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch71: kvm-kvm-tdx-Don-t-complain-when-converting-vMMIO-region-.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch72: kvm-kvm-tdx-Ignore-memory-conversion-to-shared-of-unassi.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch73: kvm-hw-i386-x86-Eliminate-two-if-statements-in-x86_bios_.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch74: kvm-hw-i386-Have-x86_bios_rom_init-take-X86MachineState-.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch75: kvm-hw-i386-pc_sysfw-Remove-unused-parameter-from-pc_isa.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch76: kvm-hw-i386-x86-Don-t-leak-isa-bios-memory-regions.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch77: kvm-hw-i386-x86-Don-t-leak-pc.bios-memory-region.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch78: kvm-hw-i386-x86-Extract-x86_isa_bios_init-from-x86_bios_.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch79: kvm-hw-i386-pc_sysfw-Alias-rather-than-copy-isa-bios-reg.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch80: kvm-i386-correctly-select-code-in-hw-i386-that-depends-o.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch81: kvm-i386-pc-remove-unnecessary-MachineClass-overrides.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch82: kvm-hw-i386-split-x86.c-in-multiple-parts.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch83: kvm-scripts-update-linux-header.sh-be-more-src-tree-frie.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch84: kvm-scripts-update-linux-headers.sh-Remove-temporary-dir.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch85: kvm-scripts-update-linux-headers.sh-Fix-the-path-of-setu.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch86: kvm-update-linux-headers-fix-forwarding-to-asm-generic-h.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch87: kvm-update-linux-headers-move-pvpanic.h-to-correct-direc.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch88: kvm-linux-headers-Update-to-current-kvm-next.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch89: kvm-update-linux-headers-import-linux-kvm_para.h-header.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch90: kvm-machine-allow-early-use-of-machine_require_guest_mem.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch91: kvm-i386-sev-Replace-error_report-with-error_setg.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch92: kvm-i386-sev-Introduce-sev-common-type-to-encapsulate-co.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch93: kvm-i386-sev-Move-sev_launch_update-to-separate-class-me.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch94: kvm-i386-sev-Move-sev_launch_finish-to-separate-class-me.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch95: kvm-i386-sev-Introduce-sev-snp-guest-object.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch96: kvm-i386-sev-Add-a-sev_snp_enabled-helper.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch97: kvm-i386-sev-Add-sev_kvm_init-override-for-SEV-class.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch98: kvm-i386-sev-Add-snp_kvm_init-override-for-SNP-class.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch99: kvm-i386-cpu-Set-SEV-SNP-CPUID-bit-when-SNP-enabled.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch100: kvm-i386-sev-Don-t-return-launch-measurements-for-SEV-SN.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch101: kvm-i386-sev-Add-a-class-method-to-determine-KVM-VM-type.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch102: kvm-i386-sev-Update-query-sev-QAPI-format-to-handle-SEV-.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch103: kvm-i386-sev-Add-the-SNP-launch-start-context.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch104: kvm-i386-sev-Add-handling-to-encrypt-finalize-guest-laun.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch105: kvm-i386-sev-Set-CPU-state-to-protected-once-SNP-guest-p.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch106: kvm-hw-i386-sev-Add-function-to-get-SEV-metadata-from-OV.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch107: kvm-i386-sev-Add-support-for-populating-OVMF-metadata-pa.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch108: kvm-i386-sev-Add-support-for-SNP-CPUID-validation.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch109: kvm-hw-i386-sev-Add-support-to-encrypt-BIOS-when-SEV-SNP.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch110: kvm-i386-sev-Invoke-launch_updata_data-for-SEV-class.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch111: kvm-i386-sev-Invoke-launch_updata_data-for-SNP-class.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch112: kvm-i386-kvm-Add-KVM_EXIT_HYPERCALL-handling-for-KVM_HC_.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch113: kvm-i386-sev-Enable-KVM_HC_MAP_GPA_RANGE-hcall-for-SNP-g.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch114: kvm-i386-sev-Extract-build_kernel_loader_hashes.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch115: kvm-i386-sev-Reorder-struct-declarations.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch116: kvm-i386-sev-Allow-measured-direct-kernel-boot-on-SNP.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch117: kvm-memory-Introduce-memory_region_init_ram_guest_memfd.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch118: kvm-hw-i386-sev-Use-guest_memfd-for-legacy-ROMs.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch119: kvm-hw-i386-Add-support-for-loading-BIOS-using-guest_mem.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch120: kvm-i386-sev-fix-unreachable-code-coverity-issue.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch121: kvm-i386-sev-Move-SEV_COMMON-null-check-before-dereferen.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch122: kvm-i386-sev-Return-when-sev_common-is-null.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch123: kvm-target-i386-SEV-fix-formatting-of-CPUID-mismatch-mes.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch124: kvm-i386-sev-Fix-error-message-in-sev_get_capabilities.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch125: kvm-i386-sev-Fallback-to-the-default-SEV-device-if-none-.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch126: kvm-i386-sev-Don-t-allow-automatic-fallback-to-legacy-KV.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch127: kvm-target-i386-SEV-fix-mismatch-in-vcek-disabled-proper.patch +# For RHEL-50336 - Fail to boot up the guest including vtpm and virtio-rng (max-bytes=0) devices +Patch128: kvm-virtio-rng-block-max-bytes-0.patch +# For RHEL-50000 - scsi-block: Cannot setup Windows Failover Cluster, qemu crashes on assert +Patch129: kvm-scsi-disk-Use-positive-return-value-for-status-in-dm.patch +# For RHEL-50000 - scsi-block: Cannot setup Windows Failover Cluster, qemu crashes on assert +Patch130: kvm-scsi-block-Don-t-skip-callback-for-sgio-error-status.patch +# For RHEL-50000 - scsi-block: Cannot setup Windows Failover Cluster, qemu crashes on assert +Patch131: kvm-scsi-disk-Add-warning-comments-that-host_status-erro.patch +# For RHEL-50000 - scsi-block: Cannot setup Windows Failover Cluster, qemu crashes on assert +Patch132: kvm-scsi-disk-Always-report-RESERVATION_CONFLICT-to-gues.patch +# For RHEL-52617 - CVE-2024-7409 qemu-kvm: Denial of Service via Improper Synchronization in QEMU NBD Server During Socket Closure [rhel-9.5] +Patch133: kvm-nbd-server-Plumb-in-new-args-to-nbd_client_add.patch +# For RHEL-52617 - CVE-2024-7409 qemu-kvm: Denial of Service via Improper Synchronization in QEMU NBD Server During Socket Closure [rhel-9.5] +Patch134: kvm-nbd-server-CVE-2024-7409-Cap-default-max-connections.patch +# For RHEL-52617 - CVE-2024-7409 qemu-kvm: Denial of Service via Improper Synchronization in QEMU NBD Server During Socket Closure [rhel-9.5] +Patch135: kvm-nbd-server-CVE-2024-7409-Drop-non-negotiating-client.patch +# For RHEL-52617 - CVE-2024-7409 qemu-kvm: Denial of Service via Improper Synchronization in QEMU NBD Server During Socket Closure [rhel-9.5] +Patch136: kvm-nbd-server-CVE-2024-7409-Close-stray-clients-at-serv.patch +# For RHEL-52250 - fsfreeze hooks break on the systems first restorecon +Patch137: kvm-qemu-guest-agent-Update-the-logfile-path-of-qga-fsfr.patch +# For RHEL-52617 - CVE-2024-7409 qemu-kvm: Denial of Service via Improper Synchronization in QEMU NBD Server During Socket Closure [rhel-9.5] +Patch138: kvm-nbd-server-CVE-2024-7409-Avoid-use-after-free-when-c.patch %if %{have_clang} BuildRequires: clang @@ -558,6 +444,7 @@ BuildRequires: gcc BuildRequires: meson >= %{meson_version} BuildRequires: ninja-build BuildRequires: zlib-devel +BuildRequires: libzstd-devel BuildRequires: glib2-devel BuildRequires: gnutls-devel BuildRequires: cyrus-sasl-devel @@ -711,8 +598,8 @@ Summary: %{name} documentation %package -n qemu-pr-helper Summary: qemu-pr-helper utility for %{name} %description -n qemu-pr-helper -This package provides the qemu-pr-helper utility that is required for certain -SCSI features. +This package provides the qemu-pr-helper utility that is required for certain +SCSI features. %package -n qemu-img @@ -851,7 +738,7 @@ Summary: QEMU usbredir support Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} Requires: usbredir >= 0.7.1 Provides: %{name}-hw-usbredir -Obsoletes: %{name}-hw-usbredir <= %{epoch}:%{version} +Obsoletes: %{name}-hw-usbredir <= %{epoch}:%{version} %description device-usb-redirect This package provides usbredir support. @@ -915,6 +802,7 @@ ulimit -n 10240 --disable-debug-tcg \\\ --disable-dmg \\\ --disable-docs \\\ + --disable-download \\\ --disable-dsound \\\ --disable-fdt \\\ --disable-fuse \\\ @@ -927,7 +815,6 @@ ulimit -n 10240 --disable-gtk \\\ --disable-guest-agent \\\ --disable-guest-agent-msi \\\ - --disable-hax \\\ --disable-hvf \\\ --disable-iconv \\\ --disable-jack \\\ @@ -964,6 +851,7 @@ ulimit -n 10240 --disable-pa \\\ --disable-parallels \\\ --disable-pie \\\ + --disable-plugins \\\ --disable-pvrdma \\\ --disable-qcow1 \\\ --disable-qed \\\ @@ -1018,7 +906,6 @@ ulimit -n 10240 --disable-xen-pci-passthrough \\\ --disable-xkbcommon \\\ --disable-zstd \\\ - --with-git-submodules=ignore \\\ --without-default-devices @@ -1039,10 +926,8 @@ run_configure() { --with-pkgversion="%{name}-%{version}-%{release}" \ --with-suffix="%{name}" \ --firmwarepath=%{firmwaredirs} \ - --meson="%{__meson}" \ --enable-trace-backends=dtrace \ --with-coroutine=ucontext \ - --with-git=git \ --tls-priority=@QEMU,SYSTEM \ %{disable_everything} \ --with-devices-%{kvm_target}=%{kvm_target}-rh-devices \ @@ -1133,6 +1018,7 @@ run_configure \ --enable-werror \ %endif --enable-xkbcommon \ + --enable-zstd \ %if %{have_safe_stack} --enable-safe-stack \ %endif @@ -1170,7 +1056,7 @@ run_configure \ --group=all --binary %{_libexecdir}/qemu-kvm --probe-prefix qemu.kvm \ trace/trace-events-all qemu-kvm-simpletrace.stp -cp -a %{kvm_target}-softmmu/qemu-system-%{kvm_target} qemu-kvm +cp -a qemu-system-%{kvm_target} qemu-kvm %ifarch s390x # Copy the built new images into place for "make check": @@ -1256,7 +1142,7 @@ popd mkdir -p %{buildroot}%{_datadir}/systemtap/tapset -install -m 0755 %{qemu_kvm_build}/%{kvm_target}-softmmu/qemu-system-%{kvm_target} %{buildroot}%{_libexecdir}/qemu-kvm +install -m 0755 %{qemu_kvm_build}/qemu-system-%{kvm_target} %{buildroot}%{_libexecdir}/qemu-kvm install -m 0644 %{qemu_kvm_build}/qemu-kvm.stp %{buildroot}%{_datadir}/systemtap/tapset/ install -m 0644 %{qemu_kvm_build}/qemu-kvm-log.stp %{buildroot}%{_datadir}/systemtap/tapset/ install -m 0644 %{qemu_kvm_build}/qemu-kvm-simpletrace.stp %{buildroot}%{_datadir}/systemtap/tapset/ @@ -1283,7 +1169,7 @@ mkdir -p %{buildroot}%{_datadir}/%{name}/tracetool/format install -m 0644 -t %{buildroot}%{_datadir}/%{name}/tracetool/format scripts/tracetool/format/*.py mkdir -p %{buildroot}%{qemudocdir} -install -p -m 0644 -t %{buildroot}%{qemudocdir} README.rst README.systemtap COPYING COPYING.LIB LICENSE docs/interop/qmp-spec.txt +install -p -m 0644 -t %{buildroot}%{qemudocdir} README.rst README.systemtap COPYING COPYING.LIB LICENSE # Rename man page pushd %{buildroot}%{_mandir}/man1/ @@ -1315,6 +1201,7 @@ rm -rf %{buildroot}%{_datadir}/%{name}/qboot.rom rm -rf %{buildroot}%{_datadir}/%{name}/s390-ccw.img rm -rf %{buildroot}%{_datadir}/%{name}/s390-netboot.img rm -rf %{buildroot}%{_datadir}/%{name}/hppa-firmware.img +rm -rf %{buildroot}%{_datadir}/%{name}/hppa-firmware64.img rm -rf %{buildroot}%{_datadir}/%{name}/canyonlands.dtb rm -rf %{buildroot}%{_datadir}/%{name}/u-boot-sam460-20100605.bin @@ -1545,6 +1432,10 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_datadir}/systemtap/tapset/qemu-kvm-simpletrace.stp %{_datadir}/%{name}/systemtap/script.d/qemu_kvm.stp %{_datadir}/%{name}/systemtap/conf.d/qemu_kvm.conf +%{_datadir}/systemtap/tapset/qemu-img*.stp +%{_datadir}/systemtap/tapset/qemu-io*.stp +%{_datadir}/systemtap/tapset/qemu-nbd*.stp +%{_datadir}/systemtap/tapset/qemu-storage-daemon*.stp %ifarch x86_64 %{_libdir}/%{name}/accel-tcg-%{kvm_target}.so @@ -1607,9 +1498,492 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog -* Fri Nov 3 2023 Arkady L. Shane - 8.0.0-13.inferit +* Sat Oct 12 2024 Arkady L. Shane - 17:9.0.0-10.inferit - Added host_cdrom driver to whitelist (INF-741) +* Mon Sep 02 2024 Miroslav Rezanina - 9.0.0-10 +- kvm-nbd-server-CVE-2024-7409-Avoid-use-after-free-when-c.patch [RHEL-52617] +- Resolves: RHEL-52617 + (CVE-2024-7409 qemu-kvm: Denial of Service via Improper Synchronization in QEMU NBD Server During Socket Closure [rhel-9.5]) + +* Mon Aug 26 2024 Miroslav Rezanina - 9.0.0-9 +- kvm-qemu-guest-agent-Update-the-logfile-path-of-qga-fsfr.patch [RHEL-52250] +- Resolves: RHEL-52250 + (fsfreeze hooks break on the systems first restorecon) + +* Wed Aug 14 2024 Miroslav Rezanina - 9.0.0-8 +- kvm-introduce-pc_rhel_9_5_compat.patch [RHEL-39544] +- kvm-target-i386-add-guest-phys-bits-cpu-property.patch [RHEL-39544] +- kvm-kvm-add-support-for-guest-physical-bits.patch [RHEL-39544] +- kvm-i386-kvm-Move-architectural-CPUID-leaf-generation-to.patch [RHEL-39544] +- kvm-target-i386-Introduce-Icelake-Server-v7-to-enable-TS.patch [RHEL-39544] +- kvm-target-i386-Add-new-CPU-model-SierraForest.patch [RHEL-39544] +- kvm-target-i386-Export-RFDS-bit-to-guests.patch [RHEL-39544] +- kvm-pci-host-q35-Move-PAM-initialization-above-SMRAM-ini.patch [RHEL-39544] +- kvm-q35-Introduce-smm_ranges-property-for-q35-pci-host.patch [RHEL-39544] +- kvm-hw-i386-acpi-Set-PCAT_COMPAT-bit-only-when-pic-is-no.patch [RHEL-39544] +- kvm-confidential-guest-support-Add-kvm_init-and-kvm_rese.patch [RHEL-39544] +- kvm-i386-sev-Switch-to-use-confidential_guest_kvm_init.patch [RHEL-39544] +- kvm-ppc-pef-switch-to-use-confidential_guest_kvm_init-re.patch [RHEL-39544] +- kvm-s390-Switch-to-use-confidential_guest_kvm_init.patch [RHEL-39544] +- kvm-scripts-update-linux-headers-Add-setup_data.h-to-imp.patch [RHEL-39544] +- kvm-scripts-update-linux-headers-Add-bits.h-to-file-impo.patch [RHEL-39544] +- kvm-linux-headers-update-to-current-kvm-next.patch [RHEL-39544] +- kvm-runstate-skip-initial-CPU-reset-if-reset-is-not-actu.patch [RHEL-39544] +- kvm-KVM-track-whether-guest-state-is-encrypted.patch [RHEL-39544] +- kvm-KVM-remove-kvm_arch_cpu_check_are_resettable.patch [RHEL-39544] +- kvm-target-i386-introduce-x86-confidential-guest.patch [RHEL-39544] +- kvm-target-i386-Implement-mc-kvm_type-to-get-VM-type.patch [RHEL-39544] +- kvm-target-i386-SEV-use-KVM_SEV_INIT2-if-possible.patch [RHEL-39544] +- kvm-i386-sev-Add-legacy-vm-type-parameter-for-SEV-guest-.patch [RHEL-39544] +- kvm-hw-i386-sev-Use-legacy-SEV-VM-types-for-older-machin.patch [RHEL-39544] +- kvm-trace-kvm-Split-address-space-and-slot-id-in-trace_k.patch [RHEL-39544] +- kvm-kvm-Introduce-support-for-memory_attributes.patch [RHEL-39544] +- kvm-RAMBlock-Add-support-of-KVM-private-guest-memfd.patch [RHEL-39544] +- kvm-kvm-Enable-KVM_SET_USER_MEMORY_REGION2-for-memslot.patch [RHEL-39544] +- kvm-kvm-memory-Make-memory-type-private-by-default-if-it.patch [RHEL-39544] +- kvm-HostMem-Add-mechanism-to-opt-in-kvm-guest-memfd-via-.patch [RHEL-39544] +- kvm-RAMBlock-make-guest_memfd-require-uncoordinated-disc.patch [RHEL-39544] +- kvm-physmem-Introduce-ram_block_discard_guest_memfd_rang.patch [RHEL-39544] +- kvm-kvm-handle-KVM_EXIT_MEMORY_FAULT.patch [RHEL-39544] +- kvm-kvm-tdx-Don-t-complain-when-converting-vMMIO-region-.patch [RHEL-39544] +- kvm-kvm-tdx-Ignore-memory-conversion-to-shared-of-unassi.patch [RHEL-39544] +- kvm-hw-i386-x86-Eliminate-two-if-statements-in-x86_bios_.patch [RHEL-39544] +- kvm-hw-i386-Have-x86_bios_rom_init-take-X86MachineState-.patch [RHEL-39544] +- kvm-hw-i386-pc_sysfw-Remove-unused-parameter-from-pc_isa.patch [RHEL-39544] +- kvm-hw-i386-x86-Don-t-leak-isa-bios-memory-regions.patch [RHEL-39544] +- kvm-hw-i386-x86-Don-t-leak-pc.bios-memory-region.patch [RHEL-39544] +- kvm-hw-i386-x86-Extract-x86_isa_bios_init-from-x86_bios_.patch [RHEL-39544] +- kvm-hw-i386-pc_sysfw-Alias-rather-than-copy-isa-bios-reg.patch [RHEL-39544] +- kvm-i386-correctly-select-code-in-hw-i386-that-depends-o.patch [RHEL-39544] +- kvm-i386-pc-remove-unnecessary-MachineClass-overrides.patch [RHEL-39544] +- kvm-hw-i386-split-x86.c-in-multiple-parts.patch [RHEL-39544] +- kvm-scripts-update-linux-header.sh-be-more-src-tree-frie.patch [RHEL-39544] +- kvm-scripts-update-linux-headers.sh-Remove-temporary-dir.patch [RHEL-39544] +- kvm-scripts-update-linux-headers.sh-Fix-the-path-of-setu.patch [RHEL-39544] +- kvm-update-linux-headers-fix-forwarding-to-asm-generic-h.patch [RHEL-39544] +- kvm-update-linux-headers-move-pvpanic.h-to-correct-direc.patch [RHEL-39544] +- kvm-linux-headers-Update-to-current-kvm-next.patch [RHEL-39544] +- kvm-update-linux-headers-import-linux-kvm_para.h-header.patch [RHEL-39544] +- kvm-machine-allow-early-use-of-machine_require_guest_mem.patch [RHEL-39544] +- kvm-i386-sev-Replace-error_report-with-error_setg.patch [RHEL-39544] +- kvm-i386-sev-Introduce-sev-common-type-to-encapsulate-co.patch [RHEL-39544] +- kvm-i386-sev-Move-sev_launch_update-to-separate-class-me.patch [RHEL-39544] +- kvm-i386-sev-Move-sev_launch_finish-to-separate-class-me.patch [RHEL-39544] +- kvm-i386-sev-Introduce-sev-snp-guest-object.patch [RHEL-39544] +- kvm-i386-sev-Add-a-sev_snp_enabled-helper.patch [RHEL-39544] +- kvm-i386-sev-Add-sev_kvm_init-override-for-SEV-class.patch [RHEL-39544] +- kvm-i386-sev-Add-snp_kvm_init-override-for-SNP-class.patch [RHEL-39544] +- kvm-i386-cpu-Set-SEV-SNP-CPUID-bit-when-SNP-enabled.patch [RHEL-39544] +- kvm-i386-sev-Don-t-return-launch-measurements-for-SEV-SN.patch [RHEL-39544] +- kvm-i386-sev-Add-a-class-method-to-determine-KVM-VM-type.patch [RHEL-39544] +- kvm-i386-sev-Update-query-sev-QAPI-format-to-handle-SEV-.patch [RHEL-39544] +- kvm-i386-sev-Add-the-SNP-launch-start-context.patch [RHEL-39544] +- kvm-i386-sev-Add-handling-to-encrypt-finalize-guest-laun.patch [RHEL-39544] +- kvm-i386-sev-Set-CPU-state-to-protected-once-SNP-guest-p.patch [RHEL-39544] +- kvm-hw-i386-sev-Add-function-to-get-SEV-metadata-from-OV.patch [RHEL-39544] +- kvm-i386-sev-Add-support-for-populating-OVMF-metadata-pa.patch [RHEL-39544] +- kvm-i386-sev-Add-support-for-SNP-CPUID-validation.patch [RHEL-39544] +- kvm-hw-i386-sev-Add-support-to-encrypt-BIOS-when-SEV-SNP.patch [RHEL-39544] +- kvm-i386-sev-Invoke-launch_updata_data-for-SEV-class.patch [RHEL-39544] +- kvm-i386-sev-Invoke-launch_updata_data-for-SNP-class.patch [RHEL-39544] +- kvm-i386-kvm-Add-KVM_EXIT_HYPERCALL-handling-for-KVM_HC_.patch [RHEL-39544] +- kvm-i386-sev-Enable-KVM_HC_MAP_GPA_RANGE-hcall-for-SNP-g.patch [RHEL-39544] +- kvm-i386-sev-Extract-build_kernel_loader_hashes.patch [RHEL-39544] +- kvm-i386-sev-Reorder-struct-declarations.patch [RHEL-39544] +- kvm-i386-sev-Allow-measured-direct-kernel-boot-on-SNP.patch [RHEL-39544] +- kvm-memory-Introduce-memory_region_init_ram_guest_memfd.patch [RHEL-39544] +- kvm-hw-i386-sev-Use-guest_memfd-for-legacy-ROMs.patch [RHEL-39544] +- kvm-hw-i386-Add-support-for-loading-BIOS-using-guest_mem.patch [RHEL-39544] +- kvm-i386-sev-fix-unreachable-code-coverity-issue.patch [RHEL-39544] +- kvm-i386-sev-Move-SEV_COMMON-null-check-before-dereferen.patch [RHEL-39544] +- kvm-i386-sev-Return-when-sev_common-is-null.patch [RHEL-39544] +- kvm-target-i386-SEV-fix-formatting-of-CPUID-mismatch-mes.patch [RHEL-39544] +- kvm-i386-sev-Fix-error-message-in-sev_get_capabilities.patch [RHEL-39544] +- kvm-i386-sev-Fallback-to-the-default-SEV-device-if-none-.patch [RHEL-39544] +- kvm-i386-sev-Don-t-allow-automatic-fallback-to-legacy-KV.patch [RHEL-39544] +- kvm-target-i386-SEV-fix-mismatch-in-vcek-disabled-proper.patch [RHEL-39544] +- kvm-virtio-rng-block-max-bytes-0.patch [RHEL-50336] +- kvm-scsi-disk-Use-positive-return-value-for-status-in-dm.patch [RHEL-50000] +- kvm-scsi-block-Don-t-skip-callback-for-sgio-error-status.patch [RHEL-50000] +- kvm-scsi-disk-Add-warning-comments-that-host_status-erro.patch [RHEL-50000] +- kvm-scsi-disk-Always-report-RESERVATION_CONFLICT-to-gues.patch [RHEL-50000] +- kvm-nbd-server-Plumb-in-new-args-to-nbd_client_add.patch [RHEL-52617] +- kvm-nbd-server-CVE-2024-7409-Cap-default-max-connections.patch [RHEL-52617] +- kvm-nbd-server-CVE-2024-7409-Drop-non-negotiating-client.patch [RHEL-52617] +- kvm-nbd-server-CVE-2024-7409-Close-stray-clients-at-serv.patch [RHEL-52617] +- Resolves: RHEL-39544 + ([QEMU] Add support for AMD SEV-SNP to Qemu) +- Resolves: RHEL-50336 + (Fail to boot up the guest including vtpm and virtio-rng (max-bytes=0) devices) +- Resolves: RHEL-50000 + (scsi-block: Cannot setup Windows Failover Cluster, qemu crashes on assert) +- Resolves: RHEL-52617 + (CVE-2024-7409 qemu-kvm: Denial of Service via Improper Synchronization in QEMU NBD Server During Socket Closure [rhel-9.5]) + +* Mon Jul 15 2024 Miroslav Rezanina - 9.0.0-7 +- kvm-hw-virtio-Fix-the-de-initialization-of-vhost-user-de.patch [RHEL-40708] +- kvm-hw-arm-virt-Avoid-unexpected-warning-from-Linux-gues.patch [RHEL-39936] +- Resolves: RHEL-40708 + ([RHEL9.5.0][virtio_fs][s390x] after hot-unplug the vhost-user-fs-ccw device, the device is failed to hot-plug again ) +- Resolves: RHEL-39936 + (ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (128 < 256) on FUJITSU) + +* Thu Jul 04 2024 Miroslav Rezanina - 9.0.0-6 +- kvm-qcow2-Don-t-open-data_file-with-BDRV_O_NO_IO.patch [RHEL-35611] +- kvm-iotests-244-Don-t-store-data-file-with-protocol-in-i.patch [RHEL-35611] +- kvm-iotests-270-Don-t-store-data-file-with-json-prefix-i.patch [RHEL-35611] +- kvm-block-Parse-filenames-only-when-explicitly-requested.patch [RHEL-35611] +- Resolves: RHEL-35611 + (CVE-2024-4467 qemu-kvm: QEMU: 'qemu-img info' leads to host file read/write [rhel-9.5]) + +* Tue Jun 25 2024 Miroslav Rezanina - 9.0.0-5 +- kvm-linux-aio-add-IO_CMD_FDSYNC-command-support.patch [RHEL-42411] +- kvm-Revert-monitor-use-aio_co_reschedule_self.patch [RHEL-34618 RHEL-38697] +- kvm-aio-warn-about-iohandler_ctx-special-casing.patch [RHEL-34618 RHEL-38697] +- kvm-block-crypto-create-ciphers-on-demand.patch [RHEL-36159] +- kvm-crypto-block-drop-qcrypto_block_open-n_threads-argum.patch [RHEL-36159] +- Resolves: RHEL-42411 + (qemu-kvm: linux-aio: add support for IO_CMD_FDSYNC command) +- Resolves: RHEL-34618 + (aio=io_uring: Assertion failure `luringcb->co->ctx == s->aio_context' with block_resize) +- Resolves: RHEL-38697 + (aio=native: Assertion failure `laiocb->co->ctx == laiocb->ctx->aio_context' with block_resize) +- Resolves: RHEL-36159 + (qemu crash on Assertion `block->n_free_ciphers > 0' failed in guest installation with luks and iothread-vq-mapping) + +* Mon Jun 17 2024 Miroslav Rezanina - 9.0.0-4 +- kvm-qio-Inherit-follow_coroutine_ctx-across-TLS.patch [RHEL-33440] +- kvm-iotests-test-NBD-TLS-iothread.patch [RHEL-33440] +- kvm-virtio-gpu-fix-v2-migration.patch [RHEL-34621] +- kvm-rhel-9.4.0-machine-type-compat-for-virtio-gpu-migrat.patch [RHEL-34621] +- Resolves: RHEL-33440 + (Qemu hang when quit dst vm after storage migration(nbd+tls)) +- Resolves: RHEL-34621 + ([RHEL9.5.0][stable_guest_abi]Failed to migrate VM with (qemu) qemu-kvm: Missing section footer for 0000:00:01.0/virtio-gpu qemu-kvm: load of migration failed: Invalid argument) + +* Tue May 21 2024 Miroslav Rezanina - 9.0.0-3 +- kvm-nbd-server-do-not-poll-within-a-coroutine-context.patch [RHEL-33440] +- kvm-nbd-server-Mark-negotiation-functions-as-coroutine_f.patch [RHEL-33440] +- Resolves: RHEL-33440 + (Qemu hang when quit dst vm after storage migration(nbd+tls)) + +* Tue May 07 2024 Miroslav Rezanina - 9.0.0-2 +- kvm-hw-arm-virt-Fix-spurious-call-to-arm_virt_compat_set.patch [RHEL-34945] +- kvm-Revert-x86-rhel-9.4.0-machine-type-compat-fix.patch [RHEL-30362] +- Resolves: RHEL-34945 + ([aarch64, kvm-unit-tests] all tests tagged as FAIL [qemu-kvm: GLib: g_ptr_array_add: assertion 'rarray' failed] ) +- Resolves: RHEL-30362 + (Check/fix machine type compatibility for QEMU 9.0.0 [x86_64][rhel-9.5.0]) + +* Wed Apr 24 2024 Miroslav Rezanina - 9.0.0-1 +- Rebase to QEMU 9.0.0 [RHEL-28073] +- Resolves: RHEL-28073 + (Rebase qemu-kvm to QEMU 9.0.0 for RHEL 9.5) + +* Tue Mar 26 2024 Miroslav Rezanina - 8.2.0-11 +- kvm-coroutine-cap-per-thread-local-pool-size.patch [RHEL-28947] +- kvm-coroutine-reserve-5-000-mappings.patch [RHEL-28947] +- Resolves: RHEL-28947 + (Qemu crashing with "failed to set up stack guard page: Cannot allocate memory") + +* Thu Mar 21 2024 Miroslav Rezanina - 8.2.0-10 +- kvm-chardev-lower-priority-of-the-HUP-GSource-in-socket-.patch [RHEL-24614] +- kvm-Revert-chardev-char-socket-Fix-TLS-io-channels-sendi.patch [RHEL-24614] +- kvm-Revert-chardev-use-a-child-source-for-qio-input-sour.patch [RHEL-24614] +- Resolves: RHEL-24614 + ([RHEL9][chardev] qemu hit core dump while using TLS server from host to guest) + +* Wed Mar 20 2024 Miroslav Rezanina - 8.2.0-9 +- kvm-mirror-Don-t-call-job_pause_point-under-graph-lock.patch [RHEL-28125] +- kvm-nbd-server-Fix-race-in-draining-the-export.patch [RHEL-28125] +- kvm-iotests-Add-test-for-reset-AioContext-switches-with-.patch [RHEL-28125] +- kvm-pc-smbios-fixup-manufacturer-product-version-to-matc.patch [RHEL-21705] +- Resolves: RHEL-28125 + (RHEL9.4 - KVM : Live migration of guest with multiple qcow devices remains incomplete.) +- Resolves: RHEL-21705 + (pc-q35-rhel9.4.0 does not provide proper computer information) + +* Mon Mar 18 2024 Miroslav Rezanina - 8.2.0-8 +- kvm-ui-clipboard-mark-type-as-not-available-when-there-i.patch [RHEL-19629] +- kvm-ui-clipboard-add-asserts-for-update-and-request.patch [RHEL-19629] +- kvm-hw-i386-pc-Defer-smbios_set_defaults-to-machine_done.patch [RHEL-21705] +- kvm-Implement-base-of-SMBIOS-type-9-descriptor.patch [RHEL-21705] +- kvm-Implement-SMBIOS-type-9-v2.6.patch [RHEL-21705] +- kvm-smbios-cleanup-smbios_get_tables-from-legacy-handlin.patch [RHEL-21705] +- kvm-smbios-get-rid-of-smbios_smp_sockets-global.patch [RHEL-21705] +- kvm-smbios-get-rid-of-smbios_legacy-global.patch [RHEL-21705] +- kvm-smbios-avoid-mangling-user-provided-tables.patch [RHEL-21705] +- kvm-smbios-don-t-check-type4-structures-in-legacy-mode.patch [RHEL-21705] +- kvm-smbios-add-smbios_add_usr_blob_size-helper.patch [RHEL-21705] +- kvm-smbios-rename-expose-structures-bitmaps-used-by-both.patch [RHEL-21705] +- kvm-smbios-build-legacy-mode-code-only-for-pc-machine.patch [RHEL-21705] +- kvm-smbios-handle-errors-consistently.patch [RHEL-21705] +- kvm-smbios-get-rid-of-global-smbios_ep_type.patch [RHEL-21705] +- kvm-smbios-clear-smbios_type4_count-before-building-tabl.patch [RHEL-21705] +- kvm-smbios-extend-smbios-entry-point-type-with-auto-valu.patch [RHEL-21705] +- kvm-smbios-in-case-of-entry-point-is-auto-try-to-build-v.patch [RHEL-21705] +- kvm-smbios-error-out-when-building-type-4-table-is-not-p.patch [RHEL-21705] +- kvm-pc-q35-set-SMBIOS-entry-point-type-to-auto-by-defaul.patch [RHEL-21705] +- Resolves: RHEL-19629 + (CVE-2023-6683 qemu-kvm: QEMU: VNC: NULL pointer dereference in qemu_clipboard_request() [rhel-9]) +- Resolves: RHEL-21705 + (pc-q35-rhel9.4.0 does not provide proper computer information) + +* Fri Mar 08 2024 Miroslav Rezanina - 8.2.0-7 +- kvm-qemu_init-increase-NOFILE-soft-limit-on-POSIX.patch [RHEL-26049] +- kvm-chardev-char-socket-Fix-TLS-io-channels-sending-too-.patch [RHEL-24614] +- Resolves: RHEL-26049 + (When max vcpu is greater than or equal to 246, qemu unable to init event notifier) +- Resolves: RHEL-24614 + ([RHEL9][chardev][s390x] qemu hit core dump while using TLS server from host to guest) + +* Mon Feb 19 2024 Miroslav Rezanina - 8.2.0-6 +- kvm-virtio-scsi-Attach-event-vq-notifier-with-no_poll.patch [RHEL-3934] +- kvm-virtio-Re-enable-notifications-after-drain.patch [RHEL-3934] +- kvm-virtio-blk-Use-ioeventfd_attach-in-start_ioeventfd.patch [RHEL-3934] +- kvm-virtio-blk-avoid-using-ioeventfd-state-in-irqfd-cond.patch [RHEL-15394] +- kvm-hw-arm-virt-deprecate-virt-rhel9.-0-2-.0-machine-typ.patch [RHEL-24988] +- Resolves: RHEL-3934 + ([qemu-kvm] Failed on repeatedly hotplug/unplug disk iothread enabled ) +- Resolves: RHEL-15394 + (virtio-blk: qemu hang on "no response on QMP query-status" when write data to disk without enough space) +- Resolves: RHEL-24988 + (Mark virt-rhel9.{0,2}.0 machine types as deprecated) + +* Mon Feb 12 2024 Miroslav Rezanina - 8.2.0-5 +- kvm-hv-balloon-use-get_min_alignment-to-express-32-GiB-a.patch [RHEL-20341] +- kvm-memory-device-reintroduce-memory-region-size-check.patch [RHEL-20341] +- kvm-block-backend-Allow-concurrent-context-changes.patch [RHEL-24593] +- kvm-scsi-Await-request-purging.patch [RHEL-24593] +- kvm-string-output-visitor-show-structs-as-omitted.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-string-output-visitor-Fix-pseudo-struct-handling.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-qdev-properties-alias-all-object-class-properties.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-qdev-add-IOThreadVirtQueueMappingList-property-type.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-virtio-blk-add-iothread-vq-mapping-parameter.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-virtio-blk-Fix-potential-nullpointer-read-access-in-.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-iotests-add-filter_qmp_generated_node_ids.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-iotests-port-141-to-Python-for-reliable-QMP-testing.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-monitor-only-run-coroutine-commands-in-qemu_aio_cont.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-virtio-blk-move-dataplane-code-into-virtio-blk.c.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-virtio-blk-rename-dataplane-create-destroy-functions.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-virtio-blk-rename-dataplane-to-ioeventfd.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-virtio-blk-restart-s-rq-reqs-in-vq-AioContexts.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-virtio-blk-tolerate-failure-to-set-BlockBackend-AioC.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-virtio-blk-always-set-ioeventfd-during-startup.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-tests-unit-Bump-test-replication-timeout-to-60-secon.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-iotests-iothreads-stream-Use-the-right-TimeoutError.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-virtio-mem-default-enable-dynamic-memslots.patch [RHEL-24045] +- Resolves: RHEL-20341 + (memory-device size alignment check invalid in QEMU 8.2) +- Resolves: RHEL-24593 + (qemu crash blk_get_aio_context(BlockBackend *): Assertion `ctx == blk->ctx' when repeatedly hotplug/unplug disk) +- Resolves: RHEL-17369 + ([nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed.) +- Resolves: RHEL-20764 + ([qemu-kvm] Enable qemu multiqueue block layer support) +- Resolves: RHEL-7356 + ([qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9]) +- Resolves: RHEL-24045 + (QEMU: default-enable dynamically using multiple memslots for virtio-mem) + +* Tue Jan 30 2024 Miroslav Rezanina - 8.2.0-4 +- kvm-vfio-pci-Clear-MSI-X-IRQ-index-always.patch [RHEL-21293] +- Resolves: RHEL-21293 + ([emulated igb] Failed to set up TRIGGER eventfd signaling for interrupt INTX-0: VFIO_DEVICE_SET_IRQS failure: Invalid argument) + +* Wed Jan 24 2024 Miroslav Rezanina - 8.2.0-3 +- kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch [RHEL-19738] +- kvm-vfio-Introduce-base-object-for-VFIOContainer-and-tar.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Introduce-a-empty-VFIOIOMMUOps.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Switch-to-dma_map-unmap-API.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-common-Introduce-vfio_container_init-destroy-he.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-common-Move-giommu_list-in-base-container.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Move-space-field-to-base-container.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Switch-to-IOMMU-BE-set_dirty_page_tra.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Move-per-container-device-list-in-bas.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Convert-functions-to-base-container.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Move-pgsizes-and-dma_max_mappings-to-.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Move-vrdl_list-to-base-container.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Move-listener-to-base-container.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Move-dirty_pgsizes-and-max_dirty_bitm.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Move-iova_ranges-to-base-container.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Implement-attach-detach_device.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-spapr-Introduce-spapr-backend-and-target-interf.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-spapr-switch-to-spapr-IOMMU-BE-add-del_section_.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-spapr-Move-prereg_listener-into-spapr-container.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-spapr-Move-hostwin_list-into-spapr-container.patch [RHEL-19302 RHEL-21057] +- kvm-backends-iommufd-Introduce-the-iommufd-object.patch [RHEL-19302 RHEL-21057] +- kvm-util-char_dev-Add-open_cdev.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-common-return-early-if-space-isn-t-empty.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-iommufd-Implement-the-iommufd-backend.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-iommufd-Relax-assert-check-for-iommufd-backend.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-iommufd-Add-support-for-iova_ranges-and-pgsizes.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-pci-Extract-out-a-helper-vfio_pci_get_pci_hot_r.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-pci-Introduce-a-vfio-pci-hot-reset-interface.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-iommufd-Enable-pci-hot-reset-through-iommufd-cd.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-pci-Allow-the-selection-of-a-given-iommu-backen.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-pci-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-platform-Allow-the-selection-of-a-given-iommu-b.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-platform-Make-vfio-cdev-pre-openable-by-passing.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-ap-Allow-the-selection-of-a-given-iommu-backend.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-ap-Make-vfio-cdev-pre-openable-by-passing-a-fil.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-ccw-Allow-the-selection-of-a-given-iommu-backen.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-ccw-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-Make-VFIOContainerBase-poiner-parameter-const-i.patch [RHEL-19302 RHEL-21057] +- kvm-hw-arm-Activate-IOMMUFD-for-virt-machines.patch [RHEL-19302 RHEL-21057] +- kvm-kconfig-Activate-IOMMUFD-for-s390x-machines.patch [RHEL-19302 RHEL-21057] +- kvm-hw-i386-Activate-IOMMUFD-for-q35-machines.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-pci-Move-VFIODevice-initializations-in-vfio_ins.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-platform-Move-VFIODevice-initializations-in-vfi.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-ap-Move-VFIODevice-initializations-in-vfio_ap_i.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-ccw-Move-VFIODevice-initializations-in-vfio_ccw.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-Introduce-a-helper-function-to-initialize-VFIOD.patch [RHEL-19302 RHEL-21057] +- kvm-docs-devel-Add-VFIO-iommufd-backend-documentation.patch [RHEL-19302 RHEL-21057] +- kvm-hw-ppc-Kconfig-Imply-VFIO_PCI.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-spapr-Extend-VFIOIOMMUOps-with-a-release-handle.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Introduce-vfio_legacy_setup-for-furth.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Initialize-VFIOIOMMUOps-under-vfio_in.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Introduce-a-VFIOIOMMU-QOM-interface.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Introduce-a-VFIOIOMMU-legacy-QOM-inte.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Intoduce-a-new-VFIOIOMMUClass-setup-h.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-spapr-Introduce-a-sPAPR-VFIOIOMMU-QOM-interface.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-iommufd-Introduce-a-VFIOIOMMU-iommufd-QOM-inter.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-spapr-Only-compile-sPAPR-IOMMU-support-when-nee.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-iommufd-Remove-CONFIG_IOMMUFD-usage.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Replace-basename-with-g_path_get_base.patch [RHEL-19302 RHEL-21057] +- kvm-hw-vfio-fix-iteration-over-global-VFIODevice-list.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-iommufd-Remove-the-use-of-stat-to-check-file-ex.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Rename-vfio_init_container-to-vfio_se.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-migration-Add-helper-function-to-set-state-or-r.patch [RHEL-19302 RHEL-21057] +- kvm-backends-iommufd-Remove-check-on-number-of-backend-u.patch [RHEL-19302 RHEL-21057] +- kvm-backends-iommufd-Remove-mutex.patch [RHEL-19302 RHEL-21057] +- kvm-Compile-IOMMUFD-object-on-aarch64.patch [RHEL-19302 RHEL-21057] +- kvm-Compile-IOMMUFD-on-s390x.patch [RHEL-19302 RHEL-21057] +- kvm-Compile-IOMMUFD-on-x86_64.patch [RHEL-19302 RHEL-21057] +- kvm-target-s390x-kvm-pv-Provide-some-more-useful-informa.patch [RHEL-18212] +- kvm-nbd-server-avoid-per-NBDRequest-nbd_client_get-put.patch [RHEL-15965] +- kvm-nbd-server-only-traverse-NBDExport-clients-from-main.patch [RHEL-15965] +- kvm-nbd-server-introduce-NBDClient-lock-to-protect-field.patch [RHEL-15965] +- kvm-block-file-posix-set-up-Linux-AIO-and-io_uring-in-th.patch [RHEL-15965] +- kvm-virtio-blk-add-lock-to-protect-s-rq.patch [RHEL-15965] +- kvm-virtio-blk-don-t-lock-AioContext-in-the-completion-c.patch [RHEL-15965] +- kvm-virtio-blk-don-t-lock-AioContext-in-the-submission-c.patch [RHEL-15965] +- kvm-scsi-only-access-SCSIDevice-requests-from-one-thread.patch [RHEL-15965] +- kvm-virtio-scsi-don-t-lock-AioContext-around-virtio_queu.patch [RHEL-15965] +- kvm-scsi-don-t-lock-AioContext-in-I-O-code-path.patch [RHEL-15965] +- kvm-dma-helpers-don-t-lock-AioContext-in-dma_blk_cb.patch [RHEL-15965] +- kvm-virtio-scsi-replace-AioContext-lock-with-tmf_bh_lock.patch [RHEL-15965] +- kvm-scsi-assert-that-callbacks-run-in-the-correct-AioCon.patch [RHEL-15965] +- kvm-tests-remove-aio_context_acquire-tests.patch [RHEL-15965] +- kvm-aio-make-aio_context_acquire-aio_context_release-a-n.patch [RHEL-15965] +- kvm-graph-lock-remove-AioContext-locking.patch [RHEL-15965] +- kvm-block-remove-AioContext-locking.patch [RHEL-15965] +- kvm-block-remove-bdrv_co_lock.patch [RHEL-15965] +- kvm-scsi-remove-AioContext-locking.patch [RHEL-15965] +- kvm-aio-wait-draw-equivalence-between-AIO_WAIT_WHILE-and.patch [RHEL-15965] +- kvm-aio-remove-aio_context_acquire-aio_context_release-A.patch [RHEL-15965] +- kvm-docs-remove-AioContext-lock-from-IOThread-docs.patch [RHEL-15965] +- kvm-scsi-remove-outdated-AioContext-lock-comment.patch [RHEL-15965] +- kvm-job-remove-outdated-AioContext-locking-comments.patch [RHEL-15965] +- kvm-block-remove-outdated-AioContext-locking-comments.patch [RHEL-15965] +- kvm-block-coroutine-wrapper-use-qemu_get_current_aio_con.patch [RHEL-15965] +- kvm-s390x-pci-avoid-double-enable-disable-of-aif.patch [RHEL-21169] +- kvm-s390x-pci-refresh-fh-before-disabling-aif.patch [RHEL-21169] +- kvm-s390x-pci-drive-ISM-reset-from-subsystem-reset.patch [RHEL-21169] +- kvm-include-ui-rect.h-fix-qemu_rect_init-mis-assignment.patch [RHEL-21570] +- kvm-virtio-gpu-block-migration-of-VMs-with-blob-true.patch [RHEL-7565] +- kvm-spec-Enable-zstd.patch [RHEL-7361] +- Resolves: RHEL-19738 + (Enable properties allowing to disable high memory regions) +- Resolves: RHEL-19302 + (NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend) +- Resolves: RHEL-21057 + (Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6) +- Resolves: RHEL-18212 + ([RHEL9][Secure-execution][s390x] The error message is not clear when boot up a SE guest with wrong encryption) +- Resolves: RHEL-15965 + ( [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize)) +- Resolves: RHEL-21169 + ([s390x] VM fails to start with ISM passed through QEMU 8.2) +- Resolves: RHEL-21570 + (Critical performance degradation for input devices in virtio vnc session) +- Resolves: RHEL-7565 + (qemu crashed when migrate guest with blob resources enabled) +- Resolves: RHEL-7361 + ([qemu-kvm] Enable zstd support for qcow2 files) + +* Mon Jan 08 2024 Miroslav Rezanina - 8.2.0-2 +- kvm-hw-arm-virt-Fix-compats.patch [RHEL-17168] +- Resolves: RHEL-17168 + (Introduce virt-rhel9.4.0 arm-virt machine type [aarch64]) + +* Tue Jan 02 2024 Miroslav Rezanina - 8.2.0-1 +- Rebase to QEMU 8.2.0 [RHEL-14111] +- Fix machine type compatibility [RHEL-17067 RHEL-17068] +- Add 9.4.0 machine type [RHEL-17168 RHEL-19117 RHEL-19119] +- Resolves: RHEL-14111 + (Rebase qemu-kvm to QEMU 8.2.0) +- Resolves: RHEL-17067 + (Check/fix machine type compatibility for qemu-kvm 8.2.0 [s390x]) +- Resolves: RHEL-17068 + (Check/fix machine type compatibility for qemu-kvm 8.2.0 [x86_64]) +- Resolves: RHEL-17168 + (Introduce virt-rhel9.4.0 arm-virt machine type [aarch64]) +- Resolves: RHEL-19117 + (Introduce virt-rhel9.4.0 arm-virt machine type [x86_64]) +- Resolves: RHEL-19119 + (Introduce virt-rhel9.4.0 arm-virt machine type [s390x]) + +* Thu Nov 30 2023 Miroslav Rezanina - 8.1.0-5 +- kvm-Preparation-for-using-allow-rpcs-list-in-guest-agent.patch [RHEL-955] +- kvm-Use-allow-rpcs-instead-of-block-rpcs-in-guest-agent..patch [RHEL-955] +- Resolves: RHEL-955 + (Use allow-rpcs instead of block-rpcs in guest-agent.service) + +* Mon Nov 13 2023 Miroslav Rezanina - 8.1.0-4 +- kvm-hw-scsi-scsi-disk-Disallow-block-sizes-smaller-than-.patch [RHEL-2828] +- kvm-Enable-igb-on-x86_64.patch [RHEL-1308] +- kvm-host-include-generic-host-atomic128-Fix-compilation-.patch [RHEL-12991] +- kvm-Enable-qemu-kvm-device-usb-redirec-for-aarch64.patch [RHEL-7561] +- Resolves: RHEL-2828 + (CVE-2023-42467 qemu-kvm: qemu: denial of service due to division by zero [rhel-9]) +- Resolves: RHEL-1308 + ([RFE] iGB: Add an emulated SR-IOV network card) +- Resolves: RHEL-12991 + (qemu-kvm fails to build on s390x with clang-17) +- Resolves: RHEL-7561 + (Missing the rpm package qemu-kvm-device-usb-redirect on Arm64 platform) + +* Mon Oct 16 2023 Miroslav Rezanina - 8.1.0-3 +- kvm-migration-Fix-race-that-dest-preempt-thread-close-to.patch [RHEL-11219] +- kvm-migration-Fix-possible-race-when-setting-rp_state.er.patch [RHEL-11219] +- kvm-migration-Fix-possible-races-when-shutting-down-the-.patch [RHEL-11219] +- kvm-migration-Fix-possible-race-when-shutting-down-to_ds.patch [RHEL-11219] +- kvm-migration-Remove-redundant-cleanup-of-postcopy_qemuf.patch [RHEL-11219] +- kvm-migration-Consolidate-return-path-closing-code.patch [RHEL-11219] +- kvm-migration-Replace-the-return-path-retry-logic.patch [RHEL-11219] +- kvm-migration-Move-return-path-cleanup-to-main-migration.patch [RHEL-11219] +- kvm-file-posix-Clear-bs-bl.zoned-on-error.patch [RHEL-7360] +- kvm-file-posix-Check-bs-bl.zoned-for-zone-info.patch [RHEL-7360] +- kvm-file-posix-Fix-zone-update-in-I-O-error-path.patch [RHEL-7360] +- kvm-file-posix-Simplify-raw_co_prw-s-out-zone-code.patch [RHEL-7360] +- kvm-tests-file-io-error-New-test.patch [RHEL-7360] +- Resolves: RHEL-11219 + (migration tests failing for RHEL 9.4 sometimes) +- Resolves: RHEL-7360 + (Qemu Core Dumped When Writing Larger Size Than The Size of A Data Disk) + +* Mon Oct 02 2023 Miroslav Rezanina - 8.1.0-2 +- kvm-virtio-Drop-out-of-coroutine-context-in-virtio_load.patch [RHEL-832] +- Resolves: RHEL-832 + (qemu-kvm crashed when migrating guest with failover vf) + +* Mon Sep 04 2023 Miroslav Rezanina - 8.1.0-1 +- Rebase to QEMU 8.1 [RHEL-870] +- Resolves: RHEL-870 + (Rebase qemu-kvm to QEMU 8.1.0) + * Thu Aug 24 2023 Miroslav Rezanina - 8.0.0-13 - kvm-vdpa-return-errno-in-vhost_vdpa_get_vring_group-erro.patch [RHEL-923] - kvm-vdpa-move-CVQ-isolation-check-to-net_init_vhost_vdpa.patch [RHEL-923] @@ -1904,9 +2278,6 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ - Resolves: bz#2180898 (Rebase to QEMU 8.0.0 for RHEL 9.3.0) -* Fri Apr 14 2023 MSVSphere Packaging Team - 7.2.0-14 -- Rebuilt for MSVSphere 9.2 beta - * Mon Mar 20 2023 Miroslav Rezanina - 7.2.0-14 - Rebuild for 9.2 release - Resolves: bz#2173590