diff --git a/.gitignore b/.gitignore index 459c79b..7dc73be 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -SOURCES/qemu-7.2.0.tar.xz +SOURCES/qemu-8.0.0.tar.xz diff --git a/.qemu-kvm.metadata b/.qemu-kvm.metadata index ed38614..a158c44 100644 --- a/.qemu-kvm.metadata +++ b/.qemu-kvm.metadata @@ -1 +1 @@ -634a3e4b381cbf13085eb1568accb85cbd9d89c4 SOURCES/qemu-7.2.0.tar.xz +17d54a85aa5d7f5dcfc619aa34049f9a91ceed0d SOURCES/qemu-8.0.0.tar.xz diff --git a/SOURCES/0004-Initial-redhat-build.patch b/SOURCES/0004-Initial-redhat-build.patch index 0f9cc55..612633e 100644 --- a/SOURCES/0004-Initial-redhat-build.patch +++ b/SOURCES/0004-Initial-redhat-build.patch @@ -1,4 +1,4 @@ -From ccc4a5bdc8c2f27678312364a7c12aeafd009bb6 Mon Sep 17 00:00:00 2001 +From 84039bfc860878f3c3421de4a1836ac5d6300ed7 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 26 May 2021 10:56:02 +0200 Subject: Initial redhat build @@ -13,7 +13,7 @@ several issues are fixed in QEMU tree: We disable make check due to issues with some of the tests. -This rebase is based on qemu-kvm-7.1.0-7.el9 +This rebase is based on qemu-kvm-7.2.0-14.el9 Signed-off-by: Miroslav Rezanina -- @@ -66,6 +66,16 @@ Rebase changes (7.2.0): - Fix SRPM name generation to work on Fedora 37 - Switch back to system meson +Rebase changes (8.0.0-rc1): +- use enable-dtrace-backands instead of enable-dtrace-backend +- Removed qemu virtiofsd bits + +Rebase changes (8.0.0-rc2): +- test/check-block.sh removed (upstream) + +Rebase changes (8.0.0-rc3): +- Add new --disable-* options for configure + Merged patches (6.0.0): - 605758c902 Limit build on Power to qemu-img and qemu-ga only @@ -162,16 +172,18 @@ Merged patches (7.2.0 rc4): - 8c6834feb6 Remove opengl display device subpackages (C9S MR 124) - 0ecc97f29e spec: Add requires for packages with additional virtio-gpu variants (C9S MR 124) -Signed-off-by: Miroslav Rezanina +Merged patches (8.0.0-rc1): +- 7754f6ba78 Minor packaging fixes +- 401af56187 spec: Disable VDUSE -fix +Signed-off-by: Miroslav Rezanina --- .distro/Makefile | 100 + .distro/Makefile.common | 41 + .distro/README.tests | 39 + .distro/modules-load.conf | 4 + .distro/qemu-guest-agent.service | 1 - - .distro/qemu-kvm.spec.template | 4315 +++++++++++++++++++++++ + .distro/qemu-kvm.spec.template | 4528 +++++++++++++++++++++++ .distro/rpminspect.yaml | 6 +- .distro/scripts/extract_build_cmd.py | 12 + .distro/scripts/process-patches.sh | 4 + @@ -180,9 +192,8 @@ fix scripts/qemu-guest-agent/fsfreeze-hook | 2 +- scripts/systemtap/conf.d/qemu_kvm.conf | 4 + scripts/systemtap/script.d/qemu_kvm.stp | 1 + - tests/check-block.sh | 2 + ui/vnc-auth-sasl.c | 2 +- - 16 files changed, 4573 insertions(+), 4 deletions(-) + 15 files changed, 4784 insertions(+), 4 deletions(-) create mode 100644 .distro/Makefile create mode 100644 .distro/Makefile.common create mode 100644 .distro/README.tests @@ -271,19 +282,6 @@ index 0000000000..c04abf9449 +++ b/scripts/systemtap/script.d/qemu_kvm.stp @@ -0,0 +1 @@ +probe qemu.kvm.simpletrace.handle_qmp_command,qemu.kvm.simpletrace.monitor_protocol_*,qemu.kvm.simpletrace.migrate_set_state {} -diff --git a/tests/check-block.sh b/tests/check-block.sh -index 5de2c1ba0b..6af743f441 100755 ---- a/tests/check-block.sh -+++ b/tests/check-block.sh -@@ -22,6 +22,8 @@ if [ -z "$(find . -name 'qemu-system-*' -print)" ]; then - skip "No qemu-system binary available ==> Not running the qemu-iotests." - fi - -+exit 0 -+ - cd tests/qemu-iotests - - # QEMU_CHECK_BLOCK_AUTO is used to disable some unstable sub-tests diff --git a/ui/vnc-auth-sasl.c b/ui/vnc-auth-sasl.c index 47fdae5b21..2a950caa2a 100644 --- a/ui/vnc-auth-sasl.c @@ -298,5 +296,5 @@ index 47fdae5b21..2a950caa2a 100644 if (saslErr != SASL_OK) { error_setg(errp, "Failed to initialize SASL auth: %s", -- -2.31.1 +2.39.1 diff --git a/SOURCES/0005-Enable-disable-devices-for-RHEL.patch b/SOURCES/0005-Enable-disable-devices-for-RHEL.patch index 767389f..14dd3f9 100644 --- a/SOURCES/0005-Enable-disable-devices-for-RHEL.patch +++ b/SOURCES/0005-Enable-disable-devices-for-RHEL.patch @@ -1,4 +1,4 @@ -From 90366cd2ead5a5301aaceed56477d2e6d9f1b3cd Mon Sep 17 00:00:00 2001 +From 63829772dbc2075fc014a9d52e3968735d228018 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 7 Dec 2022 03:05:48 -0500 Subject: Enable/disable devices for RHEL @@ -32,6 +32,11 @@ Rebase notes (7.1.0 rc3): Rebase notes (7.2.0 rc20): - Removed disabling a15mpcore.c as no longer needed +Rebase notes (8.0.0-rc1): +- Rename CONFIG_ACPI_X86_ICH to CONFIG_ACPI_ICH9 +- Inlude qemu/error-report.h in hw/display/cirrus_vga.c +- Change virtiofsd dependency version + Merged patches (6.1.0): - c51bf45304 Remove SPICE and QXL from x86_64-rh-devices.mak - 02fc745601 aarch64-rh-devices: add CONFIG_PVPANIC_PCI @@ -63,7 +68,7 @@ Merged patches (7.1.0 rc0): hw/arm/meson.build | 2 +- hw/block/fdc.c | 10 ++ hw/cpu/meson.build | 3 +- - hw/display/cirrus_vga.c | 5 +- + hw/display/cirrus_vga.c | 7 +- hw/ide/piix.c | 5 +- hw/input/pckbd.c | 2 + hw/net/e1000.c | 2 + @@ -73,7 +78,7 @@ Merged patches (7.1.0 rc0): target/ppc/cpu-models.c | 9 ++ target/s390x/cpu_models_sysemu.c | 3 + target/s390x/kvm/kvm.c | 8 ++ - 19 files changed, 283 insertions(+), 13 deletions(-) + 19 files changed, 285 insertions(+), 13 deletions(-) create mode 100644 configs/devices/aarch64-softmmu/aarch64-rh-devices.mak create mode 100644 configs/devices/ppc64-softmmu/ppc64-rh-devices.mak create mode 100644 configs/devices/rh-virtio.mak @@ -212,7 +217,7 @@ index 0000000000..69a799adbd +CONFIG_VHOST_USER_FS=y diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak new file mode 100644 -index 0000000000..10cb0a14e0 +index 0000000000..668b2d0e18 --- /dev/null +++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak @@ -0,0 +1,109 @@ @@ -226,7 +231,7 @@ index 0000000000..10cb0a14e0 +CONFIG_ACPI_SMBUS=y +CONFIG_ACPI_VMGENID=y +CONFIG_ACPI_X86=y -+CONFIG_ACPI_X86_ICH=y ++CONFIG_ACPI_ICH9=y +CONFIG_AHCI=y +CONFIG_APIC=y +CONFIG_APM=y @@ -326,10 +331,10 @@ index 0000000000..10cb0a14e0 +CONFIG_VHOST_USER_VSOCK=y +CONFIG_VHOST_USER_FS=y diff --git a/hw/arm/meson.build b/hw/arm/meson.build -index 92f9f6e000..c5e94c997c 100644 +index b545ba0e4f..a41a16cba7 100644 --- a/hw/arm/meson.build +++ b/hw/arm/meson.build -@@ -30,7 +30,7 @@ arm_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c')) +@@ -29,7 +29,7 @@ arm_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c')) arm_ss.add(when: 'CONFIG_ZYNQ', if_true: files('xilinx_zynq.c')) arm_ss.add(when: 'CONFIG_SABRELITE', if_true: files('sabrelite.c')) @@ -339,7 +344,7 @@ index 92f9f6e000..c5e94c997c 100644 arm_ss.add(when: 'CONFIG_PXA2XX', if_true: files('pxa2xx.c', 'pxa2xx_gpio.c', 'pxa2xx_pic.c')) arm_ss.add(when: 'CONFIG_DIGIC', if_true: files('digic.c')) diff --git a/hw/block/fdc.c b/hw/block/fdc.c -index 64ae4a6899..9b8e782c19 100644 +index d7cc4d3ec1..12d0a60905 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -49,6 +49,8 @@ @@ -367,7 +372,7 @@ index 64ae4a6899..9b8e782c19 100644 error_setg(errp, "Cannot choose a fallback FDrive type of 'auto'"); return; diff --git a/hw/cpu/meson.build b/hw/cpu/meson.build -index 9e52fee9e7..87c209a754 100644 +index e37490074f..4431e3731c 100644 --- a/hw/cpu/meson.build +++ b/hw/cpu/meson.build @@ -1,4 +1,5 @@ @@ -375,13 +380,29 @@ index 9e52fee9e7..87c209a754 100644 +#softmmu_ss.add(files('core.c', 'cluster.c')) +softmmu_ss.add(files('core.c')) - specific_ss.add(when: 'CONFIG_ARM11MPCORE', if_true: files('arm11mpcore.c')) - specific_ss.add(when: 'CONFIG_REALVIEW', if_true: files('realview_mpcore.c')) + softmmu_ss.add(when: 'CONFIG_ARM11MPCORE', if_true: files('arm11mpcore.c')) + softmmu_ss.add(when: 'CONFIG_REALVIEW', if_true: files('realview_mpcore.c')) diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c -index 6e8c747c46..1948ebee8e 100644 +index b80f98b6c4..cbde6a8f15 100644 --- a/hw/display/cirrus_vga.c +++ b/hw/display/cirrus_vga.c -@@ -2946,7 +2946,10 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) +@@ -36,6 +36,7 @@ + #include "qemu/module.h" + #include "qemu/units.h" + #include "qemu/log.h" ++#include "qemu/error-report.h" + #include "sysemu/reset.h" + #include "qapi/error.h" + #include "trace.h" +@@ -47,6 +48,7 @@ + #include "qom/object.h" + #include "ui/console.h" + ++ + /* + * TODO: + * - destination write mask support not complete (bits 5..7) +@@ -2946,7 +2948,10 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev); int16_t device_id = pc->device_id; @@ -394,10 +415,10 @@ index 6e8c747c46..1948ebee8e 100644 * Also accept 8 MB/16 MB for backward compatibility. */ diff --git a/hw/ide/piix.c b/hw/ide/piix.c -index 267dbf37db..87fcda4062 100644 +index 41d60921e3..a4af45b4e8 100644 --- a/hw/ide/piix.c +++ b/hw/ide/piix.c -@@ -199,7 +199,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) +@@ -193,7 +193,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) k->device_id = PCI_DEVICE_ID_INTEL_82371SB_1; k->class_id = PCI_CLASS_STORAGE_IDE; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); @@ -407,7 +428,7 @@ index 267dbf37db..87fcda4062 100644 } static const TypeInfo piix3_ide_info = { -@@ -222,6 +223,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) +@@ -216,6 +217,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) k->class_id = PCI_CLASS_STORAGE_IDE; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); dc->hotpluggable = false; @@ -430,10 +451,10 @@ index b92b63bedc..3b6235dde6 100644 static const TypeInfo i8042_info = { diff --git a/hw/net/e1000.c b/hw/net/e1000.c -index e26e0a64c1..41492fae79 100644 +index 23d660619f..b75c9aa799 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c -@@ -1824,6 +1824,7 @@ static const E1000Info e1000_devices[] = { +@@ -1805,6 +1805,7 @@ static const E1000Info e1000_devices[] = { .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, }, @@ -441,7 +462,7 @@ index e26e0a64c1..41492fae79 100644 { .name = "e1000-82544gc", .device_id = E1000_DEV_ID_82544GC_COPPER, -@@ -1836,6 +1837,7 @@ static const E1000Info e1000_devices[] = { +@@ -1817,6 +1818,7 @@ static const E1000Info e1000_devices[] = { .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, }, @@ -467,7 +488,7 @@ index 8a4861f45a..fcb5dfe792 100644 DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"), DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"), diff --git a/hw/usb/meson.build b/hw/usb/meson.build -index 793df42e21..cd3c305471 100644 +index 599dc24f0d..905a994c3a 100644 --- a/hw/usb/meson.build +++ b/hw/usb/meson.build @@ -52,7 +52,7 @@ softmmu_ss.add(when: 'CONFIG_USB_SMARTCARD', if_true: files('dev-smartcard-reade @@ -480,10 +501,10 @@ index 793df42e21..cd3c305471 100644 endif diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c -index 9a2cef7d05..a528ff9a3d 100644 +index df0c45e523..c154a4dcf2 100644 --- a/target/arm/cpu_tcg.c +++ b/target/arm/cpu_tcg.c -@@ -151,6 +151,7 @@ void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu) +@@ -155,6 +155,7 @@ void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu) /* CPU models. These are not needed for the AArch64 linux-user build. */ #if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) @@ -491,7 +512,7 @@ index 9a2cef7d05..a528ff9a3d 100644 #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG) static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) { -@@ -504,6 +505,7 @@ static void cortex_a9_initfn(Object *obj) +@@ -508,6 +509,7 @@ static void cortex_a9_initfn(Object *obj) cpu->isar.reset_pmcr_el0 = 0x41093000; define_arm_cp_regs(cpu, cortexa9_cp_reginfo); } @@ -499,7 +520,7 @@ index 9a2cef7d05..a528ff9a3d 100644 #ifndef CONFIG_USER_ONLY static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri) -@@ -528,6 +530,7 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { +@@ -532,6 +534,7 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, }; @@ -507,7 +528,7 @@ index 9a2cef7d05..a528ff9a3d 100644 static void cortex_a7_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); -@@ -576,6 +579,7 @@ static void cortex_a7_initfn(Object *obj) +@@ -580,6 +583,7 @@ static void cortex_a7_initfn(Object *obj) cpu->isar.reset_pmcr_el0 = 0x41072000; define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */ } @@ -515,7 +536,7 @@ index 9a2cef7d05..a528ff9a3d 100644 static void cortex_a15_initfn(Object *obj) { -@@ -624,6 +628,7 @@ static void cortex_a15_initfn(Object *obj) +@@ -628,6 +632,7 @@ static void cortex_a15_initfn(Object *obj) define_arm_cp_regs(cpu, cortexa15_cp_reginfo); } @@ -523,7 +544,7 @@ index 9a2cef7d05..a528ff9a3d 100644 static void cortex_m0_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); -@@ -1065,6 +1070,7 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) +@@ -1110,6 +1115,7 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) cc->gdb_core_xml_file = "arm-m-profile.xml"; } @@ -531,7 +552,7 @@ index 9a2cef7d05..a528ff9a3d 100644 #ifndef TARGET_AARCH64 /* -@@ -1132,6 +1138,7 @@ static void arm_max_initfn(Object *obj) +@@ -1177,6 +1183,7 @@ static void arm_max_initfn(Object *obj) #endif /* !TARGET_AARCH64 */ static const ARMCPUInfo arm_tcg_cpus[] = { @@ -539,7 +560,7 @@ index 9a2cef7d05..a528ff9a3d 100644 { .name = "arm926", .initfn = arm926_initfn }, { .name = "arm946", .initfn = arm946_initfn }, { .name = "arm1026", .initfn = arm1026_initfn }, -@@ -1147,7 +1154,9 @@ static const ARMCPUInfo arm_tcg_cpus[] = { +@@ -1192,7 +1199,9 @@ static const ARMCPUInfo arm_tcg_cpus[] = { { .name = "cortex-a7", .initfn = cortex_a7_initfn }, { .name = "cortex-a8", .initfn = cortex_a8_initfn }, { .name = "cortex-a9", .initfn = cortex_a9_initfn }, @@ -549,7 +570,7 @@ index 9a2cef7d05..a528ff9a3d 100644 { .name = "cortex-m0", .initfn = cortex_m0_initfn, .class_init = arm_v7m_class_init }, { .name = "cortex-m3", .initfn = cortex_m3_initfn, -@@ -1178,6 +1187,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { +@@ -1224,6 +1233,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { { .name = "pxa270-b1", .initfn = pxa270b1_initfn }, { .name = "pxa270-c0", .initfn = pxa270c0_initfn }, { .name = "pxa270-c5", .initfn = pxa270c5_initfn }, @@ -620,7 +641,7 @@ index 912b037c63..cd3ff700ac 100644 { NULL, NULL } }; diff --git a/target/s390x/cpu_models_sysemu.c b/target/s390x/cpu_models_sysemu.c -index d8a141a023..d086b1c39c 100644 +index 63981bf36b..87a4480c05 100644 --- a/target/s390x/cpu_models_sysemu.c +++ b/target/s390x/cpu_models_sysemu.c @@ -35,6 +35,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, @@ -653,5 +674,5 @@ index 3ac7ec9acf..97da1a6424 100644 prop.ibc = s390_ibc_from_cpu_model(model); /* configure cpu features indicated via STFL(e) */ -- -2.31.1 +2.39.1 diff --git a/SOURCES/0006-Machine-type-related-general-changes.patch b/SOURCES/0006-Machine-type-related-general-changes.patch index fc2a89d..5dd591f 100644 --- a/SOURCES/0006-Machine-type-related-general-changes.patch +++ b/SOURCES/0006-Machine-type-related-general-changes.patch @@ -1,4 +1,4 @@ -From 0208f38671b9de4036c0d56142a7f22e5091bae0 Mon Sep 17 00:00:00 2001 +From c13f8e21b32aa06b08847e88080f2fdea5084a9b Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 11 Jan 2019 09:54:45 +0100 Subject: Machine type related general changes @@ -46,28 +46,33 @@ Merged patches (7.1.0 rc0): Merged patches (7.2.0 rc0): - 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts) + +Merged patches (8.0.0-rc1): +- 21ed34787b Addd 7.2 compat bits for RHEL 9.1 machine type +- e5c8d5d603 virtio-rng-pci: fix migration compat for vectors +- 5a5fa77059 virtio-rng-pci: fix transitional migration compat for vectors --- hw/acpi/piix4.c | 2 +- hw/arm/virt.c | 2 +- - hw/core/machine.c | 222 +++++++++++++++++++++++++++++++++++ + hw/core/machine.c | 229 +++++++++++++++++++++++++++++++++++ hw/display/vga-isa.c | 2 +- hw/i386/pc_piix.c | 2 + hw/i386/pc_q35.c | 2 + hw/net/rtl8139.c | 4 +- - hw/smbios/smbios.c | 46 +++++++- + hw/smbios/smbios.c | 46 ++++++- hw/timer/i8254_common.c | 2 +- - hw/usb/hcd-xhci-pci.c | 59 +++++++--- + hw/usb/hcd-xhci-pci.c | 59 ++++++--- hw/usb/hcd-xhci-pci.h | 1 + include/hw/boards.h | 31 +++++ include/hw/firmware/smbios.h | 5 +- include/hw/i386/pc.h | 3 + - 14 files changed, 360 insertions(+), 23 deletions(-) + 14 files changed, 367 insertions(+), 23 deletions(-) diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c -index 0a81f1ad93..dbfb362a8f 100644 +index 63d2113b86..a24b9aac92 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c -@@ -248,7 +248,7 @@ static bool vmstate_test_migrate_acpi_index(void *opaque, int version_id) +@@ -247,7 +247,7 @@ static bool vmstate_test_migrate_acpi_index(void *opaque, int version_id) static const VMStateDescription vmstate_acpi = { .name = "piix4_pm", .version_id = 3, @@ -77,25 +82,25 @@ index 0a81f1ad93..dbfb362a8f 100644 .fields = (VMStateField[]) { VMSTATE_PCI_DEVICE(parent_obj, PIIX4PMState), diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index b871350856..d633300fdc 100644 +index ac626b3bef..4a6e89c7bc 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -1619,7 +1619,7 @@ static void virt_build_smbios(VirtMachineState *vms) +@@ -1629,7 +1629,7 @@ static void virt_build_smbios(VirtMachineState *vms) smbios_set_defaults("QEMU", product, vmc->smbios_old_sys_ver ? "1.0" : mc->name, false, - true, SMBIOS_ENTRY_POINT_TYPE_64); + true, NULL, NULL, SMBIOS_ENTRY_POINT_TYPE_64); - smbios_get_tables(MACHINE(vms), NULL, 0, - &smbios_tables, &smbios_tables_len, + /* build the array of physical mem area from base_memmap */ + mem_array.address = vms->memmap[VIRT_MEM].base; diff --git a/hw/core/machine.c b/hw/core/machine.c -index 8d34caa31d..9edec1ca05 100644 +index cd13b8b0a3..5aa567fad3 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c -@@ -40,6 +40,228 @@ - #include "hw/virtio/virtio-pci.h" - #include "qom/object_interfaces.h" +@@ -46,6 +46,235 @@ GlobalProperty hw_compat_7_2[] = { + }; + const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2); +/* + * RHEL only: machine types for previous major releases are deprecated @@ -111,6 +116,13 @@ index 8d34caa31d..9edec1ca05 100644 + { "arm-gicv3-common", "force-8-bit-prio", "on" }, + /* hw_compat_rhel_9_1 from hw_compat_7_0 */ + { "nvme-ns", "eui64-default", "on"}, ++ /* hw_compat_rhel_9_1 from hw_compat_7_1 */ ++ { "virtio-device", "queue_reset", "false" }, ++ /* hw_compat_rhel_9_1 bz 2155749 */ ++ { "virtio-rng-pci", "vectors", "0" }, ++ /* hw_compat_rhel_9_1 bz 2162569 */ ++ { "virtio-rng-pci-transitional", "vectors", "0" }, ++ { "virtio-rng-pci-non-transitional", "vectors", "0" }, +}; +const size_t hw_compat_rhel_9_1_len = G_N_ELEMENTS(hw_compat_rhel_9_1); + @@ -321,7 +333,7 @@ index 8d34caa31d..9edec1ca05 100644 + GlobalProperty hw_compat_7_1[] = { { "virtio-device", "queue_reset", "false" }, - }; + { "virtio-rng-pci", "vectors", "0" }, diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c index 2a5437d803..0db2c2b2a1 100644 --- a/hw/display/vga-isa.c @@ -336,10 +348,10 @@ index 2a5437d803..0db2c2b2a1 100644 }; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 0ad0ed1603..0985ff67d2 100644 +index 30eedd62a3..14a794081e 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c -@@ -187,6 +187,8 @@ static void pc_init1(MachineState *machine, +@@ -201,6 +201,8 @@ static void pc_init1(MachineState *machine, smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)", mc->name, pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded, @@ -349,10 +361,10 @@ index 0ad0ed1603..0985ff67d2 100644 } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index a496bd6e74..ea582254e3 100644 +index 797ba347fd..dc0ba5f9e7 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c -@@ -201,6 +201,8 @@ static void pc_q35_init(MachineState *machine) +@@ -202,6 +202,8 @@ static void pc_q35_init(MachineState *machine) smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)", mc->name, pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded, @@ -362,7 +374,7 @@ index a496bd6e74..ea582254e3 100644 } diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c -index 700b1b66b6..13693aeb4f 100644 +index 5a5aaf868d..3d473d5869 100644 --- a/hw/net/rtl8139.c +++ b/hw/net/rtl8139.c @@ -3178,7 +3178,7 @@ static int rtl8139_pre_save(void *opaque) @@ -385,10 +397,10 @@ index 700b1b66b6..13693aeb4f 100644 VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State), diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index b4243de735..c5ad69237e 100644 +index d2007e70fb..319eae9e9d 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c -@@ -57,6 +57,9 @@ static bool smbios_legacy = true; +@@ -58,6 +58,9 @@ static bool smbios_legacy = true; static bool smbios_uuid_encoded = true; /* end: legacy structures & constants for <= 2.0 machines */ @@ -398,7 +410,7 @@ index b4243de735..c5ad69237e 100644 uint8_t *smbios_tables; size_t smbios_tables_len; -@@ -669,7 +672,7 @@ static void smbios_build_type_1_table(void) +@@ -670,7 +673,7 @@ static void smbios_build_type_1_table(void) static void smbios_build_type_2_table(void) { @@ -407,7 +419,7 @@ index b4243de735..c5ad69237e 100644 SMBIOS_TABLE_SET_STR(2, manufacturer_str, type2.manufacturer); SMBIOS_TABLE_SET_STR(2, product_str, type2.product); -@@ -977,7 +980,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) +@@ -980,7 +983,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) void smbios_set_defaults(const char *manufacturer, const char *product, const char *version, bool legacy_mode, @@ -419,7 +431,7 @@ index b4243de735..c5ad69237e 100644 { smbios_have_defaults = true; smbios_legacy = legacy_mode; -@@ -998,11 +1004,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product, +@@ -1001,11 +1007,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product, g_free(smbios_entries); } @@ -579,10 +591,10 @@ index 643d4643e4..529bad9366 100644 dc->vmsd = &vmstate_xhci_pci; set_bit(DEVICE_CATEGORY_USB, dc->categories); diff --git a/hw/usb/hcd-xhci-pci.h b/hw/usb/hcd-xhci-pci.h -index c193f79443..086a1feb1e 100644 +index 08f70ce97c..1be7527c1b 100644 --- a/hw/usb/hcd-xhci-pci.h +++ b/hw/usb/hcd-xhci-pci.h -@@ -39,6 +39,7 @@ typedef struct XHCIPciState { +@@ -40,6 +40,7 @@ typedef struct XHCIPciState { XHCIState xhci; OnOffAuto msi; OnOffAuto msix; @@ -591,10 +603,10 @@ index c193f79443..086a1feb1e 100644 #endif diff --git a/include/hw/boards.h b/include/hw/boards.h -index 90f1dd3aeb..2209d4e416 100644 +index 6fbbfd56c8..c5a965d27f 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -454,4 +454,35 @@ extern const size_t hw_compat_2_2_len; +@@ -459,4 +459,35 @@ extern const size_t hw_compat_2_2_len; extern GlobalProperty hw_compat_2_1[]; extern const size_t hw_compat_2_1_len; @@ -647,10 +659,10 @@ index 7f3259a630..d24b3ccd32 100644 void smbios_get_tables(MachineState *ms, const struct smbios_phys_mem_area *mem_array, diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index c95333514e..3754eaa97d 100644 +index 8206d5405a..908a275736 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -112,6 +112,9 @@ struct PCMachineClass { +@@ -111,6 +111,9 @@ struct PCMachineClass { bool smbios_defaults; bool smbios_legacy_mode; bool smbios_uuid_encoded; @@ -661,5 +673,5 @@ index c95333514e..3754eaa97d 100644 /* RAM / address space compat: */ bool gigabyte_align; -- -2.31.1 +2.39.1 diff --git a/SOURCES/0007-Add-aarch64-machine-types.patch b/SOURCES/0007-Add-aarch64-machine-types.patch index 06611e7..f47bbd0 100644 --- a/SOURCES/0007-Add-aarch64-machine-types.patch +++ b/SOURCES/0007-Add-aarch64-machine-types.patch @@ -1,4 +1,4 @@ -From 8501581c99760ed8a800d0c98eeb17a4bf450366 Mon Sep 17 00:00:00 2001 +From ec6468b65a3af0e2b84575c9f965f61916d0d8ea Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 12:53:31 +0200 Subject: Add aarch64 machine types @@ -26,6 +26,9 @@ Rebase notes (7.1.0 rc3): Rebase notes (7.2.0 rc0): - Disabled cortex-a35 +Rebase notes (8.0.0-rc1): +- Moved changed code from target/arm/helper.c to target/arm/arm-qmp-cmds.c + Merged patches (6.2.0): - 9a3d4fde0e hw/arm/virt: Remove 9.0 machine type - f7d04d6695 hw: arm: virt: Add hw_compat_rhel_8_5 to 8.5 machine type @@ -49,23 +52,27 @@ Merged patches (7.1.0 rc0): Merged patches (7.2.0 rc0): - 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts) + +Merged patches (8.0.0-rc1): +- c1a21266d8 redhat: aarch64: add rhel9.2.0 virt machine type +- d97cd7c513 redhat: fix virt-rhel9.2.0 compat props --- - hw/arm/virt.c | 237 ++++++++++++++++++++++++++++++++- + hw/arm/virt.c | 251 ++++++++++++++++++++++++++++++++- include/hw/arm/virt.h | 8 ++ + target/arm/arm-qmp-cmds.c | 2 + target/arm/cpu-qom.h | 1 + target/arm/cpu.c | 5 + target/arm/cpu.h | 2 + target/arm/cpu64.c | 16 ++- target/arm/cpu_tcg.c | 12 +- - target/arm/helper.c | 2 + tests/qtest/arm-cpu-features.c | 6 + - 9 files changed, 277 insertions(+), 12 deletions(-) + 9 files changed, 289 insertions(+), 14 deletions(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index d633300fdc..dfcab40a73 100644 +index 4a6e89c7bc..1ae1654be5 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -80,6 +80,7 @@ +@@ -81,6 +81,7 @@ #include "hw/char/pl011.h" #include "qemu/guest-random.h" @@ -73,7 +80,7 @@ index d633300fdc..dfcab40a73 100644 #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ void *data) \ -@@ -106,7 +107,48 @@ +@@ -107,7 +108,48 @@ DEFINE_VIRT_MACHINE_LATEST(major, minor, true) #define DEFINE_VIRT_MACHINE(major, minor) \ DEFINE_VIRT_MACHINE_LATEST(major, minor, false) @@ -123,7 +130,7 @@ index d633300fdc..dfcab40a73 100644 /* Number of external interrupt lines to configure the GIC with */ #define NUM_IRQS 256 -@@ -197,15 +239,19 @@ static const int a15irqmap[] = { +@@ -204,16 +246,20 @@ static const int a15irqmap[] = { }; static const char *valid_cpus[] = { @@ -132,6 +139,7 @@ index d633300fdc..dfcab40a73 100644 ARM_CPU_TYPE_NAME("cortex-a15"), ARM_CPU_TYPE_NAME("cortex-a35"), ARM_CPU_TYPE_NAME("cortex-a53"), + ARM_CPU_TYPE_NAME("cortex-a55"), +#endif /* disabled for RHEL */ ARM_CPU_TYPE_NAME("cortex-a57"), +#if 0 /* Disabled for Red Hat Enterprise Linux */ @@ -143,7 +151,7 @@ index d633300fdc..dfcab40a73 100644 ARM_CPU_TYPE_NAME("host"), ARM_CPU_TYPE_NAME("max"), }; -@@ -2290,6 +2336,7 @@ static void machvirt_init(MachineState *machine) +@@ -2339,6 +2385,7 @@ static void machvirt_init(MachineState *machine) qemu_add_machine_init_done_notifier(&vms->machine_done); } @@ -151,7 +159,7 @@ index d633300fdc..dfcab40a73 100644 static bool virt_get_secure(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2317,6 +2364,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) +@@ -2366,6 +2413,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) vms->virt = value; } @@ -159,7 +167,25 @@ index d633300fdc..dfcab40a73 100644 static bool virt_get_highmem(Object *obj, Error **errp) { -@@ -2346,6 +2394,7 @@ static void virt_set_its(Object *obj, bool value, Error **errp) +@@ -2380,7 +2428,7 @@ static void virt_set_highmem(Object *obj, bool value, Error **errp) + + vms->highmem = value; + } +- ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static bool virt_get_compact_highmem(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -2436,7 +2484,7 @@ static void virt_set_highmem_mmio(Object *obj, bool value, Error **errp) + + vms->highmem_mmio = value; + } +- ++#endif /* disabled for RHEL */ + + static bool virt_get_its(Object *obj, Error **errp) + { +@@ -2452,6 +2500,7 @@ static void virt_set_its(Object *obj, bool value, Error **errp) vms->its = value; } @@ -167,7 +193,7 @@ index d633300fdc..dfcab40a73 100644 static bool virt_get_dtb_randomness(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2359,6 +2408,7 @@ static void virt_set_dtb_randomness(Object *obj, bool value, Error **errp) +@@ -2465,6 +2514,7 @@ static void virt_set_dtb_randomness(Object *obj, bool value, Error **errp) vms->dtb_randomness = value; } @@ -175,7 +201,7 @@ index d633300fdc..dfcab40a73 100644 static char *virt_get_oem_id(Object *obj, Error **errp) { -@@ -2442,6 +2492,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp) +@@ -2548,6 +2598,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp) vms->ras = value; } @@ -183,7 +209,7 @@ index d633300fdc..dfcab40a73 100644 static bool virt_get_mte(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2455,6 +2506,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) +@@ -2561,6 +2612,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) vms->mte = value; } @@ -191,7 +217,7 @@ index d633300fdc..dfcab40a73 100644 static char *virt_get_gic_version(Object *obj, Error **errp) { -@@ -2886,6 +2938,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) +@@ -2988,6 +3040,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) return fixed_ipa ? 0 : requested_pa_size; } @@ -199,7 +225,7 @@ index d633300fdc..dfcab40a73 100644 static void virt_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); -@@ -3294,3 +3347,185 @@ static void virt_machine_2_6_options(MachineClass *mc) +@@ -3441,3 +3494,195 @@ static void virt_machine_2_6_options(MachineClass *mc) vmc->no_pmu = true; } DEFINE_VIRT_MACHINE(2, 6) @@ -312,6 +338,7 @@ index d633300fdc..dfcab40a73 100644 + + /* High memory is enabled by default */ + vms->highmem = true; ++ vms->highmem_compact = !vmc->no_highmem_compact; + vms->gic_version = VIRT_GIC_VERSION_NOSEL; + + vms->highmem_ecam = !vmc->no_highmem_ecam; @@ -374,22 +401,31 @@ index d633300fdc..dfcab40a73 100644 +} +type_init(rhel_machine_init); + ++static void rhel920_virt_options(MachineClass *mc) ++{ ++ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); ++} ++DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0) ++ +static void rhel900_virt_options(MachineClass *mc) +{ + VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); + -+ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); ++ rhel920_virt_options(mc); ++ + compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len); + + /* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */ + vmc->no_tcg_lpa2 = true; ++ /* Compact layout for high memory regions was introduced with 9.2.0 */ ++ vmc->no_highmem_compact = true; +} -+DEFINE_RHEL_MACHINE_AS_LATEST(9, 0, 0) ++DEFINE_RHEL_MACHINE(9, 0, 0) diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index 6ec479ca2b..22b54ec510 100644 +index e1ddbea96b..81c2363a40 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h -@@ -180,9 +180,17 @@ struct VirtMachineState { +@@ -187,9 +187,17 @@ struct VirtMachineState { #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) @@ -407,8 +443,28 @@ index 6ec479ca2b..22b54ec510 100644 void virt_acpi_setup(VirtMachineState *vms); bool virt_is_acpi_enabled(VirtMachineState *vms); +diff --git a/target/arm/arm-qmp-cmds.c b/target/arm/arm-qmp-cmds.c +index c8fa524002..3aa089abf3 100644 +--- a/target/arm/arm-qmp-cmds.c ++++ b/target/arm/arm-qmp-cmds.c +@@ -231,6 +231,7 @@ CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type, + static void arm_cpu_add_definition(gpointer data, gpointer user_data) + { + ObjectClass *oc = data; ++ CPUClass *cc = CPU_CLASS(oc); + CpuDefinitionInfoList **cpu_list = user_data; + CpuDefinitionInfo *info; + const char *typename; +@@ -240,6 +241,7 @@ static void arm_cpu_add_definition(gpointer data, gpointer user_data) + info->name = g_strndup(typename, + strlen(typename) - strlen("-" TYPE_ARM_CPU)); + info->q_typename = g_strdup(typename); ++ info->deprecated = !!cc->deprecation_note; + + QAPI_LIST_PREPEND(*cpu_list, info); + } diff --git a/target/arm/cpu-qom.h b/target/arm/cpu-qom.h -index 64c44cef2d..82e97249bc 100644 +index 514c22ced9..f789173451 100644 --- a/target/arm/cpu-qom.h +++ b/target/arm/cpu-qom.h @@ -35,6 +35,7 @@ typedef struct ARMCPUInfo { @@ -420,10 +476,10 @@ index 64c44cef2d..82e97249bc 100644 void arm_cpu_register(const ARMCPUInfo *info); diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index 38d066c294..a845814bfb 100644 +index 5182ed0c91..6740a8b940 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c -@@ -2250,8 +2250,13 @@ static void arm_cpu_instance_init(Object *obj) +@@ -2290,8 +2290,13 @@ static void arm_cpu_instance_init(Object *obj) static void cpu_register_class_init(ObjectClass *oc, void *data) { ARMCPUClass *acc = ARM_CPU_CLASS(oc); @@ -438,7 +494,7 @@ index 38d066c294..a845814bfb 100644 void arm_cpu_register(const ARMCPUInfo *info) diff --git a/target/arm/cpu.h b/target/arm/cpu.h -index 9aeed3c848..f9f504d89e 100644 +index c097cae988..829d4a2328 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -34,6 +34,8 @@ @@ -451,10 +507,10 @@ index 9aeed3c848..f9f504d89e 100644 #define EXCP_SWI 2 /* software interrupt */ #define EXCP_PREFETCH_ABORT 3 diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c -index 3d74f134f5..4b330a52b5 100644 +index 0fb07cc7b6..47459627fb 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c -@@ -36,6 +36,7 @@ +@@ -31,6 +31,7 @@ #include "hw/qdev-properties.h" #include "internals.h" @@ -462,7 +518,7 @@ index 3d74f134f5..4b330a52b5 100644 static void aarch64_a35_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); -@@ -115,6 +116,7 @@ static void aarch64_a35_initfn(Object *obj) +@@ -110,6 +111,7 @@ static void aarch64_a35_initfn(Object *obj) /* These values are the same with A53/A57/A72. */ define_cortex_a72_a57_a53_cp_reginfo(cpu); } @@ -470,7 +526,7 @@ index 3d74f134f5..4b330a52b5 100644 void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) { -@@ -735,6 +737,7 @@ static void aarch64_a57_initfn(Object *obj) +@@ -730,6 +732,7 @@ static void aarch64_a57_initfn(Object *obj) define_cortex_a72_a57_a53_cp_reginfo(cpu); } @@ -478,15 +534,15 @@ index 3d74f134f5..4b330a52b5 100644 static void aarch64_a53_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); -@@ -1033,6 +1036,7 @@ static void aarch64_neoverse_n1_initfn(Object *obj) - /* From D5.1 AArch64 PMU register summary */ - cpu->isar.reset_pmcr_el0 = 0x410c3000; +@@ -1164,6 +1167,7 @@ static void aarch64_neoverse_n1_initfn(Object *obj) + + define_neoverse_n1_cp_reginfo(cpu); } +#endif /* disabled for RHEL */ static void aarch64_host_initfn(Object *obj) { -@@ -1240,13 +1244,18 @@ static void aarch64_max_initfn(Object *obj) +@@ -1373,14 +1377,19 @@ static void aarch64_max_initfn(Object *obj) } static const ARMCPUInfo aarch64_cpus[] = { @@ -498,6 +554,7 @@ index 3d74f134f5..4b330a52b5 100644 + .deprecation_note = RHEL_CPU_DEPRECATION }, +#if 0 /* Disabled for Red Hat Enterprise Linux */ { .name = "cortex-a53", .initfn = aarch64_a53_initfn }, + { .name = "cortex-a55", .initfn = aarch64_a55_initfn }, { .name = "cortex-a72", .initfn = aarch64_a72_initfn }, { .name = "cortex-a76", .initfn = aarch64_a76_initfn }, { .name = "a64fx", .initfn = aarch64_a64fx_initfn }, @@ -506,7 +563,7 @@ index 3d74f134f5..4b330a52b5 100644 { .name = "max", .initfn = aarch64_max_initfn }, #if defined(CONFIG_KVM) || defined(CONFIG_HVF) { .name = "host", .initfn = aarch64_host_initfn }, -@@ -1318,8 +1327,13 @@ static void aarch64_cpu_instance_init(Object *obj) +@@ -1452,8 +1461,13 @@ static void aarch64_cpu_instance_init(Object *obj) static void cpu_register_class_init(ObjectClass *oc, void *data) { ARMCPUClass *acc = ARM_CPU_CLASS(oc); @@ -521,10 +578,10 @@ index 3d74f134f5..4b330a52b5 100644 void aarch64_cpu_register(const ARMCPUInfo *info) diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c -index a528ff9a3d..053f70e399 100644 +index c154a4dcf2..f29425b656 100644 --- a/target/arm/cpu_tcg.c +++ b/target/arm/cpu_tcg.c -@@ -148,10 +148,10 @@ void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu) +@@ -152,10 +152,10 @@ void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu) } #endif /* !CONFIG_USER_ONLY */ @@ -536,7 +593,7 @@ index a528ff9a3d..053f70e399 100644 #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG) static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) { -@@ -505,7 +505,6 @@ static void cortex_a9_initfn(Object *obj) +@@ -509,7 +509,6 @@ static void cortex_a9_initfn(Object *obj) cpu->isar.reset_pmcr_el0 = 0x41093000; define_arm_cp_regs(cpu, cortexa9_cp_reginfo); } @@ -544,7 +601,7 @@ index a528ff9a3d..053f70e399 100644 #ifndef CONFIG_USER_ONLY static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri) -@@ -530,7 +529,6 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { +@@ -534,7 +533,6 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, }; @@ -552,7 +609,7 @@ index a528ff9a3d..053f70e399 100644 static void cortex_a7_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); -@@ -579,7 +577,6 @@ static void cortex_a7_initfn(Object *obj) +@@ -583,7 +581,6 @@ static void cortex_a7_initfn(Object *obj) cpu->isar.reset_pmcr_el0 = 0x41072000; define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */ } @@ -560,7 +617,7 @@ index a528ff9a3d..053f70e399 100644 static void cortex_a15_initfn(Object *obj) { -@@ -628,7 +625,6 @@ static void cortex_a15_initfn(Object *obj) +@@ -632,7 +629,6 @@ static void cortex_a15_initfn(Object *obj) define_arm_cp_regs(cpu, cortexa15_cp_reginfo); } @@ -568,7 +625,7 @@ index a528ff9a3d..053f70e399 100644 static void cortex_m0_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); -@@ -1070,7 +1066,6 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) +@@ -1115,7 +1111,6 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) cc->gdb_core_xml_file = "arm-m-profile.xml"; } @@ -576,7 +633,7 @@ index a528ff9a3d..053f70e399 100644 #ifndef TARGET_AARCH64 /* -@@ -1138,7 +1133,6 @@ static void arm_max_initfn(Object *obj) +@@ -1183,7 +1178,6 @@ static void arm_max_initfn(Object *obj) #endif /* !TARGET_AARCH64 */ static const ARMCPUInfo arm_tcg_cpus[] = { @@ -584,7 +641,7 @@ index a528ff9a3d..053f70e399 100644 { .name = "arm926", .initfn = arm926_initfn }, { .name = "arm946", .initfn = arm946_initfn }, { .name = "arm1026", .initfn = arm1026_initfn }, -@@ -1154,9 +1148,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { +@@ -1199,9 +1193,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { { .name = "cortex-a7", .initfn = cortex_a7_initfn }, { .name = "cortex-a8", .initfn = cortex_a8_initfn }, { .name = "cortex-a9", .initfn = cortex_a9_initfn }, @@ -594,7 +651,7 @@ index a528ff9a3d..053f70e399 100644 { .name = "cortex-m0", .initfn = cortex_m0_initfn, .class_init = arm_v7m_class_init }, { .name = "cortex-m3", .initfn = cortex_m3_initfn, -@@ -1187,7 +1179,6 @@ static const ARMCPUInfo arm_tcg_cpus[] = { +@@ -1233,7 +1225,6 @@ static const ARMCPUInfo arm_tcg_cpus[] = { { .name = "pxa270-b1", .initfn = pxa270b1_initfn }, { .name = "pxa270-c0", .initfn = pxa270c0_initfn }, { .name = "pxa270-c5", .initfn = pxa270c5_initfn }, @@ -602,36 +659,16 @@ index a528ff9a3d..053f70e399 100644 #ifndef TARGET_AARCH64 { .name = "max", .initfn = arm_max_initfn }, #endif -@@ -1215,3 +1206,4 @@ static void arm_tcg_cpu_register_types(void) +@@ -1261,3 +1252,4 @@ static void arm_tcg_cpu_register_types(void) type_init(arm_tcg_cpu_register_types) #endif /* !CONFIG_USER_ONLY || !TARGET_AARCH64 */ +#endif /* disabled for RHEL */ -diff --git a/target/arm/helper.c b/target/arm/helper.c -index d8c8223ec3..ad9d235773 100644 ---- a/target/arm/helper.c -+++ b/target/arm/helper.c -@@ -8476,6 +8476,7 @@ void arm_cpu_list(void) - static void arm_cpu_add_definition(gpointer data, gpointer user_data) - { - ObjectClass *oc = data; -+ CPUClass *cc = CPU_CLASS(oc); - CpuDefinitionInfoList **cpu_list = user_data; - CpuDefinitionInfo *info; - const char *typename; -@@ -8485,6 +8486,7 @@ static void arm_cpu_add_definition(gpointer data, gpointer user_data) - info->name = g_strndup(typename, - strlen(typename) - strlen("-" TYPE_ARM_CPU)); - info->q_typename = g_strdup(typename); -+ info->deprecated = !!cc->deprecation_note; - - QAPI_LIST_PREPEND(*cpu_list, info); - } diff --git a/tests/qtest/arm-cpu-features.c b/tests/qtest/arm-cpu-features.c -index 5a14527386..a3579fc303 100644 +index 1cb08138ad..834497dfec 100644 --- a/tests/qtest/arm-cpu-features.c +++ b/tests/qtest/arm-cpu-features.c -@@ -440,8 +440,10 @@ static void test_query_cpu_model_expansion(const void *data) +@@ -441,8 +441,10 @@ static void test_query_cpu_model_expansion(const void *data) assert_error(qts, "host", "The CPU type 'host' requires KVM", NULL); /* Test expected feature presence/absence for some cpu types */ @@ -642,7 +679,7 @@ index 5a14527386..a3579fc303 100644 /* Enabling and disabling pmu should always work. */ assert_has_feature_enabled(qts, "max", "pmu"); -@@ -458,6 +460,7 @@ static void test_query_cpu_model_expansion(const void *data) +@@ -459,6 +461,7 @@ static void test_query_cpu_model_expansion(const void *data) assert_has_feature_enabled(qts, "cortex-a57", "pmu"); assert_has_feature_enabled(qts, "cortex-a57", "aarch64"); @@ -650,7 +687,7 @@ index 5a14527386..a3579fc303 100644 assert_has_feature_enabled(qts, "a64fx", "pmu"); assert_has_feature_enabled(qts, "a64fx", "aarch64"); /* -@@ -470,6 +473,7 @@ static void test_query_cpu_model_expansion(const void *data) +@@ -471,6 +474,7 @@ static void test_query_cpu_model_expansion(const void *data) "{ 'sve384': true }"); assert_error(qts, "a64fx", "cannot enable sve640", "{ 'sve640': true }"); @@ -658,7 +695,7 @@ index 5a14527386..a3579fc303 100644 sve_tests_default(qts, "max"); pauth_tests_default(qts, "max"); -@@ -505,9 +509,11 @@ static void test_query_cpu_model_expansion_kvm(const void *data) +@@ -506,9 +510,11 @@ static void test_query_cpu_model_expansion_kvm(const void *data) QDict *resp; char *error; @@ -671,5 +708,5 @@ index 5a14527386..a3579fc303 100644 assert_has_feature_enabled(qts, "host", "aarch64"); -- -2.31.1 +2.39.1 diff --git a/SOURCES/0008-Add-ppc64-machine-types.patch b/SOURCES/0008-Add-ppc64-machine-types.patch index a3cb0a3..ab78cae 100644 --- a/SOURCES/0008-Add-ppc64-machine-types.patch +++ b/SOURCES/0008-Add-ppc64-machine-types.patch @@ -1,4 +1,4 @@ -From 2c523f1b6c9470e1cd517ba99e414cde02727e16 Mon Sep 17 00:00:00 2001 +From 401d0ebf1ee959fd944df6b5b4ae9c51c36d1244 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:27:13 +0200 Subject: Add ppc64 machine types @@ -34,10 +34,10 @@ Merged patches (7.1.0 rc0): 8 files changed, 314 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 66b414d2e9..499eb49253 100644 +index 4921198b9d..e24b3e22e3 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c -@@ -1633,6 +1633,9 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason) +@@ -1634,6 +1634,9 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason) pef_kvm_reset(machine->cgs, &error_fatal); spapr_caps_apply(spapr); @@ -47,7 +47,7 @@ index 66b414d2e9..499eb49253 100644 first_ppc_cpu = POWERPC_CPU(first_cpu); if (kvm_enabled() && kvmppc_has_cap_mmu_radix() && -@@ -3347,6 +3350,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp) +@@ -3348,6 +3351,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp) spapr->host_serial = g_strdup(value); } @@ -68,7 +68,7 @@ index 66b414d2e9..499eb49253 100644 static void spapr_instance_init(Object *obj) { SpaprMachineState *spapr = SPAPR_MACHINE(obj); -@@ -3425,6 +3442,12 @@ static void spapr_instance_init(Object *obj) +@@ -3426,6 +3443,12 @@ static void spapr_instance_init(Object *obj) spapr_get_host_serial, spapr_set_host_serial); object_property_set_description(obj, "host-serial", "Host serial number to advertise in guest device tree"); @@ -81,7 +81,7 @@ index 66b414d2e9..499eb49253 100644 } static void spapr_machine_finalizefn(Object *obj) -@@ -4682,6 +4705,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) +@@ -4683,6 +4706,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) vmc->client_architecture_support = spapr_vof_client_architecture_support; vmc->quiesce = spapr_vof_quiesce; vmc->setprop = spapr_vof_setprop; @@ -89,15 +89,15 @@ index 66b414d2e9..499eb49253 100644 } static const TypeInfo spapr_machine_info = { -@@ -4733,6 +4757,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) +@@ -4734,6 +4758,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) } \ type_init(spapr_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ /* - * pseries-7.2 + * pseries-8.0 */ -@@ -4882,6 +4907,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) +@@ -4894,6 +4919,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) } DEFINE_SPAPR_MACHINE(4_1, "4.1", false); @@ -105,7 +105,7 @@ index 66b414d2e9..499eb49253 100644 /* * pseries-4.0 -@@ -4901,6 +4927,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, +@@ -4913,6 +4939,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, *nv2atsd = 0; return true; } @@ -114,7 +114,7 @@ index 66b414d2e9..499eb49253 100644 static void spapr_machine_4_0_class_options(MachineClass *mc) { SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -@@ -5228,6 +5256,221 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) +@@ -5240,6 +5268,221 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) compat_props_add(mc->compat_props, hw_compat_2_1, hw_compat_2_1_len); } DEFINE_SPAPR_MACHINE(2_1, "2.1", false); @@ -375,10 +375,10 @@ index fcb5dfe792..ab8fb5bf62 100644 qdev_unrealize(DEVICE(cpu)); return false; diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h -index 04a95669ab..d5f4cf5e03 100644 +index 5c8aabd444..04489d5808 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h -@@ -154,6 +154,7 @@ struct SpaprMachineClass { +@@ -155,6 +155,7 @@ struct SpaprMachineClass { bool pre_5_2_numa_associativity; bool pre_6_2_numa_affinity; @@ -386,7 +386,7 @@ index 04a95669ab..d5f4cf5e03 100644 bool (*phb_placement)(SpaprMachineState *spapr, uint32_t index, uint64_t *buid, hwaddr *pio, hwaddr *mmio32, hwaddr *mmio64, -@@ -256,6 +257,9 @@ struct SpaprMachineState { +@@ -257,6 +258,9 @@ struct SpaprMachineState { /* Set by -boot */ char *boot_device; @@ -434,10 +434,10 @@ index cd3ff700ac..1cb49c8087 100644 { "405cr", "405crc" }, { "405gp", "405gpd" }, diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h -index 81d4263a07..508fbed90b 100644 +index 557d736dab..6646ec1c27 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h -@@ -1467,6 +1467,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) +@@ -1482,6 +1482,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) /* Compatibility modes */ #if defined(TARGET_PPC64) @@ -446,10 +446,10 @@ index 81d4263a07..508fbed90b 100644 uint32_t min_compat_pvr, uint32_t max_compat_pvr); bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr, diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c -index 7c25348b7b..83671c955f 100644 +index 78f6fc50cd..68d06c3f8f 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c -@@ -89,6 +89,7 @@ static int cap_ppc_nested_kvm_hv; +@@ -88,6 +88,7 @@ static int cap_ppc_nested_kvm_hv; static int cap_large_decr; static int cap_fwnmi; static int cap_rpt_invalidate; @@ -457,7 +457,7 @@ index 7c25348b7b..83671c955f 100644 static uint32_t debug_inst_opcode; -@@ -136,6 +137,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) +@@ -135,6 +136,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT); kvmppc_get_cpu_characteristics(s); cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV); @@ -465,7 +465,7 @@ index 7c25348b7b..83671c955f 100644 cap_large_decr = kvmppc_get_dec_bits(); cap_fwnmi = kvm_vm_check_extension(s, KVM_CAP_PPC_FWNMI); /* -@@ -2570,6 +2572,16 @@ int kvmppc_has_cap_rpt_invalidate(void) +@@ -2569,6 +2571,16 @@ int kvmppc_has_cap_rpt_invalidate(void) return cap_rpt_invalidate; } @@ -482,7 +482,7 @@ index 7c25348b7b..83671c955f 100644 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) { uint32_t host_pvr = mfpvr(); -@@ -2970,3 +2982,18 @@ bool kvm_arch_cpu_check_are_resettable(void) +@@ -2969,3 +2981,18 @@ bool kvm_arch_cpu_check_are_resettable(void) void kvm_arch_accel_class_init(ObjectClass *oc) { } @@ -502,10 +502,10 @@ index 7c25348b7b..83671c955f 100644 + } +} diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h -index ee9325bf9a..20dbb95989 100644 +index 5fd9753953..b5ebfe2be0 100644 --- a/target/ppc/kvm_ppc.h +++ b/target/ppc/kvm_ppc.h -@@ -40,6 +40,7 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu); +@@ -43,6 +43,7 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu); target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu, bool radix, bool gtse, uint64_t proc_tbl); @@ -513,7 +513,7 @@ index ee9325bf9a..20dbb95989 100644 #ifndef CONFIG_USER_ONLY bool kvmppc_spapr_use_multitce(void); int kvmppc_spapr_enable_inkernel_multitce(void); -@@ -74,6 +75,8 @@ int kvmppc_get_cap_large_decr(void); +@@ -77,6 +78,8 @@ int kvmppc_get_cap_large_decr(void); int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable); int kvmppc_has_cap_rpt_invalidate(void); int kvmppc_enable_hwrng(void); @@ -522,7 +522,7 @@ index ee9325bf9a..20dbb95989 100644 int kvmppc_put_books_sregs(PowerPCCPU *cpu); PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void); void kvmppc_check_papr_resize_hpt(Error **errp); -@@ -393,6 +396,16 @@ static inline int kvmppc_has_cap_rpt_invalidate(void) +@@ -396,6 +399,16 @@ static inline int kvmppc_has_cap_rpt_invalidate(void) return false; } @@ -540,5 +540,5 @@ index ee9325bf9a..20dbb95989 100644 { return -1; -- -2.31.1 +2.39.1 diff --git a/SOURCES/0009-Add-s390x-machine-types.patch b/SOURCES/0009-Add-s390x-machine-types.patch index 5860009..07dfb57 100644 --- a/SOURCES/0009-Add-s390x-machine-types.patch +++ b/SOURCES/0009-Add-s390x-machine-types.patch @@ -1,4 +1,4 @@ -From 1973257ed781a93943f27f1518933e8c09c50f88 Mon Sep 17 00:00:00 2001 +From 3c7647197729fcd76e219070c6f359bb3667d04d Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:47:32 +0200 Subject: Add s390x machine types @@ -30,45 +30,72 @@ Merged patches (7.1.0 rc0): Merged patches (7.2.0 rc0): - 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts) + +Merged patches (8.0.0-rc1): +- 27c188c6a4 redhat: Update s390x machine type compatibility for QEMU 7.2.0 update +- a932b8d429 redhat: Add new rhel-9.2.0 s390x machine type +- ac88104bad s390x/s390-virtio-ccw: Activate zPCI features on s390-ccw-virtio-rhel8.6.0 --- - hw/s390x/s390-virtio-ccw.c | 108 +++++++++++++++++++++++++++++++ - target/s390x/cpu_models.c | 11 ++++ + hw/s390x/s390-virtio-ccw.c | 143 +++++++++++++++++++++++++++++++ + target/s390x/cpu_models.c | 11 +++ target/s390x/cpu_models.h | 2 + target/s390x/cpu_models_sysemu.c | 2 + - 4 files changed, 123 insertions(+) + 4 files changed, 158 insertions(+) diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 2e64ffab45..8d5221fbb1 100644 +index 503f212a31..dcd3b966b0 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c -@@ -823,6 +823,7 @@ bool css_migration_enabled(void) +@@ -826,6 +826,7 @@ bool css_migration_enabled(void) } \ type_init(ccw_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void ccw_machine_7_2_instance_options(MachineState *machine) + static void ccw_machine_8_0_instance_options(MachineState *machine) { } -@@ -1186,6 +1187,113 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) +@@ -1201,6 +1202,148 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); } DEFINE_CCW_MACHINE(2_4, "2.4", false); +#endif + + ++static void ccw_machine_rhel920_instance_options(MachineState *machine) ++{ ++} ++ ++static void ccw_machine_rhel920_class_options(MachineClass *mc) ++{ ++} ++DEFINE_CCW_MACHINE(rhel920, "rhel9.2.0", true); ++ +static void ccw_machine_rhel900_instance_options(MachineState *machine) +{ + static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_2 }; + ++ ccw_machine_rhel920_instance_options(machine); ++ + s390_set_qemu_cpu_model(0x3906, 14, 2, qemu_cpu_feat); ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_PAIE); +} + +static void ccw_machine_rhel900_class_options(MachineClass *mc) +{ ++ S390CcwMachineClass *s390mc = S390_CCW_MACHINE_CLASS(mc); ++ static GlobalProperty compat[] = { ++ { TYPE_S390_PCI_DEVICE, "interpret", "off", }, ++ { TYPE_S390_PCI_DEVICE, "forwarding-assist", "off", }, ++ }; ++ ++ ccw_machine_rhel920_class_options(mc); ++ ++ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len); + compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len); ++ s390mc->max_threads = S390_MAX_CPUS; +} -+DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", true); ++DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", false); + +static void ccw_machine_rhel860_instance_options(MachineState *machine) +{ @@ -78,7 +105,14 @@ index 2e64ffab45..8d5221fbb1 100644 + +static void ccw_machine_rhel860_class_options(MachineClass *mc) +{ ++ static GlobalProperty compat[] = { ++ { TYPE_S390_PCI_DEVICE, "interpret", "on", }, ++ { TYPE_S390_PCI_DEVICE, "forwarding-assist", "on", }, ++ }; ++ + ccw_machine_rhel900_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_6, hw_compat_rhel_8_6_len); ++ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + + /* All RHEL machines for prior major releases are deprecated */ + mc->deprecation_reason = rhel_old_machine_deprecation; @@ -102,8 +136,14 @@ index 2e64ffab45..8d5221fbb1 100644 + +static void ccw_machine_rhel850_class_options(MachineClass *mc) +{ ++ static GlobalProperty compat[] = { ++ { TYPE_S390_PCI_DEVICE, "interpret", "off", }, ++ { TYPE_S390_PCI_DEVICE, "forwarding-assist", "off", }, ++ }; ++ + ccw_machine_rhel860_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); ++ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + mc->smp_props.prefer_sockets = true; +} +DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", false); @@ -164,10 +204,10 @@ index 2e64ffab45..8d5221fbb1 100644 static void ccw_machine_register_types(void) { diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c -index c3a4f80633..739770dc15 100644 +index 457b5cb10c..ff6b9463cb 100644 --- a/target/s390x/cpu_models.c +++ b/target/s390x/cpu_models.c -@@ -45,6 +45,9 @@ +@@ -46,6 +46,9 @@ * of a following release have been a superset of the previous release. With * generation 15 one base feature and one optional feature have been deprecated. */ @@ -177,7 +217,7 @@ index c3a4f80633..739770dc15 100644 static S390CPUDef s390_cpu_defs[] = { CPUDEF_INIT(0x2064, 7, 1, 38, 0x00000000U, "z900", "IBM zSeries 900 GA1"), CPUDEF_INIT(0x2064, 7, 2, 38, 0x00000000U, "z900.2", "IBM zSeries 900 GA2"), -@@ -854,22 +857,30 @@ static void s390_host_cpu_model_class_init(ObjectClass *oc, void *data) +@@ -857,22 +860,30 @@ static void s390_host_cpu_model_class_init(ObjectClass *oc, void *data) static void s390_base_cpu_model_class_init(ObjectClass *oc, void *data) { S390CPUClass *xcc = S390_CPU_CLASS(oc); @@ -222,7 +262,7 @@ index fb1adc8b21..d76745afa9 100644 /* CPU model based on a CPU definition */ diff --git a/target/s390x/cpu_models_sysemu.c b/target/s390x/cpu_models_sysemu.c -index d086b1c39c..1b9cc66405 100644 +index 87a4480c05..28c1b0486c 100644 --- a/target/s390x/cpu_models_sysemu.c +++ b/target/s390x/cpu_models_sysemu.c @@ -60,6 +60,7 @@ static void create_cpu_model_list(ObjectClass *klass, void *opaque) @@ -242,5 +282,5 @@ index d086b1c39c..1b9cc66405 100644 if (cpu_list_data->model) { Object *obj; -- -2.31.1 +2.39.1 diff --git a/SOURCES/0010-Add-x86_64-machine-types.patch b/SOURCES/0010-Add-x86_64-machine-types.patch index 181342a..9685338 100644 --- a/SOURCES/0010-Add-x86_64-machine-types.patch +++ b/SOURCES/0010-Add-x86_64-machine-types.patch @@ -1,4 +1,4 @@ -From 0935624ccdddc286d6eeeb0c1b70d78983c21aa2 Mon Sep 17 00:00:00 2001 +From 510291040cb280e1f68b793a84ec0f7d1c88aafa Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:10:31 +0200 Subject: Add x86_64 machine types @@ -13,6 +13,9 @@ Rebase notes (6.1.0): Rebase notes (7.0.0): - Reset alias for all machine-types except latest one +Rebase notes (8.0.0-rc1): +- remove legacy_no_rng_seed usage (removed upstream) + Merged patches (6.1.0): - 59c284ad3b x86: Add x86 rhel8.5 machine types - a8868b42fe redhat: x86: Enable 'kvm-asyncpf-int' by default @@ -39,24 +42,26 @@ Merged patches (7.1.0 rc0): Merged patches (7.2.0 rc0): - 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts) + +Merged patches (8.0.0-rc1): +- f33ca8aed4 x86: rhel 9.2.0 machine type --- - hw/i386/pc.c | 147 ++++++++++++++++++++++- - hw/i386/pc_piix.c | 86 +++++++++++++- - hw/i386/pc_q35.c | 234 ++++++++++++++++++++++++++++++++++++- - hw/s390x/s390-virtio-ccw.c | 1 + + hw/i386/pc.c | 147 +++++++++++++++++++++- + hw/i386/pc_piix.c | 86 ++++++++++++- + hw/i386/pc_q35.c | 252 ++++++++++++++++++++++++++++++++++++- include/hw/boards.h | 2 + - include/hw/i386/pc.h | 27 +++++ + include/hw/i386/pc.h | 27 ++++ target/i386/cpu.c | 21 ++++ target/i386/kvm/kvm-cpu.c | 1 + target/i386/kvm/kvm.c | 4 + tests/qtest/pvpanic-test.c | 5 +- - 10 files changed, 521 insertions(+), 7 deletions(-) + 9 files changed, 538 insertions(+), 7 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 546b703cb4..c7b1350e64 100644 +index 1489abf010..8abb1f872e 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c -@@ -393,6 +393,149 @@ GlobalProperty pc_compat_1_4[] = { +@@ -407,6 +407,149 @@ GlobalProperty pc_compat_1_4[] = { }; const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); @@ -206,7 +211,7 @@ index 546b703cb4..c7b1350e64 100644 GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled) { GSIState *s; -@@ -1907,6 +2050,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -1944,6 +2087,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->pvh_enabled = true; pcmc->kvmclock_create_always = true; assert(!mc->get_hotplug_handler); @@ -214,7 +219,7 @@ index 546b703cb4..c7b1350e64 100644 mc->get_hotplug_handler = pc_get_hotplug_handler; mc->hotplug_allowed = pc_hotplug_allowed; mc->cpu_index_to_instance_props = x86_cpu_index_to_props; -@@ -1917,7 +2061,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -1954,7 +2098,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) mc->has_hotpluggable_cpus = true; mc->default_boot_order = "cad"; mc->block_default_type = IF_IDE; @@ -225,10 +230,10 @@ index 546b703cb4..c7b1350e64 100644 mc->wakeup = pc_machine_wakeup; hc->pre_plug = pc_machine_device_pre_plug_cb; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 0985ff67d2..173a1fd10b 100644 +index 14a794081e..3e330fd36f 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c -@@ -53,6 +53,7 @@ +@@ -54,6 +54,7 @@ #include "qapi/error.h" #include "qemu/error-report.h" #include "sysemu/xen.h" @@ -236,7 +241,7 @@ index 0985ff67d2..173a1fd10b 100644 #ifdef CONFIG_XEN #include #include "hw/xen/xen_pt.h" -@@ -184,8 +185,8 @@ static void pc_init1(MachineState *machine, +@@ -198,8 +199,8 @@ static void pc_init1(MachineState *machine, if (pcmc->smbios_defaults) { MachineClass *mc = MACHINE_GET_CLASS(machine); /* These values are guest ABI, do not change */ @@ -247,7 +252,7 @@ index 0985ff67d2..173a1fd10b 100644 pcmc->smbios_uuid_encoded, pcmc->smbios_stream_product, pcmc->smbios_stream_version, -@@ -334,6 +335,7 @@ static void pc_init1(MachineState *machine, +@@ -351,6 +352,7 @@ static void pc_init1(MachineState *machine, * hw_compat_*, pc_compat_*, or * pc_*_machine_options(). */ @@ -255,7 +260,7 @@ index 0985ff67d2..173a1fd10b 100644 static void pc_compat_2_3_fn(MachineState *machine) { X86MachineState *x86ms = X86_MACHINE(machine); -@@ -896,3 +898,83 @@ static void xenfv_3_1_machine_options(MachineClass *m) +@@ -899,3 +901,83 @@ static void xenfv_3_1_machine_options(MachineClass *m) DEFINE_PC_MACHINE(xenfv, "xenfv-3.1", pc_xen_hvm_init, xenfv_3_1_machine_options); #endif @@ -304,7 +309,7 @@ index 0985ff67d2..173a1fd10b 100644 + pcmc->kvmclock_create_always = false; + /* From pc_i440fx_5_1_machine_options() */ + pcmc->pci_root_uid = 1; -+ pcmc->legacy_no_rng_seed = true; ++ pcmc->enforce_amd_1tb_hole = false; + compat_props_add(m->compat_props, hw_compat_rhel_9_1, + hw_compat_rhel_9_1_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_0, @@ -340,10 +345,10 @@ index 0985ff67d2..173a1fd10b 100644 +DEFINE_PC_MACHINE(rhel760, "pc-i440fx-rhel7.6.0", pc_init_rhel760, + pc_machine_rhel760_options); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index ea582254e3..97c3630021 100644 +index dc0ba5f9e7..98601bb76f 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c -@@ -198,8 +198,8 @@ static void pc_q35_init(MachineState *machine) +@@ -199,8 +199,8 @@ static void pc_q35_init(MachineState *machine) if (pcmc->smbios_defaults) { /* These values are guest ABI, do not change */ @@ -354,7 +359,7 @@ index ea582254e3..97c3630021 100644 pcmc->smbios_uuid_encoded, pcmc->smbios_stream_product, pcmc->smbios_stream_version, -@@ -352,6 +352,7 @@ static void pc_q35_init(MachineState *machine) +@@ -354,6 +354,7 @@ static void pc_q35_init(MachineState *machine) DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn) @@ -362,7 +367,7 @@ index ea582254e3..97c3630021 100644 static void pc_q35_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -@@ -666,3 +667,232 @@ static void pc_q35_2_4_machine_options(MachineClass *m) +@@ -663,3 +664,250 @@ static void pc_q35_2_4_machine_options(MachineClass *m) DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, pc_q35_2_4_machine_options); @@ -391,6 +396,23 @@ index ea582254e3..97c3630021 100644 + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); +} + ++static void pc_q35_init_rhel920(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel920_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel_options(m); ++ m->desc = "RHEL-9.2.0 PC (Q35 + ICH9, 2009)"; ++ pcmc->smbios_stream_product = "RHEL"; ++ pcmc->smbios_stream_version = "9.2.0"; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920, ++ pc_q35_machine_rhel920_options); ++ +static void pc_q35_init_rhel900(MachineState *machine) +{ + pc_q35_init(machine); @@ -399,11 +421,12 @@ index ea582254e3..97c3630021 100644 +static void pc_q35_machine_rhel900_options(MachineClass *m) +{ + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -+ pc_q35_machine_rhel_options(m); ++ pc_q35_machine_rhel920_options(m); + m->desc = "RHEL-9.0.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; + pcmc->smbios_stream_product = "RHEL"; + pcmc->smbios_stream_version = "9.0.0"; -+ pcmc->legacy_no_rng_seed = true; ++ pcmc->enforce_amd_1tb_hole = false; + compat_props_add(m->compat_props, hw_compat_rhel_9_1, + hw_compat_rhel_9_1_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_0, @@ -595,23 +618,11 @@ index ea582254e3..97c3630021 100644 + +DEFINE_PC_MACHINE(q35_rhel760, "pc-q35-rhel7.6.0", pc_q35_init_rhel760, + pc_q35_machine_rhel760_options); -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 8d5221fbb1..ba640e3d9e 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -1213,6 +1213,7 @@ static void ccw_machine_rhel860_instance_options(MachineState *machine) - static void ccw_machine_rhel860_class_options(MachineClass *mc) - { - ccw_machine_rhel900_class_options(mc); -+ compat_props_add(mc->compat_props, hw_compat_rhel_8_6, hw_compat_rhel_8_6_len); - - /* All RHEL machines for prior major releases are deprecated */ - mc->deprecation_reason = rhel_old_machine_deprecation; diff --git a/include/hw/boards.h b/include/hw/boards.h -index 2209d4e416..fd75f551b1 100644 +index c5a965d27f..5e7446ee40 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -266,6 +266,8 @@ struct MachineClass { +@@ -268,6 +268,8 @@ struct MachineClass { strList *allowed_dynamic_sysbus_devices; bool auto_enable_numa_with_memhp; bool auto_enable_numa_with_memdev; @@ -621,12 +632,12 @@ index 2209d4e416..fd75f551b1 100644 bool smbus_no_migration_support; bool nvdimm_supported; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 3754eaa97d..4266fe2fdb 100644 +index 908a275736..4376f64a47 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -293,6 +293,33 @@ extern const size_t pc_compat_1_5_len; - extern GlobalProperty pc_compat_1_4[]; - extern const size_t pc_compat_1_4_len; +@@ -293,6 +293,33 @@ extern const size_t pc_compat_1_4_len; + + int pc_machine_kvm_type(MachineState *machine, const char *vm_type); +extern GlobalProperty pc_rhel_compat[]; +extern const size_t pc_rhel_compat_len; @@ -659,10 +670,10 @@ index 3754eaa97d..4266fe2fdb 100644 static void pc_machine_##suffix##_class_init(ObjectClass *oc, void *data) \ { \ diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 22b681ca37..f7c526cbe6 100644 +index 6576287e5b..0ef2bf1b93 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c -@@ -1832,9 +1832,13 @@ static const CPUCaches epyc_milan_cache_info = { +@@ -1834,9 +1834,13 @@ static const CPUCaches epyc_milan_cache_info = { * PT in VMX operation */ @@ -676,7 +687,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 0xd, .vendor = CPUID_VENDOR_AMD, .family = 15, -@@ -1855,6 +1859,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -1857,6 +1861,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "phenom", @@ -684,7 +695,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 5, .vendor = CPUID_VENDOR_AMD, .family = 16, -@@ -1887,6 +1892,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -1889,6 +1894,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "core2duo", @@ -692,7 +703,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 10, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -1929,6 +1935,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -1931,6 +1937,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "kvm64", @@ -700,7 +711,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 0xd, .vendor = CPUID_VENDOR_INTEL, .family = 15, -@@ -1970,6 +1977,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -1972,6 +1979,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "qemu32", @@ -708,7 +719,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 4, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -1984,6 +1992,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -1986,6 +1994,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "kvm32", @@ -716,7 +727,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 5, .vendor = CPUID_VENDOR_INTEL, .family = 15, -@@ -2014,6 +2023,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2016,6 +2025,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "coreduo", @@ -724,7 +735,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 10, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -2047,6 +2057,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2049,6 +2059,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "486", @@ -732,7 +743,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 1, .vendor = CPUID_VENDOR_INTEL, .family = 4, -@@ -2059,6 +2070,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2061,6 +2072,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "pentium", @@ -740,7 +751,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 1, .vendor = CPUID_VENDOR_INTEL, .family = 5, -@@ -2071,6 +2083,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2073,6 +2085,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "pentium2", @@ -748,7 +759,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 2, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -2083,6 +2096,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2085,6 +2098,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "pentium3", @@ -756,7 +767,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 3, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -2095,6 +2109,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2097,6 +2111,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "athlon", @@ -764,7 +775,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 2, .vendor = CPUID_VENDOR_AMD, .family = 6, -@@ -2110,6 +2125,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2112,6 +2127,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "n270", @@ -772,7 +783,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 10, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -2135,6 +2151,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2137,6 +2153,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "Conroe", @@ -780,7 +791,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 10, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -2175,6 +2192,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2177,6 +2194,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "Penryn", @@ -788,7 +799,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 10, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -3762,6 +3780,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -3893,6 +3911,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "Opteron_G1", @@ -796,7 +807,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 5, .vendor = CPUID_VENDOR_AMD, .family = 15, -@@ -3782,6 +3801,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -3913,6 +3932,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "Opteron_G2", @@ -804,7 +815,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 5, .vendor = CPUID_VENDOR_AMD, .family = 15, -@@ -3804,6 +3824,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -3935,6 +3955,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "Opteron_G3", @@ -825,10 +836,10 @@ index 7237378a7d..7b8a3d5af0 100644 }; diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c -index a213209379..81526a1575 100644 +index de531842f6..8d82304609 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c -@@ -3707,6 +3707,7 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -3822,6 +3822,7 @@ static int kvm_get_msrs(X86CPU *cpu) struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries; int ret, i; uint64_t mtrr_top_bits; @@ -836,7 +847,7 @@ index a213209379..81526a1575 100644 kvm_msr_buf_reset(cpu); -@@ -4062,6 +4063,9 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -4177,6 +4178,9 @@ static int kvm_get_msrs(X86CPU *cpu) break; case MSR_KVM_ASYNC_PF_EN: env->async_pf_en_msr = msrs[i].data; @@ -847,7 +858,7 @@ index a213209379..81526a1575 100644 case MSR_KVM_ASYNC_PF_INT: env->async_pf_int_msr = msrs[i].data; diff --git a/tests/qtest/pvpanic-test.c b/tests/qtest/pvpanic-test.c -index bc7b7dfc39..96e6dee3a1 100644 +index 78f1cf8186..ac954c9b06 100644 --- a/tests/qtest/pvpanic-test.c +++ b/tests/qtest/pvpanic-test.c @@ -17,7 +17,7 @@ static void test_panic_nopause(void) @@ -870,5 +881,5 @@ index bc7b7dfc39..96e6dee3a1 100644 val = qtest_inb(qts, 0x505); g_assert_cmpuint(val, ==, 3); -- -2.31.1 +2.39.1 diff --git a/SOURCES/0011-Enable-make-check.patch b/SOURCES/0011-Enable-make-check.patch index d0be8e6..cc91302 100644 --- a/SOURCES/0011-Enable-make-check.patch +++ b/SOURCES/0011-Enable-make-check.patch @@ -1,4 +1,4 @@ -From badfb1290c8eea8a2e1769b2392c7899d5077698 Mon Sep 17 00:00:00 2001 +From 738db8353055eb6fd902513949c6659af8b401d0 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 2 Sep 2020 09:39:41 +0200 Subject: Enable make check @@ -27,28 +27,37 @@ Rebase changes (7.0.0): Rebase changes (7.1.0 rc0): - Disable bcm2835-dma-test (added upstream) +Rebase changes (8.0.0-rc1): +- Removed chunks for disabling bios-table-test (protected upstream) + +Rebase change (8.0.0-rc2): +- Disable new qemu-iotests execution +- Revert change in tco qtest (blocking test run) + Merged patches (6.1.0): - 2f129df7d3 redhat: Enable the 'test-block-iothread' test again Merged patches (7.1.0 rc0): - 64d736640e RHEL-only: tests/avocado: Switch aarch64 tests from a53 to a57 --- - .distro/qemu-kvm.spec.template | 5 ++--- - tests/avocado/replay_kernel.py | 2 +- - tests/avocado/reverse_debugging.py | 2 +- - tests/avocado/tcg_plugins.py | 6 +++--- - tests/qtest/fuzz-e1000e-test.c | 2 +- - tests/qtest/fuzz-virtio-scsi-test.c | 2 +- - tests/qtest/intel-hda-test.c | 2 +- - tests/qtest/libqos/meson.build | 2 +- - tests/qtest/lpc-ich9-test.c | 2 +- - tests/qtest/meson.build | 7 +------ - tests/qtest/usb-hcd-xhci-test.c | 4 ++++ - tests/qtest/virtio-net-failover.c | 1 + - 12 files changed, 18 insertions(+), 19 deletions(-) + .distro/qemu-kvm.spec.template | 4 ++-- + tests/avocado/replay_kernel.py | 2 +- + tests/avocado/reverse_debugging.py | 2 +- + tests/avocado/tcg_plugins.py | 6 ++--- + tests/qemu-iotests/meson.build | 34 ++++++++++++++--------------- + tests/qtest/fuzz-e1000e-test.c | 2 +- + tests/qtest/fuzz-virtio-scsi-test.c | 2 +- + tests/qtest/intel-hda-test.c | 2 +- + tests/qtest/libqos/meson.build | 2 +- + tests/qtest/lpc-ich9-test.c | 2 +- + tests/qtest/meson.build | 2 -- + tests/qtest/tco-test.c | 2 +- + tests/qtest/usb-hcd-xhci-test.c | 4 ++++ + tests/qtest/virtio-net-failover.c | 1 + + 14 files changed, 35 insertions(+), 32 deletions(-) diff --git a/tests/avocado/replay_kernel.py b/tests/avocado/replay_kernel.py -index 00a26e4a0c..fe5ecf238a 100644 +index f13456e1ec..2fee270a42 100644 --- a/tests/avocado/replay_kernel.py +++ b/tests/avocado/replay_kernel.py @@ -147,7 +147,7 @@ def test_aarch64_virt(self): @@ -61,10 +70,10 @@ index 00a26e4a0c..fe5ecf238a 100644 kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora' '/linux/releases/29/Everything/aarch64/os/images/pxeboot' diff --git a/tests/avocado/reverse_debugging.py b/tests/avocado/reverse_debugging.py -index d2921e70c3..66d185ed42 100644 +index 680c314cfc..71eccb8fb6 100644 --- a/tests/avocado/reverse_debugging.py +++ b/tests/avocado/reverse_debugging.py -@@ -198,7 +198,7 @@ def test_aarch64_virt(self): +@@ -206,7 +206,7 @@ def test_aarch64_virt(self): """ :avocado: tags=arch:aarch64 :avocado: tags=machine:virt @@ -104,6 +113,49 @@ index 642d2e49e3..93b3afd823 100644 """ kernel_path = self._grab_aarch64_kernel() kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + +diff --git a/tests/qemu-iotests/meson.build b/tests/qemu-iotests/meson.build +index 9735071a29..32002335f4 100644 +--- a/tests/qemu-iotests/meson.build ++++ b/tests/qemu-iotests/meson.build +@@ -51,21 +51,21 @@ foreach format, speed: qemu_iotests_formats + check: true, + ) + +- foreach item: rc.stdout().strip().split() +- args = [qemu_iotests_check_cmd, +- '-tap', '-' + format, item, +- '--source-dir', meson.current_source_dir(), +- '--build-dir', meson.current_build_dir()] +- # Some individual tests take as long as 45 seconds +- # Bump the timeout to 3 minutes for some headroom +- # on slow machines to minimize spurious failures +- test('io-' + format + '-' + item, +- python, +- args: args, +- depends: qemu_iotests_binaries, +- env: qemu_iotests_env, +- protocol: 'tap', +- timeout: 180, +- suite: suites) +- endforeach ++# foreach item: rc.stdout().strip().split() ++# args = [qemu_iotests_check_cmd, ++# '-tap', '-' + format, item, ++# '--source-dir', meson.current_source_dir(), ++# '--build-dir', meson.current_build_dir()] ++# # Some individual tests take as long as 45 seconds ++# # Bump the timeout to 3 minutes for some headroom ++# # on slow machines to minimize spurious failures ++# test('io-' + format + '-' + item, ++# python, ++# args: args, ++# depends: qemu_iotests_binaries, ++# env: qemu_iotests_env, ++# protocol: 'tap', ++# timeout: 180, ++# suite: suites) ++# endforeach + endforeach diff --git a/tests/qtest/fuzz-e1000e-test.c b/tests/qtest/fuzz-e1000e-test.c index 5052883fb6..b5286f4b12 100644 --- a/tests/qtest/fuzz-e1000e-test.c @@ -144,10 +196,10 @@ index d4a8db6fd6..1a796ec15a 100644 qtest_outl(s, 0xcf8, 0x80000804); diff --git a/tests/qtest/libqos/meson.build b/tests/qtest/libqos/meson.build -index 32f028872c..1e78a1a055 100644 +index cc209a8de5..42a7c529c9 100644 --- a/tests/qtest/libqos/meson.build +++ b/tests/qtest/libqos/meson.build -@@ -43,7 +43,7 @@ libqos_srcs = files( +@@ -44,7 +44,7 @@ libqos_srcs = files( 'virtio-rng.c', 'virtio-scsi.c', 'virtio-serial.c', @@ -170,18 +222,10 @@ index 8ac95b89f7..cd2102555c 100644 qtest_outl(s, 0xcf8, 0x8000f840); /* PMBASE */ diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build -index c07a5b1a5f..9df3f9f8b9 100644 +index 85ea4e8d99..893afc8eeb 100644 --- a/tests/qtest/meson.build +++ b/tests/qtest/meson.build -@@ -82,7 +82,6 @@ qtests_i386 = \ - config_all_devices.has_key('CONFIG_Q35') and \ - config_all_devices.has_key('CONFIG_VIRTIO_PCI') and \ - slirp.found() ? ['virtio-net-failover'] : []) + \ -- (unpack_edk2_blobs ? ['bios-tables-test'] : []) + \ - qtests_pci + \ - qtests_cxl + \ - ['fdc-test', -@@ -96,7 +95,6 @@ qtests_i386 = \ +@@ -94,7 +94,6 @@ qtests_i386 = \ 'drive_del-test', 'tco-test', 'cpu-plug-test', @@ -189,24 +233,7 @@ index c07a5b1a5f..9df3f9f8b9 100644 'vmgenid-test', 'migration-test', 'test-x86-cpuid-compat', -@@ -209,15 +207,13 @@ qtests_arm = \ - - # TODO: once aarch64 TCG is fixed on ARM 32 bit host, make bios-tables-test unconditional - qtests_aarch64 = \ -- (cpu != 'arm' and unpack_edk2_blobs ? ['bios-tables-test'] : []) + \ - (config_all_devices.has_key('CONFIG_TPM_TIS_SYSBUS') ? ['tpm-tis-device-test'] : []) + \ - (config_all_devices.has_key('CONFIG_TPM_TIS_SYSBUS') ? ['tpm-tis-device-swtpm-test'] : []) + \ - (config_all_devices.has_key('CONFIG_XLNX_ZYNQMP_ARM') ? ['xlnx-can-test', 'fuzz-xlnx-dp-test'] : []) + \ - ['arm-cpu-features', - 'numa-test', - 'boot-serial-test', -- 'migration-test', -- 'bcm2835-dma-test'] -+ 'migration-test'] - - qtests_s390x = \ - (slirp.found() ? ['pxe-test', 'test-netfilter'] : []) + \ -@@ -225,7 +221,6 @@ qtests_s390x = \ +@@ -223,7 +222,6 @@ qtests_s390x = \ (config_host.has_key('CONFIG_POSIX') ? ['test-filter-redirector'] : []) + \ ['boot-serial-test', 'drive_del-test', @@ -214,6 +241,19 @@ index c07a5b1a5f..9df3f9f8b9 100644 'virtio-ccw-test', 'cpu-plug-test', 'migration-test'] +diff --git a/tests/qtest/tco-test.c b/tests/qtest/tco-test.c +index 0547d41173..3756ce82d8 100644 +--- a/tests/qtest/tco-test.c ++++ b/tests/qtest/tco-test.c +@@ -60,7 +60,7 @@ static void test_init(TestData *d) + QTestState *qs; + + qs = qtest_initf("-machine q35 %s %s", +- d->noreboot ? "-global ICH9-LPC.noreboot=true" : "", ++ d->noreboot ? "" : "-global ICH9-LPC.noreboot=false", + !d->args ? "" : d->args); + qtest_irq_intercept_in(qs, "ioapic"); + diff --git a/tests/qtest/usb-hcd-xhci-test.c b/tests/qtest/usb-hcd-xhci-test.c index 10ef9d2a91..3855873050 100644 --- a/tests/qtest/usb-hcd-xhci-test.c @@ -257,5 +297,5 @@ index 4a809590bf..1bf3fa641c 100644 "-device pcie-root-port,id=root1,addr=0x2,bus=pcie.0,chassis=2 " -- -2.31.1 +2.39.1 diff --git a/SOURCES/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch b/SOURCES/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch index 477a75d..430959b 100644 --- a/SOURCES/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch +++ b/SOURCES/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -1,4 +1,4 @@ -From 0804844e4755377be6d2ebad578794ad9f4f3f31 Mon Sep 17 00:00:00 2001 +From 34cb4f7ddd762ec46ed1a6a4261aebde39360ca4 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 3 Dec 2013 20:05:13 +0100 Subject: vfio: cap number of devices that can be assigned @@ -32,7 +32,7 @@ Signed-off-by: Bandan Das 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 939dcc3d4a..acbc6673ce 100644 +index ec9a854361..a779053be3 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -48,6 +48,9 @@ @@ -77,7 +77,7 @@ index 939dcc3d4a..acbc6673ce 100644 if (!vbasedev->sysfsdev) { if (!(~vdev->host.domain || ~vdev->host.bus || ~vdev->host.slot || ~vdev->host.function)) { -@@ -3293,6 +3317,9 @@ static Property vfio_pci_dev_properties[] = { +@@ -3294,6 +3318,9 @@ static Property vfio_pci_dev_properties[] = { DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false), DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice, no_geforce_quirks, false), @@ -88,7 +88,7 @@ index 939dcc3d4a..acbc6673ce 100644 false), DEFINE_PROP_BOOL("x-no-vfio-ioeventfd", VFIOPCIDevice, no_vfio_ioeventfd, diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h -index 7c236a52f4..7b7d036a8f 100644 +index 177abcc8fb..45235d38ba 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -140,6 +140,7 @@ struct VFIOPCIDevice { @@ -100,5 +100,5 @@ index 7c236a52f4..7b7d036a8f 100644 uint32_t device_id; uint32_t sub_vendor_id; -- -2.31.1 +2.39.1 diff --git a/SOURCES/0013-Add-support-statement-to-help-output.patch b/SOURCES/0013-Add-support-statement-to-help-output.patch index 022f194..25db0b8 100644 --- a/SOURCES/0013-Add-support-statement-to-help-output.patch +++ b/SOURCES/0013-Add-support-statement-to-help-output.patch @@ -1,4 +1,4 @@ -From 283a0e258dc2f3b83c58e6f948bafe430cd2c1d5 Mon Sep 17 00:00:00 2001 +From 8964a3e8835992442902d35b011a708787366d82 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 4 Dec 2013 18:53:17 +0100 Subject: Add support statement to -help output @@ -21,7 +21,7 @@ Signed-off-by: Eduardo Habkost 1 file changed, 9 insertions(+) diff --git a/softmmu/vl.c b/softmmu/vl.c -index 5115221efe..17188df528 100644 +index ea20b23e4c..ad4173138d 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c @@ -834,9 +834,17 @@ static void version(void) @@ -51,5 +51,5 @@ index 5115221efe..17188df528 100644 } -- -2.31.1 +2.39.1 diff --git a/SOURCES/0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/SOURCES/0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch index e39555b..b97c844 100644 --- a/SOURCES/0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +++ b/SOURCES/0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -1,4 +1,4 @@ -From d8ded821aa698b3b03bd9089fbd6c2b33da87b9e Mon Sep 17 00:00:00 2001 +From 0b72d348fa0714de641ee242e5cee97df006e8fd Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 8 Jul 2020 08:35:50 +0200 Subject: Use qemu-kvm in documentation instead of qemu-system- @@ -36,10 +36,10 @@ index 52d6454b93..d74dbdeca9 100644 .. |I2C| replace:: I\ :sup:`2`\ C .. |I2S| replace:: I\ :sup:`2`\ S diff --git a/qemu-options.hx b/qemu-options.hx -index 7f99d15b23..ea02ca3a45 100644 +index 59bdf67a2c..52b49f1f6a 100644 --- a/qemu-options.hx +++ b/qemu-options.hx -@@ -3300,11 +3300,11 @@ SRST +@@ -3296,11 +3296,11 @@ SRST :: @@ -57,5 +57,5 @@ index 7f99d15b23..ea02ca3a45 100644 ``-netdev vhost-vdpa[,vhostdev=/path/to/dev][,vhostfd=h]`` Establish a vhost-vdpa netdev. -- -2.31.1 +2.39.1 diff --git a/SOURCES/0015-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch b/SOURCES/0015-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch deleted file mode 100644 index 2bedb0b..0000000 --- a/SOURCES/0015-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 9c6acadb444c9300d7c18b6939ce4f96484aeacc Mon Sep 17 00:00:00 2001 -From: David Gibson -Date: Wed, 6 Feb 2019 03:58:56 +0000 -Subject: BZ1653590: Require at least 64kiB pages for downstream guests & hosts - -RH-Author: David Gibson -Message-id: <20190206035856.19058-1-dgibson@redhat.com> -Patchwork-id: 84246 -O-Subject: [RHELAV-8.0/rhel qemu-kvm PATCH] BZ1653590: Require at least 64kiB pages for downstream guests & hosts -Bugzilla: 1653590 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Serhii Popovych -RH-Acked-by: Thomas Huth - -Most current POWER guests require 64kiB page support, so that's the default -for the cap-hpt-max-pagesize option in qemu which limits available guest -page sizes. We warn if the value is set smaller than that, but don't -outright fail upstream, because we need to allow for the possibility of -guest (and/or host) kernels configured for 4kiB page sizes. - -Downstream, however, we simply don't support 4kiB pagesize configured -kernels in guest or host, so we can have qemu simply error out in this -situation. - -Testing: Attempted to start a guest with cap-hpt-max-page-size=4k and verified - it failed immediately with a qemu error - -Signed-off-by: David Gibson -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/spapr_caps.c | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c -index b4283055c1..59b88aadff 100644 ---- a/hw/ppc/spapr_caps.c -+++ b/hw/ppc/spapr_caps.c -@@ -329,12 +329,19 @@ bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, - static void cap_hpt_maxpagesize_apply(SpaprMachineState *spapr, - uint8_t val, Error **errp) - { -+#if 0 /* disabled for RHEL */ - if (val < 12) { - error_setg(errp, "Require at least 4kiB hpt-max-page-size"); - return; - } else if (val < 16) { - warn_report("Many guests require at least 64kiB hpt-max-page-size"); - } -+#else /* Only page sizes >=64kiB supported for RHEL */ -+ if (val < 16) { -+ error_setg(errp, "Require at least 64kiB hpt-max-page-size"); -+ return; -+ } -+#endif - - spapr_check_pagesize(spapr, qemu_minrampagesize(), errp); - } --- -2.31.1 - diff --git a/SOURCES/0016-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch b/SOURCES/0015-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch similarity index 92% rename from SOURCES/0016-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch rename to SOURCES/0015-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch index cee5476..1e2f8e1 100644 --- a/SOURCES/0016-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch +++ b/SOURCES/0015-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch @@ -1,4 +1,4 @@ -From 02fde2a0cbd679ebd4104fe5522572c31ec23abd Mon Sep 17 00:00:00 2001 +From bd6bcebfd783fa49e283d035d378fb5240423d84 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 20 Aug 2021 18:25:12 +0200 Subject: qcow2: Deprecation warning when opening v2 images rw @@ -44,10 +44,10 @@ Rebase notes (6.1.0): 2 files changed, 7 insertions(+) diff --git a/block/qcow2.c b/block/qcow2.c -index 4d6666d3ff..d2ba263e9d 100644 +index 30fd53fa64..22084730f9 100644 --- a/block/qcow2.c +++ b/block/qcow2.c -@@ -1336,6 +1336,12 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, +@@ -1337,6 +1337,12 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, ret = -ENOTSUP; goto fail; } @@ -61,7 +61,7 @@ index 4d6666d3ff..d2ba263e9d 100644 s->qcow_version = header.version; diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter -index cc9f1a5891..6a13757177 100644 +index 6b32c7fbfa..6ddda2ee64 100644 --- a/tests/qemu-iotests/common.filter +++ b/tests/qemu-iotests/common.filter @@ -83,6 +83,7 @@ _filter_qemu() @@ -73,5 +73,5 @@ index cc9f1a5891..6a13757177 100644 } -- -2.31.1 +2.39.1 diff --git a/SOURCES/kvm-qga-linux-add-usb-support-to-guest-get-fsinfo.patch b/SOURCES/0016-qga-linux-add-usb-support-to-guest-get-fsinfo.patch similarity index 81% rename from SOURCES/kvm-qga-linux-add-usb-support-to-guest-get-fsinfo.patch rename to SOURCES/0016-qga-linux-add-usb-support-to-guest-get-fsinfo.patch index d039212..bb9455a 100644 --- a/SOURCES/kvm-qga-linux-add-usb-support-to-guest-get-fsinfo.patch +++ b/SOURCES/0016-qga-linux-add-usb-support-to-guest-get-fsinfo.patch @@ -1,7 +1,7 @@ -From 48f45171b89b8ed24f2b2484d63b00ea7818b5c3 Mon Sep 17 00:00:00 2001 +From 78a42cf27aa519bb71214443ab570b40e156fa9c Mon Sep 17 00:00:00 2001 From: Kfir Manor Date: Sun, 22 Jan 2023 17:33:07 +0200 -Subject: [PATCH 9/9] qga/linux: add usb support to guest-get-fsinfo +Subject: qga/linux: add usb support to guest-get-fsinfo RH-Author: Kostiantyn Kostiuk RH-MergeRequest: 140: qga/linux: add usb support to guest-get-fsinfo @@ -16,15 +16,19 @@ Upstream patch: https://patchew.org/QEMU/20230122153307.1050593-1-kfir@daynix.co Signed-off-by: Kfir Manor Reviewed-by: Konstantin Kostiuk Signed-off-by: Konstantin Kostiuk + +Patch-name: kvm-qga-linux-add-usb-support-to-guest-get-fsinfo.patch +Patch-id: 72 +Patch-present-in-specfile: True --- qga/commands-posix.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/qga/commands-posix.c b/qga/commands-posix.c -index 32493d6383..f1b2b87c13 100644 +index 079689d79a..97754930c1 100644 --- a/qga/commands-posix.c +++ b/qga/commands-posix.c -@@ -877,7 +877,9 @@ static bool build_guest_fsinfo_for_pci_dev(char const *syspath, +@@ -879,7 +879,9 @@ static bool build_guest_fsinfo_for_pci_dev(char const *syspath, g_str_equal(driver, "sym53c8xx") || g_str_equal(driver, "virtio-pci") || g_str_equal(driver, "ahci") || @@ -35,7 +39,7 @@ index 32493d6383..f1b2b87c13 100644 break; } -@@ -974,6 +976,8 @@ static bool build_guest_fsinfo_for_pci_dev(char const *syspath, +@@ -976,6 +978,8 @@ static bool build_guest_fsinfo_for_pci_dev(char const *syspath, } } else if (strcmp(driver, "nvme") == 0) { disk->bus_type = GUEST_DISK_BUS_TYPE_NVME; @@ -45,5 +49,5 @@ index 32493d6383..f1b2b87c13 100644 g_debug("unknown driver '%s' (sysfs path '%s')", driver, syspath); goto cleanup; -- -2.31.1 +2.39.1 diff --git a/SOURCES/0017-Add-RHEL-9.2.0-compat-structure.patch b/SOURCES/0017-Add-RHEL-9.2.0-compat-structure.patch new file mode 100644 index 0000000..ce0ba5c --- /dev/null +++ b/SOURCES/0017-Add-RHEL-9.2.0-compat-structure.patch @@ -0,0 +1,110 @@ +From bd5d81d2865c239ffea0fecf32476732149ad05c Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Wed, 15 Feb 2023 02:03:17 -0500 +Subject: Add RHEL 9.2.0 compat structure + +Adding compatibility bits necessary to keep 9.2.0 machine +types same after rebase to 8.0. + +Signed-off-by: Miroslav Rezanina + +Rebase notes (8.0.0 rc4): +- Added migration.x-preempt-pre-7-2 compat) +--- + hw/arm/virt.c | 1 + + hw/core/machine.c | 10 ++++++++++ + hw/i386/pc_piix.c | 2 ++ + hw/i386/pc_q35.c | 3 +++ + hw/s390x/s390-virtio-ccw.c | 1 + + include/hw/boards.h | 3 +++ + 6 files changed, 20 insertions(+) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 1ae1654be5..9be53e9355 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3669,6 +3669,7 @@ type_init(rhel_machine_init); + static void rhel920_virt_options(MachineClass *mc) + { + compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len); + } + DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 5aa567fad3..0e0120b7f2 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -52,6 +52,16 @@ const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2); + const char *rhel_old_machine_deprecation = + "machine types for previous major releases are deprecated"; + ++GlobalProperty hw_compat_rhel_9_2[] = { ++ /* hw_compat_rhel_9_2 from hw_compat_7_2 */ ++ { "e1000e", "migrate-timadj", "off" }, ++ /* hw_compat_rhel_9_2 from hw_compat_7_2 */ ++ { "virtio-mem", "x-early-migration", "false" }, ++ /* hw_compat_rhel_9_2 from hw_compat_7_2 */ ++ { "migration", "x-preempt-pre-7-2", "true" }, ++}; ++const size_t hw_compat_rhel_9_2_len = G_N_ELEMENTS(hw_compat_rhel_9_2); ++ + /* + * Mostly the same as hw_compat_7_0 + */ +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 3e330fd36f..90fb6e2e03 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -947,6 +947,8 @@ static void pc_machine_rhel760_options(MachineClass *m) + /* From pc_i440fx_5_1_machine_options() */ + pcmc->pci_root_uid = 1; + pcmc->enforce_amd_1tb_hole = false; ++ compat_props_add(m->compat_props, hw_compat_rhel_9_2, ++ hw_compat_rhel_9_2_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_1, + hw_compat_rhel_9_1_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_0, +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 98601bb76f..8945b69175 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -701,6 +701,9 @@ static void pc_q35_machine_rhel920_options(MachineClass *m) + m->desc = "RHEL-9.2.0 PC (Q35 + ICH9, 2009)"; + pcmc->smbios_stream_product = "RHEL"; + pcmc->smbios_stream_version = "9.2.0"; ++ ++ compat_props_add(m->compat_props, hw_compat_rhel_9_2, ++ hw_compat_rhel_9_2_len); + } + + DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920, +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index dcd3b966b0..6a0b93c63d 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1211,6 +1211,7 @@ static void ccw_machine_rhel920_instance_options(MachineState *machine) + + static void ccw_machine_rhel920_class_options(MachineClass *mc) + { ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len); + } + DEFINE_CCW_MACHINE(rhel920, "rhel9.2.0", true); + +diff --git a/include/hw/boards.h b/include/hw/boards.h +index 5e7446ee40..5f08bd7550 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -461,6 +461,9 @@ extern const size_t hw_compat_2_2_len; + extern GlobalProperty hw_compat_2_1[]; + extern const size_t hw_compat_2_1_len; + ++extern GlobalProperty hw_compat_rhel_9_2[]; ++extern const size_t hw_compat_rhel_9_2_len; ++ + extern GlobalProperty hw_compat_rhel_9_1[]; + extern const size_t hw_compat_rhel_9_1_len; + +-- +2.39.1 + diff --git a/SOURCES/0018-Addd-7.2-compat-bits-for-RHEL-9.1-machine-type.patch b/SOURCES/0018-Addd-7.2-compat-bits-for-RHEL-9.1-machine-type.patch deleted file mode 100644 index 001880b..0000000 --- a/SOURCES/0018-Addd-7.2-compat-bits-for-RHEL-9.1-machine-type.patch +++ /dev/null @@ -1,26 +0,0 @@ -From 21ed34787b9492c2cfe3d8fc12a32748bcf02307 Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Wed, 9 Nov 2022 07:08:32 -0500 -Subject: Addd 7.2 compat bits for RHEL 9.1 machine type - -Signed-off-by: Miroslav Rezanina ---- - hw/core/machine.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 9edec1ca05..3d851d34da 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -54,6 +54,8 @@ GlobalProperty hw_compat_rhel_9_1[] = { - { "arm-gicv3-common", "force-8-bit-prio", "on" }, - /* hw_compat_rhel_9_1 from hw_compat_7_0 */ - { "nvme-ns", "eui64-default", "on"}, -+ /* hw_compat_rhel_9_1 from hw_compat_7_1 */ -+ { "virtio-device", "queue_reset", "false" }, - }; - const size_t hw_compat_rhel_9_1_len = G_N_ELEMENTS(hw_compat_rhel_9_1); - --- -2.31.1 - diff --git a/SOURCES/0018-redhat-hw-i386-pc-Update-x86-machine-type-compatibil.patch b/SOURCES/0018-redhat-hw-i386-pc-Update-x86-machine-type-compatibil.patch new file mode 100644 index 0000000..81993e9 --- /dev/null +++ b/SOURCES/0018-redhat-hw-i386-pc-Update-x86-machine-type-compatibil.patch @@ -0,0 +1,76 @@ +From c6eaf73adda2e87fe91c9a3836f45dd58a553e06 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Mon, 27 Mar 2023 15:14:03 +0200 +Subject: redhat: hw/i386/pc: Update x86 machine type compatibility for QEMU + 8.0.0 update + +Add pc_rhel_9_2_compat based on upstream pc_compat_7_2. + +Signed-off-by: Thomas Huth +--- + hw/i386/pc.c | 6 ++++++ + hw/i386/pc_piix.c | 2 ++ + hw/i386/pc_q35.c | 2 ++ + include/hw/i386/pc.h | 3 +++ + 4 files changed, 13 insertions(+) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 8abb1f872e..f216922cee 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -429,6 +429,12 @@ GlobalProperty pc_rhel_compat[] = { + }; + const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + ++GlobalProperty pc_rhel_9_2_compat[] = { ++ /* pc_rhel_9_2_compat from pc_compat_7_2 */ ++ { "ICH9-LPC", "noreboot", "true" }, ++}; ++const size_t pc_rhel_9_2_compat_len = G_N_ELEMENTS(pc_rhel_9_2_compat); ++ + GlobalProperty pc_rhel_9_0_compat[] = { + /* pc_rhel_9_0_compat from pc_compat_6_2 */ + { "virtio-mem", "unplugged-inaccessible", "off" }, +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 90fb6e2e03..fc704d783f 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -949,6 +949,8 @@ static void pc_machine_rhel760_options(MachineClass *m) + pcmc->enforce_amd_1tb_hole = false; + compat_props_add(m->compat_props, hw_compat_rhel_9_2, + hw_compat_rhel_9_2_len); ++ compat_props_add(m->compat_props, pc_rhel_9_2_compat, ++ pc_rhel_9_2_compat_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_1, + hw_compat_rhel_9_1_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_0, +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 8945b69175..e97655616a 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -704,6 +704,8 @@ static void pc_q35_machine_rhel920_options(MachineClass *m) + + compat_props_add(m->compat_props, hw_compat_rhel_9_2, + hw_compat_rhel_9_2_len); ++ compat_props_add(m->compat_props, pc_rhel_9_2_compat, ++ pc_rhel_9_2_compat_len); + } + + DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920, +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 4376f64a47..d218ad1628 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -296,6 +296,9 @@ int pc_machine_kvm_type(MachineState *machine, const char *vm_type); + extern GlobalProperty pc_rhel_compat[]; + extern const size_t pc_rhel_compat_len; + ++extern GlobalProperty pc_rhel_9_2_compat[]; ++extern const size_t pc_rhel_9_2_compat_len; ++ + extern GlobalProperty pc_rhel_9_0_compat[]; + extern const size_t pc_rhel_9_0_compat_len; + +-- +2.39.1 + diff --git a/SOURCES/0019-Disable-unwanted-new-devices.patch b/SOURCES/0019-Disable-unwanted-new-devices.patch new file mode 100644 index 0000000..f656ca9 --- /dev/null +++ b/SOURCES/0019-Disable-unwanted-new-devices.patch @@ -0,0 +1,83 @@ +From 8173d2eabaf77312d36b00c618f6770948b80593 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Mon, 17 Apr 2023 01:24:18 -0400 +Subject: Disable unwanted new devices + +QEMU 8.0 adds two new device we do not want to support that can't +be disabled using configure switch. + +1) ide-cf - virtual CompactFlash card + +2) i2c-echo - testing echo device + +Use manual disabling of the device by changing code (1) and meson configs (2). + +Signed-off-by: Miroslav Rezanina +--- + hw/ide/qdev.c | 9 +++++++++ + hw/misc/meson.build | 3 ++- + 2 files changed, 11 insertions(+), 1 deletion(-) + +diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c +index 1b3b4da01d..454bfa5783 100644 +--- a/hw/ide/qdev.c ++++ b/hw/ide/qdev.c +@@ -283,10 +283,13 @@ static void ide_cd_realize(IDEDevice *dev, Error **errp) + ide_dev_initfn(dev, IDE_CD, errp); + } + ++/* Disabled for Red Hat Enterprise Linux */ ++#if 0 + static void ide_cf_realize(IDEDevice *dev, Error **errp) + { + ide_dev_initfn(dev, IDE_CFATA, errp); + } ++#endif + + #define DEFINE_IDE_DEV_PROPERTIES() \ + DEFINE_BLOCK_PROPERTIES(IDEDrive, dev.conf), \ +@@ -346,6 +349,8 @@ static const TypeInfo ide_cd_info = { + .class_init = ide_cd_class_init, + }; + ++/* Disabled for Red Hat Enterprise Linux */ ++#if 0 + static Property ide_cf_properties[] = { + DEFINE_IDE_DEV_PROPERTIES(), + DEFINE_BLOCK_CHS_PROPERTIES(IDEDrive, dev.conf), +@@ -371,6 +376,7 @@ static const TypeInfo ide_cf_info = { + .instance_size = sizeof(IDEDrive), + .class_init = ide_cf_class_init, + }; ++#endif + + static void ide_device_class_init(ObjectClass *klass, void *data) + { +@@ -396,7 +402,10 @@ static void ide_register_types(void) + type_register_static(&ide_bus_info); + type_register_static(&ide_hd_info); + type_register_static(&ide_cd_info); ++/* Disabled for Red Hat Enterprise Linux */ ++#if 0 + type_register_static(&ide_cf_info); ++#endif + type_register_static(&ide_device_type_info); + } + +diff --git a/hw/misc/meson.build b/hw/misc/meson.build +index a40245ad44..9cc5a61ed7 100644 +--- a/hw/misc/meson.build ++++ b/hw/misc/meson.build +@@ -128,7 +128,8 @@ softmmu_ss.add(when: 'CONFIG_NRF51_SOC', if_true: files('nrf51_rng.c')) + + softmmu_ss.add(when: 'CONFIG_GRLIB', if_true: files('grlib_ahb_apb_pnp.c')) + +-softmmu_ss.add(when: 'CONFIG_I2C', if_true: files('i2c-echo.c')) ++# Disabled for Red Hat Enterprise Linux ++# softmmu_ss.add(when: 'CONFIG_I2C', if_true: files('i2c-echo.c')) + + specific_ss.add(when: 'CONFIG_AVR_POWER', if_true: files('avr_power.c')) + +-- +2.39.1 + diff --git a/SOURCES/0019-redhat-Update-s390x-machine-type-compatibility-for-Q.patch b/SOURCES/0019-redhat-Update-s390x-machine-type-compatibility-for-Q.patch deleted file mode 100644 index 2642b30..0000000 --- a/SOURCES/0019-redhat-Update-s390x-machine-type-compatibility-for-Q.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 27c188c6a4cbd908269cf06affd24025708ecb5c Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Thu, 17 Nov 2022 16:47:16 +0100 -Subject: redhat: Update s390x machine type compatibility for QEMU 7.2.0 update - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2143585 -Upstream Status: n/a (rhel-only) - -Add the compatibility handling for the rebase from QEMU 7.1 to 7.2, -i.e. the settings from ccw_machine_7_1_class_options() and -ccw_machine_7_1_instance_options() to the rhel9.1.0 machine type -(earlier settings have been added by previous rebases already). - -Signed-off-by: Thomas Huth ---- - hw/s390x/s390-virtio-ccw.c | 9 +++++++++ - 1 file changed, 9 insertions(+) - -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index ba640e3d9e..97e868ada0 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -1195,12 +1195,21 @@ static void ccw_machine_rhel900_instance_options(MachineState *machine) - static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_2 }; - - s390_set_qemu_cpu_model(0x3906, 14, 2, qemu_cpu_feat); -+ s390_cpudef_featoff_greater(16, 1, S390_FEAT_PAIE); - } - - static void ccw_machine_rhel900_class_options(MachineClass *mc) - { -+ S390CcwMachineClass *s390mc = S390_CCW_MACHINE_CLASS(mc); -+ static GlobalProperty compat[] = { -+ { TYPE_S390_PCI_DEVICE, "interpret", "off", }, -+ { TYPE_S390_PCI_DEVICE, "forwarding-assist", "off", }, -+ }; -+ -+ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); - compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len); - compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len); -+ s390mc->max_threads = S390_MAX_CPUS; - } - DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", true); - --- -2.31.1 - diff --git a/SOURCES/0020-redhat-aarch64-add-rhel9.2.0-virt-machine-type.patch b/SOURCES/0020-redhat-aarch64-add-rhel9.2.0-virt-machine-type.patch deleted file mode 100644 index cb69b93..0000000 --- a/SOURCES/0020-redhat-aarch64-add-rhel9.2.0-virt-machine-type.patch +++ /dev/null @@ -1,43 +0,0 @@ -From c1a21266d8bed27f1ef1f705818fde5f9350b73f Mon Sep 17 00:00:00 2001 -From: Cornelia Huck -Date: Wed, 23 Nov 2022 14:15:37 +0100 -Subject: redhat: aarch64: add rhel9.2.0 virt machine type - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2131982 -Upstream: RHEL only - -Signed-off-by: Cornelia Huck ---- - hw/arm/virt.c | 9 ++++++++- - 1 file changed, 8 insertions(+), 1 deletion(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index dfcab40a73..0a94f31dd1 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -3518,14 +3518,21 @@ static void rhel_machine_init(void) - } - type_init(rhel_machine_init); - -+static void rhel920_virt_options(MachineClass *mc) -+{ -+} -+DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0) -+ - static void rhel900_virt_options(MachineClass *mc) - { - VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); - -+ rhel920_virt_options(mc); -+ - compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); - compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len); - - /* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */ - vmc->no_tcg_lpa2 = true; - } --DEFINE_RHEL_MACHINE_AS_LATEST(9, 0, 0) -+DEFINE_RHEL_MACHINE(9, 0, 0) --- -2.31.1 - diff --git a/SOURCES/0021-redhat-Add-new-rhel-9.2.0-s390x-machine-type.patch b/SOURCES/0021-redhat-Add-new-rhel-9.2.0-s390x-machine-type.patch deleted file mode 100644 index 144bd92..0000000 --- a/SOURCES/0021-redhat-Add-new-rhel-9.2.0-s390x-machine-type.patch +++ /dev/null @@ -1,62 +0,0 @@ -From a932b8d4296066be01613ada84241b501488f99f Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Thu, 17 Nov 2022 17:03:24 +0100 -Subject: redhat: Add new rhel-9.2.0 s390x machine type - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2136473 -Upstream Status: n/a (rhel-only) - -RHEL 9.2 will be an EUS release - we want to have a new machine -type here to make sure that we have a spot where we can wire up -fixes later. - -Signed-off-by: Thomas Huth ---- - hw/s390x/s390-virtio-ccw.c | 15 ++++++++++++++- - 1 file changed, 14 insertions(+), 1 deletion(-) - -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 97e868ada0..aa142a1a4e 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -1190,10 +1190,21 @@ DEFINE_CCW_MACHINE(2_4, "2.4", false); - #endif - - -+static void ccw_machine_rhel920_instance_options(MachineState *machine) -+{ -+} -+ -+static void ccw_machine_rhel920_class_options(MachineClass *mc) -+{ -+} -+DEFINE_CCW_MACHINE(rhel920, "rhel9.2.0", true); -+ - static void ccw_machine_rhel900_instance_options(MachineState *machine) - { - static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_2 }; - -+ ccw_machine_rhel920_instance_options(machine); -+ - s390_set_qemu_cpu_model(0x3906, 14, 2, qemu_cpu_feat); - s390_cpudef_featoff_greater(16, 1, S390_FEAT_PAIE); - } -@@ -1206,12 +1217,14 @@ static void ccw_machine_rhel900_class_options(MachineClass *mc) - { TYPE_S390_PCI_DEVICE, "forwarding-assist", "off", }, - }; - -+ ccw_machine_rhel920_class_options(mc); -+ - compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); - compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len); - compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len); - s390mc->max_threads = S390_MAX_CPUS; - } --DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", true); -+DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", false); - - static void ccw_machine_rhel860_instance_options(MachineState *machine) - { --- -2.31.1 - diff --git a/SOURCES/0022-x86-rhel-9.2.0-machine-type.patch b/SOURCES/0022-x86-rhel-9.2.0-machine-type.patch deleted file mode 100644 index 8502b91..0000000 --- a/SOURCES/0022-x86-rhel-9.2.0-machine-type.patch +++ /dev/null @@ -1,75 +0,0 @@ -From f33ca8aed4744238230f1f2cc47df77aa4c9e0ac Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Thu, 17 Nov 2022 12:36:30 +0000 -Subject: x86: rhel 9.2.0 machine type - -Add a 9.2.0 x86 machine type, and fix up the compatibility -for 9.0.0 and older. - -pc_compat_7_1 and pc_compat_7_0 are both empty upstream so there's -nothing to do there. - -Signed-off-by: Dr. David Alan Gilbert ---- - hw/i386/pc_piix.c | 1 + - hw/i386/pc_q35.c | 21 ++++++++++++++++++++- - 2 files changed, 21 insertions(+), 1 deletion(-) - -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 173a1fd10b..fc06877344 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -944,6 +944,7 @@ static void pc_machine_rhel760_options(MachineClass *m) - /* From pc_i440fx_5_1_machine_options() */ - pcmc->pci_root_uid = 1; - pcmc->legacy_no_rng_seed = true; -+ pcmc->enforce_amd_1tb_hole = false; - compat_props_add(m->compat_props, hw_compat_rhel_9_1, - hw_compat_rhel_9_1_len); - compat_props_add(m->compat_props, hw_compat_rhel_9_0, -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 97c3630021..52cfe3bf45 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -692,6 +692,23 @@ static void pc_q35_machine_rhel_options(MachineClass *m) - compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); - } - -+static void pc_q35_init_rhel920(MachineState *machine) -+{ -+ pc_q35_init(machine); -+} -+ -+static void pc_q35_machine_rhel920_options(MachineClass *m) -+{ -+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -+ pc_q35_machine_rhel_options(m); -+ m->desc = "RHEL-9.2.0 PC (Q35 + ICH9, 2009)"; -+ pcmc->smbios_stream_product = "RHEL"; -+ pcmc->smbios_stream_version = "9.2.0"; -+} -+ -+DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920, -+ pc_q35_machine_rhel920_options); -+ - static void pc_q35_init_rhel900(MachineState *machine) - { - pc_q35_init(machine); -@@ -700,11 +717,13 @@ static void pc_q35_init_rhel900(MachineState *machine) - static void pc_q35_machine_rhel900_options(MachineClass *m) - { - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -- pc_q35_machine_rhel_options(m); -+ pc_q35_machine_rhel920_options(m); - m->desc = "RHEL-9.0.0 PC (Q35 + ICH9, 2009)"; -+ m->alias = NULL; - pcmc->smbios_stream_product = "RHEL"; - pcmc->smbios_stream_version = "9.0.0"; - pcmc->legacy_no_rng_seed = true; -+ pcmc->enforce_amd_1tb_hole = false; - compat_props_add(m->compat_props, hw_compat_rhel_9_1, - hw_compat_rhel_9_1_len); - compat_props_add(m->compat_props, hw_compat_rhel_9_0, --- -2.31.1 - diff --git a/SOURCES/kvm-KVM-keep-track-of-running-ioctls.patch b/SOURCES/kvm-KVM-keep-track-of-running-ioctls.patch deleted file mode 100644 index b7aba7e..0000000 --- a/SOURCES/kvm-KVM-keep-track-of-running-ioctls.patch +++ /dev/null @@ -1,82 +0,0 @@ -From 6aebc271d95f9c59cb63c923b6ce35f16fce10e4 Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Mon, 16 Jan 2023 07:17:23 -0500 -Subject: [PATCH 30/31] KVM: keep track of running ioctls - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 138: accel: introduce accelerator blocker API -RH-Bugzilla: 1979276 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: David Hildenbrand -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/3] f566f81dda26ae733008f32261ecd1253ec1796d (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1979276 - -commit a27dd2de68f37ba96fe164a42121daa5f0750afc -Author: Emanuele Giuseppe Esposito -Date: Fri Nov 11 10:47:57 2022 -0500 - - KVM: keep track of running ioctls - - Using the new accel-blocker API, mark where ioctls are being called - in KVM. Next, we will implement the critical section that will take - care of performing memslots modifications atomically, therefore - preventing any new ioctl from running and allowing the running ones - to finish. - - Signed-off-by: David Hildenbrand - Signed-off-by: Emanuele Giuseppe Esposito - Message-Id: <20221111154758.1372674-3-eesposit@redhat.com> - Signed-off-by: Paolo Bonzini - -Signed-off-by: Emanuele Giuseppe Esposito ---- - accel/kvm/kvm-all.c | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index f99b0becd8..ff660fd469 100644 ---- a/accel/kvm/kvm-all.c -+++ b/accel/kvm/kvm-all.c -@@ -2310,6 +2310,7 @@ static int kvm_init(MachineState *ms) - assert(TARGET_PAGE_SIZE <= qemu_real_host_page_size()); - - s->sigmask_len = 8; -+ accel_blocker_init(); - - #ifdef KVM_CAP_SET_GUEST_DEBUG - QTAILQ_INIT(&s->kvm_sw_breakpoints); -@@ -3014,7 +3015,9 @@ int kvm_vm_ioctl(KVMState *s, int type, ...) - va_end(ap); - - trace_kvm_vm_ioctl(type, arg); -+ accel_ioctl_begin(); - ret = ioctl(s->vmfd, type, arg); -+ accel_ioctl_end(); - if (ret == -1) { - ret = -errno; - } -@@ -3032,7 +3035,9 @@ int kvm_vcpu_ioctl(CPUState *cpu, int type, ...) - va_end(ap); - - trace_kvm_vcpu_ioctl(cpu->cpu_index, type, arg); -+ accel_cpu_ioctl_begin(cpu); - ret = ioctl(cpu->kvm_fd, type, arg); -+ accel_cpu_ioctl_end(cpu); - if (ret == -1) { - ret = -errno; - } -@@ -3050,7 +3055,9 @@ int kvm_device_ioctl(int fd, int type, ...) - va_end(ap); - - trace_kvm_device_ioctl(fd, type, arg); -+ accel_ioctl_begin(); - ret = ioctl(fd, type, arg); -+ accel_ioctl_end(); - if (ret == -1) { - ret = -errno; - } --- -2.31.1 - diff --git a/SOURCES/kvm-Revert-vhost-user-Introduce-nested-event-loop-in-vho.patch b/SOURCES/kvm-Revert-vhost-user-Introduce-nested-event-loop-in-vho.patch deleted file mode 100644 index 752aa08..0000000 --- a/SOURCES/kvm-Revert-vhost-user-Introduce-nested-event-loop-in-vho.patch +++ /dev/null @@ -1,140 +0,0 @@ -From 0c19fb7c4a22a30830152b224b2e66963f829a7a Mon Sep 17 00:00:00 2001 -From: Greg Kurz -Date: Thu, 19 Jan 2023 18:24:24 +0100 -Subject: [PATCH 19/20] Revert "vhost-user: Introduce nested event loop in - vhost_user_read()" -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 146: Fix vhost-user with dpdk -RH-Bugzilla: 2155173 -RH-Acked-by: Cindy Lu -RH-Acked-by: Greg Kurz (RH) -RH-Acked-by: Eugenio Pérez -RH-Commit: [2/2] 9b67041f92f29f70b7ccb41d8087801e4e4e38af (lvivier/qemu-kvm-centos) - -This reverts commit a7f523c7d114d445c5d83aecdba3efc038e5a692. - -The nested event loop is broken by design. It's only user was removed. -Drop the code as well so that nobody ever tries to use it again. - -I had to fix a couple of trivial conflicts around return values because -of 025faa872bcf ("vhost-user: stick to -errno error return convention"). - -Signed-off-by: Greg Kurz -Message-Id: <20230119172424.478268-3-groug@kaod.org> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Acked-by: Maxime Coquelin -(cherry picked from commit 4382138f642f69fdbc79ebf4e93d84be8061191f) -Signed-off-by: Laurent Vivier ---- - hw/virtio/vhost-user.c | 65 ++++-------------------------------------- - 1 file changed, 5 insertions(+), 60 deletions(-) - -diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c -index 0ac00eb901..7cb49c50f9 100644 ---- a/hw/virtio/vhost-user.c -+++ b/hw/virtio/vhost-user.c -@@ -305,19 +305,8 @@ static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg) - return 0; - } - --struct vhost_user_read_cb_data { -- struct vhost_dev *dev; -- VhostUserMsg *msg; -- GMainLoop *loop; -- int ret; --}; -- --static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition, -- gpointer opaque) -+static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) - { -- struct vhost_user_read_cb_data *data = opaque; -- struct vhost_dev *dev = data->dev; -- VhostUserMsg *msg = data->msg; - struct vhost_user *u = dev->opaque; - CharBackend *chr = u->user->chr; - uint8_t *p = (uint8_t *) msg; -@@ -325,8 +314,7 @@ static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition, - - r = vhost_user_read_header(dev, msg); - if (r < 0) { -- data->ret = r; -- goto end; -+ return r; - } - - /* validate message size is sane */ -@@ -334,8 +322,7 @@ static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition, - error_report("Failed to read msg header." - " Size %d exceeds the maximum %zu.", msg->hdr.size, - VHOST_USER_PAYLOAD_SIZE); -- data->ret = -EPROTO; -- goto end; -+ return -EPROTO; - } - - if (msg->hdr.size) { -@@ -346,53 +333,11 @@ static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition, - int saved_errno = errno; - error_report("Failed to read msg payload." - " Read %d instead of %d.", r, msg->hdr.size); -- data->ret = r < 0 ? -saved_errno : -EIO; -- goto end; -+ return r < 0 ? -saved_errno : -EIO; - } - } - --end: -- g_main_loop_quit(data->loop); -- return G_SOURCE_REMOVE; --} -- --static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) --{ -- struct vhost_user *u = dev->opaque; -- CharBackend *chr = u->user->chr; -- GMainContext *prev_ctxt = chr->chr->gcontext; -- GMainContext *ctxt = g_main_context_new(); -- GMainLoop *loop = g_main_loop_new(ctxt, FALSE); -- struct vhost_user_read_cb_data data = { -- .dev = dev, -- .loop = loop, -- .msg = msg, -- .ret = 0 -- }; -- -- /* -- * We want to be able to monitor the slave channel fd while waiting -- * for chr I/O. This requires an event loop, but we can't nest the -- * one to which chr is currently attached : its fd handlers might not -- * be prepared for re-entrancy. So we create a new one and switch chr -- * to use it. -- */ -- qemu_chr_be_update_read_handlers(chr->chr, ctxt); -- qemu_chr_fe_add_watch(chr, G_IO_IN | G_IO_HUP, vhost_user_read_cb, &data); -- -- g_main_loop_run(loop); -- -- /* -- * Restore the previous event loop context. This also destroys/recreates -- * event sources : this guarantees that all pending events in the original -- * context that have been processed by the nested loop are purged. -- */ -- qemu_chr_be_update_read_handlers(chr->chr, prev_ctxt); -- -- g_main_loop_unref(loop); -- g_main_context_unref(ctxt); -- -- return data.ret; -+ return 0; - } - - static int process_message_reply(struct vhost_dev *dev, --- -2.31.1 - diff --git a/SOURCES/kvm-Revert-vhost-user-Monitor-slave-channel-in-vhost_use.patch b/SOURCES/kvm-Revert-vhost-user-Monitor-slave-channel-in-vhost_use.patch deleted file mode 100644 index 8e7b906..0000000 --- a/SOURCES/kvm-Revert-vhost-user-Monitor-slave-channel-in-vhost_use.patch +++ /dev/null @@ -1,143 +0,0 @@ -From 9fb47ad317ad8cdda9960190d499ad6c3a9817f0 Mon Sep 17 00:00:00 2001 -From: Greg Kurz -Date: Thu, 19 Jan 2023 18:24:23 +0100 -Subject: [PATCH 18/20] Revert "vhost-user: Monitor slave channel in - vhost_user_read()" -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 146: Fix vhost-user with dpdk -RH-Bugzilla: 2155173 -RH-Acked-by: Cindy Lu -RH-Acked-by: Greg Kurz (RH) -RH-Acked-by: Eugenio Pérez -RH-Commit: [1/2] c583a7f121ca9c93c9a2ad17bf0ccf5c1241dc99 (lvivier/qemu-kvm-centos) - -This reverts commit db8a3772e300c1a656331a92da0785d81667dc81. - -Motivation : this is breaking vhost-user with DPDK as reported in [0]. - -Received unexpected msg type. Expected 22 received 40 -Fail to update device iotlb -Received unexpected msg type. Expected 40 received 22 -Received unexpected msg type. Expected 22 received 11 -Fail to update device iotlb -Received unexpected msg type. Expected 11 received 22 -vhost VQ 1 ring restore failed: -71: Protocol error (71) -Received unexpected msg type. Expected 22 received 11 -Fail to update device iotlb -Received unexpected msg type. Expected 11 received 22 -vhost VQ 0 ring restore failed: -71: Protocol error (71) -unable to start vhost net: 71: falling back on userspace virtio - -The failing sequence that leads to the first error is : -- QEMU sends a VHOST_USER_GET_STATUS (40) request to DPDK on the master - socket -- QEMU starts a nested event loop in order to wait for the - VHOST_USER_GET_STATUS response and to be able to process messages from - the slave channel -- DPDK sends a couple of legitimate IOTLB miss messages on the slave - channel -- QEMU processes each IOTLB request and sends VHOST_USER_IOTLB_MSG (22) - updates on the master socket -- QEMU assumes to receive a response for the latest VHOST_USER_IOTLB_MSG - but it gets the response for the VHOST_USER_GET_STATUS instead - -The subsequent errors have the same root cause : the nested event loop -breaks the order by design. It lures QEMU to expect responses to the -latest message sent on the master socket to arrive first. - -Since this was only needed for DAX enablement which is still not merged -upstream, just drop the code for now. A working solution will have to -be merged later on. Likely protect the master socket with a mutex -and service the slave channel with a separate thread, as discussed with -Maxime in the mail thread below. - -[0] https://lore.kernel.org/qemu-devel/43145ede-89dc-280e-b953-6a2b436de395@redhat.com/ - -Reported-by: Yanghang Liu -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2155173 -Signed-off-by: Greg Kurz -Message-Id: <20230119172424.478268-2-groug@kaod.org> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Acked-by: Stefan Hajnoczi -Acked-by: Maxime Coquelin -(cherry picked from commit f340a59d5a852d75ae34555723694c7e8eafbd0c) -Signed-off-by: Laurent Vivier ---- - hw/virtio/vhost-user.c | 35 +++-------------------------------- - 1 file changed, 3 insertions(+), 32 deletions(-) - -diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c -index 8f635844af..0ac00eb901 100644 ---- a/hw/virtio/vhost-user.c -+++ b/hw/virtio/vhost-user.c -@@ -356,35 +356,6 @@ end: - return G_SOURCE_REMOVE; - } - --static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, -- gpointer opaque); -- --/* -- * This updates the read handler to use a new event loop context. -- * Event sources are removed from the previous context : this ensures -- * that events detected in the previous context are purged. They will -- * be re-detected and processed in the new context. -- */ --static void slave_update_read_handler(struct vhost_dev *dev, -- GMainContext *ctxt) --{ -- struct vhost_user *u = dev->opaque; -- -- if (!u->slave_ioc) { -- return; -- } -- -- if (u->slave_src) { -- g_source_destroy(u->slave_src); -- g_source_unref(u->slave_src); -- } -- -- u->slave_src = qio_channel_add_watch_source(u->slave_ioc, -- G_IO_IN | G_IO_HUP, -- slave_read, dev, NULL, -- ctxt); --} -- - static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) - { - struct vhost_user *u = dev->opaque; -@@ -406,7 +377,6 @@ static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) - * be prepared for re-entrancy. So we create a new one and switch chr - * to use it. - */ -- slave_update_read_handler(dev, ctxt); - qemu_chr_be_update_read_handlers(chr->chr, ctxt); - qemu_chr_fe_add_watch(chr, G_IO_IN | G_IO_HUP, vhost_user_read_cb, &data); - -@@ -418,7 +388,6 @@ static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) - * context that have been processed by the nested loop are purged. - */ - qemu_chr_be_update_read_handlers(chr->chr, prev_ctxt); -- slave_update_read_handler(dev, NULL); - - g_main_loop_unref(loop); - g_main_context_unref(ctxt); -@@ -1802,7 +1771,9 @@ static int vhost_setup_slave_channel(struct vhost_dev *dev) - return -ECONNREFUSED; - } - u->slave_ioc = ioc; -- slave_update_read_handler(dev, NULL); -+ u->slave_src = qio_channel_add_watch_source(u->slave_ioc, -+ G_IO_IN | G_IO_HUP, -+ slave_read, dev, NULL, NULL); - - if (reply_supported) { - msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; --- -2.31.1 - diff --git a/SOURCES/kvm-accel-introduce-accelerator-blocker-API.patch b/SOURCES/kvm-accel-introduce-accelerator-blocker-API.patch deleted file mode 100644 index 29a8ac5..0000000 --- a/SOURCES/kvm-accel-introduce-accelerator-blocker-API.patch +++ /dev/null @@ -1,348 +0,0 @@ -From ae2077fd5d351a68c313c64f07fb225dff694a8f Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Mon, 16 Jan 2023 07:16:41 -0500 -Subject: [PATCH 29/31] accel: introduce accelerator blocker API -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 138: accel: introduce accelerator blocker API -RH-Bugzilla: 1979276 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: David Hildenbrand -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/3] 56b07cd7db516c5066e6d66b4695064fdf73abbf (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1979276 - -commit bd688fc93120fb3e28aa70e3dfdf567ccc1e0bc1 -Author: Emanuele Giuseppe Esposito -Date: Fri Nov 11 10:47:56 2022 -0500 - - accel: introduce accelerator blocker API - - This API allows the accelerators to prevent vcpus from issuing - new ioctls while execting a critical section marked with the - accel_ioctl_inhibit_begin/end functions. - - Note that all functions submitting ioctls must mark where the - ioctl is being called with accel_{cpu_}ioctl_begin/end(). - - This API requires the caller to always hold the BQL. - API documentation is in sysemu/accel-blocker.h - - Internally, it uses a QemuLockCnt together with a per-CPU QemuLockCnt - (to minimize cache line bouncing) to keep avoid that new ioctls - run when the critical section starts, and a QemuEvent to wait - that all running ioctls finish. - - Signed-off-by: Emanuele Giuseppe Esposito - Reviewed-by: Philippe Mathieu-Daudé - Message-Id: <20221111154758.1372674-2-eesposit@redhat.com> - Signed-off-by: Paolo Bonzini - -Conflicts: - util/meson.build: "interval-tree.c" does not exist - -Signed-off-by: Emanuele Giuseppe Esposito ---- - accel/accel-blocker.c | 154 +++++++++++++++++++++++++++++++++ - accel/meson.build | 2 +- - hw/core/cpu-common.c | 2 + - include/hw/core/cpu.h | 3 + - include/sysemu/accel-blocker.h | 56 ++++++++++++ - util/meson.build | 2 +- - 6 files changed, 217 insertions(+), 2 deletions(-) - create mode 100644 accel/accel-blocker.c - create mode 100644 include/sysemu/accel-blocker.h - -diff --git a/accel/accel-blocker.c b/accel/accel-blocker.c -new file mode 100644 -index 0000000000..1e7f423462 ---- /dev/null -+++ b/accel/accel-blocker.c -@@ -0,0 +1,154 @@ -+/* -+ * Lock to inhibit accelerator ioctls -+ * -+ * Copyright (c) 2022 Red Hat Inc. -+ * -+ * Author: Emanuele Giuseppe Esposito -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this software and associated documentation files (the "Software"), to deal -+ * in the Software without restriction, including without limitation the rights -+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -+ * copies of the Software, and to permit persons to whom the Software is -+ * furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -+ * THE SOFTWARE. -+ */ -+ -+#include "qemu/osdep.h" -+#include "qemu/thread.h" -+#include "qemu/main-loop.h" -+#include "hw/core/cpu.h" -+#include "sysemu/accel-blocker.h" -+ -+static QemuLockCnt accel_in_ioctl_lock; -+static QemuEvent accel_in_ioctl_event; -+ -+void accel_blocker_init(void) -+{ -+ qemu_lockcnt_init(&accel_in_ioctl_lock); -+ qemu_event_init(&accel_in_ioctl_event, false); -+} -+ -+void accel_ioctl_begin(void) -+{ -+ if (likely(qemu_mutex_iothread_locked())) { -+ return; -+ } -+ -+ /* block if lock is taken in kvm_ioctl_inhibit_begin() */ -+ qemu_lockcnt_inc(&accel_in_ioctl_lock); -+} -+ -+void accel_ioctl_end(void) -+{ -+ if (likely(qemu_mutex_iothread_locked())) { -+ return; -+ } -+ -+ qemu_lockcnt_dec(&accel_in_ioctl_lock); -+ /* change event to SET. If event was BUSY, wake up all waiters */ -+ qemu_event_set(&accel_in_ioctl_event); -+} -+ -+void accel_cpu_ioctl_begin(CPUState *cpu) -+{ -+ if (unlikely(qemu_mutex_iothread_locked())) { -+ return; -+ } -+ -+ /* block if lock is taken in kvm_ioctl_inhibit_begin() */ -+ qemu_lockcnt_inc(&cpu->in_ioctl_lock); -+} -+ -+void accel_cpu_ioctl_end(CPUState *cpu) -+{ -+ if (unlikely(qemu_mutex_iothread_locked())) { -+ return; -+ } -+ -+ qemu_lockcnt_dec(&cpu->in_ioctl_lock); -+ /* change event to SET. If event was BUSY, wake up all waiters */ -+ qemu_event_set(&accel_in_ioctl_event); -+} -+ -+static bool accel_has_to_wait(void) -+{ -+ CPUState *cpu; -+ bool needs_to_wait = false; -+ -+ CPU_FOREACH(cpu) { -+ if (qemu_lockcnt_count(&cpu->in_ioctl_lock)) { -+ /* exit the ioctl, if vcpu is running it */ -+ qemu_cpu_kick(cpu); -+ needs_to_wait = true; -+ } -+ } -+ -+ return needs_to_wait || qemu_lockcnt_count(&accel_in_ioctl_lock); -+} -+ -+void accel_ioctl_inhibit_begin(void) -+{ -+ CPUState *cpu; -+ -+ /* -+ * We allow to inhibit only when holding the BQL, so we can identify -+ * when an inhibitor wants to issue an ioctl easily. -+ */ -+ g_assert(qemu_mutex_iothread_locked()); -+ -+ /* Block further invocations of the ioctls outside the BQL. */ -+ CPU_FOREACH(cpu) { -+ qemu_lockcnt_lock(&cpu->in_ioctl_lock); -+ } -+ qemu_lockcnt_lock(&accel_in_ioctl_lock); -+ -+ /* Keep waiting until there are running ioctls */ -+ while (true) { -+ -+ /* Reset event to FREE. */ -+ qemu_event_reset(&accel_in_ioctl_event); -+ -+ if (accel_has_to_wait()) { -+ /* -+ * If event is still FREE, and there are ioctls still in progress, -+ * wait. -+ * -+ * If an ioctl finishes before qemu_event_wait(), it will change -+ * the event state to SET. This will prevent qemu_event_wait() from -+ * blocking, but it's not a problem because if other ioctls are -+ * still running the loop will iterate once more and reset the event -+ * status to FREE so that it can wait properly. -+ * -+ * If an ioctls finishes while qemu_event_wait() is blocking, then -+ * it will be waken up, but also here the while loop makes sure -+ * to re-enter the wait if there are other running ioctls. -+ */ -+ qemu_event_wait(&accel_in_ioctl_event); -+ } else { -+ /* No ioctl is running */ -+ return; -+ } -+ } -+} -+ -+void accel_ioctl_inhibit_end(void) -+{ -+ CPUState *cpu; -+ -+ qemu_lockcnt_unlock(&accel_in_ioctl_lock); -+ CPU_FOREACH(cpu) { -+ qemu_lockcnt_unlock(&cpu->in_ioctl_lock); -+ } -+} -+ -diff --git a/accel/meson.build b/accel/meson.build -index 259c35c4c8..061332610f 100644 ---- a/accel/meson.build -+++ b/accel/meson.build -@@ -1,4 +1,4 @@ --specific_ss.add(files('accel-common.c')) -+specific_ss.add(files('accel-common.c', 'accel-blocker.c')) - softmmu_ss.add(files('accel-softmmu.c')) - user_ss.add(files('accel-user.c')) - -diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c -index f9fdd46b9d..8d6a4b1b65 100644 ---- a/hw/core/cpu-common.c -+++ b/hw/core/cpu-common.c -@@ -237,6 +237,7 @@ static void cpu_common_initfn(Object *obj) - cpu->nr_threads = 1; - - qemu_mutex_init(&cpu->work_mutex); -+ qemu_lockcnt_init(&cpu->in_ioctl_lock); - QSIMPLEQ_INIT(&cpu->work_list); - QTAILQ_INIT(&cpu->breakpoints); - QTAILQ_INIT(&cpu->watchpoints); -@@ -248,6 +249,7 @@ static void cpu_common_finalize(Object *obj) - { - CPUState *cpu = CPU(obj); - -+ qemu_lockcnt_destroy(&cpu->in_ioctl_lock); - qemu_mutex_destroy(&cpu->work_mutex); - } - -diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h -index 8830546121..2417597236 100644 ---- a/include/hw/core/cpu.h -+++ b/include/hw/core/cpu.h -@@ -398,6 +398,9 @@ struct CPUState { - uint32_t kvm_fetch_index; - uint64_t dirty_pages; - -+ /* Use by accel-block: CPU is executing an ioctl() */ -+ QemuLockCnt in_ioctl_lock; -+ - /* Used for events with 'vcpu' and *without* the 'disabled' properties */ - DECLARE_BITMAP(trace_dstate_delayed, CPU_TRACE_DSTATE_MAX_EVENTS); - DECLARE_BITMAP(trace_dstate, CPU_TRACE_DSTATE_MAX_EVENTS); -diff --git a/include/sysemu/accel-blocker.h b/include/sysemu/accel-blocker.h -new file mode 100644 -index 0000000000..72020529ef ---- /dev/null -+++ b/include/sysemu/accel-blocker.h -@@ -0,0 +1,56 @@ -+/* -+ * Accelerator blocking API, to prevent new ioctls from starting and wait the -+ * running ones finish. -+ * This mechanism differs from pause/resume_all_vcpus() in that it does not -+ * release the BQL. -+ * -+ * Copyright (c) 2022 Red Hat Inc. -+ * -+ * Author: Emanuele Giuseppe Esposito -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or later. -+ * See the COPYING file in the top-level directory. -+ */ -+#ifndef ACCEL_BLOCKER_H -+#define ACCEL_BLOCKER_H -+ -+#include "qemu/osdep.h" -+#include "sysemu/cpus.h" -+ -+extern void accel_blocker_init(void); -+ -+/* -+ * accel_{cpu_}ioctl_begin/end: -+ * Mark when ioctl is about to run or just finished. -+ * -+ * accel_{cpu_}ioctl_begin will block after accel_ioctl_inhibit_begin() is -+ * called, preventing new ioctls to run. They will continue only after -+ * accel_ioctl_inibith_end(). -+ */ -+extern void accel_ioctl_begin(void); -+extern void accel_ioctl_end(void); -+extern void accel_cpu_ioctl_begin(CPUState *cpu); -+extern void accel_cpu_ioctl_end(CPUState *cpu); -+ -+/* -+ * accel_ioctl_inhibit_begin: start critical section -+ * -+ * This function makes sure that: -+ * 1) incoming accel_{cpu_}ioctl_begin() calls block -+ * 2) wait that all ioctls that were already running reach -+ * accel_{cpu_}ioctl_end(), kicking vcpus if necessary. -+ * -+ * This allows the caller to access shared data or perform operations without -+ * worrying of concurrent vcpus accesses. -+ */ -+extern void accel_ioctl_inhibit_begin(void); -+ -+/* -+ * accel_ioctl_inhibit_end: end critical section started by -+ * accel_ioctl_inhibit_begin() -+ * -+ * This function allows blocked accel_{cpu_}ioctl_begin() to continue. -+ */ -+extern void accel_ioctl_inhibit_end(void); -+ -+#endif /* ACCEL_BLOCKER_H */ -diff --git a/util/meson.build b/util/meson.build -index 25b9b61f98..85a5504c4d 100644 ---- a/util/meson.build -+++ b/util/meson.build -@@ -57,6 +57,7 @@ util_ss.add(files('guest-random.c')) - util_ss.add(files('yank.c')) - util_ss.add(files('int128.c')) - util_ss.add(files('memalign.c')) -+util_ss.add(files('lockcnt.c')) - - if have_user - util_ss.add(files('selfmap.c')) -@@ -71,7 +72,6 @@ endif - if have_block or have_ga - util_ss.add(files('aiocb.c', 'async.c')) - util_ss.add(files('base64.c')) -- util_ss.add(files('lockcnt.c')) - util_ss.add(files('main-loop.c')) - util_ss.add(files('qemu-coroutine.c', 'qemu-coroutine-lock.c', 'qemu-coroutine-io.c')) - util_ss.add(files('coroutine-@0@.c'.format(config_host['CONFIG_COROUTINE_BACKEND']))) --- -2.31.1 - diff --git a/SOURCES/kvm-accel-tcg-Test-CPUJumpCache-in-tb_jmp_cache_clear_pa.patch b/SOURCES/kvm-accel-tcg-Test-CPUJumpCache-in-tb_jmp_cache_clear_pa.patch deleted file mode 100644 index 0680a26..0000000 --- a/SOURCES/kvm-accel-tcg-Test-CPUJumpCache-in-tb_jmp_cache_clear_pa.patch +++ /dev/null @@ -1,58 +0,0 @@ -From ab68e13b7628f2348d41a4518a92508542af712f Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Fri, 3 Feb 2023 18:15:10 +0100 -Subject: [PATCH 05/20] accel/tcg: Test CPUJumpCache in tb_jmp_cache_clear_page - -RH-Author: Eric Auger -RH-MergeRequest: 144: accel/tcg: Test CPUJumpCache in tb_jmp_cache_clear_page -RH-Bugzilla: 2165280 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Gavin Shan -RH-Acked-by: Shaoqin Huang -RH-Commit: [1/1] 5b0863c34ba06c01c4e343d1ecd72402779c7de3 (eauger1/centos-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/2165280 -Upstream: yes -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=50530041 -Test: 'kvm unit test ./run_tests.sh -g debug' does not SIGSEV anymore - -After commit 4e4fa6c12d ("accel/tcg: Complete cpu initialization -before registration"), it looks the CPUJumpCache pointer can be NULL. -This causes a SIGSEV when running debug-wp-migration kvm unit test. - -At the first place it should be clarified why this TCG code is called -with KVM acceleration. This may hide another bug. - -Fixes: 4e4fa6c12d ("accel/tcg: Complete cpu initialization before registration") -Signed-off-by: Eric Auger -Message-Id: <20230203171510.2867451-1-eric.auger@redhat.com> -Signed-off-by: Richard Henderson -(cherry picked from commit 99ab4d500af638ba3ebb20e8aa89d72201b70860) -Signed-off-by: Eric Auger ---- - accel/tcg/cputlb.c | 7 ++++++- - 1 file changed, 6 insertions(+), 1 deletion(-) - -diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c -index 6f1c00682b..4244b0e4e3 100644 ---- a/accel/tcg/cputlb.c -+++ b/accel/tcg/cputlb.c -@@ -100,9 +100,14 @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns, - - static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr) - { -- int i, i0 = tb_jmp_cache_hash_page(page_addr); - CPUJumpCache *jc = cpu->tb_jmp_cache; -+ int i, i0; - -+ if (unlikely(!jc)) { -+ return; -+ } -+ -+ i0 = tb_jmp_cache_hash_page(page_addr); - for (i = 0; i < TB_JMP_PAGE_SIZE; i++) { - qatomic_set(&jc->array[i0 + i].tb, NULL); - } --- -2.31.1 - diff --git a/SOURCES/kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch b/SOURCES/kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch index a4fb6b1..b937d27 100644 --- a/SOURCES/kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch +++ b/SOURCES/kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch @@ -1,15 +1,16 @@ -From d110c11b5658df93533698fdb428455f5e770866 Mon Sep 17 00:00:00 2001 +From 61256a82ce78f40222455becb8850b5f5ebb5d72 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Tue, 18 Apr 2023 11:04:49 +0200 -Subject: [PATCH] acpi: pcihp: allow repeating hot-unplug requests +Subject: [PATCH 1/3] acpi: pcihp: allow repeating hot-unplug requests RH-Author: Igor Mammedov -RH-MergeRequest: 280: acpi: pcihp: allow repeating hot-unplug requests -RH-Bugzilla: 2203745 +RH-MergeRequest: 159: acpi: pcihp: allow repeating hot-unplug requests +RH-Bugzilla: 2087047 RH-Acked-by: Ani Sinha -RH-Acked-by: MST RH-Acked-by: Julia Suvorova -RH-Commit: [1/1] e884ac48ebd43c3ebdbc65b01ce5ad75f4cb9284 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: MST +RH-Commit: [1/1] 9c597232466b27d91f127ee6004322d6ba69755f (imammedo/qemu-kvm-c-9-s-imam) with Q35 using ACPI PCI hotplug by default, user's request to unplug device is ignored when it's issued before guest OS has been booted. @@ -58,10 +59,10 @@ Signed-off-by: Igor Mammedov 1 file changed, 10 insertions(+) diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c -index 84d75e6b84..a2a3738b46 100644 +index dcfb779a7a..cdd6f775a1 100644 --- a/hw/acpi/pcihp.c +++ b/hw/acpi/pcihp.c -@@ -429,6 +429,16 @@ void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev, +@@ -357,6 +357,16 @@ void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev, * acpi_pcihp_eject_slot() when the operation is completed. */ pdev->qdev.pending_deleted_event = true; diff --git a/SOURCES/kvm-aio-posix-fix-race-between-epoll-upgrade-and-aio_set.patch b/SOURCES/kvm-aio-posix-fix-race-between-epoll-upgrade-and-aio_set.patch deleted file mode 100644 index 5ee3270..0000000 --- a/SOURCES/kvm-aio-posix-fix-race-between-epoll-upgrade-and-aio_set.patch +++ /dev/null @@ -1,90 +0,0 @@ -From 244e92fea388d2be9fe81a5c5912d92b8f599caa Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 23 Mar 2023 10:48:59 -0400 -Subject: [PATCH 1/2] aio-posix: fix race between epoll upgrade and - aio_set_fd_handler() - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 292: aio-posix: fix race between epoll upgrade and aio_set_fd_handler() -RH-Bugzilla: 2211923 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Paolo Bonzini -RH-Commit: [1/1] 182471bac79fa2b2ae8a34087eb6c4ab1af786e1 - -If another thread calls aio_set_fd_handler() while the IOThread event -loop is upgrading from ppoll(2) to epoll(7) then we might miss new -AioHandlers. The epollfd will not monitor the new AioHandler's fd, -resulting in hangs. - -Take the AioHandler list lock while upgrading to epoll. This prevents -AioHandlers from changing while epoll is being set up. If we cannot lock -because we're in a nested event loop, then don't upgrade to epoll (it -will happen next time we're not in a nested call). - -The downside to taking the lock is that the aio_set_fd_handler() thread -has to wait until the epoll upgrade is finished, which involves many -epoll_ctl(2) system calls. However, this scenario is rare and I couldn't -think of another solution that is still simple. - -Reported-by: Qing Wang -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2090998 -Cc: Paolo Bonzini -Cc: Fam Zheng -Signed-off-by: Stefan Hajnoczi -Message-Id: <20230323144859.1338495-1-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit e62da98527fa35fe5f532cded01a33edf9fbe7b2) -Signed-off-by: Stefan Hajnoczi ---- - util/fdmon-epoll.c | 25 ++++++++++++++++++------- - 1 file changed, 18 insertions(+), 7 deletions(-) - -diff --git a/util/fdmon-epoll.c b/util/fdmon-epoll.c -index e11a8a022e..1683aa1105 100644 ---- a/util/fdmon-epoll.c -+++ b/util/fdmon-epoll.c -@@ -127,6 +127,8 @@ static bool fdmon_epoll_try_enable(AioContext *ctx) - - bool fdmon_epoll_try_upgrade(AioContext *ctx, unsigned npfd) - { -+ bool ok; -+ - if (ctx->epollfd < 0) { - return false; - } -@@ -136,14 +138,23 @@ bool fdmon_epoll_try_upgrade(AioContext *ctx, unsigned npfd) - return false; - } - -- if (npfd >= EPOLL_ENABLE_THRESHOLD) { -- if (fdmon_epoll_try_enable(ctx)) { -- return true; -- } else { -- fdmon_epoll_disable(ctx); -- } -+ if (npfd < EPOLL_ENABLE_THRESHOLD) { -+ return false; -+ } -+ -+ /* The list must not change while we add fds to epoll */ -+ if (!qemu_lockcnt_dec_if_lock(&ctx->list_lock)) { -+ return false; -+ } -+ -+ ok = fdmon_epoll_try_enable(ctx); -+ -+ qemu_lockcnt_inc_and_unlock(&ctx->list_lock); -+ -+ if (!ok) { -+ fdmon_epoll_disable(ctx); - } -- return false; -+ return ok; - } - - void fdmon_epoll_setup(AioContext *ctx) --- -2.39.3 - diff --git a/SOURCES/kvm-aio-wait-switch-to-smp_mb__after_rmw.patch b/SOURCES/kvm-aio-wait-switch-to-smp_mb__after_rmw.patch deleted file mode 100644 index ee7e7f9..0000000 --- a/SOURCES/kvm-aio-wait-switch-to-smp_mb__after_rmw.patch +++ /dev/null @@ -1,50 +0,0 @@ -From e9a9c0b023ae0dcbb14543b74063cca931d8230f Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Thu, 9 Mar 2023 08:24:36 -0500 -Subject: [PATCH 08/12] aio-wait: switch to smp_mb__after_rmw() - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() -RH-Bugzilla: 2175660 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Eric Auger -RH-Commit: [5/9] a90c96d148fdbec340a45dc6cedf3660d8be2aab (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 - -commit b532526a07ef3b903ead2e055fe6cc87b41057a3 -Author: Paolo Bonzini -Date: Fri Mar 3 11:03:52 2023 +0100 - - aio-wait: switch to smp_mb__after_rmw() - - The barrier comes after an atomic increment, so it is enough to use - smp_mb__after_rmw(); this avoids a double barrier on x86 systems. - - Reviewed-by: Richard Henderson - Reviewed-by: David Hildenbrand - Signed-off-by: Paolo Bonzini - -Signed-off-by: Emanuele Giuseppe Esposito ---- - include/block/aio-wait.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h -index dd9a7f6461..da13357bb8 100644 ---- a/include/block/aio-wait.h -+++ b/include/block/aio-wait.h -@@ -85,7 +85,7 @@ extern AioWait global_aio_wait; - /* Increment wait_->num_waiters before evaluating cond. */ \ - qatomic_inc(&wait_->num_waiters); \ - /* Paired with smp_mb in aio_wait_kick(). */ \ -- smp_mb(); \ -+ smp_mb__after_rmw(); \ - if (ctx_ && in_aio_context_home_thread(ctx_)) { \ - while ((cond)) { \ - aio_poll(ctx_, true); \ --- -2.39.1 - diff --git a/SOURCES/kvm-apic-disable-reentrancy-detection-for-apic-msi.patch b/SOURCES/kvm-apic-disable-reentrancy-detection-for-apic-msi.patch new file mode 100644 index 0000000..69505f8 --- /dev/null +++ b/SOURCES/kvm-apic-disable-reentrancy-detection-for-apic-msi.patch @@ -0,0 +1,55 @@ +From 5beea8b889a38aa59259679d7f1ba050f09eb0f0 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 May 2023 10:29:03 -0400 +Subject: [PATCH 12/21] apic: disable reentrancy detection for apic-msi + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [8/13] 329f3b1c02fc42d85c821dd14c70e6b885cf849a (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit 50795ee051a342c681a9b45671c552fbd6274db8 +Author: Alexander Bulekov +Date: Thu Apr 27 17:10:13 2023 -0400 + + apic: disable reentrancy detection for apic-msi + + As the code is designed for re-entrant calls to apic-msi, mark apic-msi + as reentrancy-safe. + + Signed-off-by: Alexander Bulekov + Reviewed-by: Darren Kenny + Message-Id: <20230427211013.2994127-9-alxndr@bu.edu> + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + hw/intc/apic.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/hw/intc/apic.c b/hw/intc/apic.c +index 20b5a94073..ac3d47d231 100644 +--- a/hw/intc/apic.c ++++ b/hw/intc/apic.c +@@ -885,6 +885,13 @@ static void apic_realize(DeviceState *dev, Error **errp) + memory_region_init_io(&s->io_memory, OBJECT(s), &apic_io_ops, s, "apic-msi", + APIC_SPACE_SIZE); + ++ /* ++ * apic-msi's apic_mem_write can call into ioapic_eoi_broadcast, which can ++ * write back to apic-msi. As such mark the apic-msi region re-entrancy ++ * safe. ++ */ ++ s->io_memory.disable_reentrancy_guard = true; ++ + s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, apic_timer, s); + local_apics[s->id] = s; + +-- +2.39.3 + diff --git a/SOURCES/kvm-async-Add-an-optional-reentrancy-guard-to-the-BH-API.patch b/SOURCES/kvm-async-Add-an-optional-reentrancy-guard-to-the-BH-API.patch new file mode 100644 index 0000000..65ba3be --- /dev/null +++ b/SOURCES/kvm-async-Add-an-optional-reentrancy-guard-to-the-BH-API.patch @@ -0,0 +1,231 @@ +From f6db359f543723e2eb840653d35004af357ea5ac Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 May 2023 10:29:03 -0400 +Subject: [PATCH 06/21] async: Add an optional reentrancy guard to the BH API + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/13] 009a9a68c1c25b9ad0cd9bc0d73b3e07bee2a19d (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit 9c86c97f12c060bf7484dd931f38634e166a81f0 +Author: Alexander Bulekov +Date: Thu Apr 27 17:10:07 2023 -0400 + + async: Add an optional reentrancy guard to the BH API + + Devices can pass their MemoryReentrancyGuard (from their DeviceState), + when creating new BHes. Then, the async API will toggle the guard + before/after calling the BH call-back. This prevents bh->mmio reentrancy + issues. + + Signed-off-by: Alexander Bulekov + Reviewed-by: Darren Kenny + Message-Id: <20230427211013.2994127-3-alxndr@bu.edu> + [thuth: Fix "line over 90 characters" checkpatch.pl error] + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + docs/devel/multiple-iothreads.txt | 7 +++++++ + include/block/aio.h | 18 ++++++++++++++++-- + include/qemu/main-loop.h | 7 +++++-- + tests/unit/ptimer-test-stubs.c | 3 ++- + util/async.c | 18 +++++++++++++++++- + util/main-loop.c | 6 ++++-- + util/trace-events | 1 + + 7 files changed, 52 insertions(+), 8 deletions(-) + +diff --git a/docs/devel/multiple-iothreads.txt b/docs/devel/multiple-iothreads.txt +index 343120f2ef..a3e949f6b3 100644 +--- a/docs/devel/multiple-iothreads.txt ++++ b/docs/devel/multiple-iothreads.txt +@@ -61,6 +61,7 @@ There are several old APIs that use the main loop AioContext: + * LEGACY qemu_aio_set_event_notifier() - monitor an event notifier + * LEGACY timer_new_ms() - create a timer + * LEGACY qemu_bh_new() - create a BH ++ * LEGACY qemu_bh_new_guarded() - create a BH with a device re-entrancy guard + * LEGACY qemu_aio_wait() - run an event loop iteration + + Since they implicitly work on the main loop they cannot be used in code that +@@ -72,8 +73,14 @@ Instead, use the AioContext functions directly (see include/block/aio.h): + * aio_set_event_notifier() - monitor an event notifier + * aio_timer_new() - create a timer + * aio_bh_new() - create a BH ++ * aio_bh_new_guarded() - create a BH with a device re-entrancy guard + * aio_poll() - run an event loop iteration + ++The qemu_bh_new_guarded/aio_bh_new_guarded APIs accept a "MemReentrancyGuard" ++argument, which is used to check for and prevent re-entrancy problems. For ++BHs associated with devices, the reentrancy-guard is contained in the ++corresponding DeviceState and named "mem_reentrancy_guard". ++ + The AioContext can be obtained from the IOThread using + iothread_get_aio_context() or for the main loop using qemu_get_aio_context(). + Code that takes an AioContext argument works both in IOThreads or the main +diff --git a/include/block/aio.h b/include/block/aio.h +index 543717f294..db6f23c619 100644 +--- a/include/block/aio.h ++++ b/include/block/aio.h +@@ -23,6 +23,8 @@ + #include "qemu/thread.h" + #include "qemu/timer.h" + #include "block/graph-lock.h" ++#include "hw/qdev-core.h" ++ + + typedef struct BlockAIOCB BlockAIOCB; + typedef void BlockCompletionFunc(void *opaque, int ret); +@@ -331,9 +333,11 @@ void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, + * is opaque and must be allocated prior to its use. + * + * @name: A human-readable identifier for debugging purposes. ++ * @reentrancy_guard: A guard set when entering a cb to prevent ++ * device-reentrancy issues + */ + QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, +- const char *name); ++ const char *name, MemReentrancyGuard *reentrancy_guard); + + /** + * aio_bh_new: Allocate a new bottom half structure +@@ -342,7 +346,17 @@ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, + * string. + */ + #define aio_bh_new(ctx, cb, opaque) \ +- aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb))) ++ aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), NULL) ++ ++/** ++ * aio_bh_new_guarded: Allocate a new bottom half structure with a ++ * reentrancy_guard ++ * ++ * A convenience wrapper for aio_bh_new_full() that uses the cb as the name ++ * string. ++ */ ++#define aio_bh_new_guarded(ctx, cb, opaque, guard) \ ++ aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), guard) + + /** + * aio_notify: Force processing of pending events. +diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h +index b3e54e00bc..68e70e61aa 100644 +--- a/include/qemu/main-loop.h ++++ b/include/qemu/main-loop.h +@@ -387,9 +387,12 @@ void qemu_cond_timedwait_iothread(QemuCond *cond, int ms); + + /* internal interfaces */ + ++#define qemu_bh_new_guarded(cb, opaque, guard) \ ++ qemu_bh_new_full((cb), (opaque), (stringify(cb)), guard) + #define qemu_bh_new(cb, opaque) \ +- qemu_bh_new_full((cb), (opaque), (stringify(cb))) +-QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name); ++ qemu_bh_new_full((cb), (opaque), (stringify(cb)), NULL) ++QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name, ++ MemReentrancyGuard *reentrancy_guard); + void qemu_bh_schedule_idle(QEMUBH *bh); + + enum { +diff --git a/tests/unit/ptimer-test-stubs.c b/tests/unit/ptimer-test-stubs.c +index f2bfcede93..8c9407c560 100644 +--- a/tests/unit/ptimer-test-stubs.c ++++ b/tests/unit/ptimer-test-stubs.c +@@ -107,7 +107,8 @@ int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int attr_mask) + return deadline; + } + +-QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name) ++QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name, ++ MemReentrancyGuard *reentrancy_guard) + { + QEMUBH *bh = g_new(QEMUBH, 1); + +diff --git a/util/async.c b/util/async.c +index 21016a1ac7..a9b528c370 100644 +--- a/util/async.c ++++ b/util/async.c +@@ -65,6 +65,7 @@ struct QEMUBH { + void *opaque; + QSLIST_ENTRY(QEMUBH) next; + unsigned flags; ++ MemReentrancyGuard *reentrancy_guard; + }; + + /* Called concurrently from any thread */ +@@ -137,7 +138,7 @@ void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, + } + + QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, +- const char *name) ++ const char *name, MemReentrancyGuard *reentrancy_guard) + { + QEMUBH *bh; + bh = g_new(QEMUBH, 1); +@@ -146,13 +147,28 @@ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, + .cb = cb, + .opaque = opaque, + .name = name, ++ .reentrancy_guard = reentrancy_guard, + }; + return bh; + } + + void aio_bh_call(QEMUBH *bh) + { ++ bool last_engaged_in_io = false; ++ ++ if (bh->reentrancy_guard) { ++ last_engaged_in_io = bh->reentrancy_guard->engaged_in_io; ++ if (bh->reentrancy_guard->engaged_in_io) { ++ trace_reentrant_aio(bh->ctx, bh->name); ++ } ++ bh->reentrancy_guard->engaged_in_io = true; ++ } ++ + bh->cb(bh->opaque); ++ ++ if (bh->reentrancy_guard) { ++ bh->reentrancy_guard->engaged_in_io = last_engaged_in_io; ++ } + } + + /* Multiple occurrences of aio_bh_poll cannot be called concurrently. */ +diff --git a/util/main-loop.c b/util/main-loop.c +index e180c85145..7022f02ef8 100644 +--- a/util/main-loop.c ++++ b/util/main-loop.c +@@ -605,9 +605,11 @@ void main_loop_wait(int nonblocking) + + /* Functions to operate on the main QEMU AioContext. */ + +-QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name) ++QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name, ++ MemReentrancyGuard *reentrancy_guard) + { +- return aio_bh_new_full(qemu_aio_context, cb, opaque, name); ++ return aio_bh_new_full(qemu_aio_context, cb, opaque, name, ++ reentrancy_guard); + } + + /* +diff --git a/util/trace-events b/util/trace-events +index 16f78d8fe5..3f7e766683 100644 +--- a/util/trace-events ++++ b/util/trace-events +@@ -11,6 +11,7 @@ poll_remove(void *ctx, void *node, int fd) "ctx %p node %p fd %d" + # async.c + aio_co_schedule(void *ctx, void *co) "ctx %p co %p" + aio_co_schedule_bh_cb(void *ctx, void *co) "ctx %p co %p" ++reentrant_aio(void *ctx, const char *name) "ctx %p name %s" + + # thread-pool.c + thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p" +-- +2.39.3 + diff --git a/SOURCES/kvm-async-avoid-use-after-free-on-re-entrancy-guard.patch b/SOURCES/kvm-async-avoid-use-after-free-on-re-entrancy-guard.patch new file mode 100644 index 0000000..df71fa2 --- /dev/null +++ b/SOURCES/kvm-async-avoid-use-after-free-on-re-entrancy-guard.patch @@ -0,0 +1,70 @@ +From 137e84f68da06666ebf7f391766cc6209ce1c39c Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 May 2023 10:29:03 -0400 +Subject: [PATCH 13/21] async: avoid use-after-free on re-entrancy guard + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [9/13] d4b957108aaacf4a597122aaeeaa8e56985f1fca (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit 7915bd06f25e1803778081161bf6fa10c42dc7cd +Author: Alexander Bulekov +Date: Mon May 1 10:19:56 2023 -0400 + + async: avoid use-after-free on re-entrancy guard + + A BH callback can free the BH, causing a use-after-free in aio_bh_call. + Fix that by keeping a local copy of the re-entrancy guard pointer. + + Buglink: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=58513 + Fixes: 9c86c97f12 ("async: Add an optional reentrancy guard to the BH API") + Signed-off-by: Alexander Bulekov + Message-Id: <20230501141956.3444868-1-alxndr@bu.edu> + Reviewed-by: Thomas Huth + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + util/async.c | 14 ++++++++------ + 1 file changed, 8 insertions(+), 6 deletions(-) + +diff --git a/util/async.c b/util/async.c +index a9b528c370..cd1a1815f9 100644 +--- a/util/async.c ++++ b/util/async.c +@@ -156,18 +156,20 @@ void aio_bh_call(QEMUBH *bh) + { + bool last_engaged_in_io = false; + +- if (bh->reentrancy_guard) { +- last_engaged_in_io = bh->reentrancy_guard->engaged_in_io; +- if (bh->reentrancy_guard->engaged_in_io) { ++ /* Make a copy of the guard-pointer as cb may free the bh */ ++ MemReentrancyGuard *reentrancy_guard = bh->reentrancy_guard; ++ if (reentrancy_guard) { ++ last_engaged_in_io = reentrancy_guard->engaged_in_io; ++ if (reentrancy_guard->engaged_in_io) { + trace_reentrant_aio(bh->ctx, bh->name); + } +- bh->reentrancy_guard->engaged_in_io = true; ++ reentrancy_guard->engaged_in_io = true; + } + + bh->cb(bh->opaque); + +- if (bh->reentrancy_guard) { +- bh->reentrancy_guard->engaged_in_io = last_engaged_in_io; ++ if (reentrancy_guard) { ++ reentrancy_guard->engaged_in_io = last_engaged_in_io; + } + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-async-clarify-usage-of-barriers-in-the-polling-case.patch b/SOURCES/kvm-async-clarify-usage-of-barriers-in-the-polling-case.patch deleted file mode 100644 index 0e4a48d..0000000 --- a/SOURCES/kvm-async-clarify-usage-of-barriers-in-the-polling-case.patch +++ /dev/null @@ -1,66 +0,0 @@ -From 3d823dda6832b76fd3d776131008107b0b0f7166 Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Thu, 9 Mar 2023 08:24:36 -0500 -Subject: [PATCH 12/12] async: clarify usage of barriers in the polling case - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() -RH-Bugzilla: 2175660 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Eric Auger -RH-Commit: [9/9] b4ea298d75a75bb61e07a27d1296e0095fbc2bbf (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 - -commit 6229438cca037d42f44a96d38feb15cb102a444f -Author: Paolo Bonzini -Date: Mon Mar 6 10:43:52 2023 +0100 - - async: clarify usage of barriers in the polling case - - Explain that aio_context_notifier_poll() relies on - aio_notify_accept() to catch all the memory writes that were - done before ctx->notified was set to true. - - Reviewed-by: Richard Henderson - Reviewed-by: Stefan Hajnoczi - Signed-off-by: Paolo Bonzini - -Signed-off-by: Emanuele Giuseppe Esposito ---- - util/async.c | 10 ++++++++-- - 1 file changed, 8 insertions(+), 2 deletions(-) - -diff --git a/util/async.c b/util/async.c -index 37d3e6036d..e0846baf93 100644 ---- a/util/async.c -+++ b/util/async.c -@@ -472,8 +472,9 @@ void aio_notify_accept(AioContext *ctx) - qatomic_set(&ctx->notified, false); - - /* -- * Write ctx->notified before reading e.g. bh->flags. Pairs with smp_wmb -- * in aio_notify. -+ * Order reads of ctx->notified (in aio_context_notifier_poll()) and the -+ * above clearing of ctx->notified before reads of e.g. bh->flags. Pairs -+ * with smp_wmb() in aio_notify. - */ - smp_mb(); - } -@@ -496,6 +497,11 @@ static bool aio_context_notifier_poll(void *opaque) - EventNotifier *e = opaque; - AioContext *ctx = container_of(e, AioContext, notifier); - -+ /* -+ * No need for load-acquire because we just want to kick the -+ * event loop. aio_notify_accept() takes care of synchronizing -+ * the event loop with the producers. -+ */ - return qatomic_read(&ctx->notified); - } - --- -2.39.1 - diff --git a/SOURCES/kvm-async-update-documentation-of-the-memory-barriers.patch b/SOURCES/kvm-async-update-documentation-of-the-memory-barriers.patch deleted file mode 100644 index cb92dc9..0000000 --- a/SOURCES/kvm-async-update-documentation-of-the-memory-barriers.patch +++ /dev/null @@ -1,111 +0,0 @@ -From 29bcf843d796ffc2a0906dea947e4cdfe9f7ec60 Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Thu, 9 Mar 2023 08:24:36 -0500 -Subject: [PATCH 11/12] async: update documentation of the memory barriers - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() -RH-Bugzilla: 2175660 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Eric Auger -RH-Commit: [8/9] 5ca20e4c8983e0bc1ecee66bead3472777abe4d1 (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 - -commit 8dd48650b43dfde4ebea34191ac267e474bcc29e -Author: Paolo Bonzini -Date: Mon Mar 6 10:15:06 2023 +0100 - - async: update documentation of the memory barriers - - Ever since commit 8c6b0356b539 ("util/async: make bh_aio_poll() O(1)", - 2020-02-22), synchronization between qemu_bh_schedule() and aio_bh_poll() - is happening when the bottom half is enqueued in the bh_list; not - when the flags are set. Update the documentation to match. - - Reviewed-by: Stefan Hajnoczi - Signed-off-by: Paolo Bonzini - -Signed-off-by: Emanuele Giuseppe Esposito ---- - util/async.c | 33 +++++++++++++++++++-------------- - 1 file changed, 19 insertions(+), 14 deletions(-) - -diff --git a/util/async.c b/util/async.c -index 63434ddae4..37d3e6036d 100644 ---- a/util/async.c -+++ b/util/async.c -@@ -73,14 +73,21 @@ static void aio_bh_enqueue(QEMUBH *bh, unsigned new_flags) - unsigned old_flags; - - /* -- * The memory barrier implicit in qatomic_fetch_or makes sure that: -- * 1. idle & any writes needed by the callback are done before the -- * locations are read in the aio_bh_poll. -- * 2. ctx is loaded before the callback has a chance to execute and bh -- * could be freed. -+ * Synchronizes with atomic_fetch_and() in aio_bh_dequeue(), ensuring that -+ * insertion starts after BH_PENDING is set. - */ - old_flags = qatomic_fetch_or(&bh->flags, BH_PENDING | new_flags); -+ - if (!(old_flags & BH_PENDING)) { -+ /* -+ * At this point the bottom half becomes visible to aio_bh_poll(). -+ * This insertion thus synchronizes with QSLIST_MOVE_ATOMIC in -+ * aio_bh_poll(), ensuring that: -+ * 1. any writes needed by the callback are visible from the callback -+ * after aio_bh_dequeue() returns bh. -+ * 2. ctx is loaded before the callback has a chance to execute and bh -+ * could be freed. -+ */ - QSLIST_INSERT_HEAD_ATOMIC(&ctx->bh_list, bh, next); - } - -@@ -106,11 +113,8 @@ static QEMUBH *aio_bh_dequeue(BHList *head, unsigned *flags) - QSLIST_REMOVE_HEAD(head, next); - - /* -- * The qatomic_and is paired with aio_bh_enqueue(). The implicit memory -- * barrier ensures that the callback sees all writes done by the scheduling -- * thread. It also ensures that the scheduling thread sees the cleared -- * flag before bh->cb has run, and thus will call aio_notify again if -- * necessary. -+ * Synchronizes with qatomic_fetch_or() in aio_bh_enqueue(), ensuring that -+ * the removal finishes before BH_PENDING is reset. - */ - *flags = qatomic_fetch_and(&bh->flags, - ~(BH_PENDING | BH_SCHEDULED | BH_IDLE)); -@@ -157,6 +161,7 @@ int aio_bh_poll(AioContext *ctx) - BHListSlice *s; - int ret = 0; - -+ /* Synchronizes with QSLIST_INSERT_HEAD_ATOMIC in aio_bh_enqueue(). */ - QSLIST_MOVE_ATOMIC(&slice.bh_list, &ctx->bh_list); - QSIMPLEQ_INSERT_TAIL(&ctx->bh_slice_list, &slice, next); - -@@ -446,15 +451,15 @@ LuringState *aio_get_linux_io_uring(AioContext *ctx) - void aio_notify(AioContext *ctx) - { - /* -- * Write e.g. bh->flags before writing ctx->notified. Pairs with smp_mb in -- * aio_notify_accept. -+ * Write e.g. ctx->bh_list before writing ctx->notified. Pairs with -+ * smp_mb() in aio_notify_accept(). - */ - smp_wmb(); - qatomic_set(&ctx->notified, true); - - /* -- * Write ctx->notified before reading ctx->notify_me. Pairs -- * with smp_mb in aio_ctx_prepare or aio_poll. -+ * Write ctx->notified (and also ctx->bh_list) before reading ctx->notify_me. -+ * Pairs with smp_mb() in aio_ctx_prepare or aio_poll. - */ - smp_mb(); - if (qatomic_read(&ctx->notify_me)) { --- -2.39.1 - diff --git a/SOURCES/kvm-bcm2835_property-disable-reentrancy-detection-for-io.patch b/SOURCES/kvm-bcm2835_property-disable-reentrancy-detection-for-io.patch new file mode 100644 index 0000000..6d9abb8 --- /dev/null +++ b/SOURCES/kvm-bcm2835_property-disable-reentrancy-detection-for-io.patch @@ -0,0 +1,57 @@ +From 40866640d15e6a8c9f6af7e437edc1ec1e17ba34 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 May 2023 10:29:03 -0400 +Subject: [PATCH 10/21] bcm2835_property: disable reentrancy detection for + iomem + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [6/13] 128ebc85e228674af66553af82fba70eb87960e6 (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit 985c4a4e547afb9573b6bd6843d20eb2c3d1d1cd +Author: Alexander Bulekov +Date: Thu Apr 27 17:10:11 2023 -0400 + + bcm2835_property: disable reentrancy detection for iomem + + As the code is designed for re-entrant calls from bcm2835_property to + bcm2835_mbox and back into bcm2835_property, mark iomem as + reentrancy-safe. + + Signed-off-by: Alexander Bulekov + Reviewed-by: Thomas Huth + Message-Id: <20230427211013.2994127-7-alxndr@bu.edu> + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + hw/misc/bcm2835_property.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/hw/misc/bcm2835_property.c b/hw/misc/bcm2835_property.c +index 890ae7bae5..de056ea2df 100644 +--- a/hw/misc/bcm2835_property.c ++++ b/hw/misc/bcm2835_property.c +@@ -382,6 +382,13 @@ static void bcm2835_property_init(Object *obj) + + memory_region_init_io(&s->iomem, OBJECT(s), &bcm2835_property_ops, s, + TYPE_BCM2835_PROPERTY, 0x10); ++ ++ /* ++ * bcm2835_property_ops call into bcm2835_mbox, which in-turn reads from ++ * iomem. As such, mark iomem as re-entracy safe. ++ */ ++ s->iomem.disable_reentrancy_guard = true; ++ + sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem); + sysbus_init_irq(SYS_BUS_DEVICE(s), &s->mbox_irq); + } +-- +2.39.3 + diff --git a/SOURCES/kvm-block-Call-drain-callbacks-only-once.patch b/SOURCES/kvm-block-Call-drain-callbacks-only-once.patch deleted file mode 100644 index 04f1dda..0000000 --- a/SOURCES/kvm-block-Call-drain-callbacks-only-once.patch +++ /dev/null @@ -1,250 +0,0 @@ -From 9bb9cafd736057fd2a8ebfa6f5769668f125fbe6 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:41:06 +0100 -Subject: [PATCH 24/31] block: Call drain callbacks only once - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [12/16] ea9a433dc01d1b8539a2d4ea12887f2a3ce830ea (sgarzarella/qemu-kvm-c-9-s) - -We only need to call both the BlockDriver's callback and the parent -callbacks when going from undrained to drained or vice versa. A second -drain section doesn't make a difference for the driver or the parent, -they weren't supposed to send new requests before and after the second -drain. - -One thing that gets in the way is the 'ignore_bds_parents' parameter in -bdrv_do_drained_begin_quiesce() and bdrv_do_drained_end(): It means that -bdrv_drain_all_begin() increases bs->quiesce_counter, but does not -quiesce the parent through BdrvChildClass callbacks. If an additional -drain section is started now, bs->quiesce_counter will be non-zero, but -we would still need to quiesce the parent through BdrvChildClass in -order to keep things consistent (and unquiesce it on the matching -bdrv_drained_end(), even though the counter would not reach 0 yet as -long as the bdrv_drain_all() section is still active). - -Instead of keeping track of this, let's just get rid of the parameter. -It was introduced in commit 6cd5c9d7b2d as an optimisation so that -during bdrv_drain_all(), we wouldn't recursively drain all parents up to -the root for each node, resulting in quadratic complexity. As it happens, -calling the callbacks only once solves the same problem, so as of this -patch, we'll still have O(n) complexity and ignore_bds_parents is not -needed any more. - -This patch only ignores the 'ignore_bds_parents' parameter. It will be -removed in a separate patch. - -Signed-off-by: Kevin Wolf -Reviewed-by: Hanna Reitz -Message-Id: <20221118174110.55183-12-kwolf@redhat.com> -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Kevin Wolf -(cherry picked from commit 57e05be343f33f4e5899a8d8946a8596d68424a1) -Signed-off-by: Stefano Garzarella ---- - block.c | 25 +++++++------------------ - block/io.c | 30 ++++++++++++++++++------------ - include/block/block_int-common.h | 8 ++++---- - tests/unit/test-bdrv-drain.c | 16 ++++++++++------ - 4 files changed, 39 insertions(+), 40 deletions(-) - -diff --git a/block.c b/block.c -index e0e3b21790..5a583e260d 100644 ---- a/block.c -+++ b/block.c -@@ -2824,7 +2824,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child, - { - BlockDriverState *old_bs = child->bs; - int new_bs_quiesce_counter; -- int drain_saldo; - - assert(!child->frozen); - assert(old_bs != new_bs); -@@ -2834,16 +2833,13 @@ static void bdrv_replace_child_noperm(BdrvChild *child, - assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); - } - -- new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); -- drain_saldo = new_bs_quiesce_counter - child->parent_quiesce_counter; -- - /* - * If the new child node is drained but the old one was not, flush - * all outstanding requests to the old child node. - */ -- while (drain_saldo > 0 && child->klass->drained_begin) { -+ new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); -+ if (new_bs_quiesce_counter && !child->quiesced_parent) { - bdrv_parent_drained_begin_single(child, true); -- drain_saldo--; - } - - if (old_bs) { -@@ -2859,16 +2855,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child, - if (new_bs) { - assert_bdrv_graph_writable(new_bs); - QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); -- -- /* -- * Polling in bdrv_parent_drained_begin_single() may have led to the new -- * node's quiesce_counter having been decreased. Not a problem, we just -- * need to recognize this here and then invoke drained_end appropriately -- * more often. -- */ -- assert(new_bs->quiesce_counter <= new_bs_quiesce_counter); -- drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter; -- - if (child->klass->attach) { - child->klass->attach(child); - } -@@ -2877,10 +2863,13 @@ static void bdrv_replace_child_noperm(BdrvChild *child, - /* - * If the old child node was drained but the new one is not, allow - * requests to come in only after the new node has been attached. -+ * -+ * Update new_bs_quiesce_counter because bdrv_parent_drained_begin_single() -+ * polls, which could have changed the value. - */ -- while (drain_saldo < 0 && child->klass->drained_end) { -+ new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); -+ if (!new_bs_quiesce_counter && child->quiesced_parent) { - bdrv_parent_drained_end_single(child); -- drain_saldo++; - } - } - -diff --git a/block/io.c b/block/io.c -index 75224480d0..87d6f22ec4 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -62,8 +62,9 @@ void bdrv_parent_drained_end_single(BdrvChild *c) - { - IO_OR_GS_CODE(); - -- assert(c->parent_quiesce_counter > 0); -- c->parent_quiesce_counter--; -+ assert(c->quiesced_parent); -+ c->quiesced_parent = false; -+ - if (c->klass->drained_end) { - c->klass->drained_end(c); - } -@@ -110,7 +111,10 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll) - { - AioContext *ctx = bdrv_child_get_parent_aio_context(c); - IO_OR_GS_CODE(); -- c->parent_quiesce_counter++; -+ -+ assert(!c->quiesced_parent); -+ c->quiesced_parent = true; -+ - if (c->klass->drained_begin) { - c->klass->drained_begin(c); - } -@@ -358,11 +362,12 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, - /* Stop things in parent-to-child order */ - if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) { - aio_disable_external(bdrv_get_aio_context(bs)); -- } - -- bdrv_parent_drained_begin(bs, parent, ignore_bds_parents); -- if (bs->drv && bs->drv->bdrv_drain_begin) { -- bs->drv->bdrv_drain_begin(bs); -+ /* TODO Remove ignore_bds_parents, we don't consider it any more */ -+ bdrv_parent_drained_begin(bs, parent, false); -+ if (bs->drv && bs->drv->bdrv_drain_begin) { -+ bs->drv->bdrv_drain_begin(bs); -+ } - } - } - -@@ -413,13 +418,14 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, - assert(bs->quiesce_counter > 0); - - /* Re-enable things in child-to-parent order */ -- if (bs->drv && bs->drv->bdrv_drain_end) { -- bs->drv->bdrv_drain_end(bs); -- } -- bdrv_parent_drained_end(bs, parent, ignore_bds_parents); -- - old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter); - if (old_quiesce_counter == 1) { -+ if (bs->drv && bs->drv->bdrv_drain_end) { -+ bs->drv->bdrv_drain_end(bs); -+ } -+ /* TODO Remove ignore_bds_parents, we don't consider it any more */ -+ bdrv_parent_drained_end(bs, parent, false); -+ - aio_enable_external(bdrv_get_aio_context(bs)); - } - } -diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h -index 791dddfd7d..a6bc6b7fe9 100644 ---- a/include/block/block_int-common.h -+++ b/include/block/block_int-common.h -@@ -980,13 +980,13 @@ struct BdrvChild { - bool frozen; - - /* -- * How many times the parent of this child has been drained -+ * True if the parent of this child has been drained by this BdrvChild - * (through klass->drained_*). -- * Usually, this is equal to bs->quiesce_counter (potentially -- * reduced by bdrv_drain_all_count). It may differ while the -+ * -+ * It is generally true if bs->quiesce_counter > 0. It may differ while the - * child is entering or leaving a drained section. - */ -- int parent_quiesce_counter; -+ bool quiesced_parent; - - QLIST_ENTRY(BdrvChild) next; - QLIST_ENTRY(BdrvChild) next_parent; -diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c -index dda08de8db..172bc6debc 100644 ---- a/tests/unit/test-bdrv-drain.c -+++ b/tests/unit/test-bdrv-drain.c -@@ -296,7 +296,11 @@ static void test_quiesce_common(enum drain_type drain_type, bool recursive) - - do_drain_begin(drain_type, bs); - -- g_assert_cmpint(bs->quiesce_counter, ==, 1); -+ if (drain_type == BDRV_DRAIN_ALL) { -+ g_assert_cmpint(bs->quiesce_counter, ==, 2); -+ } else { -+ g_assert_cmpint(bs->quiesce_counter, ==, 1); -+ } - g_assert_cmpint(backing->quiesce_counter, ==, !!recursive); - - do_drain_end(drain_type, bs); -@@ -348,8 +352,8 @@ static void test_nested(void) - - for (outer = 0; outer < DRAIN_TYPE_MAX; outer++) { - for (inner = 0; inner < DRAIN_TYPE_MAX; inner++) { -- int backing_quiesce = (outer != BDRV_DRAIN) + -- (inner != BDRV_DRAIN); -+ int backing_quiesce = (outer == BDRV_DRAIN_ALL) + -+ (inner == BDRV_DRAIN_ALL); - - g_assert_cmpint(bs->quiesce_counter, ==, 0); - g_assert_cmpint(backing->quiesce_counter, ==, 0); -@@ -359,10 +363,10 @@ static void test_nested(void) - do_drain_begin(outer, bs); - do_drain_begin(inner, bs); - -- g_assert_cmpint(bs->quiesce_counter, ==, 2); -+ g_assert_cmpint(bs->quiesce_counter, ==, 2 + !!backing_quiesce); - g_assert_cmpint(backing->quiesce_counter, ==, backing_quiesce); -- g_assert_cmpint(s->drain_count, ==, 2); -- g_assert_cmpint(backing_s->drain_count, ==, backing_quiesce); -+ g_assert_cmpint(s->drain_count, ==, 1); -+ g_assert_cmpint(backing_s->drain_count, ==, !!backing_quiesce); - - do_drain_end(inner, bs); - do_drain_end(outer, bs); --- -2.31.1 - diff --git a/SOURCES/kvm-block-Collapse-padded-I-O-vecs-exceeding-IOV_MAX.patch b/SOURCES/kvm-block-Collapse-padded-I-O-vecs-exceeding-IOV_MAX.patch new file mode 100644 index 0000000..6de5d65 --- /dev/null +++ b/SOURCES/kvm-block-Collapse-padded-I-O-vecs-exceeding-IOV_MAX.patch @@ -0,0 +1,354 @@ +From ff05c0b0d3414c0e5b3903048280accdc6c75ca0 Mon Sep 17 00:00:00 2001 +From: Hanna Czenczek +Date: Tue, 11 Apr 2023 19:34:16 +0200 +Subject: [PATCH 2/9] block: Collapse padded I/O vecs exceeding IOV_MAX + +RH-Author: Hanna Czenczek +RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX +RH-Bugzilla: 2174676 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/5] 84c56bd16f841a18cf2baa918dfeab3240e3944d (hreitz/qemu-kvm-c-9-s) + +When processing vectored guest requests that are not aligned to the +storage request alignment, we pad them by adding head and/or tail +buffers for a read-modify-write cycle. + +The guest can submit I/O vectors up to IOV_MAX (1024) in length, but +with this padding, the vector can exceed that limit. As of +4c002cef0e9abe7135d7916c51abce47f7fc1ee2 ("util/iov: make +qemu_iovec_init_extended() honest"), we refuse to pad vectors beyond the +limit, instead returning an error to the guest. + +To the guest, this appears as a random I/O error. We should not return +an I/O error to the guest when it issued a perfectly valid request. + +Before 4c002cef0e9abe7135d7916c51abce47f7fc1ee2, we just made the vector +longer than IOV_MAX, which generally seems to work (because the guest +assumes a smaller alignment than we really have, file-posix's +raw_co_prw() will generally see bdrv_qiov_is_aligned() return false, and +so emulate the request, so that the IOV_MAX does not matter). However, +that does not seem exactly great. + +I see two ways to fix this problem: +1. We split such long requests into two requests. +2. We join some elements of the vector into new buffers to make it + shorter. + +I am wary of (1), because it seems like it may have unintended side +effects. + +(2) on the other hand seems relatively simple to implement, with +hopefully few side effects, so this patch does that. + +To do this, the use of qemu_iovec_init_extended() in bdrv_pad_request() +is effectively replaced by the new function bdrv_create_padded_qiov(), +which not only wraps the request IOV with padding head/tail, but also +ensures that the resulting vector will not have more than IOV_MAX +elements. Putting that functionality into qemu_iovec_init_extended() is +infeasible because it requires allocating a bounce buffer; doing so +would require many more parameters (buffer alignment, how to initialize +the buffer, and out parameters like the buffer, its length, and the +original elements), which is not reasonable. + +Conversely, it is not difficult to move qemu_iovec_init_extended()'s +functionality into bdrv_create_padded_qiov() by using public +qemu_iovec_* functions, so that is what this patch does. + +Because bdrv_pad_request() was the only "serious" user of +qemu_iovec_init_extended(), the next patch will remove the latter +function, so the functionality is not implemented twice. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2141964 +Signed-off-by: Hanna Czenczek +Message-Id: <20230411173418.19549-3-hreitz@redhat.com> +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 18743311b829cafc1737a5f20bc3248d5f91ee2a) +Signed-off-by: Hanna Czenczek +--- + block/io.c | 166 ++++++++++++++++++++++++++++++++++++++++++++++++----- + 1 file changed, 151 insertions(+), 15 deletions(-) + +diff --git a/block/io.c b/block/io.c +index 2e267a85ab..4e8e90208b 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -1439,6 +1439,14 @@ out: + * @merge_reads is true for small requests, + * if @buf_len == @head + bytes + @tail. In this case it is possible that both + * head and tail exist but @buf_len == align and @tail_buf == @buf. ++ * ++ * @write is true for write requests, false for read requests. ++ * ++ * If padding makes the vector too long (exceeding IOV_MAX), then we need to ++ * merge existing vector elements into a single one. @collapse_bounce_buf acts ++ * as the bounce buffer in such cases. @pre_collapse_qiov has the pre-collapse ++ * I/O vector elements so for read requests, the data can be copied back after ++ * the read is done. + */ + typedef struct BdrvRequestPadding { + uint8_t *buf; +@@ -1447,11 +1455,17 @@ typedef struct BdrvRequestPadding { + size_t head; + size_t tail; + bool merge_reads; ++ bool write; + QEMUIOVector local_qiov; ++ ++ uint8_t *collapse_bounce_buf; ++ size_t collapse_len; ++ QEMUIOVector pre_collapse_qiov; + } BdrvRequestPadding; + + static bool bdrv_init_padding(BlockDriverState *bs, + int64_t offset, int64_t bytes, ++ bool write, + BdrvRequestPadding *pad) + { + int64_t align = bs->bl.request_alignment; +@@ -1483,6 +1497,8 @@ static bool bdrv_init_padding(BlockDriverState *bs, + pad->tail_buf = pad->buf + pad->buf_len - align; + } + ++ pad->write = write; ++ + return true; + } + +@@ -1547,8 +1563,23 @@ zero_mem: + return 0; + } + +-static void bdrv_padding_destroy(BdrvRequestPadding *pad) ++/** ++ * Free *pad's associated buffers, and perform any necessary finalization steps. ++ */ ++static void bdrv_padding_finalize(BdrvRequestPadding *pad) + { ++ if (pad->collapse_bounce_buf) { ++ if (!pad->write) { ++ /* ++ * If padding required elements in the vector to be collapsed into a ++ * bounce buffer, copy the bounce buffer content back ++ */ ++ qemu_iovec_from_buf(&pad->pre_collapse_qiov, 0, ++ pad->collapse_bounce_buf, pad->collapse_len); ++ } ++ qemu_vfree(pad->collapse_bounce_buf); ++ qemu_iovec_destroy(&pad->pre_collapse_qiov); ++ } + if (pad->buf) { + qemu_vfree(pad->buf); + qemu_iovec_destroy(&pad->local_qiov); +@@ -1556,6 +1587,101 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad) + memset(pad, 0, sizeof(*pad)); + } + ++/* ++ * Create pad->local_qiov by wrapping @iov in the padding head and tail, while ++ * ensuring that the resulting vector will not exceed IOV_MAX elements. ++ * ++ * To ensure this, when necessary, the first two or three elements of @iov are ++ * merged into pad->collapse_bounce_buf and replaced by a reference to that ++ * bounce buffer in pad->local_qiov. ++ * ++ * After performing a read request, the data from the bounce buffer must be ++ * copied back into pad->pre_collapse_qiov (e.g. by bdrv_padding_finalize()). ++ */ ++static int bdrv_create_padded_qiov(BlockDriverState *bs, ++ BdrvRequestPadding *pad, ++ struct iovec *iov, int niov, ++ size_t iov_offset, size_t bytes) ++{ ++ int padded_niov, surplus_count, collapse_count; ++ ++ /* Assert this invariant */ ++ assert(niov <= IOV_MAX); ++ ++ /* ++ * Cannot pad if resulting length would exceed SIZE_MAX. Returning an error ++ * to the guest is not ideal, but there is little else we can do. At least ++ * this will practically never happen on 64-bit systems. ++ */ ++ if (SIZE_MAX - pad->head < bytes || ++ SIZE_MAX - pad->head - bytes < pad->tail) ++ { ++ return -EINVAL; ++ } ++ ++ /* Length of the resulting IOV if we just concatenated everything */ ++ padded_niov = !!pad->head + niov + !!pad->tail; ++ ++ qemu_iovec_init(&pad->local_qiov, MIN(padded_niov, IOV_MAX)); ++ ++ if (pad->head) { ++ qemu_iovec_add(&pad->local_qiov, pad->buf, pad->head); ++ } ++ ++ /* ++ * If padded_niov > IOV_MAX, we cannot just concatenate everything. ++ * Instead, merge the first two or three elements of @iov to reduce the ++ * number of vector elements as necessary. ++ */ ++ if (padded_niov > IOV_MAX) { ++ /* ++ * Only head and tail can have lead to the number of entries exceeding ++ * IOV_MAX, so we can exceed it by the head and tail at most. We need ++ * to reduce the number of elements by `surplus_count`, so we merge that ++ * many elements plus one into one element. ++ */ ++ surplus_count = padded_niov - IOV_MAX; ++ assert(surplus_count <= !!pad->head + !!pad->tail); ++ collapse_count = surplus_count + 1; ++ ++ /* ++ * Move the elements to collapse into `pad->pre_collapse_qiov`, then ++ * advance `iov` (and associated variables) by those elements. ++ */ ++ qemu_iovec_init(&pad->pre_collapse_qiov, collapse_count); ++ qemu_iovec_concat_iov(&pad->pre_collapse_qiov, iov, ++ collapse_count, iov_offset, SIZE_MAX); ++ iov += collapse_count; ++ iov_offset = 0; ++ niov -= collapse_count; ++ bytes -= pad->pre_collapse_qiov.size; ++ ++ /* ++ * Construct the bounce buffer to match the length of the to-collapse ++ * vector elements, and for write requests, initialize it with the data ++ * from those elements. Then add it to `pad->local_qiov`. ++ */ ++ pad->collapse_len = pad->pre_collapse_qiov.size; ++ pad->collapse_bounce_buf = qemu_blockalign(bs, pad->collapse_len); ++ if (pad->write) { ++ qemu_iovec_to_buf(&pad->pre_collapse_qiov, 0, ++ pad->collapse_bounce_buf, pad->collapse_len); ++ } ++ qemu_iovec_add(&pad->local_qiov, ++ pad->collapse_bounce_buf, pad->collapse_len); ++ } ++ ++ qemu_iovec_concat_iov(&pad->local_qiov, iov, niov, iov_offset, bytes); ++ ++ if (pad->tail) { ++ qemu_iovec_add(&pad->local_qiov, ++ pad->buf + pad->buf_len - pad->tail, pad->tail); ++ } ++ ++ assert(pad->local_qiov.niov == MIN(padded_niov, IOV_MAX)); ++ return 0; ++} ++ + /* + * bdrv_pad_request + * +@@ -1563,6 +1689,8 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad) + * read of padding, bdrv_padding_rmw_read() should be called separately if + * needed. + * ++ * @write is true for write requests, false for read requests. ++ * + * Request parameters (@qiov, &qiov_offset, &offset, &bytes) are in-out: + * - on function start they represent original request + * - on failure or when padding is not needed they are unchanged +@@ -1571,26 +1699,34 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad) + static int bdrv_pad_request(BlockDriverState *bs, + QEMUIOVector **qiov, size_t *qiov_offset, + int64_t *offset, int64_t *bytes, ++ bool write, + BdrvRequestPadding *pad, bool *padded, + BdrvRequestFlags *flags) + { + int ret; ++ struct iovec *sliced_iov; ++ int sliced_niov; ++ size_t sliced_head, sliced_tail; + + bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort); + +- if (!bdrv_init_padding(bs, *offset, *bytes, pad)) { ++ if (!bdrv_init_padding(bs, *offset, *bytes, write, pad)) { + if (padded) { + *padded = false; + } + return 0; + } + +- ret = qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head, +- *qiov, *qiov_offset, *bytes, +- pad->buf + pad->buf_len - pad->tail, +- pad->tail); ++ sliced_iov = qemu_iovec_slice(*qiov, *qiov_offset, *bytes, ++ &sliced_head, &sliced_tail, ++ &sliced_niov); ++ ++ /* Guaranteed by bdrv_check_qiov_request() */ ++ assert(*bytes <= SIZE_MAX); ++ ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov, ++ sliced_head, *bytes); + if (ret < 0) { +- bdrv_padding_destroy(pad); ++ bdrv_padding_finalize(pad); + return ret; + } + *bytes += pad->head + pad->tail; +@@ -1657,8 +1793,8 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child, + flags |= BDRV_REQ_COPY_ON_READ; + } + +- ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad, +- NULL, &flags); ++ ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, false, ++ &pad, NULL, &flags); + if (ret < 0) { + goto fail; + } +@@ -1668,7 +1804,7 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child, + bs->bl.request_alignment, + qiov, qiov_offset, flags); + tracked_request_end(&req); +- bdrv_padding_destroy(&pad); ++ bdrv_padding_finalize(&pad); + + fail: + bdrv_dec_in_flight(bs); +@@ -2000,7 +2136,7 @@ bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes, + /* This flag doesn't make sense for padding or zero writes */ + flags &= ~BDRV_REQ_REGISTERED_BUF; + +- padding = bdrv_init_padding(bs, offset, bytes, &pad); ++ padding = bdrv_init_padding(bs, offset, bytes, true, &pad); + if (padding) { + assert(!(flags & BDRV_REQ_NO_WAIT)); + bdrv_make_request_serialising(req, align); +@@ -2048,7 +2184,7 @@ bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes, + } + + out: +- bdrv_padding_destroy(&pad); ++ bdrv_padding_finalize(&pad); + + return ret; + } +@@ -2116,8 +2252,8 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child, + * bdrv_co_do_zero_pwritev() does aligning by itself, so, we do + * alignment only if there is no ZERO flag. + */ +- ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad, +- &padded, &flags); ++ ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, true, ++ &pad, &padded, &flags); + if (ret < 0) { + return ret; + } +@@ -2147,7 +2283,7 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child, + ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align, + qiov, qiov_offset, flags); + +- bdrv_padding_destroy(&pad); ++ bdrv_padding_finalize(&pad); + + out: + tracked_request_end(&req); +-- +2.39.3 + diff --git a/SOURCES/kvm-block-Don-t-call-no_coroutine_fns-in-qmp_block_resiz.patch b/SOURCES/kvm-block-Don-t-call-no_coroutine_fns-in-qmp_block_resiz.patch new file mode 100644 index 0000000..fbab82d --- /dev/null +++ b/SOURCES/kvm-block-Don-t-call-no_coroutine_fns-in-qmp_block_resiz.patch @@ -0,0 +1,56 @@ +From dfa2811e88afaf996345552330e97f0513c1803c Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 4 May 2023 13:57:34 +0200 +Subject: [PATCH 53/56] block: Don't call no_coroutine_fns in + qmp_block_resize() + +RH-Author: Kevin Wolf +RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize() +RH-Bugzilla: 2185688 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [2/4] 7ac7e34821cfc8bd5f0daadd7a1c4a5596bc60a6 (kmwolf/centos-qemu-kvm) + +This QMP handler runs in a coroutine, so it must use the corresponding +no_co_wrappers instead. + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2185688 +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +Reviewed-by: Stefan Hajnoczi +Message-Id: <20230504115750.54437-5-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 0c7d204f50c382c6baac8c94bd57af4a022b3888) +Signed-off-by: Kevin Wolf +--- + blockdev.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/blockdev.c b/blockdev.c +index d7b5c18f0a..eb509cf964 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -2430,7 +2430,7 @@ void coroutine_fn qmp_block_resize(const char *device, const char *node_name, + return; + } + +- blk = blk_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL, errp); ++ blk = blk_co_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL, errp); + if (!blk) { + return; + } +@@ -2445,7 +2445,7 @@ void coroutine_fn qmp_block_resize(const char *device, const char *node_name, + + bdrv_co_lock(bs); + bdrv_drained_end(bs); +- blk_unref(blk); ++ blk_co_unref(blk); + bdrv_co_unlock(bs); + } + +-- +2.39.1 + diff --git a/SOURCES/kvm-block-Don-t-poll-in-bdrv_replace_child_noperm.patch b/SOURCES/kvm-block-Don-t-poll-in-bdrv_replace_child_noperm.patch deleted file mode 100644 index 80018cc..0000000 --- a/SOURCES/kvm-block-Don-t-poll-in-bdrv_replace_child_noperm.patch +++ /dev/null @@ -1,298 +0,0 @@ -From 150ef3356cc6732fede7ca059168fc0565ed0b76 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:41:09 +0100 -Subject: [PATCH 27/31] block: Don't poll in bdrv_replace_child_noperm() - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [15/16] 5fc7d6b703a2d6c1118d875056f0afbd6ba5cca9 (sgarzarella/qemu-kvm-c-9-s) - -In order to make sure that bdrv_replace_child_noperm() doesn't have to -poll any more, get rid of the bdrv_parent_drained_begin_single() call. - -This is possible now because we can require that the parent is already -drained through the child in question when the function is called and we -don't call the parent drain callbacks more than once. - -The additional drain calls needed in callers cause the test case to run -its code in the drain handler too early (bdrv_attach_child() drains -now), so modify it to only enable the code after the test setup has -completed. - -Signed-off-by: Kevin Wolf -Message-Id: <20221118174110.55183-15-kwolf@redhat.com> -Reviewed-by: Hanna Reitz -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Kevin Wolf -(cherry picked from commit 23987471285a26397e3152a9244b652445fd36c4) -Signed-off-by: Stefano Garzarella ---- - block.c | 103 ++++++++++++++++++++++++++++++----- - block/io.c | 2 +- - include/block/block-io.h | 8 +++ - tests/unit/test-bdrv-drain.c | 10 ++++ - 4 files changed, 108 insertions(+), 15 deletions(-) - -diff --git a/block.c b/block.c -index af31a94863..65588d313a 100644 ---- a/block.c -+++ b/block.c -@@ -2407,6 +2407,20 @@ static void bdrv_replace_child_abort(void *opaque) - - GLOBAL_STATE_CODE(); - /* old_bs reference is transparently moved from @s to @s->child */ -+ if (!s->child->bs) { -+ /* -+ * The parents were undrained when removing old_bs from the child. New -+ * requests can't have been made, though, because the child was empty. -+ * -+ * TODO Make bdrv_replace_child_noperm() transactionable to avoid -+ * undraining the parent in the first place. Once this is done, having -+ * new_bs drained when calling bdrv_replace_child_tran() is not a -+ * requirement any more. -+ */ -+ bdrv_parent_drained_begin_single(s->child, false); -+ assert(!bdrv_parent_drained_poll_single(s->child)); -+ } -+ assert(s->child->quiesced_parent); - bdrv_replace_child_noperm(s->child, s->old_bs); - bdrv_unref(new_bs); - } -@@ -2422,12 +2436,19 @@ static TransactionActionDrv bdrv_replace_child_drv = { - * - * Note: real unref of old_bs is done only on commit. - * -+ * Both @child->bs and @new_bs (if non-NULL) must be drained. @new_bs must be -+ * kept drained until the transaction is completed. -+ * - * The function doesn't update permissions, caller is responsible for this. - */ - static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs, - Transaction *tran) - { - BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1); -+ -+ assert(child->quiesced_parent); -+ assert(!new_bs || new_bs->quiesce_counter); -+ - *s = (BdrvReplaceChildState) { - .child = child, - .old_bs = child->bs, -@@ -2819,6 +2840,14 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm) - return permissions[qapi_perm]; - } - -+/* -+ * Replaces the node that a BdrvChild points to without updating permissions. -+ * -+ * If @new_bs is non-NULL, the parent of @child must already be drained through -+ * @child. -+ * -+ * This function does not poll. -+ */ - static void bdrv_replace_child_noperm(BdrvChild *child, - BlockDriverState *new_bs) - { -@@ -2826,6 +2855,28 @@ static void bdrv_replace_child_noperm(BdrvChild *child, - int new_bs_quiesce_counter; - - assert(!child->frozen); -+ -+ /* -+ * If we want to change the BdrvChild to point to a drained node as its new -+ * child->bs, we need to make sure that its new parent is drained, too. In -+ * other words, either child->quiesce_parent must already be true or we must -+ * be able to set it and keep the parent's quiesce_counter consistent with -+ * that, but without polling or starting new requests (this function -+ * guarantees that it doesn't poll, and starting new requests would be -+ * against the invariants of drain sections). -+ * -+ * To keep things simple, we pick the first option (child->quiesce_parent -+ * must already be true). We also generalise the rule a bit to make it -+ * easier to verify in callers and more likely to be covered in test cases: -+ * The parent must be quiesced through this child even if new_bs isn't -+ * currently drained. -+ * -+ * The only exception is for callers that always pass new_bs == NULL. In -+ * this case, we obviously never need to consider the case of a drained -+ * new_bs, so we can keep the callers simpler by allowing them not to drain -+ * the parent. -+ */ -+ assert(!new_bs || child->quiesced_parent); - assert(old_bs != new_bs); - GLOBAL_STATE_CODE(); - -@@ -2833,15 +2884,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child, - assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); - } - -- /* -- * If the new child node is drained but the old one was not, flush -- * all outstanding requests to the old child node. -- */ -- new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); -- if (new_bs_quiesce_counter && !child->quiesced_parent) { -- bdrv_parent_drained_begin_single(child, true); -- } -- - if (old_bs) { - if (child->klass->detach) { - child->klass->detach(child); -@@ -2861,11 +2903,9 @@ static void bdrv_replace_child_noperm(BdrvChild *child, - } - - /* -- * If the old child node was drained but the new one is not, allow -- * requests to come in only after the new node has been attached. -- * -- * Update new_bs_quiesce_counter because bdrv_parent_drained_begin_single() -- * polls, which could have changed the value. -+ * If the parent was drained through this BdrvChild previously, but new_bs -+ * is not drained, allow requests to come in only after the new node has -+ * been attached. - */ - new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); - if (!new_bs_quiesce_counter && child->quiesced_parent) { -@@ -3002,6 +3042,24 @@ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs, - } - - bdrv_ref(child_bs); -+ /* -+ * Let every new BdrvChild start with a drained parent. Inserting the child -+ * in the graph with bdrv_replace_child_noperm() will undrain it if -+ * @child_bs is not drained. -+ * -+ * The child was only just created and is not yet visible in global state -+ * until bdrv_replace_child_noperm() inserts it into the graph, so nobody -+ * could have sent requests and polling is not necessary. -+ * -+ * Note that this means that the parent isn't fully drained yet, we only -+ * stop new requests from coming in. This is fine, we don't care about the -+ * old requests here, they are not for this child. If another place enters a -+ * drain section for the same parent, but wants it to be fully quiesced, it -+ * will not run most of the the code in .drained_begin() again (which is not -+ * a problem, we already did this), but it will still poll until the parent -+ * is fully quiesced, so it will not be negatively affected either. -+ */ -+ bdrv_parent_drained_begin_single(new_child, false); - bdrv_replace_child_noperm(new_child, child_bs); - - BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1); -@@ -5059,12 +5117,24 @@ static void bdrv_remove_child(BdrvChild *child, Transaction *tran) - } - - if (child->bs) { -+ BlockDriverState *bs = child->bs; -+ bdrv_drained_begin(bs); - bdrv_replace_child_tran(child, NULL, tran); -+ bdrv_drained_end(bs); - } - - tran_add(tran, &bdrv_remove_child_drv, child); - } - -+static void undrain_on_clean_cb(void *opaque) -+{ -+ bdrv_drained_end(opaque); -+} -+ -+static TransactionActionDrv undrain_on_clean = { -+ .clean = undrain_on_clean_cb, -+}; -+ - static int bdrv_replace_node_noperm(BlockDriverState *from, - BlockDriverState *to, - bool auto_skip, Transaction *tran, -@@ -5074,6 +5144,11 @@ static int bdrv_replace_node_noperm(BlockDriverState *from, - - GLOBAL_STATE_CODE(); - -+ bdrv_drained_begin(from); -+ bdrv_drained_begin(to); -+ tran_add(tran, &undrain_on_clean, from); -+ tran_add(tran, &undrain_on_clean, to); -+ - QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { - assert(c->bs == from); - if (!should_update_child(c, to)) { -diff --git a/block/io.c b/block/io.c -index 5e9150d92c..ae64830eac 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -81,7 +81,7 @@ static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore) - } - } - --static bool bdrv_parent_drained_poll_single(BdrvChild *c) -+bool bdrv_parent_drained_poll_single(BdrvChild *c) - { - if (c->klass->drained_poll) { - return c->klass->drained_poll(c); -diff --git a/include/block/block-io.h b/include/block/block-io.h -index 8f5e75756a..65e6d2569b 100644 ---- a/include/block/block-io.h -+++ b/include/block/block-io.h -@@ -292,6 +292,14 @@ bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); - */ - void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll); - -+/** -+ * bdrv_parent_drained_poll_single: -+ * -+ * Returns true if there is any pending activity to cease before @c can be -+ * called quiesced, false otherwise. -+ */ -+bool bdrv_parent_drained_poll_single(BdrvChild *c); -+ - /** - * bdrv_parent_drained_end_single: - * -diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c -index 172bc6debc..2686a8acee 100644 ---- a/tests/unit/test-bdrv-drain.c -+++ b/tests/unit/test-bdrv-drain.c -@@ -1654,6 +1654,7 @@ static void test_drop_intermediate_poll(void) - - - typedef struct BDRVReplaceTestState { -+ bool setup_completed; - bool was_drained; - bool was_undrained; - bool has_read; -@@ -1738,6 +1739,10 @@ static void bdrv_replace_test_drain_begin(BlockDriverState *bs) - { - BDRVReplaceTestState *s = bs->opaque; - -+ if (!s->setup_completed) { -+ return; -+ } -+ - if (!s->drain_count) { - s->drain_co = qemu_coroutine_create(bdrv_replace_test_drain_co, bs); - bdrv_inc_in_flight(bs); -@@ -1769,6 +1774,10 @@ static void bdrv_replace_test_drain_end(BlockDriverState *bs) - { - BDRVReplaceTestState *s = bs->opaque; - -+ if (!s->setup_completed) { -+ return; -+ } -+ - g_assert(s->drain_count > 0); - if (!--s->drain_count) { - s->was_undrained = true; -@@ -1867,6 +1876,7 @@ static void do_test_replace_child_mid_drain(int old_drain_count, - bdrv_ref(old_child_bs); - bdrv_attach_child(parent_bs, old_child_bs, "child", &child_of_bds, - BDRV_CHILD_COW, &error_abort); -+ parent_s->setup_completed = true; - - for (i = 0; i < old_drain_count; i++) { - bdrv_drained_begin(old_child_bs); --- -2.31.1 - diff --git a/SOURCES/kvm-block-Don-t-use-subtree-drains-in-bdrv_drop_intermed.patch b/SOURCES/kvm-block-Don-t-use-subtree-drains-in-bdrv_drop_intermed.patch deleted file mode 100644 index e3bf1e2..0000000 --- a/SOURCES/kvm-block-Don-t-use-subtree-drains-in-bdrv_drop_intermed.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 6af6de77dace29aa8548b3649dc9c6163740ac86 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:41:03 +0100 -Subject: [PATCH 21/31] block: Don't use subtree drains in - bdrv_drop_intermediate() - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [9/16] 3c06fa826f262558f57d38b0155500c2e8e23a53 (sgarzarella/qemu-kvm-c-9-s) - -Instead of using a subtree drain from the top node (which also drains -child nodes of base that we're not even interested in), use a normal -drain for base, which automatically drains all of the parents, too. - -Signed-off-by: Kevin Wolf -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Hanna Reitz -Message-Id: <20221118174110.55183-9-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 631086deefc32690ee56efed1c5b891dec31ae37) -Signed-off-by: Stefano Garzarella ---- - block.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/block.c b/block.c -index cb5e96b1cf..b3449a312e 100644 ---- a/block.c -+++ b/block.c -@@ -5586,7 +5586,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, - GLOBAL_STATE_CODE(); - - bdrv_ref(top); -- bdrv_subtree_drained_begin(top); -+ bdrv_drained_begin(base); - - if (!top->drv || !base->drv) { - goto exit; -@@ -5659,7 +5659,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, - - ret = 0; - exit: -- bdrv_subtree_drained_end(top); -+ bdrv_drained_end(base); - bdrv_unref(top); - return ret; - } --- -2.31.1 - diff --git a/SOURCES/kvm-block-Drain-individual-nodes-during-reopen.patch b/SOURCES/kvm-block-Drain-individual-nodes-during-reopen.patch deleted file mode 100644 index 24661fb..0000000 --- a/SOURCES/kvm-block-Drain-individual-nodes-during-reopen.patch +++ /dev/null @@ -1,157 +0,0 @@ -From ad52cb621daad45d3c2a0e2e670d6ca2e16690bd Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:41:02 +0100 -Subject: [PATCH 20/31] block: Drain individual nodes during reopen - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [8/16] 5441b6f0ae9102ef40d1093e1db3084eea81e3b0 (sgarzarella/qemu-kvm-c-9-s) - -bdrv_reopen() and friends use subtree drains as a lazy way of covering -all the nodes they touch. Turns out that this lazy way is a lot more -complicated than just draining the nodes individually, even not -accounting for the additional complexity in the drain mechanism itself. - -Simplify the code by switching to draining the individual nodes that are -already managed in the BlockReopenQueue anyway. - -Signed-off-by: Kevin Wolf -Message-Id: <20221118174110.55183-8-kwolf@redhat.com> -Reviewed-by: Hanna Reitz -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Kevin Wolf -(cherry picked from commit d22933acd2f470eeef779e4d444e848f76dcfaf8) -Signed-off-by: Stefano Garzarella ---- - block.c | 16 +++++++++------- - block/replication.c | 6 ------ - blockdev.c | 13 ------------- - 3 files changed, 9 insertions(+), 26 deletions(-) - -diff --git a/block.c b/block.c -index 46df410b07..cb5e96b1cf 100644 ---- a/block.c -+++ b/block.c -@@ -4150,7 +4150,7 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs, - * returns a pointer to bs_queue, which is either the newly allocated - * bs_queue, or the existing bs_queue being used. - * -- * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple(). -+ * bs is drained here and undrained by bdrv_reopen_queue_free(). - * - * To be called with bs->aio_context locked. - */ -@@ -4172,12 +4172,10 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, - int flags; - QemuOpts *opts; - -- /* Make sure that the caller remembered to use a drained section. This is -- * important to avoid graph changes between the recursive queuing here and -- * bdrv_reopen_multiple(). */ -- assert(bs->quiesce_counter > 0); - GLOBAL_STATE_CODE(); - -+ bdrv_drained_begin(bs); -+ - if (bs_queue == NULL) { - bs_queue = g_new0(BlockReopenQueue, 1); - QTAILQ_INIT(bs_queue); -@@ -4328,6 +4326,12 @@ void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue) - if (bs_queue) { - BlockReopenQueueEntry *bs_entry, *next; - QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { -+ AioContext *ctx = bdrv_get_aio_context(bs_entry->state.bs); -+ -+ aio_context_acquire(ctx); -+ bdrv_drained_end(bs_entry->state.bs); -+ aio_context_release(ctx); -+ - qobject_unref(bs_entry->state.explicit_options); - qobject_unref(bs_entry->state.options); - g_free(bs_entry); -@@ -4475,7 +4479,6 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, - - GLOBAL_STATE_CODE(); - -- bdrv_subtree_drained_begin(bs); - queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts); - - if (ctx != qemu_get_aio_context()) { -@@ -4486,7 +4489,6 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, - if (ctx != qemu_get_aio_context()) { - aio_context_acquire(ctx); - } -- bdrv_subtree_drained_end(bs); - - return ret; - } -diff --git a/block/replication.c b/block/replication.c -index f1eed25e43..c62f48a874 100644 ---- a/block/replication.c -+++ b/block/replication.c -@@ -374,9 +374,6 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable, - s->orig_secondary_read_only = bdrv_is_read_only(secondary_disk->bs); - } - -- bdrv_subtree_drained_begin(hidden_disk->bs); -- bdrv_subtree_drained_begin(secondary_disk->bs); -- - if (s->orig_hidden_read_only) { - QDict *opts = qdict_new(); - qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable); -@@ -401,9 +398,6 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable, - aio_context_acquire(ctx); - } - } -- -- bdrv_subtree_drained_end(hidden_disk->bs); -- bdrv_subtree_drained_end(secondary_disk->bs); - } - - static void backup_job_cleanup(BlockDriverState *bs) -diff --git a/blockdev.c b/blockdev.c -index 3f1dec6242..8ffb3d9537 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -3547,8 +3547,6 @@ fail: - void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) - { - BlockReopenQueue *queue = NULL; -- GSList *drained = NULL; -- GSList *p; - - /* Add each one of the BDS that we want to reopen to the queue */ - for (; reopen_list != NULL; reopen_list = reopen_list->next) { -@@ -3585,9 +3583,7 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) - ctx = bdrv_get_aio_context(bs); - aio_context_acquire(ctx); - -- bdrv_subtree_drained_begin(bs); - queue = bdrv_reopen_queue(queue, bs, qdict, false); -- drained = g_slist_prepend(drained, bs); - - aio_context_release(ctx); - } -@@ -3598,15 +3594,6 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) - - fail: - bdrv_reopen_queue_free(queue); -- for (p = drained; p; p = p->next) { -- BlockDriverState *bs = p->data; -- AioContext *ctx = bdrv_get_aio_context(bs); -- -- aio_context_acquire(ctx); -- bdrv_subtree_drained_end(bs); -- aio_context_release(ctx); -- } -- g_slist_free(drained); - } - - void qmp_blockdev_del(const char *node_name, Error **errp) --- -2.31.1 - diff --git a/SOURCES/kvm-block-Drop-out-of-coroutine-in-bdrv_do_drained_begin.patch b/SOURCES/kvm-block-Drop-out-of-coroutine-in-bdrv_do_drained_begin.patch deleted file mode 100644 index 1ae73c7..0000000 --- a/SOURCES/kvm-block-Drop-out-of-coroutine-in-bdrv_do_drained_begin.patch +++ /dev/null @@ -1,96 +0,0 @@ -From 9a789d104a4a69031ad95d7fad6380ab21e82503 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:41:08 +0100 -Subject: [PATCH 26/31] block: Drop out of coroutine in - bdrv_do_drained_begin_quiesce() - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [14/16] c9266663b822f703e55b6a07de98ceb56e69e924 (sgarzarella/qemu-kvm-c-9-s) - -The next patch adds a parent drain to bdrv_attach_child_common(), which -shouldn't be, but is currently called from coroutines in some cases (e.g. -.bdrv_co_create implementations generally open new nodes). Therefore, -the assertion that we're not in a coroutine doesn't hold true any more. - -We could just remove the assertion because there is nothing in the -function that should be in conflict with running in a coroutine, but -just to be on the safe side, we can reverse the caller relationship -between bdrv_do_drained_begin() and bdrv_do_drained_begin_quiesce() so -that the latter also just drops out of coroutine context and we can -still be certain in the future that any drain code doesn't run in -coroutines. - -As a nice side effect, the structure of bdrv_do_drained_begin() is now -symmetrical with bdrv_do_drained_end(). - -Signed-off-by: Kevin Wolf -Message-Id: <20221118174110.55183-14-kwolf@redhat.com> -Reviewed-by: Hanna Reitz -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Kevin Wolf -(cherry picked from commit 05c272ff0cf1b16cc3606f746182dd99b774f553) -Signed-off-by: Stefano Garzarella ---- - block/io.c | 25 ++++++++++++------------- - 1 file changed, 12 insertions(+), 13 deletions(-) - -diff --git a/block/io.c b/block/io.c -index 2e9503df6a..5e9150d92c 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -346,10 +346,15 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, - } - } - --void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent) -+static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, -+ bool poll) - { - IO_OR_GS_CODE(); -- assert(!qemu_in_coroutine()); -+ -+ if (qemu_in_coroutine()) { -+ bdrv_co_yield_to_drain(bs, true, parent, poll); -+ return; -+ } - - /* Stop things in parent-to-child order */ - if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) { -@@ -359,17 +364,6 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent) - bs->drv->bdrv_drain_begin(bs); - } - } --} -- --static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, -- bool poll) --{ -- if (qemu_in_coroutine()) { -- bdrv_co_yield_to_drain(bs, true, parent, poll); -- return; -- } -- -- bdrv_do_drained_begin_quiesce(bs, parent); - - /* - * Wait for drained requests to finish. -@@ -385,6 +379,11 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, - } - } - -+void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent) -+{ -+ bdrv_do_drained_begin(bs, parent, false); -+} -+ - void bdrv_drained_begin(BlockDriverState *bs) - { - IO_OR_GS_CODE(); --- -2.31.1 - diff --git a/SOURCES/kvm-block-Fix-locking-for-bdrv_reopen_queue_child.patch b/SOURCES/kvm-block-Fix-locking-for-bdrv_reopen_queue_child.patch deleted file mode 100644 index b73b8fe..0000000 --- a/SOURCES/kvm-block-Fix-locking-for-bdrv_reopen_queue_child.patch +++ /dev/null @@ -1,67 +0,0 @@ -From e790b4c20a5124239fe93e91fbc87745e5f2cea6 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:41:01 +0100 -Subject: [PATCH 19/31] block: Fix locking for bdrv_reopen_queue_child() - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [7/16] 46bb54506c4400b9a1bf66b6bd7987ff67260003 (sgarzarella/qemu-kvm-c-9-s) - -Callers don't agree whether bdrv_reopen_queue_child() should be called -with the AioContext lock held or not. Standardise on holding the lock -(as done by QMP blockdev-reopen and the replication block driver) and -fix bdrv_reopen() to do the same. - -Signed-off-by: Kevin Wolf -Message-Id: <20221118174110.55183-7-kwolf@redhat.com> -Reviewed-by: Hanna Reitz -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Kevin Wolf -(cherry picked from commit 2e117866d7c96cc17e84cd2946fee1bf3292d814) -Signed-off-by: Stefano Garzarella ---- - block.c | 7 +++++-- - 1 file changed, 5 insertions(+), 2 deletions(-) - -diff --git a/block.c b/block.c -index 7999fd08c5..46df410b07 100644 ---- a/block.c -+++ b/block.c -@@ -4151,6 +4151,8 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs, - * bs_queue, or the existing bs_queue being used. - * - * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple(). -+ * -+ * To be called with bs->aio_context locked. - */ - static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, - BlockDriverState *bs, -@@ -4309,6 +4311,7 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, - return bs_queue; - } - -+/* To be called with bs->aio_context locked */ - BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, - BlockDriverState *bs, - QDict *options, bool keep_old_opts) -@@ -4473,11 +4476,11 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, - GLOBAL_STATE_CODE(); - - bdrv_subtree_drained_begin(bs); -+ queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts); -+ - if (ctx != qemu_get_aio_context()) { - aio_context_release(ctx); - } -- -- queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts); - ret = bdrv_reopen_multiple(queue, errp); - - if (ctx != qemu_get_aio_context()) { --- -2.31.1 - diff --git a/SOURCES/kvm-block-Fix-pad_request-s-request-restriction.patch b/SOURCES/kvm-block-Fix-pad_request-s-request-restriction.patch new file mode 100644 index 0000000..c0ab8c2 --- /dev/null +++ b/SOURCES/kvm-block-Fix-pad_request-s-request-restriction.patch @@ -0,0 +1,73 @@ +From 547f6bf93734f7c13675eebb93273ef2273f7c31 Mon Sep 17 00:00:00 2001 +From: Hanna Czenczek +Date: Fri, 14 Jul 2023 10:59:38 +0200 +Subject: [PATCH 5/9] block: Fix pad_request's request restriction + +RH-Author: Hanna Czenczek +RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX +RH-Bugzilla: 2174676 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [5/5] e8abc0485f6e0608a1ec55143ff40a14d273dfc8 (hreitz/qemu-kvm-c-9-s) + +bdrv_pad_request() relies on requests' lengths not to exceed SIZE_MAX, +which bdrv_check_qiov_request() does not guarantee. + +bdrv_check_request32() however will guarantee this, and both of +bdrv_pad_request()'s callers (bdrv_co_preadv_part() and +bdrv_co_pwritev_part()) already run it before calling +bdrv_pad_request(). Therefore, bdrv_pad_request() can safely call +bdrv_check_request32() without expecting error, too. + +In effect, this patch will not change guest-visible behavior. It is a +clean-up to tighten a condition to match what is guaranteed by our +callers, and which exists purely to show clearly why the subsequent +assertion (`assert(*bytes <= SIZE_MAX)`) is always true. + +Note there is a difference between the interfaces of +bdrv_check_qiov_request() and bdrv_check_request32(): The former takes +an errp, the latter does not, so we can no longer just pass +&error_abort. Instead, we need to check the returned value. While we +do expect success (because the callers have already run this function), +an assert(ret == 0) is not much simpler than just to return an error if +it occurs, so let us handle errors by returning them up the stack now. + +Reported-by: Peter Maydell +Signed-off-by: Hanna Czenczek +Message-id: 20230714085938.202730-1-hreitz@redhat.com +Fixes: 18743311b829cafc1737a5f20bc3248d5f91ee2a + ("block: Collapse padded I/O vecs exceeding IOV_MAX") +Signed-off-by: Hanna Czenczek +Signed-off-by: Stefan Hajnoczi +--- + block/io.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/block/io.c b/block/io.c +index 4e8e90208b..807c9fb720 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -1708,7 +1708,11 @@ static int bdrv_pad_request(BlockDriverState *bs, + int sliced_niov; + size_t sliced_head, sliced_tail; + +- bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort); ++ /* Should have been checked by the caller already */ ++ ret = bdrv_check_request32(*offset, *bytes, *qiov, *qiov_offset); ++ if (ret < 0) { ++ return ret; ++ } + + if (!bdrv_init_padding(bs, *offset, *bytes, write, pad)) { + if (padded) { +@@ -1721,7 +1725,7 @@ static int bdrv_pad_request(BlockDriverState *bs, + &sliced_head, &sliced_tail, + &sliced_niov); + +- /* Guaranteed by bdrv_check_qiov_request() */ ++ /* Guaranteed by bdrv_check_request32() */ + assert(*bytes <= SIZE_MAX); + ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov, + sliced_head, *bytes); +-- +2.39.3 + diff --git a/SOURCES/kvm-block-Improve-empty-format-specific-info-dump.patch b/SOURCES/kvm-block-Improve-empty-format-specific-info-dump.patch deleted file mode 100644 index 5b54210..0000000 --- a/SOURCES/kvm-block-Improve-empty-format-specific-info-dump.patch +++ /dev/null @@ -1,132 +0,0 @@ -From 074c89b05dae971c7118cb769fd34e22135c8f4c Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Mon, 20 Jun 2022 18:26:53 +0200 -Subject: [PATCH 06/20] block: Improve empty format-specific info dump - -RH-Author: Hanna Czenczek -RH-MergeRequest: 145: Show protocol-level information in qemu-img info -RH-Bugzilla: 1860292 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella -RH-Commit: [1/12] be551e83f426e620e673302198b51368bfd324ce (hreitz/qemu-kvm-c-9-s) - -When a block driver supports obtaining format-specific information, but -that object only contains optional fields, it is possible that none of -them are present, so that dump_qobject() (called by -bdrv_image_info_specific_dump()) will not print anything. - -The callers of bdrv_image_info_specific_dump() put a header above this -information ("Format specific information:\n"), which will look strange -when there is nothing below. Modify bdrv_image_info_specific_dump() to -print this header instead of its callers, and only if there is indeed -something to be printed. - -Signed-off-by: Hanna Reitz -Message-Id: <20220620162704.80987-2-hreitz@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 3716470b24f0f63090d59bcf28ad8fe6fb7835bd) -Signed-off-by: Hanna Czenczek ---- - block/qapi.c | 41 +++++++++++++++++++++++++++++++++++++---- - include/block/qapi.h | 3 ++- - qemu-io-cmds.c | 4 ++-- - 3 files changed, 41 insertions(+), 7 deletions(-) - -diff --git a/block/qapi.c b/block/qapi.c -index cf557e3aea..51202b470a 100644 ---- a/block/qapi.c -+++ b/block/qapi.c -@@ -777,7 +777,35 @@ static void dump_qdict(int indentation, QDict *dict) - } - } - --void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec) -+/* -+ * Return whether dumping the given QObject with dump_qobject() would -+ * yield an empty dump, i.e. not print anything. -+ */ -+static bool qobject_is_empty_dump(const QObject *obj) -+{ -+ switch (qobject_type(obj)) { -+ case QTYPE_QNUM: -+ case QTYPE_QSTRING: -+ case QTYPE_QBOOL: -+ return false; -+ -+ case QTYPE_QDICT: -+ return qdict_size(qobject_to(QDict, obj)) == 0; -+ -+ case QTYPE_QLIST: -+ return qlist_empty(qobject_to(QList, obj)); -+ -+ default: -+ abort(); -+ } -+} -+ -+/** -+ * Dumps the given ImageInfoSpecific object in a human-readable form, -+ * prepending an optional prefix if the dump is not empty. -+ */ -+void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, -+ const char *prefix) - { - QObject *obj, *data; - Visitor *v = qobject_output_visitor_new(&obj); -@@ -785,7 +813,12 @@ void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec) - visit_type_ImageInfoSpecific(v, NULL, &info_spec, &error_abort); - visit_complete(v, &obj); - data = qdict_get(qobject_to(QDict, obj), "data"); -- dump_qobject(1, data); -+ if (!qobject_is_empty_dump(data)) { -+ if (prefix) { -+ qemu_printf("%s", prefix); -+ } -+ dump_qobject(1, data); -+ } - qobject_unref(obj); - visit_free(v); - } -@@ -866,7 +899,7 @@ void bdrv_image_info_dump(ImageInfo *info) - } - - if (info->has_format_specific) { -- qemu_printf("Format specific information:\n"); -- bdrv_image_info_specific_dump(info->format_specific); -+ bdrv_image_info_specific_dump(info->format_specific, -+ "Format specific information:\n"); - } - } -diff --git a/include/block/qapi.h b/include/block/qapi.h -index 22c7807c89..c09859ea78 100644 ---- a/include/block/qapi.h -+++ b/include/block/qapi.h -@@ -40,6 +40,7 @@ void bdrv_query_image_info(BlockDriverState *bs, - Error **errp); - - void bdrv_snapshot_dump(QEMUSnapshotInfo *sn); --void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec); -+void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, -+ const char *prefix); - void bdrv_image_info_dump(ImageInfo *info); - #endif -diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c -index 952dc940f1..f4a374528e 100644 ---- a/qemu-io-cmds.c -+++ b/qemu-io-cmds.c -@@ -1825,8 +1825,8 @@ static int info_f(BlockBackend *blk, int argc, char **argv) - return -EIO; - } - if (spec_info) { -- printf("Format specific information:\n"); -- bdrv_image_info_specific_dump(spec_info); -+ bdrv_image_info_specific_dump(spec_info, -+ "Format specific information:\n"); - qapi_free_ImageInfoSpecific(spec_info); - } - --- -2.31.1 - diff --git a/SOURCES/kvm-block-Inline-bdrv_drain_invoke.patch b/SOURCES/kvm-block-Inline-bdrv_drain_invoke.patch deleted file mode 100644 index 07160dc..0000000 --- a/SOURCES/kvm-block-Inline-bdrv_drain_invoke.patch +++ /dev/null @@ -1,81 +0,0 @@ -From 1808e560396872173f787f8e338e9837a4c3d626 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:41:00 +0100 -Subject: [PATCH 18/31] block: Inline bdrv_drain_invoke() - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [6/16] 2c7473a36360eb43d94b967deb12308cb5ea0d3b (sgarzarella/qemu-kvm-c-9-s) - -bdrv_drain_invoke() has now two entirely separate cases that share no -code any more and are selected depending on a bool parameter. Each case -has only one caller. Just inline the function. - -Signed-off-by: Kevin Wolf -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Stefan Hajnoczi -Reviewed-by: Emanuele Giuseppe Esposito -Reviewed-by: Hanna Reitz -Message-Id: <20221118174110.55183-6-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit c7bc05f78ab31fb02fc9635f60b9bd22efc8d121) -Signed-off-by: Stefano Garzarella ---- - block/io.c | 23 ++++++----------------- - 1 file changed, 6 insertions(+), 17 deletions(-) - -diff --git a/block/io.c b/block/io.c -index f4ca62b034..a25103be6f 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -242,21 +242,6 @@ typedef struct { - bool ignore_bds_parents; - } BdrvCoDrainData; - --/* Recursively call BlockDriver.bdrv_drain_begin/end callbacks */ --static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) --{ -- if (!bs->drv || (begin && !bs->drv->bdrv_drain_begin) || -- (!begin && !bs->drv->bdrv_drain_end)) { -- return; -- } -- -- if (begin) { -- bs->drv->bdrv_drain_begin(bs); -- } else { -- bs->drv->bdrv_drain_end(bs); -- } --} -- - /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ - bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, - BdrvChild *ignore_parent, bool ignore_bds_parents) -@@ -390,7 +375,9 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, - } - - bdrv_parent_drained_begin(bs, parent, ignore_bds_parents); -- bdrv_drain_invoke(bs, true); -+ if (bs->drv && bs->drv->bdrv_drain_begin) { -+ bs->drv->bdrv_drain_begin(bs); -+ } - } - - static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, -@@ -461,7 +448,9 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, - assert(bs->quiesce_counter > 0); - - /* Re-enable things in child-to-parent order */ -- bdrv_drain_invoke(bs, false); -+ if (bs->drv && bs->drv->bdrv_drain_end) { -+ bs->drv->bdrv_drain_end(bs); -+ } - bdrv_parent_drained_end(bs, parent, ignore_bds_parents); - - old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter); --- -2.31.1 - diff --git a/SOURCES/kvm-block-Remove-drained_end_counter.patch b/SOURCES/kvm-block-Remove-drained_end_counter.patch deleted file mode 100644 index cfafc33..0000000 --- a/SOURCES/kvm-block-Remove-drained_end_counter.patch +++ /dev/null @@ -1,433 +0,0 @@ -From 3009e49f242ab371ffad35bb29c2c26ddfac75d4 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:40:59 +0100 -Subject: [PATCH 17/31] block: Remove drained_end_counter - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [5/16] 5589e3f05dece5394a05641f7f42096e8dc62bdb (sgarzarella/qemu-kvm-c-9-s) - -drained_end_counter is unused now, nobody changes its value any more. It -can be removed. - -In cases where we had two almost identical functions that only differed -in whether the caller passes drained_end_counter, or whether they would -poll for a local drained_end_counter to reach 0, these become a single -function. - -Signed-off-by: Kevin Wolf -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Emanuele Giuseppe Esposito -Message-Id: <20221118174110.55183-5-kwolf@redhat.com> -Reviewed-by: Hanna Reitz -Signed-off-by: Kevin Wolf -(cherry picked from commit 2f65df6e16dea2d6e7212fa675f4779d9281e26f) -Signed-off-by: Stefano Garzarella ---- - block.c | 5 +- - block/block-backend.c | 4 +- - block/io.c | 98 ++++++++------------------------ - blockjob.c | 2 +- - include/block/block-io.h | 24 -------- - include/block/block_int-common.h | 6 +- - 6 files changed, 30 insertions(+), 109 deletions(-) - -diff --git a/block.c b/block.c -index 16a62a329c..7999fd08c5 100644 ---- a/block.c -+++ b/block.c -@@ -1235,11 +1235,10 @@ static bool bdrv_child_cb_drained_poll(BdrvChild *child) - return bdrv_drain_poll(bs, false, NULL, false); - } - --static void bdrv_child_cb_drained_end(BdrvChild *child, -- int *drained_end_counter) -+static void bdrv_child_cb_drained_end(BdrvChild *child) - { - BlockDriverState *bs = child->opaque; -- bdrv_drained_end_no_poll(bs, drained_end_counter); -+ bdrv_drained_end(bs); - } - - static int bdrv_child_cb_inactivate(BdrvChild *child) -diff --git a/block/block-backend.c b/block/block-backend.c -index d98a96ff37..feaf2181fa 100644 ---- a/block/block-backend.c -+++ b/block/block-backend.c -@@ -129,7 +129,7 @@ static void blk_root_inherit_options(BdrvChildRole role, bool parent_is_format, - } - static void blk_root_drained_begin(BdrvChild *child); - static bool blk_root_drained_poll(BdrvChild *child); --static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter); -+static void blk_root_drained_end(BdrvChild *child); - - static void blk_root_change_media(BdrvChild *child, bool load); - static void blk_root_resize(BdrvChild *child); -@@ -2556,7 +2556,7 @@ static bool blk_root_drained_poll(BdrvChild *child) - return busy || !!blk->in_flight; - } - --static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter) -+static void blk_root_drained_end(BdrvChild *child) - { - BlockBackend *blk = child->opaque; - assert(blk->quiesce_counter); -diff --git a/block/io.c b/block/io.c -index c2ed4b2af9..f4ca62b034 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -58,28 +58,19 @@ static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore, - } - } - --static void bdrv_parent_drained_end_single_no_poll(BdrvChild *c, -- int *drained_end_counter) -+void bdrv_parent_drained_end_single(BdrvChild *c) - { -+ IO_OR_GS_CODE(); -+ - assert(c->parent_quiesce_counter > 0); - c->parent_quiesce_counter--; - if (c->klass->drained_end) { -- c->klass->drained_end(c, drained_end_counter); -+ c->klass->drained_end(c); - } - } - --void bdrv_parent_drained_end_single(BdrvChild *c) --{ -- int drained_end_counter = 0; -- AioContext *ctx = bdrv_child_get_parent_aio_context(c); -- IO_OR_GS_CODE(); -- bdrv_parent_drained_end_single_no_poll(c, &drained_end_counter); -- AIO_WAIT_WHILE(ctx, qatomic_read(&drained_end_counter) > 0); --} -- - static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore, -- bool ignore_bds_parents, -- int *drained_end_counter) -+ bool ignore_bds_parents) - { - BdrvChild *c; - -@@ -87,7 +78,7 @@ static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore, - if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) { - continue; - } -- bdrv_parent_drained_end_single_no_poll(c, drained_end_counter); -+ bdrv_parent_drained_end_single(c); - } - } - -@@ -249,12 +240,10 @@ typedef struct { - bool poll; - BdrvChild *parent; - bool ignore_bds_parents; -- int *drained_end_counter; - } BdrvCoDrainData; - - /* Recursively call BlockDriver.bdrv_drain_begin/end callbacks */ --static void bdrv_drain_invoke(BlockDriverState *bs, bool begin, -- int *drained_end_counter) -+static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) - { - if (!bs->drv || (begin && !bs->drv->bdrv_drain_begin) || - (!begin && !bs->drv->bdrv_drain_end)) { -@@ -305,8 +294,7 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, - BdrvChild *parent, bool ignore_bds_parents, - bool poll); - static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, -- BdrvChild *parent, bool ignore_bds_parents, -- int *drained_end_counter); -+ BdrvChild *parent, bool ignore_bds_parents); - - static void bdrv_co_drain_bh_cb(void *opaque) - { -@@ -319,14 +307,12 @@ static void bdrv_co_drain_bh_cb(void *opaque) - aio_context_acquire(ctx); - bdrv_dec_in_flight(bs); - if (data->begin) { -- assert(!data->drained_end_counter); - bdrv_do_drained_begin(bs, data->recursive, data->parent, - data->ignore_bds_parents, data->poll); - } else { - assert(!data->poll); - bdrv_do_drained_end(bs, data->recursive, data->parent, -- data->ignore_bds_parents, -- data->drained_end_counter); -+ data->ignore_bds_parents); - } - aio_context_release(ctx); - } else { -@@ -342,8 +328,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, - bool begin, bool recursive, - BdrvChild *parent, - bool ignore_bds_parents, -- bool poll, -- int *drained_end_counter) -+ bool poll) - { - BdrvCoDrainData data; - Coroutine *self = qemu_coroutine_self(); -@@ -363,7 +348,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, - .parent = parent, - .ignore_bds_parents = ignore_bds_parents, - .poll = poll, -- .drained_end_counter = drained_end_counter, - }; - - if (bs) { -@@ -406,7 +390,7 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, - } - - bdrv_parent_drained_begin(bs, parent, ignore_bds_parents); -- bdrv_drain_invoke(bs, true, NULL); -+ bdrv_drain_invoke(bs, true); - } - - static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, -@@ -417,7 +401,7 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, - - if (qemu_in_coroutine()) { - bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents, -- poll, NULL); -+ poll); - return; - } - -@@ -461,38 +445,24 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs) - - /** - * This function does not poll, nor must any of its recursively called -- * functions. The *drained_end_counter pointee will be incremented -- * once for every background operation scheduled, and decremented once -- * the operation settles. Therefore, the pointer must remain valid -- * until the pointee reaches 0. That implies that whoever sets up the -- * pointee has to poll until it is 0. -- * -- * We use atomic operations to access *drained_end_counter, because -- * (1) when called from bdrv_set_aio_context_ignore(), the subgraph of -- * @bs may contain nodes in different AioContexts, -- * (2) bdrv_drain_all_end() uses the same counter for all nodes, -- * regardless of which AioContext they are in. -+ * functions. - */ - static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, -- BdrvChild *parent, bool ignore_bds_parents, -- int *drained_end_counter) -+ BdrvChild *parent, bool ignore_bds_parents) - { - BdrvChild *child; - int old_quiesce_counter; - -- assert(drained_end_counter != NULL); -- - if (qemu_in_coroutine()) { - bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents, -- false, drained_end_counter); -+ false); - return; - } - assert(bs->quiesce_counter > 0); - - /* Re-enable things in child-to-parent order */ -- bdrv_drain_invoke(bs, false, drained_end_counter); -- bdrv_parent_drained_end(bs, parent, ignore_bds_parents, -- drained_end_counter); -+ bdrv_drain_invoke(bs, false); -+ bdrv_parent_drained_end(bs, parent, ignore_bds_parents); - - old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter); - if (old_quiesce_counter == 1) { -@@ -503,32 +473,21 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, - assert(!ignore_bds_parents); - bs->recursive_quiesce_counter--; - QLIST_FOREACH(child, &bs->children, next) { -- bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents, -- drained_end_counter); -+ bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents); - } - } - } - - void bdrv_drained_end(BlockDriverState *bs) - { -- int drained_end_counter = 0; - IO_OR_GS_CODE(); -- bdrv_do_drained_end(bs, false, NULL, false, &drained_end_counter); -- BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0); --} -- --void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter) --{ -- IO_CODE(); -- bdrv_do_drained_end(bs, false, NULL, false, drained_end_counter); -+ bdrv_do_drained_end(bs, false, NULL, false); - } - - void bdrv_subtree_drained_end(BlockDriverState *bs) - { -- int drained_end_counter = 0; - IO_OR_GS_CODE(); -- bdrv_do_drained_end(bs, true, NULL, false, &drained_end_counter); -- BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0); -+ bdrv_do_drained_end(bs, true, NULL, false); - } - - void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent) -@@ -543,16 +502,12 @@ void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent) - - void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent) - { -- int drained_end_counter = 0; - int i; - IO_OR_GS_CODE(); - - for (i = 0; i < old_parent->recursive_quiesce_counter; i++) { -- bdrv_do_drained_end(child->bs, true, child, false, -- &drained_end_counter); -+ bdrv_do_drained_end(child->bs, true, child, false); - } -- -- BDRV_POLL_WHILE(child->bs, qatomic_read(&drained_end_counter) > 0); - } - - void bdrv_drain(BlockDriverState *bs) -@@ -610,7 +565,7 @@ void bdrv_drain_all_begin(void) - GLOBAL_STATE_CODE(); - - if (qemu_in_coroutine()) { -- bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true, NULL); -+ bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true); - return; - } - -@@ -649,22 +604,19 @@ void bdrv_drain_all_begin(void) - - void bdrv_drain_all_end_quiesce(BlockDriverState *bs) - { -- int drained_end_counter = 0; - GLOBAL_STATE_CODE(); - - g_assert(bs->quiesce_counter > 0); - g_assert(!bs->refcnt); - - while (bs->quiesce_counter) { -- bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter); -+ bdrv_do_drained_end(bs, false, NULL, true); - } -- BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0); - } - - void bdrv_drain_all_end(void) - { - BlockDriverState *bs = NULL; -- int drained_end_counter = 0; - GLOBAL_STATE_CODE(); - - /* -@@ -680,13 +632,11 @@ void bdrv_drain_all_end(void) - AioContext *aio_context = bdrv_get_aio_context(bs); - - aio_context_acquire(aio_context); -- bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter); -+ bdrv_do_drained_end(bs, false, NULL, true); - aio_context_release(aio_context); - } - - assert(qemu_get_current_aio_context() == qemu_get_aio_context()); -- AIO_WAIT_WHILE(NULL, qatomic_read(&drained_end_counter) > 0); -- - assert(bdrv_drain_all_count > 0); - bdrv_drain_all_count--; - } -diff --git a/blockjob.c b/blockjob.c -index f51d4e18f3..0ab721e139 100644 ---- a/blockjob.c -+++ b/blockjob.c -@@ -120,7 +120,7 @@ static bool child_job_drained_poll(BdrvChild *c) - } - } - --static void child_job_drained_end(BdrvChild *c, int *drained_end_counter) -+static void child_job_drained_end(BdrvChild *c) - { - BlockJob *job = c->opaque; - job_resume(&job->job); -diff --git a/include/block/block-io.h b/include/block/block-io.h -index b099d7db45..054e964c9b 100644 ---- a/include/block/block-io.h -+++ b/include/block/block-io.h -@@ -237,21 +237,6 @@ int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset, - int64_t bytes, BdrvRequestFlags read_flags, - BdrvRequestFlags write_flags); - --/** -- * bdrv_drained_end_no_poll: -- * -- * Same as bdrv_drained_end(), but do not poll for the subgraph to -- * actually become unquiesced. Therefore, no graph changes will occur -- * with this function. -- * -- * *drained_end_counter is incremented for every background operation -- * that is scheduled, and will be decremented for every operation once -- * it settles. The caller must poll until it reaches 0. The counter -- * should be accessed using atomic operations only. -- */ --void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter); -- -- - /* - * "I/O or GS" API functions. These functions can run without - * the BQL, but only in one specific iothread/main loop. -@@ -311,9 +296,6 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll); - * bdrv_parent_drained_end_single: - * - * End a quiesced section for the parent of @c. -- * -- * This polls @bs's AioContext until all scheduled sub-drained_ends -- * have settled, which may result in graph changes. - */ - void bdrv_parent_drained_end_single(BdrvChild *c); - -@@ -361,12 +343,6 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs); - * bdrv_drained_end: - * - * End a quiescent section started by bdrv_drained_begin(). -- * -- * This polls @bs's AioContext until all scheduled sub-drained_ends -- * have settled. On one hand, that may result in graph changes. On -- * the other, this requires that the caller either runs in the main -- * loop; or that all involved nodes (@bs and all of its parents) are -- * in the caller's AioContext. - */ - void bdrv_drained_end(BlockDriverState *bs); - -diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h -index 40d646d1ed..2b97576f6d 100644 ---- a/include/block/block_int-common.h -+++ b/include/block/block_int-common.h -@@ -939,15 +939,11 @@ struct BdrvChildClass { - * These functions must not change the graph (and therefore also must not - * call aio_poll(), which could change the graph indirectly). - * -- * If drained_end() schedules background operations, it must atomically -- * increment *drained_end_counter for each such operation and atomically -- * decrement it once the operation has settled. -- * - * Note that this can be nested. If drained_begin() was called twice, new - * I/O is allowed only after drained_end() was called twice, too. - */ - void (*drained_begin)(BdrvChild *child); -- void (*drained_end)(BdrvChild *child, int *drained_end_counter); -+ void (*drained_end)(BdrvChild *child); - - /* - * Returns whether the parent has pending requests for the child. This --- -2.31.1 - diff --git a/SOURCES/kvm-block-Remove-ignore_bds_parents-parameter-from-drain.patch b/SOURCES/kvm-block-Remove-ignore_bds_parents-parameter-from-drain.patch deleted file mode 100644 index aa64bec..0000000 --- a/SOURCES/kvm-block-Remove-ignore_bds_parents-parameter-from-drain.patch +++ /dev/null @@ -1,274 +0,0 @@ -From 0dc7990533cef41e58579ee96315aca1fdc44ea1 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:41:07 +0100 -Subject: [PATCH 25/31] block: Remove ignore_bds_parents parameter from - drain_begin/end. - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [13/16] 1ed88d975a9569bffeb33ad847874417780ce408 (sgarzarella/qemu-kvm-c-9-s) - -ignore_bds_parents is now ignored during drain_begin and drain_end, so -we can just remove it there. It is still a valid optimisation for -drain_all in bdrv_drained_poll(), so leave it around there. - -Signed-off-by: Kevin Wolf -Message-Id: <20221118174110.55183-13-kwolf@redhat.com> -Reviewed-by: Hanna Reitz -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Kevin Wolf -(cherry picked from commit a82a3bd135078d14f1bb4b5e50f51e77d3748270) -Signed-off-by: Stefano Garzarella ---- - block.c | 2 +- - block/io.c | 58 +++++++++++++++------------------------- - include/block/block-io.h | 3 +-- - 3 files changed, 24 insertions(+), 39 deletions(-) - -diff --git a/block.c b/block.c -index 5a583e260d..af31a94863 100644 ---- a/block.c -+++ b/block.c -@@ -1226,7 +1226,7 @@ static char *bdrv_child_get_parent_desc(BdrvChild *c) - static void bdrv_child_cb_drained_begin(BdrvChild *child) - { - BlockDriverState *bs = child->opaque; -- bdrv_do_drained_begin_quiesce(bs, NULL, false); -+ bdrv_do_drained_begin_quiesce(bs, NULL); - } - - static bool bdrv_child_cb_drained_poll(BdrvChild *child) -diff --git a/block/io.c b/block/io.c -index 87d6f22ec4..2e9503df6a 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -45,13 +45,12 @@ static void bdrv_parent_cb_resize(BlockDriverState *bs); - static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, - int64_t offset, int64_t bytes, BdrvRequestFlags flags); - --static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore, -- bool ignore_bds_parents) -+static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore) - { - BdrvChild *c, *next; - - QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) { -- if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) { -+ if (c == ignore) { - continue; - } - bdrv_parent_drained_begin_single(c, false); -@@ -70,13 +69,12 @@ void bdrv_parent_drained_end_single(BdrvChild *c) - } - } - --static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore, -- bool ignore_bds_parents) -+static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore) - { - BdrvChild *c; - - QLIST_FOREACH(c, &bs->parents, next_parent) { -- if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) { -+ if (c == ignore) { - continue; - } - bdrv_parent_drained_end_single(c); -@@ -242,7 +240,6 @@ typedef struct { - bool begin; - bool poll; - BdrvChild *parent; -- bool ignore_bds_parents; - } BdrvCoDrainData; - - /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ -@@ -269,9 +266,8 @@ static bool bdrv_drain_poll_top_level(BlockDriverState *bs, - } - - static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, -- bool ignore_bds_parents, bool poll); --static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, -- bool ignore_bds_parents); -+ bool poll); -+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent); - - static void bdrv_co_drain_bh_cb(void *opaque) - { -@@ -284,11 +280,10 @@ static void bdrv_co_drain_bh_cb(void *opaque) - aio_context_acquire(ctx); - bdrv_dec_in_flight(bs); - if (data->begin) { -- bdrv_do_drained_begin(bs, data->parent, data->ignore_bds_parents, -- data->poll); -+ bdrv_do_drained_begin(bs, data->parent, data->poll); - } else { - assert(!data->poll); -- bdrv_do_drained_end(bs, data->parent, data->ignore_bds_parents); -+ bdrv_do_drained_end(bs, data->parent); - } - aio_context_release(ctx); - } else { -@@ -303,7 +298,6 @@ static void bdrv_co_drain_bh_cb(void *opaque) - static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, - bool begin, - BdrvChild *parent, -- bool ignore_bds_parents, - bool poll) - { - BdrvCoDrainData data; -@@ -321,7 +315,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, - .done = false, - .begin = begin, - .parent = parent, -- .ignore_bds_parents = ignore_bds_parents, - .poll = poll, - }; - -@@ -353,8 +346,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, - } - } - --void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, -- BdrvChild *parent, bool ignore_bds_parents) -+void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent) - { - IO_OR_GS_CODE(); - assert(!qemu_in_coroutine()); -@@ -362,9 +354,7 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, - /* Stop things in parent-to-child order */ - if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) { - aio_disable_external(bdrv_get_aio_context(bs)); -- -- /* TODO Remove ignore_bds_parents, we don't consider it any more */ -- bdrv_parent_drained_begin(bs, parent, false); -+ bdrv_parent_drained_begin(bs, parent); - if (bs->drv && bs->drv->bdrv_drain_begin) { - bs->drv->bdrv_drain_begin(bs); - } -@@ -372,14 +362,14 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, - } - - static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, -- bool ignore_bds_parents, bool poll) -+ bool poll) - { - if (qemu_in_coroutine()) { -- bdrv_co_yield_to_drain(bs, true, parent, ignore_bds_parents, poll); -+ bdrv_co_yield_to_drain(bs, true, parent, poll); - return; - } - -- bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents); -+ bdrv_do_drained_begin_quiesce(bs, parent); - - /* - * Wait for drained requests to finish. -@@ -391,7 +381,6 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, - * nodes. - */ - if (poll) { -- assert(!ignore_bds_parents); - BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent)); - } - } -@@ -399,20 +388,19 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, - void bdrv_drained_begin(BlockDriverState *bs) - { - IO_OR_GS_CODE(); -- bdrv_do_drained_begin(bs, NULL, false, true); -+ bdrv_do_drained_begin(bs, NULL, true); - } - - /** - * This function does not poll, nor must any of its recursively called - * functions. - */ --static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, -- bool ignore_bds_parents) -+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent) - { - int old_quiesce_counter; - - if (qemu_in_coroutine()) { -- bdrv_co_yield_to_drain(bs, false, parent, ignore_bds_parents, false); -+ bdrv_co_yield_to_drain(bs, false, parent, false); - return; - } - assert(bs->quiesce_counter > 0); -@@ -423,9 +411,7 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, - if (bs->drv && bs->drv->bdrv_drain_end) { - bs->drv->bdrv_drain_end(bs); - } -- /* TODO Remove ignore_bds_parents, we don't consider it any more */ -- bdrv_parent_drained_end(bs, parent, false); -- -+ bdrv_parent_drained_end(bs, parent); - aio_enable_external(bdrv_get_aio_context(bs)); - } - } -@@ -433,7 +419,7 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, - void bdrv_drained_end(BlockDriverState *bs) - { - IO_OR_GS_CODE(); -- bdrv_do_drained_end(bs, NULL, false); -+ bdrv_do_drained_end(bs, NULL); - } - - void bdrv_drain(BlockDriverState *bs) -@@ -491,7 +477,7 @@ void bdrv_drain_all_begin(void) - GLOBAL_STATE_CODE(); - - if (qemu_in_coroutine()) { -- bdrv_co_yield_to_drain(NULL, true, NULL, true, true); -+ bdrv_co_yield_to_drain(NULL, true, NULL, true); - return; - } - -@@ -516,7 +502,7 @@ void bdrv_drain_all_begin(void) - AioContext *aio_context = bdrv_get_aio_context(bs); - - aio_context_acquire(aio_context); -- bdrv_do_drained_begin(bs, NULL, true, false); -+ bdrv_do_drained_begin(bs, NULL, false); - aio_context_release(aio_context); - } - -@@ -536,7 +522,7 @@ void bdrv_drain_all_end_quiesce(BlockDriverState *bs) - g_assert(!bs->refcnt); - - while (bs->quiesce_counter) { -- bdrv_do_drained_end(bs, NULL, true); -+ bdrv_do_drained_end(bs, NULL); - } - } - -@@ -558,7 +544,7 @@ void bdrv_drain_all_end(void) - AioContext *aio_context = bdrv_get_aio_context(bs); - - aio_context_acquire(aio_context); -- bdrv_do_drained_end(bs, NULL, true); -+ bdrv_do_drained_end(bs, NULL); - aio_context_release(aio_context); - } - -diff --git a/include/block/block-io.h b/include/block/block-io.h -index 9c36a16a1f..8f5e75756a 100644 ---- a/include/block/block-io.h -+++ b/include/block/block-io.h -@@ -329,8 +329,7 @@ void bdrv_drained_begin(BlockDriverState *bs); - * Quiesces a BDS like bdrv_drained_begin(), but does not wait for already - * running requests to complete. - */ --void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, -- BdrvChild *parent, bool ignore_bds_parents); -+void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent); - - /** - * bdrv_drained_end: --- -2.31.1 - diff --git a/SOURCES/kvm-block-Remove-poll-parameter-from-bdrv_parent_drained.patch b/SOURCES/kvm-block-Remove-poll-parameter-from-bdrv_parent_drained.patch deleted file mode 100644 index 94eba86..0000000 --- a/SOURCES/kvm-block-Remove-poll-parameter-from-bdrv_parent_drained.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 60b66881fb972e1cdff1cd7b4c865e5e21c141b0 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:41:10 +0100 -Subject: [PATCH 28/31] block: Remove poll parameter from - bdrv_parent_drained_begin_single() - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [16/16] fd526cc9e5bebeb256cfa56d23ec596f26caa37a (sgarzarella/qemu-kvm-c-9-s) - -All callers of bdrv_parent_drained_begin_single() pass poll=false now, -so we don't need the parameter any more. - -Signed-off-by: Kevin Wolf -Message-Id: <20221118174110.55183-16-kwolf@redhat.com> -Reviewed-by: Hanna Reitz -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Kevin Wolf -(cherry picked from commit 606ed756c1d69cba4822be8923248d2fd714f069) -Signed-off-by: Stefano Garzarella ---- - block.c | 4 ++-- - block/io.c | 8 ++------ - include/block/block-io.h | 5 ++--- - 3 files changed, 6 insertions(+), 11 deletions(-) - -diff --git a/block.c b/block.c -index 65588d313a..0d78711416 100644 ---- a/block.c -+++ b/block.c -@@ -2417,7 +2417,7 @@ static void bdrv_replace_child_abort(void *opaque) - * new_bs drained when calling bdrv_replace_child_tran() is not a - * requirement any more. - */ -- bdrv_parent_drained_begin_single(s->child, false); -+ bdrv_parent_drained_begin_single(s->child); - assert(!bdrv_parent_drained_poll_single(s->child)); - } - assert(s->child->quiesced_parent); -@@ -3059,7 +3059,7 @@ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs, - * a problem, we already did this), but it will still poll until the parent - * is fully quiesced, so it will not be negatively affected either. - */ -- bdrv_parent_drained_begin_single(new_child, false); -+ bdrv_parent_drained_begin_single(new_child); - bdrv_replace_child_noperm(new_child, child_bs); - - BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1); -diff --git a/block/io.c b/block/io.c -index ae64830eac..38e57d1f67 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -53,7 +53,7 @@ static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore) - if (c == ignore) { - continue; - } -- bdrv_parent_drained_begin_single(c, false); -+ bdrv_parent_drained_begin_single(c); - } - } - -@@ -105,9 +105,8 @@ static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore, - return busy; - } - --void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll) -+void bdrv_parent_drained_begin_single(BdrvChild *c) - { -- AioContext *ctx = bdrv_child_get_parent_aio_context(c); - IO_OR_GS_CODE(); - - assert(!c->quiesced_parent); -@@ -116,9 +115,6 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll) - if (c->klass->drained_begin) { - c->klass->drained_begin(c); - } -- if (poll) { -- AIO_WAIT_WHILE(ctx, bdrv_parent_drained_poll_single(c)); -- } - } - - static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src) -diff --git a/include/block/block-io.h b/include/block/block-io.h -index 65e6d2569b..92aaa7c1e9 100644 ---- a/include/block/block-io.h -+++ b/include/block/block-io.h -@@ -287,10 +287,9 @@ bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); - /** - * bdrv_parent_drained_begin_single: - * -- * Begin a quiesced section for the parent of @c. If @poll is true, wait for -- * any pending activity to cease. -+ * Begin a quiesced section for the parent of @c. - */ --void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll); -+void bdrv_parent_drained_begin_single(BdrvChild *c); - - /** - * bdrv_parent_drained_poll_single: --- -2.31.1 - diff --git a/SOURCES/kvm-block-Remove-subtree-drains.patch b/SOURCES/kvm-block-Remove-subtree-drains.patch deleted file mode 100644 index af9c0ff..0000000 --- a/SOURCES/kvm-block-Remove-subtree-drains.patch +++ /dev/null @@ -1,896 +0,0 @@ -From 79063522861cb2baf921b204bcdf4c3bfb5697f4 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:41:05 +0100 -Subject: [PATCH 23/31] block: Remove subtree drains - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [11/16] d92f5041cceeeec49a65441b22d20f692c0f1c77 (sgarzarella/qemu-kvm-c-9-s) - -Subtree drains are not used any more. Remove them. - -After this, BdrvChildClass.attach/detach() don't poll any more. - -Signed-off-by: Kevin Wolf -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Hanna Reitz -Message-Id: <20221118174110.55183-11-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 299403aedaeb7f08d8e98aa8614b29d4e5546066) -Signed-off-by: Stefano Garzarella ---- - block.c | 20 +-- - block/io.c | 121 +++----------- - include/block/block-io.h | 18 +-- - include/block/block_int-common.h | 1 - - include/block/block_int-io.h | 12 -- - tests/unit/test-bdrv-drain.c | 261 ++----------------------------- - 6 files changed, 44 insertions(+), 389 deletions(-) - -diff --git a/block.c b/block.c -index 5330e89903..e0e3b21790 100644 ---- a/block.c -+++ b/block.c -@@ -1232,7 +1232,7 @@ static void bdrv_child_cb_drained_begin(BdrvChild *child) - static bool bdrv_child_cb_drained_poll(BdrvChild *child) - { - BlockDriverState *bs = child->opaque; -- return bdrv_drain_poll(bs, false, NULL, false); -+ return bdrv_drain_poll(bs, NULL, false); - } - - static void bdrv_child_cb_drained_end(BdrvChild *child) -@@ -1482,8 +1482,6 @@ static void bdrv_child_cb_attach(BdrvChild *child) - assert(!bs->file); - bs->file = child; - } -- -- bdrv_apply_subtree_drain(child, bs); - } - - static void bdrv_child_cb_detach(BdrvChild *child) -@@ -1494,8 +1492,6 @@ static void bdrv_child_cb_detach(BdrvChild *child) - bdrv_backing_detach(child); - } - -- bdrv_unapply_subtree_drain(child, bs); -- - assert_bdrv_graph_writable(bs); - QLIST_REMOVE(child, next); - if (child == bs->backing) { -@@ -2851,9 +2847,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child, - } - - if (old_bs) { -- /* Detach first so that the recursive drain sections coming from @child -- * are already gone and we only end the drain sections that came from -- * elsewhere. */ - if (child->klass->detach) { - child->klass->detach(child); - } -@@ -2868,17 +2861,14 @@ static void bdrv_replace_child_noperm(BdrvChild *child, - QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); - - /* -- * Detaching the old node may have led to the new node's -- * quiesce_counter having been decreased. Not a problem, we -- * just need to recognize this here and then invoke -- * drained_end appropriately more often. -+ * Polling in bdrv_parent_drained_begin_single() may have led to the new -+ * node's quiesce_counter having been decreased. Not a problem, we just -+ * need to recognize this here and then invoke drained_end appropriately -+ * more often. - */ - assert(new_bs->quiesce_counter <= new_bs_quiesce_counter); - drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter; - -- /* Attach only after starting new drained sections, so that recursive -- * drain sections coming from @child don't get an extra .drained_begin -- * callback. */ - if (child->klass->attach) { - child->klass->attach(child); - } -diff --git a/block/io.c b/block/io.c -index a25103be6f..75224480d0 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -236,17 +236,15 @@ typedef struct { - BlockDriverState *bs; - bool done; - bool begin; -- bool recursive; - bool poll; - BdrvChild *parent; - bool ignore_bds_parents; - } BdrvCoDrainData; - - /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ --bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, -- BdrvChild *ignore_parent, bool ignore_bds_parents) -+bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent, -+ bool ignore_bds_parents) - { -- BdrvChild *child, *next; - IO_OR_GS_CODE(); - - if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) { -@@ -257,29 +255,19 @@ bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, - return true; - } - -- if (recursive) { -- assert(!ignore_bds_parents); -- QLIST_FOREACH_SAFE(child, &bs->children, next, next) { -- if (bdrv_drain_poll(child->bs, recursive, child, false)) { -- return true; -- } -- } -- } -- - return false; - } - --static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive, -+static bool bdrv_drain_poll_top_level(BlockDriverState *bs, - BdrvChild *ignore_parent) - { -- return bdrv_drain_poll(bs, recursive, ignore_parent, false); -+ return bdrv_drain_poll(bs, ignore_parent, false); - } - --static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, -- BdrvChild *parent, bool ignore_bds_parents, -- bool poll); --static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, -- BdrvChild *parent, bool ignore_bds_parents); -+static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, -+ bool ignore_bds_parents, bool poll); -+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, -+ bool ignore_bds_parents); - - static void bdrv_co_drain_bh_cb(void *opaque) - { -@@ -292,12 +280,11 @@ static void bdrv_co_drain_bh_cb(void *opaque) - aio_context_acquire(ctx); - bdrv_dec_in_flight(bs); - if (data->begin) { -- bdrv_do_drained_begin(bs, data->recursive, data->parent, -- data->ignore_bds_parents, data->poll); -+ bdrv_do_drained_begin(bs, data->parent, data->ignore_bds_parents, -+ data->poll); - } else { - assert(!data->poll); -- bdrv_do_drained_end(bs, data->recursive, data->parent, -- data->ignore_bds_parents); -+ bdrv_do_drained_end(bs, data->parent, data->ignore_bds_parents); - } - aio_context_release(ctx); - } else { -@@ -310,7 +297,7 @@ static void bdrv_co_drain_bh_cb(void *opaque) - } - - static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, -- bool begin, bool recursive, -+ bool begin, - BdrvChild *parent, - bool ignore_bds_parents, - bool poll) -@@ -329,7 +316,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, - .bs = bs, - .done = false, - .begin = begin, -- .recursive = recursive, - .parent = parent, - .ignore_bds_parents = ignore_bds_parents, - .poll = poll, -@@ -380,29 +366,16 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, - } - } - --static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, -- BdrvChild *parent, bool ignore_bds_parents, -- bool poll) -+static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, -+ bool ignore_bds_parents, bool poll) - { -- BdrvChild *child, *next; -- - if (qemu_in_coroutine()) { -- bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents, -- poll); -+ bdrv_co_yield_to_drain(bs, true, parent, ignore_bds_parents, poll); - return; - } - - bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents); - -- if (recursive) { -- assert(!ignore_bds_parents); -- bs->recursive_quiesce_counter++; -- QLIST_FOREACH_SAFE(child, &bs->children, next, next) { -- bdrv_do_drained_begin(child->bs, true, child, ignore_bds_parents, -- false); -- } -- } -- - /* - * Wait for drained requests to finish. - * -@@ -414,35 +387,27 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, - */ - if (poll) { - assert(!ignore_bds_parents); -- BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, recursive, parent)); -+ BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent)); - } - } - - void bdrv_drained_begin(BlockDriverState *bs) - { - IO_OR_GS_CODE(); -- bdrv_do_drained_begin(bs, false, NULL, false, true); --} -- --void bdrv_subtree_drained_begin(BlockDriverState *bs) --{ -- IO_OR_GS_CODE(); -- bdrv_do_drained_begin(bs, true, NULL, false, true); -+ bdrv_do_drained_begin(bs, NULL, false, true); - } - - /** - * This function does not poll, nor must any of its recursively called - * functions. - */ --static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, -- BdrvChild *parent, bool ignore_bds_parents) -+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, -+ bool ignore_bds_parents) - { -- BdrvChild *child; - int old_quiesce_counter; - - if (qemu_in_coroutine()) { -- bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents, -- false); -+ bdrv_co_yield_to_drain(bs, false, parent, ignore_bds_parents, false); - return; - } - assert(bs->quiesce_counter > 0); -@@ -457,46 +422,12 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, - if (old_quiesce_counter == 1) { - aio_enable_external(bdrv_get_aio_context(bs)); - } -- -- if (recursive) { -- assert(!ignore_bds_parents); -- bs->recursive_quiesce_counter--; -- QLIST_FOREACH(child, &bs->children, next) { -- bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents); -- } -- } - } - - void bdrv_drained_end(BlockDriverState *bs) - { - IO_OR_GS_CODE(); -- bdrv_do_drained_end(bs, false, NULL, false); --} -- --void bdrv_subtree_drained_end(BlockDriverState *bs) --{ -- IO_OR_GS_CODE(); -- bdrv_do_drained_end(bs, true, NULL, false); --} -- --void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent) --{ -- int i; -- IO_OR_GS_CODE(); -- -- for (i = 0; i < new_parent->recursive_quiesce_counter; i++) { -- bdrv_do_drained_begin(child->bs, true, child, false, true); -- } --} -- --void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent) --{ -- int i; -- IO_OR_GS_CODE(); -- -- for (i = 0; i < old_parent->recursive_quiesce_counter; i++) { -- bdrv_do_drained_end(child->bs, true, child, false); -- } -+ bdrv_do_drained_end(bs, NULL, false); - } - - void bdrv_drain(BlockDriverState *bs) -@@ -529,7 +460,7 @@ static bool bdrv_drain_all_poll(void) - while ((bs = bdrv_next_all_states(bs))) { - AioContext *aio_context = bdrv_get_aio_context(bs); - aio_context_acquire(aio_context); -- result |= bdrv_drain_poll(bs, false, NULL, true); -+ result |= bdrv_drain_poll(bs, NULL, true); - aio_context_release(aio_context); - } - -@@ -554,7 +485,7 @@ void bdrv_drain_all_begin(void) - GLOBAL_STATE_CODE(); - - if (qemu_in_coroutine()) { -- bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true); -+ bdrv_co_yield_to_drain(NULL, true, NULL, true, true); - return; - } - -@@ -579,7 +510,7 @@ void bdrv_drain_all_begin(void) - AioContext *aio_context = bdrv_get_aio_context(bs); - - aio_context_acquire(aio_context); -- bdrv_do_drained_begin(bs, false, NULL, true, false); -+ bdrv_do_drained_begin(bs, NULL, true, false); - aio_context_release(aio_context); - } - -@@ -599,7 +530,7 @@ void bdrv_drain_all_end_quiesce(BlockDriverState *bs) - g_assert(!bs->refcnt); - - while (bs->quiesce_counter) { -- bdrv_do_drained_end(bs, false, NULL, true); -+ bdrv_do_drained_end(bs, NULL, true); - } - } - -@@ -621,7 +552,7 @@ void bdrv_drain_all_end(void) - AioContext *aio_context = bdrv_get_aio_context(bs); - - aio_context_acquire(aio_context); -- bdrv_do_drained_end(bs, false, NULL, true); -+ bdrv_do_drained_end(bs, NULL, true); - aio_context_release(aio_context); - } - -diff --git a/include/block/block-io.h b/include/block/block-io.h -index 054e964c9b..9c36a16a1f 100644 ---- a/include/block/block-io.h -+++ b/include/block/block-io.h -@@ -302,8 +302,7 @@ void bdrv_parent_drained_end_single(BdrvChild *c); - /** - * bdrv_drain_poll: - * -- * Poll for pending requests in @bs, its parents (except for @ignore_parent), -- * and if @recursive is true its children as well (used for subtree drain). -+ * Poll for pending requests in @bs and its parents (except for @ignore_parent). - * - * If @ignore_bds_parents is true, parents that are BlockDriverStates must - * ignore the drain request because they will be drained separately (used for -@@ -311,8 +310,8 @@ void bdrv_parent_drained_end_single(BdrvChild *c); - * - * This is part of bdrv_drained_begin. - */ --bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, -- BdrvChild *ignore_parent, bool ignore_bds_parents); -+bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent, -+ bool ignore_bds_parents); - - /** - * bdrv_drained_begin: -@@ -333,12 +332,6 @@ void bdrv_drained_begin(BlockDriverState *bs); - void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, - BdrvChild *parent, bool ignore_bds_parents); - --/** -- * Like bdrv_drained_begin, but recursively begins a quiesced section for -- * exclusive access to all child nodes as well. -- */ --void bdrv_subtree_drained_begin(BlockDriverState *bs); -- - /** - * bdrv_drained_end: - * -@@ -346,9 +339,4 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs); - */ - void bdrv_drained_end(BlockDriverState *bs); - --/** -- * End a quiescent section started by bdrv_subtree_drained_begin(). -- */ --void bdrv_subtree_drained_end(BlockDriverState *bs); -- - #endif /* BLOCK_IO_H */ -diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h -index 2b97576f6d..791dddfd7d 100644 ---- a/include/block/block_int-common.h -+++ b/include/block/block_int-common.h -@@ -1184,7 +1184,6 @@ struct BlockDriverState { - - /* Accessed with atomic ops. */ - int quiesce_counter; -- int recursive_quiesce_counter; - - unsigned int write_gen; /* Current data generation */ - -diff --git a/include/block/block_int-io.h b/include/block/block_int-io.h -index 4b0b3e17ef..8bc061ebb8 100644 ---- a/include/block/block_int-io.h -+++ b/include/block/block_int-io.h -@@ -179,16 +179,4 @@ void bdrv_bsc_invalidate_range(BlockDriverState *bs, - */ - void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes); - -- --/* -- * "I/O or GS" API functions. These functions can run without -- * the BQL, but only in one specific iothread/main loop. -- * -- * See include/block/block-io.h for more information about -- * the "I/O or GS" API. -- */ -- --void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent); --void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent); -- - #endif /* BLOCK_INT_IO_H */ -diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c -index 695519ee02..dda08de8db 100644 ---- a/tests/unit/test-bdrv-drain.c -+++ b/tests/unit/test-bdrv-drain.c -@@ -156,7 +156,6 @@ static void call_in_coroutine(void (*entry)(void)) - enum drain_type { - BDRV_DRAIN_ALL, - BDRV_DRAIN, -- BDRV_SUBTREE_DRAIN, - DRAIN_TYPE_MAX, - }; - -@@ -165,7 +164,6 @@ static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs) - switch (drain_type) { - case BDRV_DRAIN_ALL: bdrv_drain_all_begin(); break; - case BDRV_DRAIN: bdrv_drained_begin(bs); break; -- case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_begin(bs); break; - default: g_assert_not_reached(); - } - } -@@ -175,7 +173,6 @@ static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs) - switch (drain_type) { - case BDRV_DRAIN_ALL: bdrv_drain_all_end(); break; - case BDRV_DRAIN: bdrv_drained_end(bs); break; -- case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_end(bs); break; - default: g_assert_not_reached(); - } - } -@@ -271,11 +268,6 @@ static void test_drv_cb_drain(void) - test_drv_cb_common(BDRV_DRAIN, false); - } - --static void test_drv_cb_drain_subtree(void) --{ -- test_drv_cb_common(BDRV_SUBTREE_DRAIN, true); --} -- - static void test_drv_cb_co_drain_all(void) - { - call_in_coroutine(test_drv_cb_drain_all); -@@ -286,11 +278,6 @@ static void test_drv_cb_co_drain(void) - call_in_coroutine(test_drv_cb_drain); - } - --static void test_drv_cb_co_drain_subtree(void) --{ -- call_in_coroutine(test_drv_cb_drain_subtree); --} -- - static void test_quiesce_common(enum drain_type drain_type, bool recursive) - { - BlockBackend *blk; -@@ -332,11 +319,6 @@ static void test_quiesce_drain(void) - test_quiesce_common(BDRV_DRAIN, false); - } - --static void test_quiesce_drain_subtree(void) --{ -- test_quiesce_common(BDRV_SUBTREE_DRAIN, true); --} -- - static void test_quiesce_co_drain_all(void) - { - call_in_coroutine(test_quiesce_drain_all); -@@ -347,11 +329,6 @@ static void test_quiesce_co_drain(void) - call_in_coroutine(test_quiesce_drain); - } - --static void test_quiesce_co_drain_subtree(void) --{ -- call_in_coroutine(test_quiesce_drain_subtree); --} -- - static void test_nested(void) - { - BlockBackend *blk; -@@ -402,158 +379,6 @@ static void test_nested(void) - blk_unref(blk); - } - --static void test_multiparent(void) --{ -- BlockBackend *blk_a, *blk_b; -- BlockDriverState *bs_a, *bs_b, *backing; -- BDRVTestState *a_s, *b_s, *backing_s; -- -- blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); -- bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR, -- &error_abort); -- a_s = bs_a->opaque; -- blk_insert_bs(blk_a, bs_a, &error_abort); -- -- blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); -- bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR, -- &error_abort); -- b_s = bs_b->opaque; -- blk_insert_bs(blk_b, bs_b, &error_abort); -- -- backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); -- backing_s = backing->opaque; -- bdrv_set_backing_hd(bs_a, backing, &error_abort); -- bdrv_set_backing_hd(bs_b, backing, &error_abort); -- -- g_assert_cmpint(bs_a->quiesce_counter, ==, 0); -- g_assert_cmpint(bs_b->quiesce_counter, ==, 0); -- g_assert_cmpint(backing->quiesce_counter, ==, 0); -- g_assert_cmpint(a_s->drain_count, ==, 0); -- g_assert_cmpint(b_s->drain_count, ==, 0); -- g_assert_cmpint(backing_s->drain_count, ==, 0); -- -- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); -- -- g_assert_cmpint(bs_a->quiesce_counter, ==, 1); -- g_assert_cmpint(bs_b->quiesce_counter, ==, 1); -- g_assert_cmpint(backing->quiesce_counter, ==, 1); -- g_assert_cmpint(a_s->drain_count, ==, 1); -- g_assert_cmpint(b_s->drain_count, ==, 1); -- g_assert_cmpint(backing_s->drain_count, ==, 1); -- -- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); -- -- g_assert_cmpint(bs_a->quiesce_counter, ==, 2); -- g_assert_cmpint(bs_b->quiesce_counter, ==, 2); -- g_assert_cmpint(backing->quiesce_counter, ==, 2); -- g_assert_cmpint(a_s->drain_count, ==, 2); -- g_assert_cmpint(b_s->drain_count, ==, 2); -- g_assert_cmpint(backing_s->drain_count, ==, 2); -- -- do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); -- -- g_assert_cmpint(bs_a->quiesce_counter, ==, 1); -- g_assert_cmpint(bs_b->quiesce_counter, ==, 1); -- g_assert_cmpint(backing->quiesce_counter, ==, 1); -- g_assert_cmpint(a_s->drain_count, ==, 1); -- g_assert_cmpint(b_s->drain_count, ==, 1); -- g_assert_cmpint(backing_s->drain_count, ==, 1); -- -- do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); -- -- g_assert_cmpint(bs_a->quiesce_counter, ==, 0); -- g_assert_cmpint(bs_b->quiesce_counter, ==, 0); -- g_assert_cmpint(backing->quiesce_counter, ==, 0); -- g_assert_cmpint(a_s->drain_count, ==, 0); -- g_assert_cmpint(b_s->drain_count, ==, 0); -- g_assert_cmpint(backing_s->drain_count, ==, 0); -- -- bdrv_unref(backing); -- bdrv_unref(bs_a); -- bdrv_unref(bs_b); -- blk_unref(blk_a); -- blk_unref(blk_b); --} -- --static void test_graph_change_drain_subtree(void) --{ -- BlockBackend *blk_a, *blk_b; -- BlockDriverState *bs_a, *bs_b, *backing; -- BDRVTestState *a_s, *b_s, *backing_s; -- -- blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); -- bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR, -- &error_abort); -- a_s = bs_a->opaque; -- blk_insert_bs(blk_a, bs_a, &error_abort); -- -- blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); -- bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR, -- &error_abort); -- b_s = bs_b->opaque; -- blk_insert_bs(blk_b, bs_b, &error_abort); -- -- backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); -- backing_s = backing->opaque; -- bdrv_set_backing_hd(bs_a, backing, &error_abort); -- -- g_assert_cmpint(bs_a->quiesce_counter, ==, 0); -- g_assert_cmpint(bs_b->quiesce_counter, ==, 0); -- g_assert_cmpint(backing->quiesce_counter, ==, 0); -- g_assert_cmpint(a_s->drain_count, ==, 0); -- g_assert_cmpint(b_s->drain_count, ==, 0); -- g_assert_cmpint(backing_s->drain_count, ==, 0); -- -- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); -- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); -- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); -- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); -- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); -- -- bdrv_set_backing_hd(bs_b, backing, &error_abort); -- g_assert_cmpint(bs_a->quiesce_counter, ==, 5); -- g_assert_cmpint(bs_b->quiesce_counter, ==, 5); -- g_assert_cmpint(backing->quiesce_counter, ==, 5); -- g_assert_cmpint(a_s->drain_count, ==, 5); -- g_assert_cmpint(b_s->drain_count, ==, 5); -- g_assert_cmpint(backing_s->drain_count, ==, 5); -- -- bdrv_set_backing_hd(bs_b, NULL, &error_abort); -- g_assert_cmpint(bs_a->quiesce_counter, ==, 3); -- g_assert_cmpint(bs_b->quiesce_counter, ==, 2); -- g_assert_cmpint(backing->quiesce_counter, ==, 3); -- g_assert_cmpint(a_s->drain_count, ==, 3); -- g_assert_cmpint(b_s->drain_count, ==, 2); -- g_assert_cmpint(backing_s->drain_count, ==, 3); -- -- bdrv_set_backing_hd(bs_b, backing, &error_abort); -- g_assert_cmpint(bs_a->quiesce_counter, ==, 5); -- g_assert_cmpint(bs_b->quiesce_counter, ==, 5); -- g_assert_cmpint(backing->quiesce_counter, ==, 5); -- g_assert_cmpint(a_s->drain_count, ==, 5); -- g_assert_cmpint(b_s->drain_count, ==, 5); -- g_assert_cmpint(backing_s->drain_count, ==, 5); -- -- do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); -- do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); -- do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); -- do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); -- do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); -- -- g_assert_cmpint(bs_a->quiesce_counter, ==, 0); -- g_assert_cmpint(bs_b->quiesce_counter, ==, 0); -- g_assert_cmpint(backing->quiesce_counter, ==, 0); -- g_assert_cmpint(a_s->drain_count, ==, 0); -- g_assert_cmpint(b_s->drain_count, ==, 0); -- g_assert_cmpint(backing_s->drain_count, ==, 0); -- -- bdrv_unref(backing); -- bdrv_unref(bs_a); -- bdrv_unref(bs_b); -- blk_unref(blk_a); -- blk_unref(blk_b); --} -- - static void test_graph_change_drain_all(void) - { - BlockBackend *blk_a, *blk_b; -@@ -773,12 +598,6 @@ static void test_iothread_drain(void) - test_iothread_common(BDRV_DRAIN, 1); - } - --static void test_iothread_drain_subtree(void) --{ -- test_iothread_common(BDRV_SUBTREE_DRAIN, 0); -- test_iothread_common(BDRV_SUBTREE_DRAIN, 1); --} -- - - typedef struct TestBlockJob { - BlockJob common; -@@ -863,7 +682,6 @@ enum test_job_result { - enum test_job_drain_node { - TEST_JOB_DRAIN_SRC, - TEST_JOB_DRAIN_SRC_CHILD, -- TEST_JOB_DRAIN_SRC_PARENT, - }; - - static void test_blockjob_common_drain_node(enum drain_type drain_type, -@@ -901,9 +719,6 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, - case TEST_JOB_DRAIN_SRC_CHILD: - drain_bs = src_backing; - break; -- case TEST_JOB_DRAIN_SRC_PARENT: -- drain_bs = src_overlay; -- break; - default: - g_assert_not_reached(); - } -@@ -1055,10 +870,6 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread, - TEST_JOB_DRAIN_SRC); - test_blockjob_common_drain_node(drain_type, use_iothread, result, - TEST_JOB_DRAIN_SRC_CHILD); -- if (drain_type == BDRV_SUBTREE_DRAIN) { -- test_blockjob_common_drain_node(drain_type, use_iothread, result, -- TEST_JOB_DRAIN_SRC_PARENT); -- } - } - - static void test_blockjob_drain_all(void) -@@ -1071,11 +882,6 @@ static void test_blockjob_drain(void) - test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_SUCCESS); - } - --static void test_blockjob_drain_subtree(void) --{ -- test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_SUCCESS); --} -- - static void test_blockjob_error_drain_all(void) - { - test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_FAIL_RUN); -@@ -1088,12 +894,6 @@ static void test_blockjob_error_drain(void) - test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_FAIL_PREPARE); - } - --static void test_blockjob_error_drain_subtree(void) --{ -- test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_RUN); -- test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_PREPARE); --} -- - static void test_blockjob_iothread_drain_all(void) - { - test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_SUCCESS); -@@ -1104,11 +904,6 @@ static void test_blockjob_iothread_drain(void) - test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_SUCCESS); - } - --static void test_blockjob_iothread_drain_subtree(void) --{ -- test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_SUCCESS); --} -- - static void test_blockjob_iothread_error_drain_all(void) - { - test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_FAIL_RUN); -@@ -1121,12 +916,6 @@ static void test_blockjob_iothread_error_drain(void) - test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_FAIL_PREPARE); - } - --static void test_blockjob_iothread_error_drain_subtree(void) --{ -- test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_RUN); -- test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_PREPARE); --} -- - - typedef struct BDRVTestTopState { - BdrvChild *wait_child; -@@ -1273,14 +1062,6 @@ static void do_test_delete_by_drain(bool detach_instead_of_delete, - bdrv_drain(child_bs); - bdrv_unref(child_bs); - break; -- case BDRV_SUBTREE_DRAIN: -- /* Would have to ref/unref bs here for !detach_instead_of_delete, but -- * then the whole test becomes pointless because the graph changes -- * don't occur during the drain any more. */ -- assert(detach_instead_of_delete); -- bdrv_subtree_drained_begin(bs); -- bdrv_subtree_drained_end(bs); -- break; - case BDRV_DRAIN_ALL: - bdrv_drain_all_begin(); - bdrv_drain_all_end(); -@@ -1315,11 +1096,6 @@ static void test_detach_by_drain(void) - do_test_delete_by_drain(true, BDRV_DRAIN); - } - --static void test_detach_by_drain_subtree(void) --{ -- do_test_delete_by_drain(true, BDRV_SUBTREE_DRAIN); --} -- - - struct detach_by_parent_data { - BlockDriverState *parent_b; -@@ -1452,7 +1228,10 @@ static void test_detach_indirect(bool by_parent_cb) - g_assert(acb != NULL); - - /* Drain and check the expected result */ -- bdrv_subtree_drained_begin(parent_b); -+ bdrv_drained_begin(parent_b); -+ bdrv_drained_begin(a); -+ bdrv_drained_begin(b); -+ bdrv_drained_begin(c); - - g_assert(detach_by_parent_data.child_c != NULL); - -@@ -1467,12 +1246,15 @@ static void test_detach_indirect(bool by_parent_cb) - g_assert(QLIST_NEXT(child_a, next) == NULL); - - g_assert_cmpint(parent_a->quiesce_counter, ==, 1); -- g_assert_cmpint(parent_b->quiesce_counter, ==, 1); -+ g_assert_cmpint(parent_b->quiesce_counter, ==, 3); - g_assert_cmpint(a->quiesce_counter, ==, 1); -- g_assert_cmpint(b->quiesce_counter, ==, 0); -+ g_assert_cmpint(b->quiesce_counter, ==, 1); - g_assert_cmpint(c->quiesce_counter, ==, 1); - -- bdrv_subtree_drained_end(parent_b); -+ bdrv_drained_end(parent_b); -+ bdrv_drained_end(a); -+ bdrv_drained_end(b); -+ bdrv_drained_end(c); - - bdrv_unref(parent_b); - blk_unref(blk); -@@ -2202,70 +1984,47 @@ int main(int argc, char **argv) - - g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all); - g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain); -- g_test_add_func("/bdrv-drain/driver-cb/drain_subtree", -- test_drv_cb_drain_subtree); - - g_test_add_func("/bdrv-drain/driver-cb/co/drain_all", - test_drv_cb_co_drain_all); - g_test_add_func("/bdrv-drain/driver-cb/co/drain", test_drv_cb_co_drain); -- g_test_add_func("/bdrv-drain/driver-cb/co/drain_subtree", -- test_drv_cb_co_drain_subtree); -- - - g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all); - g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain); -- g_test_add_func("/bdrv-drain/quiesce/drain_subtree", -- test_quiesce_drain_subtree); - - g_test_add_func("/bdrv-drain/quiesce/co/drain_all", - test_quiesce_co_drain_all); - g_test_add_func("/bdrv-drain/quiesce/co/drain", test_quiesce_co_drain); -- g_test_add_func("/bdrv-drain/quiesce/co/drain_subtree", -- test_quiesce_co_drain_subtree); - - g_test_add_func("/bdrv-drain/nested", test_nested); -- g_test_add_func("/bdrv-drain/multiparent", test_multiparent); - -- g_test_add_func("/bdrv-drain/graph-change/drain_subtree", -- test_graph_change_drain_subtree); - g_test_add_func("/bdrv-drain/graph-change/drain_all", - test_graph_change_drain_all); - - g_test_add_func("/bdrv-drain/iothread/drain_all", test_iothread_drain_all); - g_test_add_func("/bdrv-drain/iothread/drain", test_iothread_drain); -- g_test_add_func("/bdrv-drain/iothread/drain_subtree", -- test_iothread_drain_subtree); - - g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all); - g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain); -- g_test_add_func("/bdrv-drain/blockjob/drain_subtree", -- test_blockjob_drain_subtree); - - g_test_add_func("/bdrv-drain/blockjob/error/drain_all", - test_blockjob_error_drain_all); - g_test_add_func("/bdrv-drain/blockjob/error/drain", - test_blockjob_error_drain); -- g_test_add_func("/bdrv-drain/blockjob/error/drain_subtree", -- test_blockjob_error_drain_subtree); - - g_test_add_func("/bdrv-drain/blockjob/iothread/drain_all", - test_blockjob_iothread_drain_all); - g_test_add_func("/bdrv-drain/blockjob/iothread/drain", - test_blockjob_iothread_drain); -- g_test_add_func("/bdrv-drain/blockjob/iothread/drain_subtree", -- test_blockjob_iothread_drain_subtree); - - g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_all", - test_blockjob_iothread_error_drain_all); - g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain", - test_blockjob_iothread_error_drain); -- g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_subtree", -- test_blockjob_iothread_error_drain_subtree); - - g_test_add_func("/bdrv-drain/deletion/drain", test_delete_by_drain); - g_test_add_func("/bdrv-drain/detach/drain_all", test_detach_by_drain_all); - g_test_add_func("/bdrv-drain/detach/drain", test_detach_by_drain); -- g_test_add_func("/bdrv-drain/detach/drain_subtree", test_detach_by_drain_subtree); - g_test_add_func("/bdrv-drain/detach/parent_cb", test_detach_by_parent_cb); - g_test_add_func("/bdrv-drain/detach/driver_cb", test_detach_by_driver_cb); - --- -2.31.1 - diff --git a/SOURCES/kvm-block-Revert-.bdrv_drained_begin-end-to-non-coroutin.patch b/SOURCES/kvm-block-Revert-.bdrv_drained_begin-end-to-non-coroutin.patch deleted file mode 100644 index 1529fdb..0000000 --- a/SOURCES/kvm-block-Revert-.bdrv_drained_begin-end-to-non-coroutin.patch +++ /dev/null @@ -1,302 +0,0 @@ -From 0e894c93cae97bb792dc483be8e295d097ebd7a1 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:40:58 +0100 -Subject: [PATCH 16/31] block: Revert .bdrv_drained_begin/end to - non-coroutine_fn - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [4/16] 86d6049e40a99604e414c2572b67f74b85868832 (sgarzarella/qemu-kvm-c-9-s) - -Polling during bdrv_drained_end() can be problematic (and in the future, -we may get cases for bdrv_drained_begin() where polling is forbidden, -and we don't care about already in-flight requests, but just want to -prevent new requests from arriving). - -The .bdrv_drained_begin/end callbacks running in a coroutine is the only -reason why we have to do this polling, so make them non-coroutine -callbacks again. None of the callers actually yield any more. - -This means that bdrv_drained_end() effectively doesn't poll any more, -even if AIO_WAIT_WHILE() loops are still there (their condition is false -from the beginning). This is generally not a problem, but in -test-bdrv-drain, some additional explicit aio_poll() calls need to be -added because the test case wants to verify the final state after BHs -have executed. - -Signed-off-by: Kevin Wolf -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Emanuele Giuseppe Esposito -Reviewed-by: Hanna Reitz -Message-Id: <20221118174110.55183-4-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 5e8ac21717373cbe96ef7a91e216bf5788815d63) -Signed-off-by: Stefano Garzarella ---- - block.c | 4 +-- - block/io.c | 49 +++++--------------------------- - block/qed.c | 6 ++-- - block/throttle.c | 8 +++--- - include/block/block_int-common.h | 10 ++++--- - tests/unit/test-bdrv-drain.c | 18 ++++++------ - 6 files changed, 32 insertions(+), 63 deletions(-) - -diff --git a/block.c b/block.c -index ec184150a2..16a62a329c 100644 ---- a/block.c -+++ b/block.c -@@ -1713,8 +1713,8 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, - assert(is_power_of_2(bs->bl.request_alignment)); - - for (i = 0; i < bs->quiesce_counter; i++) { -- if (drv->bdrv_co_drain_begin) { -- drv->bdrv_co_drain_begin(bs); -+ if (drv->bdrv_drain_begin) { -+ drv->bdrv_drain_begin(bs); - } - } - -diff --git a/block/io.c b/block/io.c -index b9424024f9..c2ed4b2af9 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -252,55 +252,20 @@ typedef struct { - int *drained_end_counter; - } BdrvCoDrainData; - --static void coroutine_fn bdrv_drain_invoke_entry(void *opaque) --{ -- BdrvCoDrainData *data = opaque; -- BlockDriverState *bs = data->bs; -- -- if (data->begin) { -- bs->drv->bdrv_co_drain_begin(bs); -- } else { -- bs->drv->bdrv_co_drain_end(bs); -- } -- -- /* Set data->done and decrement drained_end_counter before bdrv_wakeup() */ -- qatomic_mb_set(&data->done, true); -- if (!data->begin) { -- qatomic_dec(data->drained_end_counter); -- } -- bdrv_dec_in_flight(bs); -- -- g_free(data); --} -- --/* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */ -+/* Recursively call BlockDriver.bdrv_drain_begin/end callbacks */ - static void bdrv_drain_invoke(BlockDriverState *bs, bool begin, - int *drained_end_counter) - { -- BdrvCoDrainData *data; -- -- if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) || -- (!begin && !bs->drv->bdrv_co_drain_end)) { -+ if (!bs->drv || (begin && !bs->drv->bdrv_drain_begin) || -+ (!begin && !bs->drv->bdrv_drain_end)) { - return; - } - -- data = g_new(BdrvCoDrainData, 1); -- *data = (BdrvCoDrainData) { -- .bs = bs, -- .done = false, -- .begin = begin, -- .drained_end_counter = drained_end_counter, -- }; -- -- if (!begin) { -- qatomic_inc(drained_end_counter); -+ if (begin) { -+ bs->drv->bdrv_drain_begin(bs); -+ } else { -+ bs->drv->bdrv_drain_end(bs); - } -- -- /* Make sure the driver callback completes during the polling phase for -- * drain_begin. */ -- bdrv_inc_in_flight(bs); -- data->co = qemu_coroutine_create(bdrv_drain_invoke_entry, data); -- aio_co_schedule(bdrv_get_aio_context(bs), data->co); - } - - /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ -diff --git a/block/qed.c b/block/qed.c -index 013f826c44..c2691a85b1 100644 ---- a/block/qed.c -+++ b/block/qed.c -@@ -262,7 +262,7 @@ static bool coroutine_fn qed_plug_allocating_write_reqs(BDRVQEDState *s) - assert(!s->allocating_write_reqs_plugged); - if (s->allocating_acb != NULL) { - /* Another allocating write came concurrently. This cannot happen -- * from bdrv_qed_co_drain_begin, but it can happen when the timer runs. -+ * from bdrv_qed_drain_begin, but it can happen when the timer runs. - */ - qemu_co_mutex_unlock(&s->table_lock); - return false; -@@ -365,7 +365,7 @@ static void bdrv_qed_attach_aio_context(BlockDriverState *bs, - } - } - --static void coroutine_fn bdrv_qed_co_drain_begin(BlockDriverState *bs) -+static void bdrv_qed_drain_begin(BlockDriverState *bs) - { - BDRVQEDState *s = bs->opaque; - -@@ -1661,7 +1661,7 @@ static BlockDriver bdrv_qed = { - .bdrv_co_check = bdrv_qed_co_check, - .bdrv_detach_aio_context = bdrv_qed_detach_aio_context, - .bdrv_attach_aio_context = bdrv_qed_attach_aio_context, -- .bdrv_co_drain_begin = bdrv_qed_co_drain_begin, -+ .bdrv_drain_begin = bdrv_qed_drain_begin, - }; - - static void bdrv_qed_init(void) -diff --git a/block/throttle.c b/block/throttle.c -index 131eba3ab4..88851c84f4 100644 ---- a/block/throttle.c -+++ b/block/throttle.c -@@ -214,7 +214,7 @@ static void throttle_reopen_abort(BDRVReopenState *reopen_state) - reopen_state->opaque = NULL; - } - --static void coroutine_fn throttle_co_drain_begin(BlockDriverState *bs) -+static void throttle_drain_begin(BlockDriverState *bs) - { - ThrottleGroupMember *tgm = bs->opaque; - if (qatomic_fetch_inc(&tgm->io_limits_disabled) == 0) { -@@ -222,7 +222,7 @@ static void coroutine_fn throttle_co_drain_begin(BlockDriverState *bs) - } - } - --static void coroutine_fn throttle_co_drain_end(BlockDriverState *bs) -+static void throttle_drain_end(BlockDriverState *bs) - { - ThrottleGroupMember *tgm = bs->opaque; - assert(tgm->io_limits_disabled); -@@ -261,8 +261,8 @@ static BlockDriver bdrv_throttle = { - .bdrv_reopen_commit = throttle_reopen_commit, - .bdrv_reopen_abort = throttle_reopen_abort, - -- .bdrv_co_drain_begin = throttle_co_drain_begin, -- .bdrv_co_drain_end = throttle_co_drain_end, -+ .bdrv_drain_begin = throttle_drain_begin, -+ .bdrv_drain_end = throttle_drain_end, - - .is_filter = true, - .strong_runtime_opts = throttle_strong_runtime_opts, -diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h -index 31ae91e56e..40d646d1ed 100644 ---- a/include/block/block_int-common.h -+++ b/include/block/block_int-common.h -@@ -735,17 +735,19 @@ struct BlockDriver { - void (*bdrv_io_unplug)(BlockDriverState *bs); - - /** -- * bdrv_co_drain_begin is called if implemented in the beginning of a -+ * bdrv_drain_begin is called if implemented in the beginning of a - * drain operation to drain and stop any internal sources of requests in - * the driver. -- * bdrv_co_drain_end is called if implemented at the end of the drain. -+ * bdrv_drain_end is called if implemented at the end of the drain. - * - * They should be used by the driver to e.g. manage scheduled I/O - * requests, or toggle an internal state. After the end of the drain new - * requests will continue normally. -+ * -+ * Implementations of both functions must not call aio_poll(). - */ -- void coroutine_fn (*bdrv_co_drain_begin)(BlockDriverState *bs); -- void coroutine_fn (*bdrv_co_drain_end)(BlockDriverState *bs); -+ void (*bdrv_drain_begin)(BlockDriverState *bs); -+ void (*bdrv_drain_end)(BlockDriverState *bs); - - bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs); - bool coroutine_fn (*bdrv_co_can_store_new_dirty_bitmap)( -diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c -index 24f34e24ad..695519ee02 100644 ---- a/tests/unit/test-bdrv-drain.c -+++ b/tests/unit/test-bdrv-drain.c -@@ -46,7 +46,7 @@ static void coroutine_fn sleep_in_drain_begin(void *opaque) - bdrv_dec_in_flight(bs); - } - --static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs) -+static void bdrv_test_drain_begin(BlockDriverState *bs) - { - BDRVTestState *s = bs->opaque; - s->drain_count++; -@@ -57,7 +57,7 @@ static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs) - } - } - --static void coroutine_fn bdrv_test_co_drain_end(BlockDriverState *bs) -+static void bdrv_test_drain_end(BlockDriverState *bs) - { - BDRVTestState *s = bs->opaque; - s->drain_count--; -@@ -111,8 +111,8 @@ static BlockDriver bdrv_test = { - .bdrv_close = bdrv_test_close, - .bdrv_co_preadv = bdrv_test_co_preadv, - -- .bdrv_co_drain_begin = bdrv_test_co_drain_begin, -- .bdrv_co_drain_end = bdrv_test_co_drain_end, -+ .bdrv_drain_begin = bdrv_test_drain_begin, -+ .bdrv_drain_end = bdrv_test_drain_end, - - .bdrv_child_perm = bdrv_default_perms, - -@@ -1703,6 +1703,7 @@ static void test_blockjob_commit_by_drained_end(void) - bdrv_drained_begin(bs_child); - g_assert(!job_has_completed); - bdrv_drained_end(bs_child); -+ aio_poll(qemu_get_aio_context(), false); - g_assert(job_has_completed); - - bdrv_unref(bs_parents[0]); -@@ -1858,6 +1859,7 @@ static void test_drop_intermediate_poll(void) - - g_assert(!job_has_completed); - ret = bdrv_drop_intermediate(chain[1], chain[0], NULL); -+ aio_poll(qemu_get_aio_context(), false); - g_assert(ret == 0); - g_assert(job_has_completed); - -@@ -1946,7 +1948,7 @@ static void coroutine_fn bdrv_replace_test_drain_co(void *opaque) - * .was_drained. - * Increment .drain_count. - */ --static void coroutine_fn bdrv_replace_test_co_drain_begin(BlockDriverState *bs) -+static void bdrv_replace_test_drain_begin(BlockDriverState *bs) - { - BDRVReplaceTestState *s = bs->opaque; - -@@ -1977,7 +1979,7 @@ static void coroutine_fn bdrv_replace_test_read_entry(void *opaque) - * If .drain_count reaches 0 and the node has a backing file, issue a - * read request. - */ --static void coroutine_fn bdrv_replace_test_co_drain_end(BlockDriverState *bs) -+static void bdrv_replace_test_drain_end(BlockDriverState *bs) - { - BDRVReplaceTestState *s = bs->opaque; - -@@ -2002,8 +2004,8 @@ static BlockDriver bdrv_replace_test = { - .bdrv_close = bdrv_replace_test_close, - .bdrv_co_preadv = bdrv_replace_test_co_preadv, - -- .bdrv_co_drain_begin = bdrv_replace_test_co_drain_begin, -- .bdrv_co_drain_end = bdrv_replace_test_co_drain_end, -+ .bdrv_drain_begin = bdrv_replace_test_drain_begin, -+ .bdrv_drain_end = bdrv_replace_test_drain_end, - - .bdrv_child_perm = bdrv_default_perms, - }; --- -2.31.1 - diff --git a/SOURCES/kvm-block-Split-BlockNodeInfo-off-of-ImageInfo.patch b/SOURCES/kvm-block-Split-BlockNodeInfo-off-of-ImageInfo.patch deleted file mode 100644 index 2d95689..0000000 --- a/SOURCES/kvm-block-Split-BlockNodeInfo-off-of-ImageInfo.patch +++ /dev/null @@ -1,246 +0,0 @@ -From 54e290df4bc1c9e83be7357caed6a2b1ba4f21f0 Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Mon, 20 Jun 2022 18:26:56 +0200 -Subject: [PATCH 09/20] block: Split BlockNodeInfo off of ImageInfo - -RH-Author: Hanna Czenczek -RH-MergeRequest: 145: Show protocol-level information in qemu-img info -RH-Bugzilla: 1860292 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella -RH-Commit: [4/12] fc8d69d549bb9a929db218b91697ee3ae95c1ff6 (hreitz/qemu-kvm-c-9-s) - -ImageInfo sometimes contains flat information, and sometimes it does -not. Split off a BlockNodeInfo struct, which only contains information -about a single node and has no link to the backing image. - -We do this so we can extend BlockNodeInfo to a BlockGraphInfo struct, -which has links to all child nodes, not just the backing node. It would -be strange to base BlockGraphInfo on ImageInfo, because then this -extended struct would have two links to the backing node (one in -BlockGraphInfo as one of all the child links, and one in ImageInfo). - -Furthermore, it is quite common to ignore the backing-image field -altogether: bdrv_query_image_info() does not set it, and -bdrv_image_info_dump() does not evaluate it. That signals that we -should have different structs for describing a single node and one that -has a link to the backing image. - -Still, bdrv_query_image_info() and bdrv_image_info_dump() are not -changed too much in this patch. Follow-up patches will handle them. - -Signed-off-by: Hanna Reitz -Message-Id: <20220620162704.80987-5-hreitz@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit a2085f8909377b6df738f6c3f7ee6db4d16da8f7) -Signed-off-by: Hanna Czenczek ---- - block/qapi.c | 86 ++++++++++++++++++++++++++++++++------------ - include/block/qapi.h | 3 ++ - qapi/block-core.json | 24 +++++++++---- - 3 files changed, 85 insertions(+), 28 deletions(-) - -diff --git a/block/qapi.c b/block/qapi.c -index 51202b470a..e5022b4481 100644 ---- a/block/qapi.c -+++ b/block/qapi.c -@@ -241,30 +241,18 @@ int bdrv_query_snapshot_info_list(BlockDriverState *bs, - } - - /** -- * bdrv_query_image_info: -- * @bs: block device to examine -- * @p_info: location to store image information -- * @errp: location to store error information -- * -- * Store "flat" image information in @p_info. -- * -- * "Flat" means it does *not* query backing image information, -- * i.e. (*pinfo)->has_backing_image will be set to false and -- * (*pinfo)->backing_image to NULL even when the image does in fact have -- * a backing image. -- * -- * @p_info will be set only on success. On error, store error in @errp. -+ * Helper function for other query info functions. Store information about @bs -+ * in @info, setting @errp on error. - */ --void bdrv_query_image_info(BlockDriverState *bs, -- ImageInfo **p_info, -- Error **errp) -+static void bdrv_do_query_node_info(BlockDriverState *bs, -+ BlockNodeInfo *info, -+ Error **errp) - { - int64_t size; - const char *backing_filename; - BlockDriverInfo bdi; - int ret; - Error *err = NULL; -- ImageInfo *info; - - aio_context_acquire(bdrv_get_aio_context(bs)); - -@@ -277,7 +265,6 @@ void bdrv_query_image_info(BlockDriverState *bs, - - bdrv_refresh_filename(bs); - -- info = g_new0(ImageInfo, 1); - info->filename = g_strdup(bs->filename); - info->format = g_strdup(bdrv_get_format_name(bs)); - info->virtual_size = size; -@@ -298,7 +285,6 @@ void bdrv_query_image_info(BlockDriverState *bs, - info->format_specific = bdrv_get_specific_info(bs, &err); - if (err) { - error_propagate(errp, err); -- qapi_free_ImageInfo(info); - goto out; - } - info->has_format_specific = info->format_specific != NULL; -@@ -339,16 +325,72 @@ void bdrv_query_image_info(BlockDriverState *bs, - break; - default: - error_propagate(errp, err); -- qapi_free_ImageInfo(info); - goto out; - } - -- *p_info = info; -- - out: - aio_context_release(bdrv_get_aio_context(bs)); - } - -+/** -+ * bdrv_query_block_node_info: -+ * @bs: block node to examine -+ * @p_info: location to store node information -+ * @errp: location to store error information -+ * -+ * Store image information about @bs in @p_info. -+ * -+ * @p_info will be set only on success. On error, store error in @errp. -+ */ -+void bdrv_query_block_node_info(BlockDriverState *bs, -+ BlockNodeInfo **p_info, -+ Error **errp) -+{ -+ BlockNodeInfo *info; -+ ERRP_GUARD(); -+ -+ info = g_new0(BlockNodeInfo, 1); -+ bdrv_do_query_node_info(bs, info, errp); -+ if (*errp) { -+ qapi_free_BlockNodeInfo(info); -+ return; -+ } -+ -+ *p_info = info; -+} -+ -+/** -+ * bdrv_query_image_info: -+ * @bs: block node to examine -+ * @p_info: location to store image information -+ * @errp: location to store error information -+ * -+ * Store "flat" image information in @p_info. -+ * -+ * "Flat" means it does *not* query backing image information, -+ * i.e. (*pinfo)->has_backing_image will be set to false and -+ * (*pinfo)->backing_image to NULL even when the image does in fact have -+ * a backing image. -+ * -+ * @p_info will be set only on success. On error, store error in @errp. -+ */ -+void bdrv_query_image_info(BlockDriverState *bs, -+ ImageInfo **p_info, -+ Error **errp) -+{ -+ ImageInfo *info; -+ ERRP_GUARD(); -+ -+ info = g_new0(ImageInfo, 1); -+ bdrv_do_query_node_info(bs, qapi_ImageInfo_base(info), errp); -+ if (*errp) { -+ qapi_free_ImageInfo(info); -+ return; -+ } -+ -+ *p_info = info; -+} -+ - /* @p_info will be set only on success. */ - static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info, - Error **errp) -diff --git a/include/block/qapi.h b/include/block/qapi.h -index c09859ea78..c7de4e3fa9 100644 ---- a/include/block/qapi.h -+++ b/include/block/qapi.h -@@ -35,6 +35,9 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk, - int bdrv_query_snapshot_info_list(BlockDriverState *bs, - SnapshotInfoList **p_list, - Error **errp); -+void bdrv_query_block_node_info(BlockDriverState *bs, -+ BlockNodeInfo **p_info, -+ Error **errp); - void bdrv_query_image_info(BlockDriverState *bs, - ImageInfo **p_info, - Error **errp); -diff --git a/qapi/block-core.json b/qapi/block-core.json -index 4b9365167f..7720da0498 100644 ---- a/qapi/block-core.json -+++ b/qapi/block-core.json -@@ -251,7 +251,7 @@ - } } - - ## --# @ImageInfo: -+# @BlockNodeInfo: - # - # Information about a QEMU image file - # -@@ -279,22 +279,34 @@ - # - # @snapshots: list of VM snapshots - # --# @backing-image: info of the backing image (since 1.6) --# - # @format-specific: structure supplying additional format-specific - # information (since 1.7) - # --# Since: 1.3 -+# Since: 8.0 - ## --{ 'struct': 'ImageInfo', -+{ 'struct': 'BlockNodeInfo', - 'data': {'filename': 'str', 'format': 'str', '*dirty-flag': 'bool', - '*actual-size': 'int', 'virtual-size': 'int', - '*cluster-size': 'int', '*encrypted': 'bool', '*compressed': 'bool', - '*backing-filename': 'str', '*full-backing-filename': 'str', - '*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'], -- '*backing-image': 'ImageInfo', - '*format-specific': 'ImageInfoSpecific' } } - -+## -+# @ImageInfo: -+# -+# Information about a QEMU image file, and potentially its backing image -+# -+# @backing-image: info of the backing image -+# -+# Since: 1.3 -+## -+{ 'struct': 'ImageInfo', -+ 'base': 'BlockNodeInfo', -+ 'data': { -+ '*backing-image': 'ImageInfo' -+ } } -+ - ## - # @ImageCheck: - # --- -2.31.1 - diff --git a/SOURCES/kvm-block-bdrv-blk_co_unref-for-calls-in-coroutine-conte.patch b/SOURCES/kvm-block-bdrv-blk_co_unref-for-calls-in-coroutine-conte.patch new file mode 100644 index 0000000..0f0347b --- /dev/null +++ b/SOURCES/kvm-block-bdrv-blk_co_unref-for-calls-in-coroutine-conte.patch @@ -0,0 +1,386 @@ +From 7baea25be90e184175dd5a919ee5878cbd4970c2 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 4 May 2023 13:57:33 +0200 +Subject: [PATCH 52/56] block: bdrv/blk_co_unref() for calls in coroutine + context + +RH-Author: Kevin Wolf +RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize() +RH-Bugzilla: 2185688 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [1/4] 8ebf8486b082c30ca1b39a6ede35e471eaaccfa3 (kmwolf/centos-qemu-kvm) + +These functions must not be called in coroutine context, because they +need write access to the graph. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +Reviewed-by: Stefan Hajnoczi +Message-Id: <20230504115750.54437-4-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit b2ab5f545fa1eaaf2955dd617bee19a8b3279786) +Signed-off-by: Kevin Wolf +--- + block.c | 2 +- + block/crypto.c | 6 +++--- + block/parallels.c | 6 +++--- + block/qcow.c | 6 +++--- + block/qcow2.c | 14 +++++++------- + block/qed.c | 6 +++--- + block/vdi.c | 6 +++--- + block/vhdx.c | 6 +++--- + block/vmdk.c | 18 +++++++++--------- + block/vpc.c | 6 +++--- + include/block/block-global-state.h | 3 ++- + include/sysemu/block-backend-global-state.h | 5 ++++- + 12 files changed, 44 insertions(+), 40 deletions(-) + +diff --git a/block.c b/block.c +index d79a52ca74..a48112f945 100644 +--- a/block.c ++++ b/block.c +@@ -680,7 +680,7 @@ int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv, + + ret = 0; + out: +- blk_unref(blk); ++ blk_co_unref(blk); + return ret; + } + +diff --git a/block/crypto.c b/block/crypto.c +index ca67289187..8fd3ad0054 100644 +--- a/block/crypto.c ++++ b/block/crypto.c +@@ -355,7 +355,7 @@ block_crypto_co_create_generic(BlockDriverState *bs, int64_t size, + ret = 0; + cleanup: + qcrypto_block_free(crypto); +- blk_unref(blk); ++ blk_co_unref(blk); + return ret; + } + +@@ -661,7 +661,7 @@ block_crypto_co_create_luks(BlockdevCreateOptions *create_options, Error **errp) + + ret = 0; + fail: +- bdrv_unref(bs); ++ bdrv_co_unref(bs); + return ret; + } + +@@ -730,7 +730,7 @@ fail: + bdrv_co_delete_file_noerr(bs); + } + +- bdrv_unref(bs); ++ bdrv_co_unref(bs); + qapi_free_QCryptoBlockCreateOptions(create_opts); + qobject_unref(cryptoopts); + return ret; +diff --git a/block/parallels.c b/block/parallels.c +index 013684801a..b49c35929e 100644 +--- a/block/parallels.c ++++ b/block/parallels.c +@@ -613,8 +613,8 @@ static int coroutine_fn parallels_co_create(BlockdevCreateOptions* opts, + + ret = 0; + out: +- blk_unref(blk); +- bdrv_unref(bs); ++ blk_co_unref(blk); ++ bdrv_co_unref(bs); + return ret; + + exit: +@@ -691,7 +691,7 @@ parallels_co_create_opts(BlockDriver *drv, const char *filename, + + done: + qobject_unref(qdict); +- bdrv_unref(bs); ++ bdrv_co_unref(bs); + qapi_free_BlockdevCreateOptions(create_options); + return ret; + } +diff --git a/block/qcow.c b/block/qcow.c +index 490e4f819e..a0c701f578 100644 +--- a/block/qcow.c ++++ b/block/qcow.c +@@ -915,8 +915,8 @@ static int coroutine_fn qcow_co_create(BlockdevCreateOptions *opts, + g_free(tmp); + ret = 0; + exit: +- blk_unref(qcow_blk); +- bdrv_unref(bs); ++ blk_co_unref(qcow_blk); ++ bdrv_co_unref(bs); + qcrypto_block_free(crypto); + return ret; + } +@@ -1015,7 +1015,7 @@ qcow_co_create_opts(BlockDriver *drv, const char *filename, + fail: + g_free(backing_fmt); + qobject_unref(qdict); +- bdrv_unref(bs); ++ bdrv_co_unref(bs); + qapi_free_BlockdevCreateOptions(create_options); + return ret; + } +diff --git a/block/qcow2.c b/block/qcow2.c +index 22084730f9..0b8beb8b47 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -3711,7 +3711,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) + goto out; + } + +- blk_unref(blk); ++ blk_co_unref(blk); + blk = NULL; + + /* +@@ -3791,7 +3791,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) + } + } + +- blk_unref(blk); ++ blk_co_unref(blk); + blk = NULL; + + /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning. +@@ -3816,9 +3816,9 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) + + ret = 0; + out: +- blk_unref(blk); +- bdrv_unref(bs); +- bdrv_unref(data_bs); ++ blk_co_unref(blk); ++ bdrv_co_unref(bs); ++ bdrv_co_unref(data_bs); + return ret; + } + +@@ -3949,8 +3949,8 @@ finish: + } + + qobject_unref(qdict); +- bdrv_unref(bs); +- bdrv_unref(data_bs); ++ bdrv_co_unref(bs); ++ bdrv_co_unref(data_bs); + qapi_free_BlockdevCreateOptions(create_options); + return ret; + } +diff --git a/block/qed.c b/block/qed.c +index 0705a7b4e2..aff2a2076e 100644 +--- a/block/qed.c ++++ b/block/qed.c +@@ -748,8 +748,8 @@ static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts, + ret = 0; /* success */ + out: + g_free(l1_table); +- blk_unref(blk); +- bdrv_unref(bs); ++ blk_co_unref(blk); ++ bdrv_co_unref(bs); + return ret; + } + +@@ -819,7 +819,7 @@ bdrv_qed_co_create_opts(BlockDriver *drv, const char *filename, + + fail: + qobject_unref(qdict); +- bdrv_unref(bs); ++ bdrv_co_unref(bs); + qapi_free_BlockdevCreateOptions(create_options); + return ret; + } +diff --git a/block/vdi.c b/block/vdi.c +index f2434d6153..08331d2dd7 100644 +--- a/block/vdi.c ++++ b/block/vdi.c +@@ -886,8 +886,8 @@ static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options, + + ret = 0; + exit: +- blk_unref(blk); +- bdrv_unref(bs_file); ++ blk_co_unref(blk); ++ bdrv_co_unref(bs_file); + g_free(bmap); + return ret; + } +@@ -975,7 +975,7 @@ vdi_co_create_opts(BlockDriver *drv, const char *filename, + done: + qobject_unref(qdict); + qapi_free_BlockdevCreateOptions(create_options); +- bdrv_unref(bs_file); ++ bdrv_co_unref(bs_file); + return ret; + } + +diff --git a/block/vhdx.c b/block/vhdx.c +index 81420722a1..00777da91a 100644 +--- a/block/vhdx.c ++++ b/block/vhdx.c +@@ -2053,8 +2053,8 @@ static int coroutine_fn vhdx_co_create(BlockdevCreateOptions *opts, + + ret = 0; + delete_and_exit: +- blk_unref(blk); +- bdrv_unref(bs); ++ blk_co_unref(blk); ++ bdrv_co_unref(bs); + g_free(creator); + return ret; + } +@@ -2144,7 +2144,7 @@ vhdx_co_create_opts(BlockDriver *drv, const char *filename, + + fail: + qobject_unref(qdict); +- bdrv_unref(bs); ++ bdrv_co_unref(bs); + qapi_free_BlockdevCreateOptions(create_options); + return ret; + } +diff --git a/block/vmdk.c b/block/vmdk.c +index f5f49018fe..01ca13c82b 100644 +--- a/block/vmdk.c ++++ b/block/vmdk.c +@@ -2306,7 +2306,7 @@ exit: + if (pbb) { + *pbb = blk; + } else { +- blk_unref(blk); ++ blk_co_unref(blk); + blk = NULL; + } + } +@@ -2516,12 +2516,12 @@ vmdk_co_do_create(int64_t size, + if (strcmp(blk_bs(backing)->drv->format_name, "vmdk")) { + error_setg(errp, "Invalid backing file format: %s. Must be vmdk", + blk_bs(backing)->drv->format_name); +- blk_unref(backing); ++ blk_co_unref(backing); + ret = -EINVAL; + goto exit; + } + ret = vmdk_read_cid(blk_bs(backing), 0, &parent_cid); +- blk_unref(backing); ++ blk_co_unref(backing); + if (ret) { + error_setg(errp, "Failed to read parent CID"); + goto exit; +@@ -2542,14 +2542,14 @@ vmdk_co_do_create(int64_t size, + blk_bs(extent_blk)->filename); + created_size += cur_size; + extent_idx++; +- blk_unref(extent_blk); ++ blk_co_unref(extent_blk); + } + + /* Check whether we got excess extents */ + extent_blk = extent_fn(-1, extent_idx, flat, split, compress, zeroed_grain, + opaque, NULL); + if (extent_blk) { +- blk_unref(extent_blk); ++ blk_co_unref(extent_blk); + error_setg(errp, "List of extents contains unused extents"); + ret = -EINVAL; + goto exit; +@@ -2590,7 +2590,7 @@ vmdk_co_do_create(int64_t size, + ret = 0; + exit: + if (blk) { +- blk_unref(blk); ++ blk_co_unref(blk); + } + g_free(desc); + g_free(parent_desc_line); +@@ -2641,7 +2641,7 @@ vmdk_co_create_opts_cb(int64_t size, int idx, bool flat, bool split, + errp)) { + goto exit; + } +- bdrv_unref(bs); ++ bdrv_co_unref(bs); + exit: + g_free(ext_filename); + return blk; +@@ -2797,12 +2797,12 @@ static BlockBackend * coroutine_fn vmdk_co_create_cb(int64_t size, int idx, + return NULL; + } + blk_set_allow_write_beyond_eof(blk, true); +- bdrv_unref(bs); ++ bdrv_co_unref(bs); + + if (size != -1) { + ret = vmdk_init_extent(blk, size, flat, compress, zeroed_grain, errp); + if (ret) { +- blk_unref(blk); ++ blk_co_unref(blk); + blk = NULL; + } + } +diff --git a/block/vpc.c b/block/vpc.c +index b89b0ff8e2..07ddda5b99 100644 +--- a/block/vpc.c ++++ b/block/vpc.c +@@ -1082,8 +1082,8 @@ static int coroutine_fn vpc_co_create(BlockdevCreateOptions *opts, + } + + out: +- blk_unref(blk); +- bdrv_unref(bs); ++ blk_co_unref(blk); ++ bdrv_co_unref(bs); + return ret; + } + +@@ -1162,7 +1162,7 @@ vpc_co_create_opts(BlockDriver *drv, const char *filename, + + fail: + qobject_unref(qdict); +- bdrv_unref(bs); ++ bdrv_co_unref(bs); + qapi_free_BlockdevCreateOptions(create_options); + return ret; + } +diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h +index 399200a9a3..cd4ea554bf 100644 +--- a/include/block/block-global-state.h ++++ b/include/block/block-global-state.h +@@ -214,7 +214,8 @@ void bdrv_img_create(const char *filename, const char *fmt, + bool quiet, Error **errp); + + void bdrv_ref(BlockDriverState *bs); +-void bdrv_unref(BlockDriverState *bs); ++void no_coroutine_fn bdrv_unref(BlockDriverState *bs); ++void coroutine_fn no_co_wrapper bdrv_co_unref(BlockDriverState *bs); + void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child); + BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, + BlockDriverState *child_bs, +diff --git a/include/sysemu/block-backend-global-state.h b/include/sysemu/block-backend-global-state.h +index 2b6d27db7c..fa83f9389c 100644 +--- a/include/sysemu/block-backend-global-state.h ++++ b/include/sysemu/block-backend-global-state.h +@@ -42,7 +42,10 @@ blk_co_new_open(const char *filename, const char *reference, QDict *options, + + int blk_get_refcnt(BlockBackend *blk); + void blk_ref(BlockBackend *blk); +-void blk_unref(BlockBackend *blk); ++ ++void no_coroutine_fn blk_unref(BlockBackend *blk); ++void coroutine_fn no_co_wrapper blk_co_unref(BlockBackend *blk); ++ + void blk_remove_all_bs(void); + BlockBackend *blk_by_name(const char *name); + BlockBackend *blk_next(BlockBackend *blk); +-- +2.39.1 + diff --git a/SOURCES/kvm-block-blkio-do-not-use-open-flags-in-qemu_open.patch b/SOURCES/kvm-block-blkio-do-not-use-open-flags-in-qemu_open.patch new file mode 100644 index 0000000..caf6694 --- /dev/null +++ b/SOURCES/kvm-block-blkio-do-not-use-open-flags-in-qemu_open.patch @@ -0,0 +1,74 @@ +From b1f0546548e561856252c2bc610a8f4f8fcdf007 Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Wed, 26 Jul 2023 09:48:07 +0200 +Subject: [PATCH 02/14] block/blkio: do not use open flags in qemu_open() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Stefano Garzarella +RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers +RH-Bugzilla: 2225354 2225439 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Alberto Faria +RH-Commit: [2/6] 1ccd0ef56182bb5e2374c3b5be98ee1ec05066d6 (sgarzarella/qemu-kvm-c-9-s) + +qemu_open() in blkio_virtio_blk_common_open() is used to open the +character device (e.g. /dev/vhost-vdpa-0 or /dev/vfio/vfio) or in +the future eventually the unix socket. + +In all these cases we cannot open the path in read-only mode, +when the `read-only` option of blockdev is on, because the exchange +of IOCTL commands for example will fail. + +In order to open the device read-only, we have to use the `read-only` +property of the libblkio driver as we already do in blkio_file_open(). + +Fixes: cad2ccc395 ("block/blkio: use qemu_open() to support fd passing for virtio-blk") +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2225439 +Reported-by: Qing Wang +Signed-off-by: Stefano Garzarella +Reviewed-by: Daniel P. Berrangé +Message-id: 20230726074807.14041-1-sgarzare@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit a5942c177b7bcc1357e496b7d68668befcfc2bb9) +Signed-off-by: Stefano Garzarella +--- + block/blkio.c | 21 ++++++++++++--------- + 1 file changed, 12 insertions(+), 9 deletions(-) + +diff --git a/block/blkio.c b/block/blkio.c +index 3ea9841bd8..5a82c6cb1a 100644 +--- a/block/blkio.c ++++ b/block/blkio.c +@@ -685,15 +685,18 @@ static int blkio_virtio_blk_common_open(BlockDriverState *bs, + * layer through the "/dev/fdset/N" special path. + */ + if (fd_supported) { +- int open_flags; +- +- if (flags & BDRV_O_RDWR) { +- open_flags = O_RDWR; +- } else { +- open_flags = O_RDONLY; +- } +- +- fd = qemu_open(path, open_flags, errp); ++ /* ++ * `path` can contain the path of a character device ++ * (e.g. /dev/vhost-vdpa-0 or /dev/vfio/vfio) or a unix socket. ++ * ++ * So, we should always open it with O_RDWR flag, also if BDRV_O_RDWR ++ * is not set in the open flags, because the exchange of IOCTL commands ++ * for example will fail. ++ * ++ * In order to open the device read-only, we are using the `read-only` ++ * property of the libblkio driver in blkio_file_open(). ++ */ ++ fd = qemu_open(path, O_RDWR, errp); + if (fd < 0) { + return -EINVAL; + } +-- +2.39.3 + diff --git a/SOURCES/kvm-block-blkio-enable-the-completion-eventfd.patch b/SOURCES/kvm-block-blkio-enable-the-completion-eventfd.patch new file mode 100644 index 0000000..8a6f72b --- /dev/null +++ b/SOURCES/kvm-block-blkio-enable-the-completion-eventfd.patch @@ -0,0 +1,54 @@ +From ef99db21e9469f3fc946b7bf3edc1837d7b24e0b Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Tue, 25 Jul 2023 12:37:44 +0200 +Subject: [PATCH 01/14] block/blkio: enable the completion eventfd + +RH-Author: Stefano Garzarella +RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers +RH-Bugzilla: 2225354 2225439 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Alberto Faria +RH-Commit: [1/6] d91b3a465942863550130105ae2f38f47a82a360 (sgarzarella/qemu-kvm-c-9-s) + +Until libblkio 1.3.0, virtio-blk drivers had completion eventfd +notifications enabled from the start, but from the next releases +this is no longer the case, so we have to explicitly enable them. + +In fact, the libblkio documentation says they could be disabled, +so we should always enable them at the start if we want to be +sure to get completion eventfd notifications: + + By default, the driver might not generate completion events for + requests so it is necessary to explicitly enable the completion + file descriptor before use: + + void blkioq_set_completion_fd_enabled(struct blkioq *q, bool enable); + +I discovered this while trying a development version of libblkio: +the guest kernel hangs during boot, while probing the device. + +Fixes: fd66dbd424f5 ("blkio: add libblkio block driver") +Signed-off-by: Stefano Garzarella +Message-id: 20230725103744.77343-1-sgarzare@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 9359c459889fce1804c4e1b2a2ff8f182b4a9ae8) +Signed-off-by: Stefano Garzarella +--- + block/blkio.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/block/blkio.c b/block/blkio.c +index afcec359f2..3ea9841bd8 100644 +--- a/block/blkio.c ++++ b/block/blkio.c +@@ -844,6 +844,7 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags, + QLIST_INIT(&s->bounce_bufs); + s->blkioq = blkio_get_queue(s->blkio, 0); + s->completion_fd = blkioq_get_completion_fd(s->blkioq); ++ blkioq_set_completion_fd_enabled(s->blkioq, true); + + blkio_attach_aio_context(bs, bdrv_get_aio_context(bs)); + return 0; +-- +2.39.3 + diff --git a/SOURCES/kvm-block-blkio-fall-back-on-using-path-when-fd-setting-.patch b/SOURCES/kvm-block-blkio-fall-back-on-using-path-when-fd-setting-.patch new file mode 100644 index 0000000..f4d6e3c --- /dev/null +++ b/SOURCES/kvm-block-blkio-fall-back-on-using-path-when-fd-setting-.patch @@ -0,0 +1,67 @@ +From c1ce3ba81698b9d52ac9dff83c01ee8141ca403d Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Thu, 27 Jul 2023 18:10:19 +0200 +Subject: [PATCH 05/14] block/blkio: fall back on using `path` when `fd` + setting fails + +RH-Author: Stefano Garzarella +RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers +RH-Bugzilla: 2225354 2225439 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Alberto Faria +RH-Commit: [5/6] c03cea95146a59b2830ffe2dd56ef77a6630ce3e (sgarzarella/qemu-kvm-c-9-s) + +qemu_open() fails if called with an unix domain socket in this way: + -blockdev node-name=drive0,driver=virtio-blk-vhost-user,path=vhost-user-blk.sock,cache.direct=on: Could not open 'vhost-user-blk.sock': No such device or address + +Since virtio-blk-vhost-user does not support fd passing, let`s always fall back +on using `path` if we fail the fd passing. + +Fixes: cad2ccc395 ("block/blkio: use qemu_open() to support fd passing for virtio-blk") +Reported-by: Qing Wang +Signed-off-by: Stefano Garzarella +Message-id: 20230727161020.84213-4-sgarzare@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 723bea27b127969931fa26bc0de79372a3d9e148) +Signed-off-by: Stefano Garzarella +--- + block/blkio.c | 20 ++++++++++---------- + 1 file changed, 10 insertions(+), 10 deletions(-) + +diff --git a/block/blkio.c b/block/blkio.c +index 93a8f8fc5c..eef80e9ce5 100644 +--- a/block/blkio.c ++++ b/block/blkio.c +@@ -710,19 +710,19 @@ static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options, + * In order to open the device read-only, we are using the `read-only` + * property of the libblkio driver in blkio_file_open(). + */ +- fd = qemu_open(path, O_RDWR, errp); ++ fd = qemu_open(path, O_RDWR, NULL); + if (fd < 0) { +- return -EINVAL; ++ fd_supported = false; ++ } else { ++ ret = blkio_set_int(s->blkio, "fd", fd); ++ if (ret < 0) { ++ fd_supported = false; ++ qemu_close(fd); ++ } + } ++ } + +- ret = blkio_set_int(s->blkio, "fd", fd); +- if (ret < 0) { +- error_setg_errno(errp, -ret, "failed to set fd: %s", +- blkio_get_error_msg()); +- qemu_close(fd); +- return ret; +- } +- } else { ++ if (!fd_supported) { + ret = blkio_set_str(s->blkio, "path", path); + if (ret < 0) { + error_setg_errno(errp, -ret, "failed to set path: %s", +-- +2.39.3 + diff --git a/SOURCES/kvm-block-blkio-fix-module_block.py-parsing.patch b/SOURCES/kvm-block-blkio-fix-module_block.py-parsing.patch new file mode 100644 index 0000000..1c89a0b --- /dev/null +++ b/SOURCES/kvm-block-blkio-fix-module_block.py-parsing.patch @@ -0,0 +1,205 @@ +From 545482400ea87d54b1b839587f8aaad41e30692f Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 4 Jul 2023 14:34:36 +0200 +Subject: [PATCH 36/37] block/blkio: fix module_block.py parsing + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 181: block/blkio: fix module_block.py parsing +RH-Bugzilla: 2213317 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Kevin Wolf +RH-Commit: [1/2] c85df95824f4889526a73527771dec9efcb06926 (stefanha/centos-stream-qemu-kvm) + +When QEMU is built with --enable-modules, the module_block.py script +parses block/*.c to find block drivers that are built as modules. The +script generates a table of block drivers called block_driver_modules[]. +This table is used for block driver module loading. + +The blkio.c driver uses macros to define its BlockDriver structs. This +was done to avoid code duplication but the module_block.py script is +unable to parse the macro. The result is that libblkio-based block +drivers can be built as modules but will not be found at runtime. + +One fix is to make the module_block.py script or build system fancier so +it can parse C macros (e.g. by parsing the preprocessed source code). I +chose not to do this because it raises the complexity of the build, +making future issues harder to debug. + +Keep things simple: use the macro to avoid duplicating BlockDriver +function pointers but define .format_name and .protocol_name manually +for each BlockDriver. This way the module_block.py is able to parse the +code. + +Also get rid of the block driver name macros (e.g. DRIVER_IO_URING) +because module_block.py cannot parse them either. + +Fixes: fd66dbd424f5 ("blkio: add libblkio block driver") +Reported-by: Qing Wang +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Stefano Garzarella +Message-id: 20230704123436.187761-1-stefanha@redhat.com +Cc: Stefano Garzarella +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit c21eae1ccc782440f320accb6f90c66cb8f45ee9) + +Conflicts: +- Downstream lacks commit 28ff7b4dfbb5 ("block/blkio: convert to + blk_io_plug_call() API") so keep the .bdrv_co_io_unplug callback. + +Signed-off-by: Stefan Hajnoczi +--- + block/blkio.c | 118 ++++++++++++++++++++++++++------------------------ + 1 file changed, 61 insertions(+), 57 deletions(-) + +diff --git a/block/blkio.c b/block/blkio.c +index 6a6f20f923..afcec359f2 100644 +--- a/block/blkio.c ++++ b/block/blkio.c +@@ -21,16 +21,6 @@ + + #include "block/block-io.h" + +-/* +- * Keep the QEMU BlockDriver names identical to the libblkio driver names. +- * Using macros instead of typing out the string literals avoids typos. +- */ +-#define DRIVER_IO_URING "io_uring" +-#define DRIVER_NVME_IO_URING "nvme-io_uring" +-#define DRIVER_VIRTIO_BLK_VFIO_PCI "virtio-blk-vfio-pci" +-#define DRIVER_VIRTIO_BLK_VHOST_USER "virtio-blk-vhost-user" +-#define DRIVER_VIRTIO_BLK_VHOST_VDPA "virtio-blk-vhost-vdpa" +- + /* + * Allocated bounce buffers are kept in a list sorted by buffer address. + */ +@@ -743,15 +733,15 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags, + return ret; + } + +- if (strcmp(blkio_driver, DRIVER_IO_URING) == 0) { ++ if (strcmp(blkio_driver, "io_uring") == 0) { + ret = blkio_io_uring_open(bs, options, flags, errp); +- } else if (strcmp(blkio_driver, DRIVER_NVME_IO_URING) == 0) { ++ } else if (strcmp(blkio_driver, "nvme-io_uring") == 0) { + ret = blkio_nvme_io_uring(bs, options, flags, errp); +- } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VFIO_PCI) == 0) { ++ } else if (strcmp(blkio_driver, "virtio-blk-vfio-pci") == 0) { + ret = blkio_virtio_blk_common_open(bs, options, flags, errp); +- } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VHOST_USER) == 0) { ++ } else if (strcmp(blkio_driver, "virtio-blk-vhost-user") == 0) { + ret = blkio_virtio_blk_common_open(bs, options, flags, errp); +- } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VHOST_VDPA) == 0) { ++ } else if (strcmp(blkio_driver, "virtio-blk-vhost-vdpa") == 0) { + ret = blkio_virtio_blk_common_open(bs, options, flags, errp); + } else { + g_assert_not_reached(); +@@ -1027,50 +1017,64 @@ static void blkio_refresh_limits(BlockDriverState *bs, Error **errp) + * - truncate + */ + +-#define BLKIO_DRIVER(name, ...) \ +- { \ +- .format_name = name, \ +- .protocol_name = name, \ +- .instance_size = sizeof(BDRVBlkioState), \ +- .bdrv_file_open = blkio_file_open, \ +- .bdrv_close = blkio_close, \ +- .bdrv_co_getlength = blkio_co_getlength, \ +- .bdrv_co_truncate = blkio_truncate, \ +- .bdrv_co_get_info = blkio_co_get_info, \ +- .bdrv_attach_aio_context = blkio_attach_aio_context, \ +- .bdrv_detach_aio_context = blkio_detach_aio_context, \ +- .bdrv_co_pdiscard = blkio_co_pdiscard, \ +- .bdrv_co_preadv = blkio_co_preadv, \ +- .bdrv_co_pwritev = blkio_co_pwritev, \ +- .bdrv_co_flush_to_disk = blkio_co_flush, \ +- .bdrv_co_pwrite_zeroes = blkio_co_pwrite_zeroes, \ +- .bdrv_co_io_unplug = blkio_co_io_unplug, \ +- .bdrv_refresh_limits = blkio_refresh_limits, \ +- .bdrv_register_buf = blkio_register_buf, \ +- .bdrv_unregister_buf = blkio_unregister_buf, \ +- __VA_ARGS__ \ +- } +- +-static BlockDriver bdrv_io_uring = BLKIO_DRIVER( +- DRIVER_IO_URING, +- .bdrv_needs_filename = true, +-); +- +-static BlockDriver bdrv_nvme_io_uring = BLKIO_DRIVER( +- DRIVER_NVME_IO_URING, +-); +- +-static BlockDriver bdrv_virtio_blk_vfio_pci = BLKIO_DRIVER( +- DRIVER_VIRTIO_BLK_VFIO_PCI +-); ++/* ++ * Do not include .format_name and .protocol_name because module_block.py ++ * does not parse macros in the source code. ++ */ ++#define BLKIO_DRIVER_COMMON \ ++ .instance_size = sizeof(BDRVBlkioState), \ ++ .bdrv_file_open = blkio_file_open, \ ++ .bdrv_close = blkio_close, \ ++ .bdrv_co_getlength = blkio_co_getlength, \ ++ .bdrv_co_truncate = blkio_truncate, \ ++ .bdrv_co_get_info = blkio_co_get_info, \ ++ .bdrv_attach_aio_context = blkio_attach_aio_context, \ ++ .bdrv_detach_aio_context = blkio_detach_aio_context, \ ++ .bdrv_co_pdiscard = blkio_co_pdiscard, \ ++ .bdrv_co_preadv = blkio_co_preadv, \ ++ .bdrv_co_pwritev = blkio_co_pwritev, \ ++ .bdrv_co_flush_to_disk = blkio_co_flush, \ ++ .bdrv_co_pwrite_zeroes = blkio_co_pwrite_zeroes, \ ++ .bdrv_co_io_unplug = blkio_co_io_unplug, \ ++ .bdrv_refresh_limits = blkio_refresh_limits, \ ++ .bdrv_register_buf = blkio_register_buf, \ ++ .bdrv_unregister_buf = blkio_unregister_buf, + +-static BlockDriver bdrv_virtio_blk_vhost_user = BLKIO_DRIVER( +- DRIVER_VIRTIO_BLK_VHOST_USER +-); ++/* ++ * Use the same .format_name and .protocol_name as the libblkio driver name for ++ * consistency. ++ */ + +-static BlockDriver bdrv_virtio_blk_vhost_vdpa = BLKIO_DRIVER( +- DRIVER_VIRTIO_BLK_VHOST_VDPA +-); ++static BlockDriver bdrv_io_uring = { ++ .format_name = "io_uring", ++ .protocol_name = "io_uring", ++ .bdrv_needs_filename = true, ++ BLKIO_DRIVER_COMMON ++}; ++ ++static BlockDriver bdrv_nvme_io_uring = { ++ .format_name = "nvme-io_uring", ++ .protocol_name = "nvme-io_uring", ++ BLKIO_DRIVER_COMMON ++}; ++ ++static BlockDriver bdrv_virtio_blk_vfio_pci = { ++ .format_name = "virtio-blk-vfio-pci", ++ .protocol_name = "virtio-blk-vfio-pci", ++ BLKIO_DRIVER_COMMON ++}; ++ ++static BlockDriver bdrv_virtio_blk_vhost_user = { ++ .format_name = "virtio-blk-vhost-user", ++ .protocol_name = "virtio-blk-vhost-user", ++ BLKIO_DRIVER_COMMON ++}; ++ ++static BlockDriver bdrv_virtio_blk_vhost_vdpa = { ++ .format_name = "virtio-blk-vhost-vdpa", ++ .protocol_name = "virtio-blk-vhost-vdpa", ++ BLKIO_DRIVER_COMMON ++}; + + static void bdrv_blkio_init(void) + { +-- +2.39.3 + diff --git a/SOURCES/kvm-block-blkio-move-blkio_connect-in-the-drivers-functi.patch b/SOURCES/kvm-block-blkio-move-blkio_connect-in-the-drivers-functi.patch new file mode 100644 index 0000000..e3ec1ee --- /dev/null +++ b/SOURCES/kvm-block-blkio-move-blkio_connect-in-the-drivers-functi.patch @@ -0,0 +1,151 @@ +From 458c33c9f19ed01beeb9b2b494ce6ed10d2ed4ac Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Thu, 27 Jul 2023 18:10:17 +0200 +Subject: [PATCH 03/14] block/blkio: move blkio_connect() in the drivers + functions + +RH-Author: Stefano Garzarella +RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers +RH-Bugzilla: 2225354 2225439 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Alberto Faria +RH-Commit: [3/6] c356108d7dfe1ba2098c094f8d12b6e40853560c (sgarzarella/qemu-kvm-c-9-s) + +This is in preparation for the next patch, where for virtio-blk +drivers we need to handle the failure of blkio_connect(). + +Let's also rename the *_open() functions to *_connect() to make +the code reflect the changes applied. + +Signed-off-by: Stefano Garzarella +Message-id: 20230727161020.84213-2-sgarzare@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 69785d66ae1ec43f77fc65109a21721992bead9f) +Signed-off-by: Stefano Garzarella +--- + block/blkio.c | 67 ++++++++++++++++++++++++++++++--------------------- + 1 file changed, 40 insertions(+), 27 deletions(-) + +diff --git a/block/blkio.c b/block/blkio.c +index 5a82c6cb1a..85d1eed5fb 100644 +--- a/block/blkio.c ++++ b/block/blkio.c +@@ -602,8 +602,8 @@ static void blkio_unregister_buf(BlockDriverState *bs, void *host, size_t size) + } + } + +-static int blkio_io_uring_open(BlockDriverState *bs, QDict *options, int flags, +- Error **errp) ++static int blkio_io_uring_connect(BlockDriverState *bs, QDict *options, ++ int flags, Error **errp) + { + const char *filename = qdict_get_str(options, "filename"); + BDRVBlkioState *s = bs->opaque; +@@ -626,11 +626,18 @@ static int blkio_io_uring_open(BlockDriverState *bs, QDict *options, int flags, + } + } + ++ ret = blkio_connect(s->blkio); ++ if (ret < 0) { ++ error_setg_errno(errp, -ret, "blkio_connect failed: %s", ++ blkio_get_error_msg()); ++ return ret; ++ } ++ + return 0; + } + +-static int blkio_nvme_io_uring(BlockDriverState *bs, QDict *options, int flags, +- Error **errp) ++static int blkio_nvme_io_uring_connect(BlockDriverState *bs, QDict *options, ++ int flags, Error **errp) + { + const char *path = qdict_get_try_str(options, "path"); + BDRVBlkioState *s = bs->opaque; +@@ -654,11 +661,18 @@ static int blkio_nvme_io_uring(BlockDriverState *bs, QDict *options, int flags, + return -EINVAL; + } + ++ ret = blkio_connect(s->blkio); ++ if (ret < 0) { ++ error_setg_errno(errp, -ret, "blkio_connect failed: %s", ++ blkio_get_error_msg()); ++ return ret; ++ } ++ + return 0; + } + +-static int blkio_virtio_blk_common_open(BlockDriverState *bs, +- QDict *options, int flags, Error **errp) ++static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options, ++ int flags, Error **errp) + { + const char *path = qdict_get_try_str(options, "path"); + BDRVBlkioState *s = bs->opaque; +@@ -717,6 +731,13 @@ static int blkio_virtio_blk_common_open(BlockDriverState *bs, + } + } + ++ ret = blkio_connect(s->blkio); ++ if (ret < 0) { ++ error_setg_errno(errp, -ret, "blkio_connect failed: %s", ++ blkio_get_error_msg()); ++ return ret; ++ } ++ + qdict_del(options, "path"); + + return 0; +@@ -736,24 +757,6 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags, + return ret; + } + +- if (strcmp(blkio_driver, "io_uring") == 0) { +- ret = blkio_io_uring_open(bs, options, flags, errp); +- } else if (strcmp(blkio_driver, "nvme-io_uring") == 0) { +- ret = blkio_nvme_io_uring(bs, options, flags, errp); +- } else if (strcmp(blkio_driver, "virtio-blk-vfio-pci") == 0) { +- ret = blkio_virtio_blk_common_open(bs, options, flags, errp); +- } else if (strcmp(blkio_driver, "virtio-blk-vhost-user") == 0) { +- ret = blkio_virtio_blk_common_open(bs, options, flags, errp); +- } else if (strcmp(blkio_driver, "virtio-blk-vhost-vdpa") == 0) { +- ret = blkio_virtio_blk_common_open(bs, options, flags, errp); +- } else { +- g_assert_not_reached(); +- } +- if (ret < 0) { +- blkio_destroy(&s->blkio); +- return ret; +- } +- + if (!(flags & BDRV_O_RDWR)) { + ret = blkio_set_bool(s->blkio, "read-only", true); + if (ret < 0) { +@@ -764,10 +767,20 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags, + } + } + +- ret = blkio_connect(s->blkio); ++ if (strcmp(blkio_driver, "io_uring") == 0) { ++ ret = blkio_io_uring_connect(bs, options, flags, errp); ++ } else if (strcmp(blkio_driver, "nvme-io_uring") == 0) { ++ ret = blkio_nvme_io_uring_connect(bs, options, flags, errp); ++ } else if (strcmp(blkio_driver, "virtio-blk-vfio-pci") == 0) { ++ ret = blkio_virtio_blk_connect(bs, options, flags, errp); ++ } else if (strcmp(blkio_driver, "virtio-blk-vhost-user") == 0) { ++ ret = blkio_virtio_blk_connect(bs, options, flags, errp); ++ } else if (strcmp(blkio_driver, "virtio-blk-vhost-vdpa") == 0) { ++ ret = blkio_virtio_blk_connect(bs, options, flags, errp); ++ } else { ++ g_assert_not_reached(); ++ } + if (ret < 0) { +- error_setg_errno(errp, -ret, "blkio_connect failed: %s", +- blkio_get_error_msg()); + blkio_destroy(&s->blkio); + return ret; + } +-- +2.39.3 + diff --git a/SOURCES/kvm-block-blkio-retry-blkio_connect-if-it-fails-using-fd.patch b/SOURCES/kvm-block-blkio-retry-blkio_connect-if-it-fails-using-fd.patch new file mode 100644 index 0000000..5ec9e0b --- /dev/null +++ b/SOURCES/kvm-block-blkio-retry-blkio_connect-if-it-fails-using-fd.patch @@ -0,0 +1,85 @@ +From ece855a71d9234c58497f37cb5498f507742167d Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Thu, 27 Jul 2023 18:10:18 +0200 +Subject: [PATCH 04/14] block/blkio: retry blkio_connect() if it fails using + `fd` + +RH-Author: Stefano Garzarella +RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers +RH-Bugzilla: 2225354 2225439 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Alberto Faria +RH-Commit: [4/6] 14ebc1f333617ce22c68693dec1c9a186d4f8a08 (sgarzarella/qemu-kvm-c-9-s) + +libblkio 1.3.0 added support of "fd" property for virtio-blk-vhost-vdpa +driver. In QEMU, starting from commit cad2ccc395 ("block/blkio: use +qemu_open() to support fd passing for virtio-blk") we are using +`blkio_get_int(..., "fd")` to check if the "fd" property is supported +for all the virtio-blk-* driver. + +Unfortunately that property is also available for those driver that do +not support it, such as virtio-blk-vhost-user. + +So, `blkio_get_int()` is not enough to check whether the driver supports +the `fd` property or not. This is because the virito-blk common libblkio +driver only checks whether or not `fd` is set during `blkio_connect()` +and fails with -EINVAL for those transports that do not support it +(all except vhost-vdpa for now). + +So let's handle the `blkio_connect()` failure, retrying it using `path` +directly. + +Fixes: cad2ccc395 ("block/blkio: use qemu_open() to support fd passing for virtio-blk") +Suggested-by: Stefan Hajnoczi +Signed-off-by: Stefano Garzarella +Message-id: 20230727161020.84213-3-sgarzare@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 809c319f8a089fbc49223dc29e1cc2b978beeada) +Signed-off-by: Stefano Garzarella +--- + block/blkio.c | 29 +++++++++++++++++++++++++++++ + 1 file changed, 29 insertions(+) + +diff --git a/block/blkio.c b/block/blkio.c +index 85d1eed5fb..93a8f8fc5c 100644 +--- a/block/blkio.c ++++ b/block/blkio.c +@@ -732,6 +732,35 @@ static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options, + } + + ret = blkio_connect(s->blkio); ++ /* ++ * If the libblkio driver doesn't support the `fd` property, blkio_connect() ++ * will fail with -EINVAL. So let's try calling blkio_connect() again by ++ * directly setting `path`. ++ */ ++ if (fd_supported && ret == -EINVAL) { ++ qemu_close(fd); ++ ++ /* ++ * We need to clear the `fd` property we set previously by setting ++ * it to -1. ++ */ ++ ret = blkio_set_int(s->blkio, "fd", -1); ++ if (ret < 0) { ++ error_setg_errno(errp, -ret, "failed to set fd: %s", ++ blkio_get_error_msg()); ++ return ret; ++ } ++ ++ ret = blkio_set_str(s->blkio, "path", path); ++ if (ret < 0) { ++ error_setg_errno(errp, -ret, "failed to set path: %s", ++ blkio_get_error_msg()); ++ return ret; ++ } ++ ++ ret = blkio_connect(s->blkio); ++ } ++ + if (ret < 0) { + error_setg_errno(errp, -ret, "blkio_connect failed: %s", + blkio_get_error_msg()); +-- +2.39.3 + diff --git a/SOURCES/kvm-block-blkio-use-blkio_set_int-fd-to-check-fd-support.patch b/SOURCES/kvm-block-blkio-use-blkio_set_int-fd-to-check-fd-support.patch new file mode 100644 index 0000000..c6e1cd8 --- /dev/null +++ b/SOURCES/kvm-block-blkio-use-blkio_set_int-fd-to-check-fd-support.patch @@ -0,0 +1,49 @@ +From 2f4436e7cc2f63d198229dc8ba32783460c0b185 Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Thu, 27 Jul 2023 18:10:20 +0200 +Subject: [PATCH 06/14] block/blkio: use blkio_set_int("fd") to check fd + support + +RH-Author: Stefano Garzarella +RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers +RH-Bugzilla: 2225354 2225439 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Alberto Faria +RH-Commit: [6/6] d57aafb2c3a8ed13aa3c6dcce5525a9cc8f5aa21 (sgarzarella/qemu-kvm-c-9-s) + +Setting the `fd` property fails with virtio-blk-* libblkio drivers +that do not support fd passing since +https://gitlab.com/libblkio/libblkio/-/merge_requests/208. + +Getting the `fd` property, on the other hand, always succeeds for +virtio-blk-* libblkio drivers even when they don't support fd passing. + +This patch switches to setting the `fd` property because it is a +better mechanism for probing fd passing support than getting the `fd` +property. + +Signed-off-by: Stefano Garzarella +Message-id: 20230727161020.84213-5-sgarzare@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 1c38fe69e2b8a05c1762b122292fa7e3662f06fd) +Signed-off-by: Stefano Garzarella +--- + block/blkio.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/block/blkio.c b/block/blkio.c +index eef80e9ce5..8defbf744f 100644 +--- a/block/blkio.c ++++ b/block/blkio.c +@@ -689,7 +689,7 @@ static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options, + return -EINVAL; + } + +- if (blkio_get_int(s->blkio, "fd", &fd) == 0) { ++ if (blkio_set_int(s->blkio, "fd", -1) == 0) { + fd_supported = true; + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-block-blkio-use-qemu_open-to-support-fd-passing-for-.patch b/SOURCES/kvm-block-blkio-use-qemu_open-to-support-fd-passing-for-.patch new file mode 100644 index 0000000..3b32299 --- /dev/null +++ b/SOURCES/kvm-block-blkio-use-qemu_open-to-support-fd-passing-for-.patch @@ -0,0 +1,108 @@ +From fd57241cf0f8c2906fa56118f8da1e65a5b1e4d8 Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Tue, 30 May 2023 09:19:40 +0200 +Subject: [PATCH 3/5] block/blkio: use qemu_open() to support fd passing for + virtio-blk + +RH-Author: Stefano Garzarella +RH-MergeRequest: 169: block/blkio: support fd passing for virtio-blk-vhost-vdpa driver +RH-Bugzilla: 2180076 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [1/2] 9ff1a1510500db101648341207a36318a0c41c5a (sgarzarella/qemu-kvm-c-9-s) + +Some virtio-blk drivers (e.g. virtio-blk-vhost-vdpa) supports the fd +passing. Let's expose this to the user, so the management layer +can pass the file descriptor of an already opened path. + +If the libblkio virtio-blk driver supports fd passing, let's always +use qemu_open() to open the `path`, so we can handle fd passing +from the management layer through the "/dev/fdset/N" special path. + +Reviewed-by: Stefan Hajnoczi +Signed-off-by: Stefano Garzarella +Message-id: 20230530071941.8954-2-sgarzare@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit cad2ccc395c7113fb30bc9390774b67b34f06c68) +Signed-off-by: Stefano Garzarella +--- + block/blkio.c | 53 ++++++++++++++++++++++++++++++++++++++++++--------- + 1 file changed, 44 insertions(+), 9 deletions(-) + +diff --git a/block/blkio.c b/block/blkio.c +index 0cdc99a729..6a6f20f923 100644 +--- a/block/blkio.c ++++ b/block/blkio.c +@@ -672,25 +672,60 @@ static int blkio_virtio_blk_common_open(BlockDriverState *bs, + { + const char *path = qdict_get_try_str(options, "path"); + BDRVBlkioState *s = bs->opaque; +- int ret; ++ bool fd_supported = false; ++ int fd, ret; + + if (!path) { + error_setg(errp, "missing 'path' option"); + return -EINVAL; + } + +- ret = blkio_set_str(s->blkio, "path", path); +- qdict_del(options, "path"); +- if (ret < 0) { +- error_setg_errno(errp, -ret, "failed to set path: %s", +- blkio_get_error_msg()); +- return ret; +- } +- + if (!(flags & BDRV_O_NOCACHE)) { + error_setg(errp, "cache.direct=off is not supported"); + return -EINVAL; + } ++ ++ if (blkio_get_int(s->blkio, "fd", &fd) == 0) { ++ fd_supported = true; ++ } ++ ++ /* ++ * If the libblkio driver supports fd passing, let's always use qemu_open() ++ * to open the `path`, so we can handle fd passing from the management ++ * layer through the "/dev/fdset/N" special path. ++ */ ++ if (fd_supported) { ++ int open_flags; ++ ++ if (flags & BDRV_O_RDWR) { ++ open_flags = O_RDWR; ++ } else { ++ open_flags = O_RDONLY; ++ } ++ ++ fd = qemu_open(path, open_flags, errp); ++ if (fd < 0) { ++ return -EINVAL; ++ } ++ ++ ret = blkio_set_int(s->blkio, "fd", fd); ++ if (ret < 0) { ++ error_setg_errno(errp, -ret, "failed to set fd: %s", ++ blkio_get_error_msg()); ++ qemu_close(fd); ++ return ret; ++ } ++ } else { ++ ret = blkio_set_str(s->blkio, "path", path); ++ if (ret < 0) { ++ error_setg_errno(errp, -ret, "failed to set path: %s", ++ blkio_get_error_msg()); ++ return ret; ++ } ++ } ++ ++ qdict_del(options, "path"); ++ + return 0; + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-block-compile-out-assert_bdrv_graph_readable-by-defa.patch b/SOURCES/kvm-block-compile-out-assert_bdrv_graph_readable-by-defa.patch new file mode 100644 index 0000000..b6eebf3 --- /dev/null +++ b/SOURCES/kvm-block-compile-out-assert_bdrv_graph_readable-by-defa.patch @@ -0,0 +1,121 @@ +From d9190117f3c701380701d6e9b2aa3c2446b9708f Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Mon, 1 May 2023 13:34:43 -0400 +Subject: [PATCH 01/21] block: compile out assert_bdrv_graph_readable() by + default + +RH-Author: Kevin Wolf +RH-MergeRequest: 166: block/graph-lock: Disable locking for now +RH-Bugzilla: 2186725 +RH-Acked-by: Eric Blake +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [1/4] d8cb4bb832c85e8216d97e57679a34c7bc6a8f71 (kmwolf/centos-qemu-kvm) + +reader_count() is a performance bottleneck because the global +aio_context_list_lock mutex causes thread contention. Put this debugging +assertion behind a new ./configure --enable-debug-graph-lock option and +disable it by default. + +The --enable-debug-graph-lock option is also enabled by the more general +--enable-debug option. + +Signed-off-by: Stefan Hajnoczi +Message-Id: <20230501173443.153062-1-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 58a2e3f5c37be02dac3086b81bdda9414b931edf) +Signed-off-by: Kevin Wolf +--- + block/graph-lock.c | 3 +++ + configure | 1 + + meson.build | 2 ++ + meson_options.txt | 2 ++ + scripts/meson-buildoptions.sh | 4 ++++ + 5 files changed, 12 insertions(+) + +diff --git a/block/graph-lock.c b/block/graph-lock.c +index 454c31e691..259a7a0bde 100644 +--- a/block/graph-lock.c ++++ b/block/graph-lock.c +@@ -265,7 +265,10 @@ void bdrv_graph_rdunlock_main_loop(void) + + void assert_bdrv_graph_readable(void) + { ++ /* reader_count() is slow due to aio_context_list_lock lock contention */ ++#ifdef CONFIG_DEBUG_GRAPH_LOCK + assert(qemu_in_main_thread() || reader_count()); ++#endif + } + + void assert_bdrv_graph_writable(void) +diff --git a/configure b/configure +index 800b5850f4..a62a3e6be9 100755 +--- a/configure ++++ b/configure +@@ -806,6 +806,7 @@ for opt do + --enable-debug) + # Enable debugging options that aren't excessively noisy + debug_tcg="yes" ++ meson_option_parse --enable-debug-graph-lock "" + meson_option_parse --enable-debug-mutex "" + meson_option_add -Doptimization=0 + fortify_source="no" +diff --git a/meson.build b/meson.build +index c44d05a13f..d964e741e7 100644 +--- a/meson.build ++++ b/meson.build +@@ -1956,6 +1956,7 @@ if get_option('debug_stack_usage') and have_coroutine_pool + have_coroutine_pool = false + endif + config_host_data.set10('CONFIG_COROUTINE_POOL', have_coroutine_pool) ++config_host_data.set('CONFIG_DEBUG_GRAPH_LOCK', get_option('debug_graph_lock')) + config_host_data.set('CONFIG_DEBUG_MUTEX', get_option('debug_mutex')) + config_host_data.set('CONFIG_DEBUG_STACK_USAGE', get_option('debug_stack_usage')) + config_host_data.set('CONFIG_GPROF', get_option('gprof')) +@@ -3833,6 +3834,7 @@ summary_info += {'PIE': get_option('b_pie')} + summary_info += {'static build': config_host.has_key('CONFIG_STATIC')} + summary_info += {'malloc trim support': has_malloc_trim} + summary_info += {'membarrier': have_membarrier} ++summary_info += {'debug graph lock': get_option('debug_graph_lock')} + summary_info += {'debug stack usage': get_option('debug_stack_usage')} + summary_info += {'mutex debugging': get_option('debug_mutex')} + summary_info += {'memory allocator': get_option('malloc')} +diff --git a/meson_options.txt b/meson_options.txt +index fc9447d267..bc857fe68b 100644 +--- a/meson_options.txt ++++ b/meson_options.txt +@@ -311,6 +311,8 @@ option('rng_none', type: 'boolean', value: false, + description: 'dummy RNG, avoid using /dev/(u)random and getrandom()') + option('coroutine_pool', type: 'boolean', value: true, + description: 'coroutine freelist (better performance)') ++option('debug_graph_lock', type: 'boolean', value: false, ++ description: 'graph lock debugging support') + option('debug_mutex', type: 'boolean', value: false, + description: 'mutex debugging support') + option('debug_stack_usage', type: 'boolean', value: false, +diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh +index 009fab1515..30e1f25259 100644 +--- a/scripts/meson-buildoptions.sh ++++ b/scripts/meson-buildoptions.sh +@@ -21,6 +21,8 @@ meson_options_help() { + printf "%s\n" ' QEMU' + printf "%s\n" ' --enable-cfi Control-Flow Integrity (CFI)' + printf "%s\n" ' --enable-cfi-debug Verbose errors in case of CFI violation' ++ printf "%s\n" ' --enable-debug-graph-lock' ++ printf "%s\n" ' graph lock debugging support' + printf "%s\n" ' --enable-debug-mutex mutex debugging support' + printf "%s\n" ' --enable-debug-stack-usage' + printf "%s\n" ' measure coroutine stack usage' +@@ -249,6 +251,8 @@ _meson_option_parse() { + --datadir=*) quote_sh "-Ddatadir=$2" ;; + --enable-dbus-display) printf "%s" -Ddbus_display=enabled ;; + --disable-dbus-display) printf "%s" -Ddbus_display=disabled ;; ++ --enable-debug-graph-lock) printf "%s" -Ddebug_graph_lock=true ;; ++ --disable-debug-graph-lock) printf "%s" -Ddebug_graph_lock=false ;; + --enable-debug-mutex) printf "%s" -Ddebug_mutex=true ;; + --disable-debug-mutex) printf "%s" -Ddebug_mutex=false ;; + --enable-debug-stack-usage) printf "%s" -Ddebug_stack_usage=true ;; +-- +2.39.3 + diff --git a/SOURCES/kvm-block-drop-bdrv_remove_filter_or_cow_child.patch b/SOURCES/kvm-block-drop-bdrv_remove_filter_or_cow_child.patch deleted file mode 100644 index 19d52b5..0000000 --- a/SOURCES/kvm-block-drop-bdrv_remove_filter_or_cow_child.patch +++ /dev/null @@ -1,70 +0,0 @@ -From defd6b325264d94ffb1355a8b19f9a77bd694a2f Mon Sep 17 00:00:00 2001 -From: Vladimir Sementsov-Ogievskiy -Date: Mon, 7 Nov 2022 19:35:56 +0300 -Subject: [PATCH 13/31] block: drop bdrv_remove_filter_or_cow_child - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [1/16] 6339edd738c3b79f8ecb6c1e012e52b6afb1a622 (sgarzarella/qemu-kvm-c-9-s) - -Drop this simple wrapper used only in one place. We have too many graph -modifying functions even without it. - -Signed-off-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Hanna Reitz -Message-Id: <20221107163558.618889-3-vsementsov@yandex-team.ru> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit f38eaec4c3618dfc4a23e20435cefb5bf8325264) -Signed-off-by: Stefano Garzarella ---- - block.c | 15 +-------------- - 1 file changed, 1 insertion(+), 14 deletions(-) - -diff --git a/block.c b/block.c -index a18f052374..ec184150a2 100644 ---- a/block.c -+++ b/block.c -@@ -93,8 +93,6 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs, - static void bdrv_replace_child_noperm(BdrvChild *child, - BlockDriverState *new_bs); - static void bdrv_remove_child(BdrvChild *child, Transaction *tran); --static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs, -- Transaction *tran); - - static int bdrv_reopen_prepare(BDRVReopenState *reopen_state, - BlockReopenQueue *queue, -@@ -5073,17 +5071,6 @@ static void bdrv_remove_child(BdrvChild *child, Transaction *tran) - tran_add(tran, &bdrv_remove_child_drv, child); - } - --/* -- * A function to remove backing-chain child of @bs if exists: cow child for -- * format nodes (always .backing) and filter child for filters (may be .file or -- * .backing) -- */ --static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs, -- Transaction *tran) --{ -- bdrv_remove_child(bdrv_filter_or_cow_child(bs), tran); --} -- - static int bdrv_replace_node_noperm(BlockDriverState *from, - BlockDriverState *to, - bool auto_skip, Transaction *tran, -@@ -5168,7 +5155,7 @@ static int bdrv_replace_node_common(BlockDriverState *from, - } - - if (detach_subchain) { -- bdrv_remove_filter_or_cow_child(to_cow_parent, tran); -+ bdrv_remove_child(bdrv_filter_or_cow_child(to_cow_parent), tran); - } - - found = g_hash_table_new(NULL, NULL); --- -2.31.1 - diff --git a/SOURCES/kvm-block-file-Add-file-specific-image-info.patch b/SOURCES/kvm-block-file-Add-file-specific-image-info.patch deleted file mode 100644 index a81b6b0..0000000 --- a/SOURCES/kvm-block-file-Add-file-specific-image-info.patch +++ /dev/null @@ -1,145 +0,0 @@ -From 4af86458d6bea2a6e15fd57d4d4bbe88e35f7e72 Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Mon, 20 Jun 2022 18:26:54 +0200 -Subject: [PATCH 07/20] block/file: Add file-specific image info - -RH-Author: Hanna Czenczek -RH-MergeRequest: 145: Show protocol-level information in qemu-img info -RH-Bugzilla: 1860292 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella -RH-Commit: [2/12] d8cc351d6c16c41b2000e41dc555f13093a9edce (hreitz/qemu-kvm-c-9-s) - -Add some (optional) information that the file driver can provide for -image files, namely the extent size hint. - -Signed-off-by: Hanna Reitz -Message-Id: <20220620162704.80987-3-hreitz@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 7f36a50ab4e7d39369cac67be4ba9d6ee4081dc0) -Signed-off-by: Hanna Czenczek ---- - block/file-posix.c | 30 ++++++++++++++++++++++++++++++ - qapi/block-core.json | 26 ++++++++++++++++++++++++-- - 2 files changed, 54 insertions(+), 2 deletions(-) - -diff --git a/block/file-posix.c b/block/file-posix.c -index b9647c5ffc..df3da79aed 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -3095,6 +3095,34 @@ static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) - return 0; - } - -+static ImageInfoSpecific *raw_get_specific_info(BlockDriverState *bs, -+ Error **errp) -+{ -+ ImageInfoSpecificFile *file_info = g_new0(ImageInfoSpecificFile, 1); -+ ImageInfoSpecific *spec_info = g_new(ImageInfoSpecific, 1); -+ -+ *spec_info = (ImageInfoSpecific){ -+ .type = IMAGE_INFO_SPECIFIC_KIND_FILE, -+ .u.file.data = file_info, -+ }; -+ -+#ifdef FS_IOC_FSGETXATTR -+ { -+ BDRVRawState *s = bs->opaque; -+ struct fsxattr attr; -+ int ret; -+ -+ ret = ioctl(s->fd, FS_IOC_FSGETXATTR, &attr); -+ if (!ret && attr.fsx_extsize != 0) { -+ file_info->has_extent_size_hint = true; -+ file_info->extent_size_hint = attr.fsx_extsize; -+ } -+ } -+#endif -+ -+ return spec_info; -+} -+ - static BlockStatsSpecificFile get_blockstats_specific_file(BlockDriverState *bs) - { - BDRVRawState *s = bs->opaque; -@@ -3328,6 +3356,7 @@ BlockDriver bdrv_file = { - .bdrv_co_truncate = raw_co_truncate, - .bdrv_getlength = raw_getlength, - .bdrv_get_info = raw_get_info, -+ .bdrv_get_specific_info = raw_get_specific_info, - .bdrv_get_allocated_file_size - = raw_get_allocated_file_size, - .bdrv_get_specific_stats = raw_get_specific_stats, -@@ -3700,6 +3729,7 @@ static BlockDriver bdrv_host_device = { - .bdrv_co_truncate = raw_co_truncate, - .bdrv_getlength = raw_getlength, - .bdrv_get_info = raw_get_info, -+ .bdrv_get_specific_info = raw_get_specific_info, - .bdrv_get_allocated_file_size - = raw_get_allocated_file_size, - .bdrv_get_specific_stats = hdev_get_specific_stats, -diff --git a/qapi/block-core.json b/qapi/block-core.json -index 95ac4fa634..f5d822cbd6 100644 ---- a/qapi/block-core.json -+++ b/qapi/block-core.json -@@ -139,16 +139,29 @@ - '*encryption-format': 'RbdImageEncryptionFormat' - } } - -+## -+# @ImageInfoSpecificFile: -+# -+# @extent-size-hint: Extent size hint (if available) -+# -+# Since: 8.0 -+## -+{ 'struct': 'ImageInfoSpecificFile', -+ 'data': { -+ '*extent-size-hint': 'size' -+ } } -+ - ## - # @ImageInfoSpecificKind: - # - # @luks: Since 2.7 - # @rbd: Since 6.1 -+# @file: Since 8.0 - # - # Since: 1.7 - ## - { 'enum': 'ImageInfoSpecificKind', -- 'data': [ 'qcow2', 'vmdk', 'luks', 'rbd' ] } -+ 'data': [ 'qcow2', 'vmdk', 'luks', 'rbd', 'file' ] } - - ## - # @ImageInfoSpecificQCow2Wrapper: -@@ -185,6 +198,14 @@ - { 'struct': 'ImageInfoSpecificRbdWrapper', - 'data': { 'data': 'ImageInfoSpecificRbd' } } - -+## -+# @ImageInfoSpecificFileWrapper: -+# -+# Since: 8.0 -+## -+{ 'struct': 'ImageInfoSpecificFileWrapper', -+ 'data': { 'data': 'ImageInfoSpecificFile' } } -+ - ## - # @ImageInfoSpecific: - # -@@ -199,7 +220,8 @@ - 'qcow2': 'ImageInfoSpecificQCow2Wrapper', - 'vmdk': 'ImageInfoSpecificVmdkWrapper', - 'luks': 'ImageInfoSpecificLUKSWrapper', -- 'rbd': 'ImageInfoSpecificRbdWrapper' -+ 'rbd': 'ImageInfoSpecificRbdWrapper', -+ 'file': 'ImageInfoSpecificFileWrapper' - } } - - ## --- -2.31.1 - diff --git a/SOURCES/kvm-block-qapi-Add-indentation-to-bdrv_node_info_dump.patch b/SOURCES/kvm-block-qapi-Add-indentation-to-bdrv_node_info_dump.patch deleted file mode 100644 index 62979ef..0000000 --- a/SOURCES/kvm-block-qapi-Add-indentation-to-bdrv_node_info_dump.patch +++ /dev/null @@ -1,206 +0,0 @@ -From c8c282c2e1d74cfc5de6527f7e20dfc3e76b67ac Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Mon, 20 Jun 2022 18:27:00 +0200 -Subject: [PATCH 13/20] block/qapi: Add indentation to bdrv_node_info_dump() - -RH-Author: Hanna Czenczek -RH-MergeRequest: 145: Show protocol-level information in qemu-img info -RH-Bugzilla: 1860292 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella -RH-Commit: [8/12] d3a697e81ab9828457198075e5815a592363c725 (hreitz/qemu-kvm-c-9-s) - -In order to let qemu-img info present a block graph, add a parameter to -bdrv_node_info_dump() and bdrv_image_info_specific_dump() so that the -information of nodes below the root level can be given an indentation. - -Signed-off-by: Hanna Reitz -Message-Id: <20220620162704.80987-9-hreitz@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 76c9e9750d1bd580e8ed4465f6be3a986434e7c3) -Signed-off-by: Hanna Czenczek ---- - block/monitor/block-hmp-cmds.c | 2 +- - block/qapi.c | 47 +++++++++++++++++++--------------- - include/block/qapi.h | 5 ++-- - qemu-img.c | 2 +- - qemu-io-cmds.c | 3 ++- - 5 files changed, 34 insertions(+), 25 deletions(-) - -diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c -index aa37faa601..72824d4e2e 100644 ---- a/block/monitor/block-hmp-cmds.c -+++ b/block/monitor/block-hmp-cmds.c -@@ -734,7 +734,7 @@ static void print_block_info(Monitor *mon, BlockInfo *info, - monitor_printf(mon, "\nImages:\n"); - image_info = inserted->image; - while (1) { -- bdrv_node_info_dump(qapi_ImageInfo_base(image_info)); -+ bdrv_node_info_dump(qapi_ImageInfo_base(image_info), 0); - if (image_info->has_backing_image) { - image_info = image_info->backing_image; - } else { -diff --git a/block/qapi.c b/block/qapi.c -index f208c21ccf..3e35603f0c 100644 ---- a/block/qapi.c -+++ b/block/qapi.c -@@ -915,7 +915,8 @@ static bool qobject_is_empty_dump(const QObject *obj) - * prepending an optional prefix if the dump is not empty. - */ - void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, -- const char *prefix) -+ const char *prefix, -+ int indentation) - { - QObject *obj, *data; - Visitor *v = qobject_output_visitor_new(&obj); -@@ -925,48 +926,51 @@ void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, - data = qdict_get(qobject_to(QDict, obj), "data"); - if (!qobject_is_empty_dump(data)) { - if (prefix) { -- qemu_printf("%s", prefix); -+ qemu_printf("%*s%s", indentation * 4, "", prefix); - } -- dump_qobject(1, data); -+ dump_qobject(indentation + 1, data); - } - qobject_unref(obj); - visit_free(v); - } - --void bdrv_node_info_dump(BlockNodeInfo *info) -+void bdrv_node_info_dump(BlockNodeInfo *info, int indentation) - { - char *size_buf, *dsize_buf; -+ g_autofree char *ind_s = g_strdup_printf("%*s", indentation * 4, ""); -+ - if (!info->has_actual_size) { - dsize_buf = g_strdup("unavailable"); - } else { - dsize_buf = size_to_str(info->actual_size); - } - size_buf = size_to_str(info->virtual_size); -- qemu_printf("image: %s\n" -- "file format: %s\n" -- "virtual size: %s (%" PRId64 " bytes)\n" -- "disk size: %s\n", -- info->filename, info->format, size_buf, -- info->virtual_size, -- dsize_buf); -+ qemu_printf("%simage: %s\n" -+ "%sfile format: %s\n" -+ "%svirtual size: %s (%" PRId64 " bytes)\n" -+ "%sdisk size: %s\n", -+ ind_s, info->filename, -+ ind_s, info->format, -+ ind_s, size_buf, info->virtual_size, -+ ind_s, dsize_buf); - g_free(size_buf); - g_free(dsize_buf); - - if (info->has_encrypted && info->encrypted) { -- qemu_printf("encrypted: yes\n"); -+ qemu_printf("%sencrypted: yes\n", ind_s); - } - - if (info->has_cluster_size) { -- qemu_printf("cluster_size: %" PRId64 "\n", -- info->cluster_size); -+ qemu_printf("%scluster_size: %" PRId64 "\n", -+ ind_s, info->cluster_size); - } - - if (info->has_dirty_flag && info->dirty_flag) { -- qemu_printf("cleanly shut down: no\n"); -+ qemu_printf("%scleanly shut down: no\n", ind_s); - } - - if (info->has_backing_filename) { -- qemu_printf("backing file: %s", info->backing_filename); -+ qemu_printf("%sbacking file: %s", ind_s, info->backing_filename); - if (!info->has_full_backing_filename) { - qemu_printf(" (cannot determine actual path)"); - } else if (strcmp(info->backing_filename, -@@ -975,15 +979,16 @@ void bdrv_node_info_dump(BlockNodeInfo *info) - } - qemu_printf("\n"); - if (info->has_backing_filename_format) { -- qemu_printf("backing file format: %s\n", -- info->backing_filename_format); -+ qemu_printf("%sbacking file format: %s\n", -+ ind_s, info->backing_filename_format); - } - } - - if (info->has_snapshots) { - SnapshotInfoList *elem; - -- qemu_printf("Snapshot list:\n"); -+ qemu_printf("%sSnapshot list:\n", ind_s); -+ qemu_printf("%s", ind_s); - bdrv_snapshot_dump(NULL); - qemu_printf("\n"); - -@@ -1003,6 +1008,7 @@ void bdrv_node_info_dump(BlockNodeInfo *info) - - pstrcpy(sn.id_str, sizeof(sn.id_str), elem->value->id); - pstrcpy(sn.name, sizeof(sn.name), elem->value->name); -+ qemu_printf("%s", ind_s); - bdrv_snapshot_dump(&sn); - qemu_printf("\n"); - } -@@ -1010,6 +1016,7 @@ void bdrv_node_info_dump(BlockNodeInfo *info) - - if (info->has_format_specific) { - bdrv_image_info_specific_dump(info->format_specific, -- "Format specific information:\n"); -+ "Format specific information:\n", -+ indentation); - } - } -diff --git a/include/block/qapi.h b/include/block/qapi.h -index 196436020e..38855f2ae9 100644 ---- a/include/block/qapi.h -+++ b/include/block/qapi.h -@@ -49,6 +49,7 @@ void bdrv_query_block_graph_info(BlockDriverState *bs, - - void bdrv_snapshot_dump(QEMUSnapshotInfo *sn); - void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, -- const char *prefix); --void bdrv_node_info_dump(BlockNodeInfo *info); -+ const char *prefix, -+ int indentation); -+void bdrv_node_info_dump(BlockNodeInfo *info, int indentation); - #endif -diff --git a/qemu-img.c b/qemu-img.c -index 3b2ca3bbcb..30b4ea58bb 100644 ---- a/qemu-img.c -+++ b/qemu-img.c -@@ -2859,7 +2859,7 @@ static void dump_human_image_info_list(BlockNodeInfoList *list) - } - delim = true; - -- bdrv_node_info_dump(elem->value); -+ bdrv_node_info_dump(elem->value, 0); - } - } - -diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c -index f4a374528e..fdcb89211b 100644 ---- a/qemu-io-cmds.c -+++ b/qemu-io-cmds.c -@@ -1826,7 +1826,8 @@ static int info_f(BlockBackend *blk, int argc, char **argv) - } - if (spec_info) { - bdrv_image_info_specific_dump(spec_info, -- "Format specific information:\n"); -+ "Format specific information:\n", -+ 0); - qapi_free_ImageInfoSpecific(spec_info); - } - --- -2.31.1 - diff --git a/SOURCES/kvm-block-qapi-Introduce-BlockGraphInfo.patch b/SOURCES/kvm-block-qapi-Introduce-BlockGraphInfo.patch deleted file mode 100644 index e9a1622..0000000 --- a/SOURCES/kvm-block-qapi-Introduce-BlockGraphInfo.patch +++ /dev/null @@ -1,155 +0,0 @@ -From 0044e3848b02ef6edba5961d1f4b6297d137d207 Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Mon, 20 Jun 2022 18:26:59 +0200 -Subject: [PATCH 12/20] block/qapi: Introduce BlockGraphInfo - -RH-Author: Hanna Czenczek -RH-MergeRequest: 145: Show protocol-level information in qemu-img info -RH-Bugzilla: 1860292 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella -RH-Commit: [7/12] de47bac372cd552b812c774a2f35f95923af74ff (hreitz/qemu-kvm-c-9-s) - -Introduce a new QAPI type BlockGraphInfo and an associated -bdrv_query_block_graph_info() function that recursively gathers -BlockNodeInfo objects through a block graph. - -A follow-up patch is going to make "qemu-img info" use this to print -information about all nodes that are (usually implicitly) opened for a -given image file. - -Signed-off-by: Hanna Reitz -Message-Id: <20220620162704.80987-8-hreitz@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 6cab33997b91eb86e82a6a2ae58a24f835249d4a) -Signed-off-by: Hanna Czenczek ---- - block/qapi.c | 48 ++++++++++++++++++++++++++++++++++++++++++++ - include/block/qapi.h | 3 +++ - qapi/block-core.json | 35 ++++++++++++++++++++++++++++++++ - 3 files changed, 86 insertions(+) - -diff --git a/block/qapi.c b/block/qapi.c -index 5d0a8d2ce3..f208c21ccf 100644 ---- a/block/qapi.c -+++ b/block/qapi.c -@@ -411,6 +411,54 @@ fail: - qapi_free_ImageInfo(info); - } - -+/** -+ * bdrv_query_block_graph_info: -+ * @bs: root node to start from -+ * @p_info: location to store image information -+ * @errp: location to store error information -+ * -+ * Store image information about the graph starting from @bs in @p_info. -+ * -+ * @p_info will be set only on success. On error, store error in @errp. -+ */ -+void bdrv_query_block_graph_info(BlockDriverState *bs, -+ BlockGraphInfo **p_info, -+ Error **errp) -+{ -+ BlockGraphInfo *info; -+ BlockChildInfoList **children_list_tail; -+ BdrvChild *c; -+ ERRP_GUARD(); -+ -+ info = g_new0(BlockGraphInfo, 1); -+ bdrv_do_query_node_info(bs, qapi_BlockGraphInfo_base(info), errp); -+ if (*errp) { -+ goto fail; -+ } -+ -+ children_list_tail = &info->children; -+ -+ QLIST_FOREACH(c, &bs->children, next) { -+ BlockChildInfo *c_info; -+ -+ c_info = g_new0(BlockChildInfo, 1); -+ QAPI_LIST_APPEND(children_list_tail, c_info); -+ -+ c_info->name = g_strdup(c->name); -+ bdrv_query_block_graph_info(c->bs, &c_info->info, errp); -+ if (*errp) { -+ goto fail; -+ } -+ } -+ -+ *p_info = info; -+ return; -+ -+fail: -+ assert(*errp != NULL); -+ qapi_free_BlockGraphInfo(info); -+} -+ - /* @p_info will be set only on success. */ - static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info, - Error **errp) -diff --git a/include/block/qapi.h b/include/block/qapi.h -index 2174bf8fa2..196436020e 100644 ---- a/include/block/qapi.h -+++ b/include/block/qapi.h -@@ -43,6 +43,9 @@ void bdrv_query_image_info(BlockDriverState *bs, - bool flat, - bool skip_implicit_filters, - Error **errp); -+void bdrv_query_block_graph_info(BlockDriverState *bs, -+ BlockGraphInfo **p_info, -+ Error **errp); - - void bdrv_snapshot_dump(QEMUSnapshotInfo *sn); - void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, -diff --git a/qapi/block-core.json b/qapi/block-core.json -index 4cf2deeb6c..d703e0fb16 100644 ---- a/qapi/block-core.json -+++ b/qapi/block-core.json -@@ -307,6 +307,41 @@ - '*backing-image': 'ImageInfo' - } } - -+## -+# @BlockChildInfo: -+# -+# Information about all nodes in the block graph starting at some node, -+# annotated with information about that node in relation to its parent. -+# -+# @name: Child name of the root node in the BlockGraphInfo struct, in its role -+# as the child of some undescribed parent node -+# -+# @info: Block graph information starting at this node -+# -+# Since: 8.0 -+## -+{ 'struct': 'BlockChildInfo', -+ 'data': { -+ 'name': 'str', -+ 'info': 'BlockGraphInfo' -+ } } -+ -+## -+# @BlockGraphInfo: -+# -+# Information about all nodes in a block (sub)graph in the form of BlockNodeInfo -+# data. -+# The base BlockNodeInfo struct contains the information for the (sub)graph's -+# root node. -+# -+# @children: Array of links to this node's child nodes' information -+# -+# Since: 8.0 -+## -+{ 'struct': 'BlockGraphInfo', -+ 'base': 'BlockNodeInfo', -+ 'data': { 'children': ['BlockChildInfo'] } } -+ - ## - # @ImageCheck: - # --- -2.31.1 - diff --git a/SOURCES/kvm-block-qapi-Let-bdrv_query_image_info-recurse.patch b/SOURCES/kvm-block-qapi-Let-bdrv_query_image_info-recurse.patch deleted file mode 100644 index e5c012a..0000000 --- a/SOURCES/kvm-block-qapi-Let-bdrv_query_image_info-recurse.patch +++ /dev/null @@ -1,197 +0,0 @@ -From ae2c3df00d673d436fe4d8ec9103a3b76d7e6233 Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Mon, 20 Jun 2022 18:26:58 +0200 -Subject: [PATCH 11/20] block/qapi: Let bdrv_query_image_info() recurse - -RH-Author: Hanna Czenczek -RH-MergeRequest: 145: Show protocol-level information in qemu-img info -RH-Bugzilla: 1860292 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella -RH-Commit: [6/12] 451a83fd682cd6dd6026c22974d18c2f12ee06e3 (hreitz/qemu-kvm-c-9-s) - -There is no real reason why bdrv_query_image_info() should generally not -recurse. The ImageInfo struct has a pointer to the backing image, so it -should generally be filled, unless the caller explicitly opts out. - -This moves the recursing code from bdrv_block_device_info() into -bdrv_query_image_info(). - -Signed-off-by: Hanna Reitz -Message-Id: <20220620162704.80987-7-hreitz@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 5d8813593f3f673fc96eed199beb35690cc46f58) - -Conflicts: - block/qapi.c: Conflicts with - 54fde4ff0621c22b15cbaaa3c74301cc0dbd1c9e ("qapi block: Elide - redundant has_FOO in generated C"), which dropped - `has_backing_image`. Without that commit (and 44ea9d9be before it), - we still need to set `has_backing_image` in - `bdrv_query_image_info()`. - -Signed-off-by: Hanna Czenczek ---- - block/qapi.c | 94 +++++++++++++++++++++++++++----------------- - include/block/qapi.h | 2 + - 2 files changed, 59 insertions(+), 37 deletions(-) - -diff --git a/block/qapi.c b/block/qapi.c -index ad88bf9b38..5d0a8d2ce3 100644 ---- a/block/qapi.c -+++ b/block/qapi.c -@@ -47,8 +47,10 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk, - Error **errp) - { - ImageInfo **p_image_info; -+ ImageInfo *backing_info; - BlockDriverState *bs0, *backing; - BlockDeviceInfo *info; -+ ERRP_GUARD(); - - if (!bs->drv) { - error_setg(errp, "Block device %s is ejected", bs->node_name); -@@ -149,38 +151,21 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk, - bs0 = bs; - p_image_info = &info->image; - info->backing_file_depth = 0; -- while (1) { -- Error *local_err = NULL; -- bdrv_query_image_info(bs0, p_image_info, &local_err); -- if (local_err) { -- error_propagate(errp, local_err); -- qapi_free_BlockDeviceInfo(info); -- return NULL; -- } -- -- /* stop gathering data for flat output */ -- if (flat) { -- break; -- } - -- if (bs0->drv && bdrv_filter_or_cow_child(bs0)) { -- /* -- * Put any filtered child here (for backwards compatibility to when -- * we put bs0->backing here, which might be any filtered child). -- */ -- info->backing_file_depth++; -- bs0 = bdrv_filter_or_cow_bs(bs0); -- (*p_image_info)->has_backing_image = true; -- p_image_info = &((*p_image_info)->backing_image); -- } else { -- break; -- } -+ /* -+ * Skip automatically inserted nodes that the user isn't aware of for -+ * query-block (blk != NULL), but not for query-named-block-nodes -+ */ -+ bdrv_query_image_info(bs0, p_image_info, flat, blk != NULL, errp); -+ if (*errp) { -+ qapi_free_BlockDeviceInfo(info); -+ return NULL; -+ } - -- /* Skip automatically inserted nodes that the user isn't aware of for -- * query-block (blk != NULL), but not for query-named-block-nodes */ -- if (blk) { -- bs0 = bdrv_skip_implicit_filters(bs0); -- } -+ backing_info = info->image->backing_image; -+ while (backing_info) { -+ info->backing_file_depth++; -+ backing_info = backing_info->backing_image; - } - - return info; -@@ -363,19 +348,28 @@ void bdrv_query_block_node_info(BlockDriverState *bs, - * bdrv_query_image_info: - * @bs: block node to examine - * @p_info: location to store image information -+ * @flat: skip backing node information -+ * @skip_implicit_filters: skip implicit filters in the backing chain - * @errp: location to store error information - * -- * Store "flat" image information in @p_info. -+ * Store image information in @p_info, potentially recursively covering the -+ * backing chain. - * -- * "Flat" means it does *not* query backing image information, -- * i.e. (*pinfo)->has_backing_image will be set to false and -- * (*pinfo)->backing_image to NULL even when the image does in fact have -- * a backing image. -+ * If @flat is true, do not query backing image information, i.e. -+ * (*p_info)->has_backing_image will be set to false and -+ * (*p_info)->backing_image to NULL even when the image does in fact have a -+ * backing image. -+ * -+ * If @skip_implicit_filters is true, implicit filter nodes in the backing chain -+ * will be skipped when querying backing image information. -+ * (@skip_implicit_filters is ignored when @flat is true.) - * - * @p_info will be set only on success. On error, store error in @errp. - */ - void bdrv_query_image_info(BlockDriverState *bs, - ImageInfo **p_info, -+ bool flat, -+ bool skip_implicit_filters, - Error **errp) - { - ImageInfo *info; -@@ -384,11 +378,37 @@ void bdrv_query_image_info(BlockDriverState *bs, - info = g_new0(ImageInfo, 1); - bdrv_do_query_node_info(bs, qapi_ImageInfo_base(info), errp); - if (*errp) { -- qapi_free_ImageInfo(info); -- return; -+ goto fail; -+ } -+ -+ if (!flat) { -+ BlockDriverState *backing; -+ -+ /* -+ * Use any filtered child here (for backwards compatibility to when -+ * we always took bs->backing, which might be any filtered child). -+ */ -+ backing = bdrv_filter_or_cow_bs(bs); -+ if (skip_implicit_filters) { -+ backing = bdrv_skip_implicit_filters(backing); -+ } -+ -+ if (backing) { -+ bdrv_query_image_info(backing, &info->backing_image, false, -+ skip_implicit_filters, errp); -+ if (*errp) { -+ goto fail; -+ } -+ info->has_backing_image = true; -+ } - } - - *p_info = info; -+ return; -+ -+fail: -+ assert(*errp); -+ qapi_free_ImageInfo(info); - } - - /* @p_info will be set only on success. */ -diff --git a/include/block/qapi.h b/include/block/qapi.h -index 22198dcd0c..2174bf8fa2 100644 ---- a/include/block/qapi.h -+++ b/include/block/qapi.h -@@ -40,6 +40,8 @@ void bdrv_query_block_node_info(BlockDriverState *bs, - Error **errp); - void bdrv_query_image_info(BlockDriverState *bs, - ImageInfo **p_info, -+ bool flat, -+ bool skip_implicit_filters, - Error **errp); - - void bdrv_snapshot_dump(QEMUSnapshotInfo *sn); --- -2.31.1 - diff --git a/SOURCES/kvm-block-temporarily-hold-the-new-AioContext-of-bs_top-.patch b/SOURCES/kvm-block-temporarily-hold-the-new-AioContext-of-bs_top-.patch deleted file mode 100644 index 8d5a20a..0000000 --- a/SOURCES/kvm-block-temporarily-hold-the-new-AioContext-of-bs_top-.patch +++ /dev/null @@ -1,99 +0,0 @@ -From b952c8f1da6f8597736c0e040565830139369359 Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Tue, 14 Feb 2023 18:16:21 +0100 -Subject: [PATCH] block: temporarily hold the new AioContext of bs_top in - bdrv_append() - -RH-Author: Stefano Garzarella -RH-MergeRequest: 153: block: temporarily hold the new AioContext of bs_top in bdrv_append() -RH-Bugzilla: 2168209 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Kevin Wolf -RH-Commit: [1/1] 5b190426d996e8c9f7a781bd97aee8d25756dbd3 (sgarzarella/qemu-kvm-c-9-s) - -bdrv_append() is called with bs_top AioContext held, but -bdrv_attach_child_noperm() could change the AioContext of bs_top. - -bdrv_replace_node_noperm() calls bdrv_drained_begin() starting from -commit 2398747128 ("block: Don't poll in bdrv_replace_child_noperm()"). -bdrv_drained_begin() can call BDRV_POLL_WHILE that assumes the new lock -is taken, so let's temporarily hold the new AioContext to prevent QEMU -from failing in BDRV_POLL_WHILE when it tries to release the wrong -AioContext. - -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2168209 -Reported-by: Aihua Liang -Signed-off-by: Stefano Garzarella -Message-Id: <20230214171621.11574-1-sgarzare@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 60d90bf43c169b9d1dbcb17ed794b7b02c6862b1) -Signed-off-by: Stefano Garzarella ---- - block.c | 23 +++++++++++++++++++++++ - 1 file changed, 23 insertions(+) - -diff --git a/block.c b/block.c -index 0d78711416..9e1dcb9e47 100644 ---- a/block.c -+++ b/block.c -@@ -5275,6 +5275,8 @@ int bdrv_drop_filter(BlockDriverState *bs, Error **errp) - * child. - * - * This function does not create any image files. -+ * -+ * The caller must hold the AioContext lock for @bs_top. - */ - int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, - Error **errp) -@@ -5282,11 +5284,14 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, - int ret; - BdrvChild *child; - Transaction *tran = tran_new(); -+ AioContext *old_context, *new_context = NULL; - - GLOBAL_STATE_CODE(); - - assert(!bs_new->backing); - -+ old_context = bdrv_get_aio_context(bs_top); -+ - child = bdrv_attach_child_noperm(bs_new, bs_top, "backing", - &child_of_bds, bdrv_backing_role(bs_new), - tran, errp); -@@ -5295,6 +5300,19 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, - goto out; - } - -+ /* -+ * bdrv_attach_child_noperm could change the AioContext of bs_top. -+ * bdrv_replace_node_noperm calls bdrv_drained_begin, so let's temporarily -+ * hold the new AioContext, since bdrv_drained_begin calls BDRV_POLL_WHILE -+ * that assumes the new lock is taken. -+ */ -+ new_context = bdrv_get_aio_context(bs_top); -+ -+ if (old_context != new_context) { -+ aio_context_release(old_context); -+ aio_context_acquire(new_context); -+ } -+ - ret = bdrv_replace_node_noperm(bs_top, bs_new, true, tran, errp); - if (ret < 0) { - goto out; -@@ -5306,6 +5324,11 @@ out: - - bdrv_refresh_limits(bs_top, NULL, NULL); - -+ if (new_context && old_context != new_context) { -+ aio_context_release(new_context); -+ aio_context_acquire(old_context); -+ } -+ - return ret; - } - --- -2.31.1 - diff --git a/SOURCES/kvm-block-vmdk-Change-extent-info-type.patch b/SOURCES/kvm-block-vmdk-Change-extent-info-type.patch deleted file mode 100644 index 6b8f6a7..0000000 --- a/SOURCES/kvm-block-vmdk-Change-extent-info-type.patch +++ /dev/null @@ -1,140 +0,0 @@ -From d8caed018afb0f60f449e971398d2a8d6c2992e7 Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Mon, 20 Jun 2022 18:26:55 +0200 -Subject: [PATCH 08/20] block/vmdk: Change extent info type - -RH-Author: Hanna Czenczek -RH-MergeRequest: 145: Show protocol-level information in qemu-img info -RH-Bugzilla: 1860292 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella -RH-Commit: [3/12] efe50a2797c679ce6bb5faa423047461a34e6792 (hreitz/qemu-kvm-c-9-s) - -VMDK's implementation of .bdrv_get_specific_info() returns information -about its extent files, ostensibly in the form of ImageInfo objects. -However, it does not get this information through -bdrv_query_image_info(), but fills only a select few fields with custom -information that does not always match the fields' purposes. - -For example, @format, which is supposed to be a block driver name, is -filled with the extent type, e.g. SPARSE or FLAT. - -In ImageInfo, @compressed shows whether the data that can be seen in the -image is stored in compressed form or not. For example, a compressed -qcow2 image will store compressed data in its data file, but when -accessing the qcow2 node, you will see normal data. This is not how -VMDK uses the @compressed field for its extent files: Instead, it -signifies whether accessing the extent file will yield compressed data -(which the VMDK driver then (de-)compresses). - -Create a new structure to represent the extent information. This allows -us to clarify the fields' meanings, and it clearly shows that these are -not complete ImageInfo objects. (That is, if a user wants an extent -file's ImageInfo object, they will need to query it separately, and will -not get it from ImageInfoSpecificVmdk.extents.) - -Note that this removes the last use of ['ImageInfo'] (i.e. an array of -ImageInfo objects), so the QAPI generator will no longer generate -ImageInfoList by default. However, we use it in qemu-img.c, so we need -to create a dummy object to force the generate to create that type, -similarly to DummyForceArrays in machine.json (introduced in commit -9f08c8ec73878122ad4b061ed334f0437afaaa32 ("qapi: Lazy creation of array -types")). - -Signed-off-by: Hanna Reitz -Message-Id: <20220620162704.80987-4-hreitz@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 456e75171a85c19a5bfa202eefcbdc4ef1692f05) -Signed-off-by: Hanna Czenczek ---- - block/vmdk.c | 8 ++++---- - qapi/block-core.json | 38 +++++++++++++++++++++++++++++++++++++- - 2 files changed, 41 insertions(+), 5 deletions(-) - -diff --git a/block/vmdk.c b/block/vmdk.c -index 26376352b9..4435b9880b 100644 ---- a/block/vmdk.c -+++ b/block/vmdk.c -@@ -2901,12 +2901,12 @@ static int vmdk_has_zero_init(BlockDriverState *bs) - return 1; - } - --static ImageInfo *vmdk_get_extent_info(VmdkExtent *extent) -+static VmdkExtentInfo *vmdk_get_extent_info(VmdkExtent *extent) - { -- ImageInfo *info = g_new0(ImageInfo, 1); -+ VmdkExtentInfo *info = g_new0(VmdkExtentInfo, 1); - - bdrv_refresh_filename(extent->file->bs); -- *info = (ImageInfo){ -+ *info = (VmdkExtentInfo){ - .filename = g_strdup(extent->file->bs->filename), - .format = g_strdup(extent->type), - .virtual_size = extent->sectors * BDRV_SECTOR_SIZE, -@@ -2985,7 +2985,7 @@ static ImageInfoSpecific *vmdk_get_specific_info(BlockDriverState *bs, - int i; - BDRVVmdkState *s = bs->opaque; - ImageInfoSpecific *spec_info = g_new0(ImageInfoSpecific, 1); -- ImageInfoList **tail; -+ VmdkExtentInfoList **tail; - - *spec_info = (ImageInfoSpecific){ - .type = IMAGE_INFO_SPECIFIC_KIND_VMDK, -diff --git a/qapi/block-core.json b/qapi/block-core.json -index f5d822cbd6..4b9365167f 100644 ---- a/qapi/block-core.json -+++ b/qapi/block-core.json -@@ -124,7 +124,33 @@ - 'create-type': 'str', - 'cid': 'int', - 'parent-cid': 'int', -- 'extents': ['ImageInfo'] -+ 'extents': ['VmdkExtentInfo'] -+ } } -+ -+## -+# @VmdkExtentInfo: -+# -+# Information about a VMDK extent file -+# -+# @filename: Name of the extent file -+# -+# @format: Extent type (e.g. FLAT or SPARSE) -+# -+# @virtual-size: Number of bytes covered by this extent -+# -+# @cluster-size: Cluster size in bytes (for non-flat extents) -+# -+# @compressed: Whether this extent contains compressed data -+# -+# Since: 8.0 -+## -+{ 'struct': 'VmdkExtentInfo', -+ 'data': { -+ 'filename': 'str', -+ 'format': 'str', -+ 'virtual-size': 'int', -+ '*cluster-size': 'int', -+ '*compressed': 'bool' - } } - - ## -@@ -5754,3 +5780,13 @@ - 'data': { 'device': 'str', '*id': 'str', '*name': 'str'}, - 'returns': 'SnapshotInfo', - 'allow-preconfig': true } -+ -+## -+# @DummyBlockCoreForceArrays: -+# -+# Not used by QMP; hack to let us use ImageInfoList internally -+# -+# Since: 8.0 -+## -+{ 'struct': 'DummyBlockCoreForceArrays', -+ 'data': { 'unused-image-info': ['ImageInfo'] } } --- -2.31.1 - diff --git a/SOURCES/kvm-checkpatch-add-qemu_bh_new-aio_bh_new-checks.patch b/SOURCES/kvm-checkpatch-add-qemu_bh_new-aio_bh_new-checks.patch new file mode 100644 index 0000000..4173648 --- /dev/null +++ b/SOURCES/kvm-checkpatch-add-qemu_bh_new-aio_bh_new-checks.patch @@ -0,0 +1,55 @@ +From 961bc392ee60743344236ddd247ab646a0eec914 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 May 2023 10:29:03 -0400 +Subject: [PATCH 07/21] checkpatch: add qemu_bh_new/aio_bh_new checks + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/13] e0473487f0e3186c42559a5c36a8650f27ab26ae (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit ef56ffbdd6b0605dc1e305611287b948c970e236 +Author: Alexander Bulekov +Date: Thu Apr 27 17:10:08 2023 -0400 + + checkpatch: add qemu_bh_new/aio_bh_new checks + + Advise authors to use the _guarded versions of the APIs, instead. + + Signed-off-by: Alexander Bulekov + Reviewed-by: Darren Kenny + Message-Id: <20230427211013.2994127-4-alxndr@bu.edu> + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + scripts/checkpatch.pl | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl +index d768171dcf..eeaec436eb 100755 +--- a/scripts/checkpatch.pl ++++ b/scripts/checkpatch.pl +@@ -2865,6 +2865,14 @@ sub process { + if ($line =~ /\bsignal\s*\(/ && !($line =~ /SIG_(?:IGN|DFL)/)) { + ERROR("use sigaction to establish signal handlers; signal is not portable\n" . $herecurr); + } ++# recommend qemu_bh_new_guarded instead of qemu_bh_new ++ if ($realfile =~ /.*\/hw\/.*/ && $line =~ /\bqemu_bh_new\s*\(/) { ++ ERROR("use qemu_bh_new_guarded() instead of qemu_bh_new() to avoid reentrancy problems\n" . $herecurr); ++ } ++# recommend aio_bh_new_guarded instead of aio_bh_new ++ if ($realfile =~ /.*\/hw\/.*/ && $line =~ /\baio_bh_new\s*\(/) { ++ ERROR("use aio_bh_new_guarded() instead of aio_bh_new() to avoid reentrancy problems\n" . $herecurr); ++ } + # check for module_init(), use category-specific init macros explicitly please + if ($line =~ /^module_init\s*\(/) { + ERROR("please use block_init(), type_init() etc. instead of module_init()\n" . $herecurr); +-- +2.39.3 + diff --git a/SOURCES/kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch b/SOURCES/kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch deleted file mode 100644 index 1a3c139..0000000 --- a/SOURCES/kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch +++ /dev/null @@ -1,127 +0,0 @@ -From b886411a682b56bfe674f0a35d40c67c8e9dc87a Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 21 Feb 2023 16:22:17 -0500 -Subject: [PATCH 02/12] dma-helpers: prevent dma_blk_cb() vs dma_aio_cancel() - race - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 155: virtio-scsi: reset SCSI devices from main loop thread -RH-Bugzilla: 2155748 -RH-Acked-by: Eric Blake -RH-Acked-by: Kevin Wolf -RH-Acked-by: Laszlo Ersek -RH-Commit: [2/3] eeeea43c25d8f4fa84591b05547fb77e4058abff (stefanha/centos-stream-qemu-kvm) - -dma_blk_cb() only takes the AioContext lock around ->io_func(). That -means the rest of dma_blk_cb() is not protected. In particular, the -DMAAIOCB field accesses happen outside the lock. - -There is a race when the main loop thread holds the AioContext lock and -invokes scsi_device_purge_requests() -> bdrv_aio_cancel() -> -dma_aio_cancel() while an IOThread executes dma_blk_cb(). The dbs->acb -field determines how cancellation proceeds. If dma_aio_cancel() sees -dbs->acb == NULL while dma_blk_cb() is still running, the request can be -completed twice (-ECANCELED and the actual return value). - -The following assertion can occur with virtio-scsi when an IOThread is -used: - - ../hw/scsi/scsi-disk.c:368: scsi_dma_complete: Assertion `r->req.aiocb != NULL' failed. - -Fix the race by holding the AioContext across dma_blk_cb(). Now -dma_aio_cancel() under the AioContext lock will not see -inconsistent/intermediate states. - -Cc: Paolo Bonzini -Reviewed-by: Eric Blake -Signed-off-by: Stefan Hajnoczi -Message-Id: <20230221212218.1378734-3-stefanha@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit abfcd2760b3e70727bbc0792221b8b98a733dc32) -Signed-off-by: Stefan Hajnoczi ---- - hw/scsi/scsi-disk.c | 4 +--- - softmmu/dma-helpers.c | 12 +++++++----- - 2 files changed, 8 insertions(+), 8 deletions(-) - -diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c -index 5327f93f4c..b12d8b0816 100644 ---- a/hw/scsi/scsi-disk.c -+++ b/hw/scsi/scsi-disk.c -@@ -354,13 +354,12 @@ done: - scsi_req_unref(&r->req); - } - -+/* Called with AioContext lock held */ - static void scsi_dma_complete(void *opaque, int ret) - { - SCSIDiskReq *r = (SCSIDiskReq *)opaque; - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - -- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -- - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; - -@@ -370,7 +369,6 @@ static void scsi_dma_complete(void *opaque, int ret) - block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); - } - scsi_dma_complete_noio(r, ret); -- aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); - } - - static void scsi_read_complete_noio(SCSIDiskReq *r, int ret) -diff --git a/softmmu/dma-helpers.c b/softmmu/dma-helpers.c -index 7820fec54c..2463964805 100644 ---- a/softmmu/dma-helpers.c -+++ b/softmmu/dma-helpers.c -@@ -113,17 +113,19 @@ static void dma_complete(DMAAIOCB *dbs, int ret) - static void dma_blk_cb(void *opaque, int ret) - { - DMAAIOCB *dbs = (DMAAIOCB *)opaque; -+ AioContext *ctx = dbs->ctx; - dma_addr_t cur_addr, cur_len; - void *mem; - - trace_dma_blk_cb(dbs, ret); - -+ aio_context_acquire(ctx); - dbs->acb = NULL; - dbs->offset += dbs->iov.size; - - if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) { - dma_complete(dbs, ret); -- return; -+ goto out; - } - dma_blk_unmap(dbs); - -@@ -164,9 +166,9 @@ static void dma_blk_cb(void *opaque, int ret) - - if (dbs->iov.size == 0) { - trace_dma_map_wait(dbs); -- dbs->bh = aio_bh_new(dbs->ctx, reschedule_dma, dbs); -+ dbs->bh = aio_bh_new(ctx, reschedule_dma, dbs); - cpu_register_map_client(dbs->bh); -- return; -+ goto out; - } - - if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) { -@@ -174,11 +176,11 @@ static void dma_blk_cb(void *opaque, int ret) - QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align)); - } - -- aio_context_acquire(dbs->ctx); - dbs->acb = dbs->io_func(dbs->offset, &dbs->iov, - dma_blk_cb, dbs, dbs->io_func_opaque); -- aio_context_release(dbs->ctx); - assert(dbs->acb); -+out: -+ aio_context_release(ctx); - } - - static void dma_aio_cancel(BlockAIOCB *acb) --- -2.39.1 - diff --git a/SOURCES/kvm-edu-add-smp_mb__after_rmw.patch b/SOURCES/kvm-edu-add-smp_mb__after_rmw.patch deleted file mode 100644 index dd77648..0000000 --- a/SOURCES/kvm-edu-add-smp_mb__after_rmw.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 67bbeb056f75adc6c964468d876531ab68366fe0 Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Thu, 9 Mar 2023 08:24:36 -0500 -Subject: [PATCH 07/12] edu: add smp_mb__after_rmw() - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() -RH-Bugzilla: 2175660 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Eric Auger -RH-Commit: [4/9] 2ad6fd6cb33fde39d2d017d94c0dde2152ad70c4 (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 - -commit 2482aeea4195ad84cf3d4e5b15b28ec5b420ed5a -Author: Paolo Bonzini -Date: Thu Mar 2 11:16:13 2023 +0100 - - edu: add smp_mb__after_rmw() - - Ensure ordering between clearing the COMPUTING flag and checking - IRQFACT, and between setting the IRQFACT flag and checking - COMPUTING. This ensures that no wakeups are lost. - - Reviewed-by: Richard Henderson - Reviewed-by: David Hildenbrand - Signed-off-by: Paolo Bonzini - -Signed-off-by: Emanuele Giuseppe Esposito ---- - hw/misc/edu.c | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/hw/misc/edu.c b/hw/misc/edu.c -index e935c418d4..a1f8bc77e7 100644 ---- a/hw/misc/edu.c -+++ b/hw/misc/edu.c -@@ -267,6 +267,8 @@ static void edu_mmio_write(void *opaque, hwaddr addr, uint64_t val, - case 0x20: - if (val & EDU_STATUS_IRQFACT) { - qatomic_or(&edu->status, EDU_STATUS_IRQFACT); -+ /* Order check of the COMPUTING flag after setting IRQFACT. */ -+ smp_mb__after_rmw(); - } else { - qatomic_and(&edu->status, ~EDU_STATUS_IRQFACT); - } -@@ -349,6 +351,9 @@ static void *edu_fact_thread(void *opaque) - qemu_mutex_unlock(&edu->thr_mutex); - qatomic_and(&edu->status, ~EDU_STATUS_COMPUTING); - -+ /* Clear COMPUTING flag before checking IRQFACT. */ -+ smp_mb__after_rmw(); -+ - if (qatomic_read(&edu->status) & EDU_STATUS_IRQFACT) { - qemu_mutex_lock_iothread(); - edu_raise_irq(edu, FACT_IRQ); --- -2.39.1 - diff --git a/SOURCES/kvm-graph-lock-Disable-locking-for-now.patch b/SOURCES/kvm-graph-lock-Disable-locking-for-now.patch new file mode 100644 index 0000000..77086e5 --- /dev/null +++ b/SOURCES/kvm-graph-lock-Disable-locking-for-now.patch @@ -0,0 +1,153 @@ +From 516bf44de08a13d97c08e210137078e642ce8e88 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 17 May 2023 17:28:32 +0200 +Subject: [PATCH 02/21] graph-lock: Disable locking for now + +RH-Author: Kevin Wolf +RH-MergeRequest: 166: block/graph-lock: Disable locking for now +RH-Bugzilla: 2186725 +RH-Acked-by: Eric Blake +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [2/4] 39d42fb527aad0491a018743289de7b762108317 (kmwolf/centos-qemu-kvm) + +In QEMU 8.0, we've been seeing deadlocks in bdrv_graph_wrlock(). They +come from callers that hold an AioContext lock, which is not allowed +during polling. In theory, we could temporarily release the lock, but +callers are inconsistent about whether they hold a lock, and if they do, +some are also confused about which one they hold. While all of this is +fixable, it's not trivial, and the best course of action for 8.0.1 is +probably just disabling the graph locking code temporarily. + +We don't currently rely on graph locking yet. It is supposed to replace +the AioContext lock eventually to enable multiqueue support, but as long +as we still have the AioContext lock, it is sufficient without the graph +lock. Once the AioContext lock goes away, the deadlock doesn't exist any +more either and this commit can be reverted. (Of course, it can also be +reverted while the AioContext lock still exists if the callers have been +fixed.) + +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Message-Id: <20230517152834.277483-2-kwolf@redhat.com> +Reviewed-by: Eric Blake +Signed-off-by: Kevin Wolf +(cherry picked from commit 80fc5d260002432628710f8b0c7cfc7d9b97bb9d) +Signed-off-by: Kevin Wolf +--- + block/graph-lock.c | 24 ++++++++++++++++++++++++ + 1 file changed, 24 insertions(+) + +diff --git a/block/graph-lock.c b/block/graph-lock.c +index 259a7a0bde..2490926c90 100644 +--- a/block/graph-lock.c ++++ b/block/graph-lock.c +@@ -30,8 +30,10 @@ BdrvGraphLock graph_lock; + /* Protects the list of aiocontext and orphaned_reader_count */ + static QemuMutex aio_context_list_lock; + ++#if 0 + /* Written and read with atomic operations. */ + static int has_writer; ++#endif + + /* + * A reader coroutine could move from an AioContext to another. +@@ -88,6 +90,7 @@ void unregister_aiocontext(AioContext *ctx) + g_free(ctx->bdrv_graph); + } + ++#if 0 + static uint32_t reader_count(void) + { + BdrvGraphRWlock *brdv_graph; +@@ -105,10 +108,17 @@ static uint32_t reader_count(void) + assert((int32_t)rd >= 0); + return rd; + } ++#endif + + void bdrv_graph_wrlock(void) + { + GLOBAL_STATE_CODE(); ++ /* ++ * TODO Some callers hold an AioContext lock when this is called, which ++ * causes deadlocks. Reenable once the AioContext locking is cleaned up (or ++ * AioContext locks are gone). ++ */ ++#if 0 + assert(!qatomic_read(&has_writer)); + + /* Make sure that constantly arriving new I/O doesn't cause starvation */ +@@ -139,11 +149,13 @@ void bdrv_graph_wrlock(void) + } while (reader_count() >= 1); + + bdrv_drain_all_end(); ++#endif + } + + void bdrv_graph_wrunlock(void) + { + GLOBAL_STATE_CODE(); ++#if 0 + QEMU_LOCK_GUARD(&aio_context_list_lock); + assert(qatomic_read(&has_writer)); + +@@ -155,10 +167,13 @@ void bdrv_graph_wrunlock(void) + + /* Wake up all coroutine that are waiting to read the graph */ + qemu_co_enter_all(&reader_queue, &aio_context_list_lock); ++#endif + } + + void coroutine_fn bdrv_graph_co_rdlock(void) + { ++ /* TODO Reenable when wrlock is reenabled */ ++#if 0 + BdrvGraphRWlock *bdrv_graph; + bdrv_graph = qemu_get_current_aio_context()->bdrv_graph; + +@@ -223,10 +238,12 @@ void coroutine_fn bdrv_graph_co_rdlock(void) + qemu_co_queue_wait(&reader_queue, &aio_context_list_lock); + } + } ++#endif + } + + void coroutine_fn bdrv_graph_co_rdunlock(void) + { ++#if 0 + BdrvGraphRWlock *bdrv_graph; + bdrv_graph = qemu_get_current_aio_context()->bdrv_graph; + +@@ -249,6 +266,7 @@ void coroutine_fn bdrv_graph_co_rdunlock(void) + if (qatomic_read(&has_writer)) { + aio_wait_kick(); + } ++#endif + } + + void bdrv_graph_rdlock_main_loop(void) +@@ -266,13 +284,19 @@ void bdrv_graph_rdunlock_main_loop(void) + void assert_bdrv_graph_readable(void) + { + /* reader_count() is slow due to aio_context_list_lock lock contention */ ++ /* TODO Reenable when wrlock is reenabled */ ++#if 0 + #ifdef CONFIG_DEBUG_GRAPH_LOCK + assert(qemu_in_main_thread() || reader_count()); + #endif ++#endif + } + + void assert_bdrv_graph_writable(void) + { + assert(qemu_in_main_thread()); ++ /* TODO Reenable when wrlock is reenabled */ ++#if 0 + assert(qatomic_read(&has_writer)); ++#endif + } +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-acpi-Mark-acpi-blobs-as-resizable-on-RHEL-pc-mach.patch b/SOURCES/kvm-hw-acpi-Mark-acpi-blobs-as-resizable-on-RHEL-pc-mach.patch new file mode 100644 index 0000000..67e702c --- /dev/null +++ b/SOURCES/kvm-hw-acpi-Mark-acpi-blobs-as-resizable-on-RHEL-pc-mach.patch @@ -0,0 +1,40 @@ +From b4645e7682aa1bde6f89df0eff2a9de83720eecc Mon Sep 17 00:00:00 2001 +From: Ani Sinha +Date: Tue, 2 May 2023 15:51:53 +0530 +Subject: [PATCH 3/3] hw/acpi: Mark acpi blobs as resizable on RHEL pc machines + version 7.6 and above + +RH-Author: Ani Sinha +RH-MergeRequest: 160: hw/acpi: limit warning on acpi table size to pc machines older than version 2.3 +RH-Bugzilla: 1934134 +RH-Acked-by: Igor Mammedov +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: MST +RH-Commit: [2/2] 95d443af6e75c569d89d04d028012c3c56c0c3a4 (anisinha/centos-qemu-kvm) + +Please look at QEMU upstream commit +1af507756bae7 ("hw/acpi: limit warning on acpi table size to pc machines older than version 2.3") +This patch adapts the above change so that it applies to RHEL pc machines of +version 7.6 and newer. These are the machine types that are currently supported +in RHEL. Q35 machines are not affected. + +Signed-off-by: Ani Sinha +--- + hw/i386/pc_piix.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 4d5880e249..6c7be628e1 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -914,6 +914,7 @@ static void pc_machine_rhel7_options(MachineClass *m) + m->default_machine_opts = "firmware=bios-256k.bin,hpet=off"; + pcmc->default_nic_model = "e1000"; + pcmc->pci_root_uid = 0; ++ pcmc->resizable_acpi_blob = true; + m->default_display = "std"; + m->no_parallel = 1; + m->numa_mem_supported = true; +-- +2.39.1 + diff --git a/SOURCES/kvm-hw-acpi-limit-warning-on-acpi-table-size-to-pc-machi.patch b/SOURCES/kvm-hw-acpi-limit-warning-on-acpi-table-size-to-pc-machi.patch new file mode 100644 index 0000000..e06113a --- /dev/null +++ b/SOURCES/kvm-hw-acpi-limit-warning-on-acpi-table-size-to-pc-machi.patch @@ -0,0 +1,101 @@ +From 3f70da88788c398877b8ded0b27689530385302b Mon Sep 17 00:00:00 2001 +From: Ani Sinha +Date: Wed, 29 Mar 2023 10:27:26 +0530 +Subject: [PATCH 2/3] hw/acpi: limit warning on acpi table size to pc machines + older than version 2.3 + +RH-Author: Ani Sinha +RH-MergeRequest: 160: hw/acpi: limit warning on acpi table size to pc machines older than version 2.3 +RH-Bugzilla: 1934134 +RH-Acked-by: Igor Mammedov +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: MST +RH-Commit: [1/2] 96c3b6d51e16734eb4e8de52635e0ca036964090 (anisinha/centos-qemu-kvm) + +i440fx machine versions 2.3 and newer supports dynamic ram +resizing. See commit a1666142db6233 ("acpi-build: make ROMs RAM blocks resizeable") . +Currently supported all q35 machine types (versions 2.4 and newer) supports +resizable RAM/ROM blocks.Therefore the warning generated when the ACPI table +size exceeds a pre-defined value does not apply to those machine versions. +Add a check limiting the warning message to only those machines that does not +support expandable ram blocks (that is, i440fx machines with version 2.2 +and older). + +Signed-off-by: Ani Sinha +Message-Id: <20230329045726.14028-1-anisinha@redhat.com> +Reviewed-by: Igor Mammedov +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 1af507756bae775028c27d30e602e2b9c72cd074) +--- + hw/i386/acpi-build.c | 6 ++++-- + hw/i386/pc.c | 1 + + hw/i386/pc_piix.c | 1 + + include/hw/i386/pc.h | 3 +++ + 4 files changed, 9 insertions(+), 2 deletions(-) + +diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c +index ec857a117e..9bc4d8a981 100644 +--- a/hw/i386/acpi-build.c ++++ b/hw/i386/acpi-build.c +@@ -2695,7 +2695,8 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) + int legacy_table_size = + ROUND_UP(tables_blob->len - aml_len + legacy_aml_len, + ACPI_BUILD_ALIGN_SIZE); +- if (tables_blob->len > legacy_table_size) { ++ if ((tables_blob->len > legacy_table_size) && ++ !pcmc->resizable_acpi_blob) { + /* Should happen only with PCI bridges and -M pc-i440fx-2.0. */ + warn_report("ACPI table size %u exceeds %d bytes," + " migration may not work", +@@ -2706,7 +2707,8 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) + g_array_set_size(tables_blob, legacy_table_size); + } else { + /* Make sure we have a buffer in case we need to resize the tables. */ +- if (tables_blob->len > ACPI_BUILD_TABLE_SIZE / 2) { ++ if ((tables_blob->len > ACPI_BUILD_TABLE_SIZE / 2) && ++ !pcmc->resizable_acpi_blob) { + /* As of QEMU 2.1, this fires with 160 VCPUs and 255 memory slots. */ + warn_report("ACPI table size %u exceeds %d bytes," + " migration may not work", +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index f216922cee..7db5a2348f 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -2092,6 +2092,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) + pcmc->acpi_data_size = 0x20000 + 0x8000; + pcmc->pvh_enabled = true; + pcmc->kvmclock_create_always = true; ++ pcmc->resizable_acpi_blob = true; + assert(!mc->get_hotplug_handler); + mc->async_pf_vmexit_disable = false; + mc->get_hotplug_handler = pc_get_hotplug_handler; +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index fc704d783f..4d5880e249 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -750,6 +750,7 @@ static void pc_i440fx_2_2_machine_options(MachineClass *m) + compat_props_add(m->compat_props, hw_compat_2_2, hw_compat_2_2_len); + compat_props_add(m->compat_props, pc_compat_2_2, pc_compat_2_2_len); + pcmc->rsdp_in_ram = false; ++ pcmc->resizable_acpi_blob = false; + } + + DEFINE_I440FX_MACHINE(v2_2, "pc-i440fx-2.2", pc_compat_2_2_fn, +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index d218ad1628..2f514d13d8 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -130,6 +130,9 @@ struct PCMachineClass { + + /* create kvmclock device even when KVM PV features are not exposed */ + bool kvmclock_create_always; ++ ++ /* resizable acpi blob compat */ ++ bool resizable_acpi_blob; + }; + + #define TYPE_PC_MACHINE "generic-pc-machine" +-- +2.39.1 + diff --git a/SOURCES/kvm-hw-arm-Validate-cluster-and-NUMA-node-boundary.patch b/SOURCES/kvm-hw-arm-Validate-cluster-and-NUMA-node-boundary.patch new file mode 100644 index 0000000..e96bb10 --- /dev/null +++ b/SOURCES/kvm-hw-arm-Validate-cluster-and-NUMA-node-boundary.patch @@ -0,0 +1,60 @@ +From 7b57aec372fc238cbaafe86557f9fb4b560895b1 Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Tue, 27 Jun 2023 20:20:09 +1000 +Subject: [PATCH 2/6] hw/arm: Validate cluster and NUMA node boundary + +RH-Author: Gavin Shan +RH-MergeRequest: 175: hw/arm: Validate CPU cluster and NUMA node boundary for RHEL machines +RH-Bugzilla: 2171363 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Eric Auger +RH-Commit: [2/3] fcac7ea85d9f73613989903c642fc1bf6c51946b + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2171363 + +There are two ARM machines where NUMA is aware: 'virt' and 'sbsa-ref'. +Both of them are required to follow cluster-NUMA-node boundary. To +enable the validation to warn about the irregular configuration where +multiple CPUs in one cluster have been associated with different NUMA +nodes. + +Signed-off-by: Gavin Shan +Acked-by: Igor Mammedov +Message-Id: <20230509002739.18388-3-gshan@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit fecff672351ace5e39adf7dbcf7a8ee748b201cb) +Signed-off-by: Gavin Shan +--- + hw/arm/sbsa-ref.c | 2 ++ + hw/arm/virt.c | 2 ++ + 2 files changed, 4 insertions(+) + +diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c +index 0b93558dde..efb380e7c8 100644 +--- a/hw/arm/sbsa-ref.c ++++ b/hw/arm/sbsa-ref.c +@@ -864,6 +864,8 @@ static void sbsa_ref_class_init(ObjectClass *oc, void *data) + mc->possible_cpu_arch_ids = sbsa_ref_possible_cpu_arch_ids; + mc->cpu_index_to_instance_props = sbsa_ref_cpu_index_to_props; + mc->get_default_cpu_node_id = sbsa_ref_get_default_cpu_node_id; ++ /* platform instead of architectural choice */ ++ mc->cpu_cluster_has_numa_boundary = true; + } + + static const TypeInfo sbsa_ref_info = { +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 9be53e9355..df6a0231bc 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3083,6 +3083,8 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) + mc->smp_props.clusters_supported = true; + mc->auto_enable_numa_with_memhp = true; + mc->auto_enable_numa_with_memdev = true; ++ /* platform instead of architectural choice */ ++ mc->cpu_cluster_has_numa_boundary = true; + mc->default_ram_id = "mach-virt.ram"; + + object_class_property_add(oc, "acpi", "OnOffAuto", +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-arm-smmu-Handle-big-endian-hosts-correctly.patch b/SOURCES/kvm-hw-arm-smmu-Handle-big-endian-hosts-correctly.patch new file mode 100644 index 0000000..3bbe93f --- /dev/null +++ b/SOURCES/kvm-hw-arm-smmu-Handle-big-endian-hosts-correctly.patch @@ -0,0 +1,166 @@ +From a3412036477e8c91e0b71fcd91de4e24a9904077 Mon Sep 17 00:00:00 2001 +From: Peter Maydell +Date: Tue, 25 Jul 2023 10:56:51 +0100 +Subject: [PATCH 09/14] hw/arm/smmu: Handle big-endian hosts correctly +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 197: virtio-iommu/smmu: backport some late fixes +RH-Bugzilla: 2229133 +RH-Acked-by: Thomas Huth +RH-Acked-by: Peter Xu +RH-Commit: [3/3] df9c8d228b25273e0c4927a10b21e66fb4bef5f0 (eauger1/centos-qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2229133 + +The implementation of the SMMUv3 has multiple places where it reads a +data structure from the guest and directly operates on it without +doing a guest-to-host endianness conversion. Since all SMMU data +structures are little-endian, this means that the SMMU doesn't work +on a big-endian host. In particular, this causes the Avocado test + machine_aarch64_virt.py:Aarch64VirtMachine.test_alpine_virt_tcg_gic_max +to fail on an s390x host. + +Add appropriate byte-swapping on reads and writes of guest in-memory +data structures so that the device works correctly on big-endian +hosts. + +As part of this we constrain queue_read() to operate only on Cmd +structs and queue_write() on Evt structs, because in practice these +are the only data structures the two functions are used with, and we +need to know what the data structure is to be able to byte-swap its +parts correctly. + +Signed-off-by: Peter Maydell +Tested-by: Thomas Huth +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Eric Auger +Message-id: 20230717132641.764660-1-peter.maydell@linaro.org +Cc: qemu-stable@nongnu.org +(cherry picked from commit c6445544d4cea2628fbad3bad09f3d3a03c749d3) +Signed-off-by: Eric Auger +--- + hw/arm/smmu-common.c | 3 +-- + hw/arm/smmuv3.c | 39 +++++++++++++++++++++++++++++++-------- + 2 files changed, 32 insertions(+), 10 deletions(-) + +diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c +index e7f1c1f219..daa02ce798 100644 +--- a/hw/arm/smmu-common.c ++++ b/hw/arm/smmu-common.c +@@ -192,8 +192,7 @@ static int get_pte(dma_addr_t baseaddr, uint32_t index, uint64_t *pte, + dma_addr_t addr = baseaddr + index * sizeof(*pte); + + /* TODO: guarantee 64-bit single-copy atomicity */ +- ret = dma_memory_read(&address_space_memory, addr, pte, sizeof(*pte), +- MEMTXATTRS_UNSPECIFIED); ++ ret = ldq_le_dma(&address_space_memory, addr, pte, MEMTXATTRS_UNSPECIFIED); + + if (ret != MEMTX_OK) { + info->type = SMMU_PTW_ERR_WALK_EABT; +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index 270c80b665..cfb56725a6 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -98,20 +98,34 @@ static void smmuv3_write_gerrorn(SMMUv3State *s, uint32_t new_gerrorn) + trace_smmuv3_write_gerrorn(toggled & pending, s->gerrorn); + } + +-static inline MemTxResult queue_read(SMMUQueue *q, void *data) ++static inline MemTxResult queue_read(SMMUQueue *q, Cmd *cmd) + { + dma_addr_t addr = Q_CONS_ENTRY(q); ++ MemTxResult ret; ++ int i; + +- return dma_memory_read(&address_space_memory, addr, data, q->entry_size, +- MEMTXATTRS_UNSPECIFIED); ++ ret = dma_memory_read(&address_space_memory, addr, cmd, sizeof(Cmd), ++ MEMTXATTRS_UNSPECIFIED); ++ if (ret != MEMTX_OK) { ++ return ret; ++ } ++ for (i = 0; i < ARRAY_SIZE(cmd->word); i++) { ++ le32_to_cpus(&cmd->word[i]); ++ } ++ return ret; + } + +-static MemTxResult queue_write(SMMUQueue *q, void *data) ++static MemTxResult queue_write(SMMUQueue *q, Evt *evt_in) + { + dma_addr_t addr = Q_PROD_ENTRY(q); + MemTxResult ret; ++ Evt evt = *evt_in; ++ int i; + +- ret = dma_memory_write(&address_space_memory, addr, data, q->entry_size, ++ for (i = 0; i < ARRAY_SIZE(evt.word); i++) { ++ cpu_to_le32s(&evt.word[i]); ++ } ++ ret = dma_memory_write(&address_space_memory, addr, &evt, sizeof(Evt), + MEMTXATTRS_UNSPECIFIED); + if (ret != MEMTX_OK) { + return ret; +@@ -291,7 +305,7 @@ static void smmuv3_init_regs(SMMUv3State *s) + static int smmu_get_ste(SMMUv3State *s, dma_addr_t addr, STE *buf, + SMMUEventInfo *event) + { +- int ret; ++ int ret, i; + + trace_smmuv3_get_ste(addr); + /* TODO: guarantee 64-bit single-copy atomicity */ +@@ -304,6 +318,9 @@ static int smmu_get_ste(SMMUv3State *s, dma_addr_t addr, STE *buf, + event->u.f_ste_fetch.addr = addr; + return -EINVAL; + } ++ for (i = 0; i < ARRAY_SIZE(buf->word); i++) { ++ le32_to_cpus(&buf->word[i]); ++ } + return 0; + + } +@@ -313,7 +330,7 @@ static int smmu_get_cd(SMMUv3State *s, STE *ste, uint32_t ssid, + CD *buf, SMMUEventInfo *event) + { + dma_addr_t addr = STE_CTXPTR(ste); +- int ret; ++ int ret, i; + + trace_smmuv3_get_cd(addr); + /* TODO: guarantee 64-bit single-copy atomicity */ +@@ -326,6 +343,9 @@ static int smmu_get_cd(SMMUv3State *s, STE *ste, uint32_t ssid, + event->u.f_ste_fetch.addr = addr; + return -EINVAL; + } ++ for (i = 0; i < ARRAY_SIZE(buf->word); i++) { ++ le32_to_cpus(&buf->word[i]); ++ } + return 0; + } + +@@ -407,7 +427,7 @@ static int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste, + return -EINVAL; + } + if (s->features & SMMU_FEATURE_2LVL_STE) { +- int l1_ste_offset, l2_ste_offset, max_l2_ste, span; ++ int l1_ste_offset, l2_ste_offset, max_l2_ste, span, i; + dma_addr_t l1ptr, l2ptr; + STEDesc l1std; + +@@ -431,6 +451,9 @@ static int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste, + event->u.f_ste_fetch.addr = l1ptr; + return -EINVAL; + } ++ for (i = 0; i < ARRAY_SIZE(l1std.word); i++) { ++ le32_to_cpus(&l1std.word[i]); ++ } + + span = L1STD_SPAN(&l1std); + +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-arm-virt-Add-compact-highmem-property.patch b/SOURCES/kvm-hw-arm-virt-Add-compact-highmem-property.patch deleted file mode 100644 index bc65e2f..0000000 --- a/SOURCES/kvm-hw-arm-virt-Add-compact-highmem-property.patch +++ /dev/null @@ -1,169 +0,0 @@ -From 4ab2aff624908e49b099f00609875f4d03e9e1ec Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 21 Dec 2022 08:48:45 +0800 -Subject: [PATCH 6/8] hw/arm/virt: Add 'compact-highmem' property - -RH-Author: Gavin Shan -RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment -RH-Bugzilla: 2113840 -RH-Acked-by: Eric Auger -RH-Acked-by: Cornelia Huck -RH-Acked-by: Miroslav Rezanina -RH-Commit: [6/8] 781506f3445493f05b511547370b6d88ef092457 - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840 - -After the improvement to high memory region address assignment is -applied, the memory layout can be changed, introducing possible -migration breakage. For example, VIRT_HIGH_PCIE_MMIO memory region -is disabled or enabled when the optimization is applied or not, with -the following configuration. The configuration is only achievable by -modifying the source code until more properties are added to allow -users selectively disable those high memory regions. - - pa_bits = 40; - vms->highmem_redists = false; - vms->highmem_ecam = false; - vms->highmem_mmio = true; - - # qemu-system-aarch64 -accel kvm -cpu host \ - -machine virt-7.2,compact-highmem={on, off} \ - -m 4G,maxmem=511G -monitor stdio - - Region compact-highmem=off compact-highmem=on - ---------------------------------------------------------------- - MEM [1GB 512GB] [1GB 512GB] - HIGH_GIC_REDISTS2 [512GB 512GB+64MB] [disabled] - HIGH_PCIE_ECAM [512GB+256MB 512GB+512MB] [disabled] - HIGH_PCIE_MMIO [disabled] [512GB 1TB] - -In order to keep backwords compatibility, we need to disable the -optimization on machine, which is virt-7.1 or ealier than it. It -means the optimization is enabled by default from virt-7.2. Besides, -'compact-highmem' property is added so that the optimization can be -explicitly enabled or disabled on all machine types by users. - -Signed-off-by: Gavin Shan -Reviewed-by: Eric Auger -Reviewed-by: Cornelia Huck -Reviewed-by: Marc Zyngier -Tested-by: Zhenyu Zhang -Message-id: 20221029224307.138822-7-gshan@redhat.com -Signed-off-by: Peter Maydell -(cherry picked from commit f40408a9fe5d1db70a75a33d2b26c8af8a5d57b0) -Signed-off-by: Gavin Shan -Conflicts: - hw/arm/virt.c - Comment out the handlers of property 'compact-highmem' since - the property isn't exposed. ---- - docs/system/arm/virt.rst | 4 ++++ - hw/arm/virt.c | 34 ++++++++++++++++++++++++++++++++++ - include/hw/arm/virt.h | 1 + - 3 files changed, 39 insertions(+) - -diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst -index 20442ea2c1..4454706392 100644 ---- a/docs/system/arm/virt.rst -+++ b/docs/system/arm/virt.rst -@@ -94,6 +94,10 @@ highmem - address space above 32 bits. The default is ``on`` for machine types - later than ``virt-2.12``. - -+compact-highmem -+ Set ``on``/``off`` to enable/disable the compact layout for high memory regions. -+ The default is ``on`` for machine types later than ``virt-7.2``. -+ - gic-version - Specify the version of the Generic Interrupt Controller (GIC) to provide. - Valid values are: -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 6896e0ca0f..6087511ae9 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -216,6 +216,12 @@ static const MemMapEntry base_memmap[] = { - * Note the extended_memmap is sized so that it eventually also includes the - * base_memmap entries (VIRT_HIGH_GIC_REDIST2 index is greater than the last - * index of base_memmap). -+ * -+ * The memory map for these Highmem IO Regions can be in legacy or compact -+ * layout, depending on 'compact-highmem' property. With legacy layout, the -+ * PA space for one specific region is always reserved, even if the region -+ * has been disabled or doesn't fit into the PA space. However, the PA space -+ * for the region won't be reserved in these circumstances with compact layout. - */ - static MemMapEntry extended_memmap[] = { - /* Additional 64 MB redist region (can contain up to 512 redistributors) */ -@@ -2400,6 +2406,22 @@ static void virt_set_highmem(Object *obj, bool value, Error **errp) - vms->highmem = value; - } - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ -+static bool virt_get_compact_highmem(Object *obj, Error **errp) -+{ -+ VirtMachineState *vms = VIRT_MACHINE(obj); -+ -+ return vms->highmem_compact; -+} -+ -+static void virt_set_compact_highmem(Object *obj, bool value, Error **errp) -+{ -+ VirtMachineState *vms = VIRT_MACHINE(obj); -+ -+ vms->highmem_compact = value; -+} -+#endif /* disabled for RHEL */ -+ - static bool virt_get_its(Object *obj, Error **errp) - { - VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -3023,6 +3045,13 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) - "Set on/off to enable/disable using " - "physical address space above 32 bits"); - -+ object_class_property_add_bool(oc, "compact-highmem", -+ virt_get_compact_highmem, -+ virt_set_compact_highmem); -+ object_class_property_set_description(oc, "compact-highmem", -+ "Set on/off to enable/disable compact " -+ "layout for high memory regions"); -+ - object_class_property_add_str(oc, "gic-version", virt_get_gic_version, - virt_set_gic_version); - object_class_property_set_description(oc, "gic-version", -@@ -3107,6 +3136,7 @@ static void virt_instance_init(Object *obj) - - /* High memory is enabled by default */ - vms->highmem = true; -+ vms->highmem_compact = !vmc->no_highmem_compact; - vms->gic_version = VIRT_GIC_VERSION_NOSEL; - - vms->highmem_ecam = !vmc->no_highmem_ecam; -@@ -3176,8 +3206,12 @@ DEFINE_VIRT_MACHINE_AS_LATEST(7, 2) - - static void virt_machine_7_1_options(MachineClass *mc) - { -+ VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); -+ - virt_machine_7_2_options(mc); - compat_props_add(mc->compat_props, hw_compat_7_1, hw_compat_7_1_len); -+ /* Compact layout for high memory regions was introduced with 7.2 */ -+ vmc->no_highmem_compact = true; - } - DEFINE_VIRT_MACHINE(7, 1) - -diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index 15bd291311..85e7d61868 100644 ---- a/include/hw/arm/virt.h -+++ b/include/hw/arm/virt.h -@@ -125,6 +125,7 @@ struct VirtMachineClass { - bool no_pmu; - bool claim_edge_triggered_timers; - bool smbios_old_sys_ver; -+ bool no_highmem_compact; - bool no_highmem_ecam; - bool no_ged; /* Machines < 4.2 have no support for ACPI GED device */ - bool kvm_no_adjvtime; --- -2.31.1 - diff --git a/SOURCES/kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch b/SOURCES/kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch deleted file mode 100644 index df691a7..0000000 --- a/SOURCES/kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch +++ /dev/null @@ -1,179 +0,0 @@ -From 30e86a7c4fbcdc95b74bcb2a15745cb221783091 Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 21 Dec 2022 08:48:45 +0800 -Subject: [PATCH 7/8] hw/arm/virt: Add properties to disable high memory - regions - -RH-Author: Gavin Shan -RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment -RH-Bugzilla: 2113840 -RH-Acked-by: Eric Auger -RH-Acked-by: Cornelia Huck -RH-Acked-by: Miroslav Rezanina -RH-Commit: [7/8] 16f8762393b447a590b31c9e4d8d3c58c6bc9fa8 - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840 - -The 3 high memory regions are usually enabled by default, but they may -be not used. For example, VIRT_HIGH_GIC_REDIST2 isn't needed by GICv2. -This leads to waste in the PA space. - -Add properties ("highmem-redists", "highmem-ecam", "highmem-mmio") to -allow users selectively disable them if needed. After that, the high -memory region for GICv3 or GICv4 redistributor can be disabled by user, -the number of maximal supported CPUs needs to be calculated based on -'vms->highmem_redists'. The follow-up error message is also improved -to indicate if the high memory region for GICv3 and GICv4 has been -enabled or not. - -Suggested-by: Marc Zyngier -Signed-off-by: Gavin Shan -Reviewed-by: Marc Zyngier -Reviewed-by: Cornelia Huck -Reviewed-by: Eric Auger -Message-id: 20221029224307.138822-8-gshan@redhat.com -Signed-off-by: Peter Maydell -(cherry picked from commit 6a48c64eec355ab1aff694eb4522d07a8e461368) -Signed-off-by: Gavin Shan -Conflicts: - hw/arm/virt.c - Comment out the handlers of the property 'highmem-redists', - 'highmem-ecam' and 'highmem-mmio' since they aren't exposed. ---- - docs/system/arm/virt.rst | 13 +++++++ - hw/arm/virt.c | 75 ++++++++++++++++++++++++++++++++++++++-- - 2 files changed, 86 insertions(+), 2 deletions(-) - -diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst -index 4454706392..188a4f211f 100644 ---- a/docs/system/arm/virt.rst -+++ b/docs/system/arm/virt.rst -@@ -98,6 +98,19 @@ compact-highmem - Set ``on``/``off`` to enable/disable the compact layout for high memory regions. - The default is ``on`` for machine types later than ``virt-7.2``. - -+highmem-redists -+ Set ``on``/``off`` to enable/disable the high memory region for GICv3 or -+ GICv4 redistributor. The default is ``on``. Setting this to ``off`` will -+ limit the maximum number of CPUs when GICv3 or GICv4 is used. -+ -+highmem-ecam -+ Set ``on``/``off`` to enable/disable the high memory region for PCI ECAM. -+ The default is ``on`` for machine types later than ``virt-3.0``. -+ -+highmem-mmio -+ Set ``on``/``off`` to enable/disable the high memory region for PCI MMIO. -+ The default is ``on``. -+ - gic-version - Specify the version of the Generic Interrupt Controller (GIC) to provide. - Valid values are: -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 6087511ae9..304fa0d6e7 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -2142,14 +2142,20 @@ static void machvirt_init(MachineState *machine) - if (vms->gic_version == VIRT_GIC_VERSION_2) { - virt_max_cpus = GIC_NCPU; - } else { -- virt_max_cpus = virt_redist_capacity(vms, VIRT_GIC_REDIST) + -- virt_redist_capacity(vms, VIRT_HIGH_GIC_REDIST2); -+ virt_max_cpus = virt_redist_capacity(vms, VIRT_GIC_REDIST); -+ if (vms->highmem_redists) { -+ virt_max_cpus += virt_redist_capacity(vms, VIRT_HIGH_GIC_REDIST2); -+ } - } - - if (max_cpus > virt_max_cpus) { - error_report("Number of SMP CPUs requested (%d) exceeds max CPUs " - "supported by machine 'mach-virt' (%d)", - max_cpus, virt_max_cpus); -+ if (vms->gic_version != VIRT_GIC_VERSION_2 && !vms->highmem_redists) { -+ error_printf("Try 'highmem-redists=on' for more CPUs\n"); -+ } -+ - exit(1); - } - -@@ -2420,6 +2426,49 @@ static void virt_set_compact_highmem(Object *obj, bool value, Error **errp) - - vms->highmem_compact = value; - } -+ -+static bool virt_get_highmem_redists(Object *obj, Error **errp) -+{ -+ VirtMachineState *vms = VIRT_MACHINE(obj); -+ -+ return vms->highmem_redists; -+} -+ -+static void virt_set_highmem_redists(Object *obj, bool value, Error **errp) -+{ -+ VirtMachineState *vms = VIRT_MACHINE(obj); -+ -+ vms->highmem_redists = value; -+} -+ -+static bool virt_get_highmem_ecam(Object *obj, Error **errp) -+{ -+ VirtMachineState *vms = VIRT_MACHINE(obj); -+ -+ return vms->highmem_ecam; -+} -+ -+static void virt_set_highmem_ecam(Object *obj, bool value, Error **errp) -+{ -+ VirtMachineState *vms = VIRT_MACHINE(obj); -+ -+ vms->highmem_ecam = value; -+} -+ -+static bool virt_get_highmem_mmio(Object *obj, Error **errp) -+{ -+ VirtMachineState *vms = VIRT_MACHINE(obj); -+ -+ return vms->highmem_mmio; -+} -+ -+static void virt_set_highmem_mmio(Object *obj, bool value, Error **errp) -+{ -+ VirtMachineState *vms = VIRT_MACHINE(obj); -+ -+ vms->highmem_mmio = value; -+} -+ - #endif /* disabled for RHEL */ - - static bool virt_get_its(Object *obj, Error **errp) -@@ -3052,6 +3101,28 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) - "Set on/off to enable/disable compact " - "layout for high memory regions"); - -+ object_class_property_add_bool(oc, "highmem-redists", -+ virt_get_highmem_redists, -+ virt_set_highmem_redists); -+ object_class_property_set_description(oc, "highmem-redists", -+ "Set on/off to enable/disable high " -+ "memory region for GICv3 or GICv4 " -+ "redistributor"); -+ -+ object_class_property_add_bool(oc, "highmem-ecam", -+ virt_get_highmem_ecam, -+ virt_set_highmem_ecam); -+ object_class_property_set_description(oc, "highmem-ecam", -+ "Set on/off to enable/disable high " -+ "memory region for PCI ECAM"); -+ -+ object_class_property_add_bool(oc, "highmem-mmio", -+ virt_get_highmem_mmio, -+ virt_set_highmem_mmio); -+ object_class_property_set_description(oc, "highmem-mmio", -+ "Set on/off to enable/disable high " -+ "memory region for PCI MMIO"); -+ - object_class_property_add_str(oc, "gic-version", virt_get_gic_version, - virt_set_gic_version); - object_class_property_set_description(oc, "gic-version", --- -2.31.1 - diff --git a/SOURCES/kvm-hw-arm-virt-Enable-compat-high-memory-region-address.patch b/SOURCES/kvm-hw-arm-virt-Enable-compat-high-memory-region-address.patch deleted file mode 100644 index 6b20bb8..0000000 --- a/SOURCES/kvm-hw-arm-virt-Enable-compat-high-memory-region-address.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 969ea1ff46b52c5fe6d87f2eeb1625871a2dfb2a Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 21 Dec 2022 08:48:45 +0800 -Subject: [PATCH 8/8] hw/arm/virt: Enable compat high memory region address - assignment for 9.2.0 machine - -RH-Author: Gavin Shan -RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment -RH-Bugzilla: 2113840 -RH-Acked-by: Eric Auger -RH-Acked-by: Cornelia Huck -RH-Acked-by: Miroslav Rezanina -RH-Commit: [8/8] beda1791c0c35dce5c669efd47685302b8468032 - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840 -Upstream: RHEL only - -The compact high memory region address assignment is enabled for 9.2.0, -but it's kept as disabled for 9.0.0, to keep the backwards compatibility -on 9.0.0. Note that these newly added properties ('compact-highmem', -'highmem-redists', 'highmem-ecam', and 'highmem-mmio') in the upstream -aren't exposed for the downstream. - -Signed-off-by: Gavin Shan ---- - hw/arm/virt.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 304fa0d6e7..e41c0b462c 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -3581,6 +3581,7 @@ static void rhel_virt_instance_init(Object *obj) - - /* High memory is enabled by default */ - vms->highmem = true; -+ vms->highmem_compact = !vmc->no_highmem_compact; - vms->gic_version = VIRT_GIC_VERSION_NOSEL; - - vms->highmem_ecam = !vmc->no_highmem_ecam; -@@ -3659,5 +3660,7 @@ static void rhel900_virt_options(MachineClass *mc) - - /* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */ - vmc->no_tcg_lpa2 = true; -+ /* Compact layout for high memory regions was introduced with 9.2.0 */ -+ vmc->no_highmem_compact = true; - } - DEFINE_RHEL_MACHINE(9, 0, 0) --- -2.31.1 - diff --git a/SOURCES/kvm-hw-arm-virt-Improve-high-memory-region-address-assig.patch b/SOURCES/kvm-hw-arm-virt-Improve-high-memory-region-address-assig.patch deleted file mode 100644 index 9dcdf61..0000000 --- a/SOURCES/kvm-hw-arm-virt-Improve-high-memory-region-address-assig.patch +++ /dev/null @@ -1,112 +0,0 @@ -From 1c7fad3776a14ca35b24dc2fdb262d4ddf40d6eb Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 21 Dec 2022 08:48:45 +0800 -Subject: [PATCH 5/8] hw/arm/virt: Improve high memory region address - assignment - -RH-Author: Gavin Shan -RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment -RH-Bugzilla: 2113840 -RH-Acked-by: Eric Auger -RH-Acked-by: Cornelia Huck -RH-Acked-by: Miroslav Rezanina -RH-Commit: [5/8] 4d77fa78b5258a1bd8d30405cec5ba3311d42f92 - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840 - -There are three high memory regions, which are VIRT_HIGH_REDIST2, -VIRT_HIGH_PCIE_ECAM and VIRT_HIGH_PCIE_MMIO. Their base addresses -are floating on highest RAM address. However, they can be disabled -in several cases. - -(1) One specific high memory region is likely to be disabled by - code by toggling vms->highmem_{redists, ecam, mmio}. - -(2) VIRT_HIGH_PCIE_ECAM region is disabled on machine, which is - 'virt-2.12' or ealier than it. - -(3) VIRT_HIGH_PCIE_ECAM region is disabled when firmware is loaded - on 32-bits system. - -(4) One specific high memory region is disabled when it breaks the - PA space limit. - -The current implementation of virt_set_{memmap, high_memmap}() isn't -optimized because the high memory region's PA space is always reserved, -regardless of whatever the actual state in the corresponding -vms->highmem_{redists, ecam, mmio} flag. In the code, 'base' and -'vms->highest_gpa' are always increased for case (1), (2) and (3). -It's unnecessary since the assigned PA space for the disabled high -memory region won't be used afterwards. - -Improve the address assignment for those three high memory region by -skipping the address assignment for one specific high memory region if -it has been disabled in case (1), (2) and (3). The memory layout may -be changed after the improvement is applied, which leads to potential -migration breakage. So 'vms->highmem_compact' is added to control if -the improvement should be applied. For now, 'vms->highmem_compact' is -set to false, meaning that we don't have memory layout change until it -becomes configurable through property 'compact-highmem' in next patch. - -Signed-off-by: Gavin Shan -Reviewed-by: Eric Auger -Reviewed-by: Cornelia Huck -Reviewed-by: Marc Zyngier -Tested-by: Zhenyu Zhang -Message-id: 20221029224307.138822-6-gshan@redhat.com -Signed-off-by: Peter Maydell -(cherry picked from commit 4a4ff9edc6a8fdc76082af5b41b059217138c09b) -Signed-off-by: Gavin Shan ---- - hw/arm/virt.c | 15 ++++++++++----- - include/hw/arm/virt.h | 1 + - 2 files changed, 11 insertions(+), 5 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 6e3b9fc060..6896e0ca0f 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -1768,18 +1768,23 @@ static void virt_set_high_memmap(VirtMachineState *vms, - vms->memmap[i].size = region_size; - - /* -- * Check each device to see if they fit in the PA space, -- * moving highest_gpa as we go. -+ * Check each device to see if it fits in the PA space, -+ * moving highest_gpa as we go. For compatibility, move -+ * highest_gpa for disabled fitting devices as well, if -+ * the compact layout has been disabled. - * - * For each device that doesn't fit, disable it. - */ - fits = (region_base + region_size) <= BIT_ULL(pa_bits); -- if (fits) { -- vms->highest_gpa = region_base + region_size - 1; -+ *region_enabled &= fits; -+ if (vms->highmem_compact && !*region_enabled) { -+ continue; - } - -- *region_enabled &= fits; - base = region_base + region_size; -+ if (fits) { -+ vms->highest_gpa = base - 1; -+ } - } - } - -diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index 22b54ec510..15bd291311 100644 ---- a/include/hw/arm/virt.h -+++ b/include/hw/arm/virt.h -@@ -144,6 +144,7 @@ struct VirtMachineState { - PFlashCFI01 *flash[2]; - bool secure; - bool highmem; -+ bool highmem_compact; - bool highmem_ecam; - bool highmem_mmio; - bool highmem_redists; --- -2.31.1 - diff --git a/SOURCES/kvm-hw-arm-virt-Introduce-variable-region_base-in-virt_s.patch b/SOURCES/kvm-hw-arm-virt-Introduce-variable-region_base-in-virt_s.patch deleted file mode 100644 index ea9cb1f..0000000 --- a/SOURCES/kvm-hw-arm-virt-Introduce-variable-region_base-in-virt_s.patch +++ /dev/null @@ -1,82 +0,0 @@ -From 305a369fd18f29914bf96cc181add532d435d8ed Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 21 Dec 2022 08:48:45 +0800 -Subject: [PATCH 3/8] hw/arm/virt: Introduce variable region_base in - virt_set_high_memmap() - -RH-Author: Gavin Shan -RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment -RH-Bugzilla: 2113840 -RH-Acked-by: Eric Auger -RH-Acked-by: Cornelia Huck -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/8] 15de90df217d680ccc858b679898b3993e1c050a - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840 - -This introduces variable 'region_base' for the base address of the -specific high memory region. It's the preparatory work to optimize -high memory region address assignment. - -No functional change intended. - -Signed-off-by: Gavin Shan -Reviewed-by: Eric Auger -Reviewed-by: Cornelia Huck -Reviewed-by: Marc Zyngier -Tested-by: Zhenyu Zhang -Message-id: 20221029224307.138822-4-gshan@redhat.com -Signed-off-by: Peter Maydell -(cherry picked from commit fa245799b9407fc7b561da185b3d889df5e16a88) -Signed-off-by: Gavin Shan ---- - hw/arm/virt.c | 12 ++++++------ - 1 file changed, 6 insertions(+), 6 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index ca098d40b8..ddcf7ee2f8 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -1739,15 +1739,15 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx) - static void virt_set_high_memmap(VirtMachineState *vms, - hwaddr base, int pa_bits) - { -- hwaddr region_size; -+ hwaddr region_base, region_size; - bool fits; - int i; - - for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) { -+ region_base = ROUND_UP(base, extended_memmap[i].size); - region_size = extended_memmap[i].size; - -- base = ROUND_UP(base, region_size); -- vms->memmap[i].base = base; -+ vms->memmap[i].base = region_base; - vms->memmap[i].size = region_size; - - /* -@@ -1756,9 +1756,9 @@ static void virt_set_high_memmap(VirtMachineState *vms, - * - * For each device that doesn't fit, disable it. - */ -- fits = (base + region_size) <= BIT_ULL(pa_bits); -+ fits = (region_base + region_size) <= BIT_ULL(pa_bits); - if (fits) { -- vms->highest_gpa = base + region_size - 1; -+ vms->highest_gpa = region_base + region_size - 1; - } - - switch (i) { -@@ -1773,7 +1773,7 @@ static void virt_set_high_memmap(VirtMachineState *vms, - break; - } - -- base += region_size; -+ base = region_base + region_size; - } - } - --- -2.31.1 - diff --git a/SOURCES/kvm-hw-arm-virt-Introduce-virt_get_high_memmap_enabled-h.patch b/SOURCES/kvm-hw-arm-virt-Introduce-virt_get_high_memmap_enabled-h.patch deleted file mode 100644 index 659faeb..0000000 --- a/SOURCES/kvm-hw-arm-virt-Introduce-virt_get_high_memmap_enabled-h.patch +++ /dev/null @@ -1,95 +0,0 @@ -From a2ddd68c8365ec602db6b2a9cf83bb441ca701cc Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 21 Dec 2022 08:48:45 +0800 -Subject: [PATCH 4/8] hw/arm/virt: Introduce virt_get_high_memmap_enabled() - helper - -RH-Author: Gavin Shan -RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment -RH-Bugzilla: 2113840 -RH-Acked-by: Eric Auger -RH-Acked-by: Cornelia Huck -RH-Acked-by: Miroslav Rezanina -RH-Commit: [4/8] 65524de2fc106600bbaff641caa8c4f2f8027114 - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840 - -This introduces virt_get_high_memmap_enabled() helper, which returns -the pointer to vms->highmem_{redists, ecam, mmio}. The pointer will -be used in the subsequent patches. - -No functional change intended. - -Signed-off-by: Gavin Shan -Reviewed-by: Eric Auger -Reviewed-by: Cornelia Huck -Reviewed-by: Marc Zyngier -Tested-by: Zhenyu Zhang -Message-id: 20221029224307.138822-5-gshan@redhat.com -Signed-off-by: Peter Maydell -(cherry picked from commit a5cb1350b19a5c2a58ab4edddf609ed429c13085) -Signed-off-by: Gavin Shan ---- - hw/arm/virt.c | 32 +++++++++++++++++++------------- - 1 file changed, 19 insertions(+), 13 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index ddcf7ee2f8..6e3b9fc060 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -1736,14 +1736,31 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx) - return arm_cpu_mp_affinity(idx, clustersz); - } - -+static inline bool *virt_get_high_memmap_enabled(VirtMachineState *vms, -+ int index) -+{ -+ bool *enabled_array[] = { -+ &vms->highmem_redists, -+ &vms->highmem_ecam, -+ &vms->highmem_mmio, -+ }; -+ -+ assert(ARRAY_SIZE(extended_memmap) - VIRT_LOWMEMMAP_LAST == -+ ARRAY_SIZE(enabled_array)); -+ assert(index - VIRT_LOWMEMMAP_LAST < ARRAY_SIZE(enabled_array)); -+ -+ return enabled_array[index - VIRT_LOWMEMMAP_LAST]; -+} -+ - static void virt_set_high_memmap(VirtMachineState *vms, - hwaddr base, int pa_bits) - { - hwaddr region_base, region_size; -- bool fits; -+ bool *region_enabled, fits; - int i; - - for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) { -+ region_enabled = virt_get_high_memmap_enabled(vms, i); - region_base = ROUND_UP(base, extended_memmap[i].size); - region_size = extended_memmap[i].size; - -@@ -1761,18 +1778,7 @@ static void virt_set_high_memmap(VirtMachineState *vms, - vms->highest_gpa = region_base + region_size - 1; - } - -- switch (i) { -- case VIRT_HIGH_GIC_REDIST2: -- vms->highmem_redists &= fits; -- break; -- case VIRT_HIGH_PCIE_ECAM: -- vms->highmem_ecam &= fits; -- break; -- case VIRT_HIGH_PCIE_MMIO: -- vms->highmem_mmio &= fits; -- break; -- } -- -+ *region_enabled &= fits; - base = region_base + region_size; - } - } --- -2.31.1 - diff --git a/SOURCES/kvm-hw-arm-virt-Introduce-virt_set_high_memmap-helper.patch b/SOURCES/kvm-hw-arm-virt-Introduce-virt_set_high_memmap-helper.patch deleted file mode 100644 index f55c06a..0000000 --- a/SOURCES/kvm-hw-arm-virt-Introduce-virt_set_high_memmap-helper.patch +++ /dev/null @@ -1,130 +0,0 @@ -From 5dff87c5ea60054709021025c9513ec259433ce2 Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 21 Dec 2022 08:48:45 +0800 -Subject: [PATCH 1/8] hw/arm/virt: Introduce virt_set_high_memmap() helper - -RH-Author: Gavin Shan -RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment -RH-Bugzilla: 2113840 -RH-Acked-by: Eric Auger -RH-Acked-by: Cornelia Huck -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/8] 5f6ba5af7a2c21d8473c58e088ee99b11336c673 - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840 - -This introduces virt_set_high_memmap() helper. The logic of high -memory region address assignment is moved to the helper. The intention -is to make the subsequent optimization for high memory region address -assignment easier. - -No functional change intended. - -Signed-off-by: Gavin Shan -Reviewed-by: Eric Auger -Reviewed-by: Cornelia Huck -Reviewed-by: Marc Zyngier -Tested-by: Zhenyu Zhang -Message-id: 20221029224307.138822-2-gshan@redhat.com -Signed-off-by: Peter Maydell -(cherry picked from commit 4af6b6edece5ef273d29972d53547f823d2bc1c0) -Signed-off-by: Gavin Shan ---- - hw/arm/virt.c | 74 ++++++++++++++++++++++++++++----------------------- - 1 file changed, 41 insertions(+), 33 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index bf18838b87..bea5f54720 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -1736,6 +1736,46 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx) - return arm_cpu_mp_affinity(idx, clustersz); - } - -+static void virt_set_high_memmap(VirtMachineState *vms, -+ hwaddr base, int pa_bits) -+{ -+ int i; -+ -+ for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) { -+ hwaddr size = extended_memmap[i].size; -+ bool fits; -+ -+ base = ROUND_UP(base, size); -+ vms->memmap[i].base = base; -+ vms->memmap[i].size = size; -+ -+ /* -+ * Check each device to see if they fit in the PA space, -+ * moving highest_gpa as we go. -+ * -+ * For each device that doesn't fit, disable it. -+ */ -+ fits = (base + size) <= BIT_ULL(pa_bits); -+ if (fits) { -+ vms->highest_gpa = base + size - 1; -+ } -+ -+ switch (i) { -+ case VIRT_HIGH_GIC_REDIST2: -+ vms->highmem_redists &= fits; -+ break; -+ case VIRT_HIGH_PCIE_ECAM: -+ vms->highmem_ecam &= fits; -+ break; -+ case VIRT_HIGH_PCIE_MMIO: -+ vms->highmem_mmio &= fits; -+ break; -+ } -+ -+ base += size; -+ } -+} -+ - static void virt_set_memmap(VirtMachineState *vms, int pa_bits) - { - MachineState *ms = MACHINE(vms); -@@ -1791,39 +1831,7 @@ static void virt_set_memmap(VirtMachineState *vms, int pa_bits) - /* We know for sure that at least the memory fits in the PA space */ - vms->highest_gpa = memtop - 1; - -- for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) { -- hwaddr size = extended_memmap[i].size; -- bool fits; -- -- base = ROUND_UP(base, size); -- vms->memmap[i].base = base; -- vms->memmap[i].size = size; -- -- /* -- * Check each device to see if they fit in the PA space, -- * moving highest_gpa as we go. -- * -- * For each device that doesn't fit, disable it. -- */ -- fits = (base + size) <= BIT_ULL(pa_bits); -- if (fits) { -- vms->highest_gpa = base + size - 1; -- } -- -- switch (i) { -- case VIRT_HIGH_GIC_REDIST2: -- vms->highmem_redists &= fits; -- break; -- case VIRT_HIGH_PCIE_ECAM: -- vms->highmem_ecam &= fits; -- break; -- case VIRT_HIGH_PCIE_MMIO: -- vms->highmem_mmio &= fits; -- break; -- } -- -- base += size; -- } -+ virt_set_high_memmap(vms, base, pa_bits); - - if (device_memory_size > 0) { - ms->device_memory = g_malloc0(sizeof(*ms->device_memory)); --- -2.31.1 - diff --git a/SOURCES/kvm-hw-arm-virt-Rename-variable-size-to-region_size-in-v.patch b/SOURCES/kvm-hw-arm-virt-Rename-variable-size-to-region_size-in-v.patch deleted file mode 100644 index 27bc6bb..0000000 --- a/SOURCES/kvm-hw-arm-virt-Rename-variable-size-to-region_size-in-v.patch +++ /dev/null @@ -1,83 +0,0 @@ -From bd5b7edbf8f4425f4b4e0d49a00cbdd48d9c6f48 Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 21 Dec 2022 08:48:45 +0800 -Subject: [PATCH 2/8] hw/arm/virt: Rename variable size to region_size in - virt_set_high_memmap() - -RH-Author: Gavin Shan -RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment -RH-Bugzilla: 2113840 -RH-Acked-by: Eric Auger -RH-Acked-by: Cornelia Huck -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/8] 1cadf1b00686cceb45821a58fdcb509bc5da335d - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840 - -This renames variable 'size' to 'region_size' in virt_set_high_memmap(). -Its counterpart ('region_base') will be introduced in next patch. - -No functional change intended. - -Signed-off-by: Gavin Shan -Reviewed-by: Eric Auger -Reviewed-by: Cornelia Huck -Reviewed-by: Marc Zyngier -Tested-by: Zhenyu Zhang -Message-id: 20221029224307.138822-3-gshan@redhat.com -Signed-off-by: Peter Maydell -(cherry picked from commit 370bea9d1c78796eec235ed6cb4310f489931a62) -Signed-off-by: Gavin Shan ---- - hw/arm/virt.c | 15 ++++++++------- - 1 file changed, 8 insertions(+), 7 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index bea5f54720..ca098d40b8 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -1739,15 +1739,16 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx) - static void virt_set_high_memmap(VirtMachineState *vms, - hwaddr base, int pa_bits) - { -+ hwaddr region_size; -+ bool fits; - int i; - - for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) { -- hwaddr size = extended_memmap[i].size; -- bool fits; -+ region_size = extended_memmap[i].size; - -- base = ROUND_UP(base, size); -+ base = ROUND_UP(base, region_size); - vms->memmap[i].base = base; -- vms->memmap[i].size = size; -+ vms->memmap[i].size = region_size; - - /* - * Check each device to see if they fit in the PA space, -@@ -1755,9 +1756,9 @@ static void virt_set_high_memmap(VirtMachineState *vms, - * - * For each device that doesn't fit, disable it. - */ -- fits = (base + size) <= BIT_ULL(pa_bits); -+ fits = (base + region_size) <= BIT_ULL(pa_bits); - if (fits) { -- vms->highest_gpa = base + size - 1; -+ vms->highest_gpa = base + region_size - 1; - } - - switch (i) { -@@ -1772,7 +1773,7 @@ static void virt_set_high_memmap(VirtMachineState *vms, - break; - } - -- base += size; -+ base += region_size; - } - } - --- -2.31.1 - diff --git a/SOURCES/kvm-hw-arm-virt-Validate-cluster-and-NUMA-node-boundary-.patch b/SOURCES/kvm-hw-arm-virt-Validate-cluster-and-NUMA-node-boundary-.patch new file mode 100644 index 0000000..42ec705 --- /dev/null +++ b/SOURCES/kvm-hw-arm-virt-Validate-cluster-and-NUMA-node-boundary-.patch @@ -0,0 +1,41 @@ +From 022529f6d0ee306da857825c72a98bf7ddf5de22 Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Tue, 27 Jun 2023 20:20:09 +1000 +Subject: [PATCH 3/6] hw/arm/virt: Validate cluster and NUMA node boundary for + RHEL machines + +RH-Author: Gavin Shan +RH-MergeRequest: 175: hw/arm: Validate CPU cluster and NUMA node boundary for RHEL machines +RH-Bugzilla: 2171363 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Eric Auger +RH-Commit: [3/3] a396c499259b566861ca007b01f8539bf6113711 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2171363 +Upstream Status: RHEL only + +Set mc->cpu_cluster_has_numa_boundary to true so that the boundary of +CPU cluster and NUMA node will be validated for 'virt-rhel*' machines. +A warning message will be printed if the boundary is broken. + +Signed-off-by: Gavin Shan +--- + hw/arm/virt.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index df6a0231bc..faf68488d5 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3530,6 +3530,8 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + mc->smp_props.clusters_supported = true; + mc->auto_enable_numa_with_memhp = true; + mc->auto_enable_numa_with_memdev = true; ++ /* platform instead of architectural choice */ ++ mc->cpu_cluster_has_numa_boundary = true; + mc->default_ram_id = "mach-virt.ram"; + + object_class_property_add(oc, "acpi", "OnOffAuto", +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-reg-for-machine-ty.patch b/SOURCES/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-reg-for-machine-ty.patch new file mode 100644 index 0000000..fe9cd8c --- /dev/null +++ b/SOURCES/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-reg-for-machine-ty.patch @@ -0,0 +1,44 @@ +From 491cf9e251026d135f315b7fe0d8771841f06e9f Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Tue, 25 Jul 2023 15:34:45 -0300 +Subject: [PATCH 8/9] hw/pci: Disable PCI_ERR_UNCOR_MASK reg for machine type + <= pc-q35-rhel9.2.0 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 192: hw/pci: Disable PCI_ERR_UNCOR_MASK reg for machine type <= pc-q35-rhel9.2.0 +RH-Bugzilla: 2223691 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [1/1] e57816f8ad15a9ce5f342b061c103ae011ec1223 (LeoBras/centos-qemu-kvm) + +This is a downstream-only patch to that sets off the property +x-pcie-err-unc-mask for machine types <= pc-q35-rhel9.2.0, allowing +live migrations to RHEL9.2 happen successfully. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2223691 +Fixes: 293a34b4be ("hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine +type < 8.0") +Signed-off-by: Leonardo Bras +--- + hw/core/machine.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 5ea52317b9..6f5117669d 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -62,6 +62,8 @@ GlobalProperty hw_compat_rhel_9_2[] = { + { "virtio-mem", "x-early-migration", "false" }, + /* hw_compat_rhel_9_2 from hw_compat_7_2 */ + { "migration", "x-preempt-pre-7-2", "true" }, ++ /* hw_compat_rhel_9_2 from hw_compat_7_2 */ ++ { TYPE_PCI_DEVICE, "x-pcie-err-unc-mask", "off" }, + }; + const size_t hw_compat_rhel_9_2_len = G_N_ELEMENTS(hw_compat_rhel_9_2); + +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-register-for-machi.patch b/SOURCES/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-register-for-machi.patch new file mode 100644 index 0000000..164bea7 --- /dev/null +++ b/SOURCES/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-register-for-machi.patch @@ -0,0 +1,118 @@ +From 3ac01bb90da12538898f95b2fb4e7f6bc1557eb3 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Tue, 2 May 2023 21:27:02 -0300 +Subject: [PATCH 18/21] hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine + type < 8.0 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 170: hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine type < 8.0 +RH-Bugzilla: 2189423 +RH-Acked-by: Peter Xu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/1] ad62dd5a8567f386770577513c00a0bf36bd3df1 (LeoBras/centos-qemu-kvm) + +Since it's implementation on v8.0.0-rc0, having the PCI_ERR_UNCOR_MASK +set for machine types < 8.0 will cause migration to fail if the target +QEMU version is < 8.0.0 : + +qemu-system-x86_64: get_pci_config_device: Bad config data: i=0x10a read: 40 device: 0 cmask: ff wmask: 0 w1cmask:0 +qemu-system-x86_64: Failed to load PCIDevice:config +qemu-system-x86_64: Failed to load e1000e:parent_obj +qemu-system-x86_64: error while loading state for instance 0x0 of device '0000:00:02.0/e1000e' +qemu-system-x86_64: load of migration failed: Invalid argument + +The above test migrated a 7.2 machine type from QEMU master to QEMU 7.2.0, +with this cmdline: + +./qemu-system-x86_64 -M pc-q35-7.2 [-incoming XXX] + +In order to fix this, property x-pcie-err-unc-mask was introduced to +control when PCI_ERR_UNCOR_MASK is enabled. This property is enabled by +default, but is disabled if machine type <= 7.2. + +Fixes: 010746ae1d ("hw/pci/aer: Implement PCI_ERR_UNCOR_MASK register") +Suggested-by: Michael S. Tsirkin +Signed-off-by: Leonardo Bras +Message-Id: <20230503002701.854329-1-leobras@redhat.com> +Reviewed-by: Jonathan Cameron +Reviewed-by: Peter Xu +Reviewed-by: Juan Quintela +Fixes: https://gitlab.com/qemu-project/qemu/-/issues/1576 +Tested-by: Fiona Ebner +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 5ed3dabe57dd9f4c007404345e5f5bf0e347317f) +Signed-off-by: Leonardo Bras +--- + hw/core/machine.c | 1 + + hw/pci/pci.c | 2 ++ + hw/pci/pcie_aer.c | 11 +++++++---- + include/hw/pci/pci.h | 2 ++ + 4 files changed, 12 insertions(+), 4 deletions(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 0e0120b7f2..c28702b690 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -43,6 +43,7 @@ GlobalProperty hw_compat_7_2[] = { + { "e1000e", "migrate-timadj", "off" }, + { "virtio-mem", "x-early-migration", "false" }, + { "migration", "x-preempt-pre-7-2", "true" }, ++ { TYPE_PCI_DEVICE, "x-pcie-err-unc-mask", "off" }, + }; + const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2); + +diff --git a/hw/pci/pci.c b/hw/pci/pci.c +index def5000e7b..8ad4349e96 100644 +--- a/hw/pci/pci.c ++++ b/hw/pci/pci.c +@@ -79,6 +79,8 @@ static Property pci_props[] = { + DEFINE_PROP_STRING("failover_pair_id", PCIDevice, + failover_pair_id), + DEFINE_PROP_UINT32("acpi-index", PCIDevice, acpi_index, 0), ++ DEFINE_PROP_BIT("x-pcie-err-unc-mask", PCIDevice, cap_present, ++ QEMU_PCIE_ERR_UNC_MASK_BITNR, true), + DEFINE_PROP_END_OF_LIST() + }; + +diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c +index 103667c368..374d593ead 100644 +--- a/hw/pci/pcie_aer.c ++++ b/hw/pci/pcie_aer.c +@@ -112,10 +112,13 @@ int pcie_aer_init(PCIDevice *dev, uint8_t cap_ver, uint16_t offset, + + pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS, + PCI_ERR_UNC_SUPPORTED); +- pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK, +- PCI_ERR_UNC_MASK_DEFAULT); +- pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK, +- PCI_ERR_UNC_SUPPORTED); ++ ++ if (dev->cap_present & QEMU_PCIE_ERR_UNC_MASK) { ++ pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK, ++ PCI_ERR_UNC_MASK_DEFAULT); ++ pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK, ++ PCI_ERR_UNC_SUPPORTED); ++ } + + pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER, + PCI_ERR_UNC_SEVERITY_DEFAULT); +diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h +index d5a40cd058..6dc6742fc4 100644 +--- a/include/hw/pci/pci.h ++++ b/include/hw/pci/pci.h +@@ -207,6 +207,8 @@ enum { + QEMU_PCIE_EXTCAP_INIT = (1 << QEMU_PCIE_EXTCAP_INIT_BITNR), + #define QEMU_PCIE_CXL_BITNR 10 + QEMU_PCIE_CAP_CXL = (1 << QEMU_PCIE_CXL_BITNR), ++#define QEMU_PCIE_ERR_UNC_MASK_BITNR 11 ++ QEMU_PCIE_ERR_UNC_MASK = (1 << QEMU_PCIE_ERR_UNC_MASK_BITNR), + }; + + typedef struct PCIINTxRoute { +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-replace-most-qemu_bh_new-calls-with-qemu_bh_new_g.patch b/SOURCES/kvm-hw-replace-most-qemu_bh_new-calls-with-qemu_bh_new_g.patch new file mode 100644 index 0000000..08ee94f --- /dev/null +++ b/SOURCES/kvm-hw-replace-most-qemu_bh_new-calls-with-qemu_bh_new_g.patch @@ -0,0 +1,470 @@ +From d1b7a9b25c0df9016cd8e93d40837314b1a81d70 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 May 2023 10:29:03 -0400 +Subject: [PATCH 08/21] hw: replace most qemu_bh_new calls with + qemu_bh_new_guarded + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [4/13] bcbc67dd0023aee2b3a342665237daa83b183c7b (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit f63192b0544af5d3e4d5edfd85ab520fcf671377 +Author: Alexander Bulekov +Date: Thu Apr 27 17:10:09 2023 -0400 + + hw: replace most qemu_bh_new calls with qemu_bh_new_guarded + + This protects devices from bh->mmio reentrancy issues. + + Thanks: Thomas Huth for diagnosing OS X test failure. + Signed-off-by: Alexander Bulekov + Reviewed-by: Darren Kenny + Reviewed-by: Stefan Hajnoczi + Reviewed-by: Michael S. Tsirkin + Reviewed-by: Paul Durrant + Reviewed-by: Thomas Huth + Message-Id: <20230427211013.2994127-5-alxndr@bu.edu> + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + hw/9pfs/xen-9p-backend.c | 5 ++++- + hw/block/dataplane/virtio-blk.c | 3 ++- + hw/block/dataplane/xen-block.c | 5 +++-- + hw/char/virtio-serial-bus.c | 3 ++- + hw/display/qxl.c | 9 ++++++--- + hw/display/virtio-gpu.c | 6 ++++-- + hw/ide/ahci.c | 3 ++- + hw/ide/ahci_internal.h | 1 + + hw/ide/core.c | 4 +++- + hw/misc/imx_rngc.c | 6 ++++-- + hw/misc/macio/mac_dbdma.c | 2 +- + hw/net/virtio-net.c | 3 ++- + hw/nvme/ctrl.c | 6 ++++-- + hw/scsi/mptsas.c | 3 ++- + hw/scsi/scsi-bus.c | 3 ++- + hw/scsi/vmw_pvscsi.c | 3 ++- + hw/usb/dev-uas.c | 3 ++- + hw/usb/hcd-dwc2.c | 3 ++- + hw/usb/hcd-ehci.c | 3 ++- + hw/usb/hcd-uhci.c | 2 +- + hw/usb/host-libusb.c | 6 ++++-- + hw/usb/redirect.c | 6 ++++-- + hw/usb/xen-usb.c | 3 ++- + hw/virtio/virtio-balloon.c | 5 +++-- + hw/virtio/virtio-crypto.c | 3 ++- + 25 files changed, 66 insertions(+), 33 deletions(-) + +diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c +index 74f3a05f88..0e266c552b 100644 +--- a/hw/9pfs/xen-9p-backend.c ++++ b/hw/9pfs/xen-9p-backend.c +@@ -61,6 +61,7 @@ typedef struct Xen9pfsDev { + + int num_rings; + Xen9pfsRing *rings; ++ MemReentrancyGuard mem_reentrancy_guard; + } Xen9pfsDev; + + static void xen_9pfs_disconnect(struct XenLegacyDevice *xendev); +@@ -443,7 +444,9 @@ static int xen_9pfs_connect(struct XenLegacyDevice *xendev) + xen_9pdev->rings[i].ring.out = xen_9pdev->rings[i].data + + XEN_FLEX_RING_SIZE(ring_order); + +- xen_9pdev->rings[i].bh = qemu_bh_new(xen_9pfs_bh, &xen_9pdev->rings[i]); ++ xen_9pdev->rings[i].bh = qemu_bh_new_guarded(xen_9pfs_bh, ++ &xen_9pdev->rings[i], ++ &xen_9pdev->mem_reentrancy_guard); + xen_9pdev->rings[i].out_cons = 0; + xen_9pdev->rings[i].out_size = 0; + xen_9pdev->rings[i].inprogress = false; +diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c +index b28d81737e..a6202997ee 100644 +--- a/hw/block/dataplane/virtio-blk.c ++++ b/hw/block/dataplane/virtio-blk.c +@@ -127,7 +127,8 @@ bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, + } else { + s->ctx = qemu_get_aio_context(); + } +- s->bh = aio_bh_new(s->ctx, notify_guest_bh, s); ++ s->bh = aio_bh_new_guarded(s->ctx, notify_guest_bh, s, ++ &DEVICE(vdev)->mem_reentrancy_guard); + s->batch_notify_vqs = bitmap_new(conf->num_queues); + + *dataplane = s; +diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c +index 734da42ea7..d8bc39d359 100644 +--- a/hw/block/dataplane/xen-block.c ++++ b/hw/block/dataplane/xen-block.c +@@ -633,8 +633,9 @@ XenBlockDataPlane *xen_block_dataplane_create(XenDevice *xendev, + } else { + dataplane->ctx = qemu_get_aio_context(); + } +- dataplane->bh = aio_bh_new(dataplane->ctx, xen_block_dataplane_bh, +- dataplane); ++ dataplane->bh = aio_bh_new_guarded(dataplane->ctx, xen_block_dataplane_bh, ++ dataplane, ++ &DEVICE(xendev)->mem_reentrancy_guard); + + return dataplane; + } +diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c +index 7d4601cb5d..dd619f0731 100644 +--- a/hw/char/virtio-serial-bus.c ++++ b/hw/char/virtio-serial-bus.c +@@ -985,7 +985,8 @@ static void virtser_port_device_realize(DeviceState *dev, Error **errp) + return; + } + +- port->bh = qemu_bh_new(flush_queued_data_bh, port); ++ port->bh = qemu_bh_new_guarded(flush_queued_data_bh, port, ++ &dev->mem_reentrancy_guard); + port->elem = NULL; + } + +diff --git a/hw/display/qxl.c b/hw/display/qxl.c +index 80ce1e9a93..f1c0eb7dfc 100644 +--- a/hw/display/qxl.c ++++ b/hw/display/qxl.c +@@ -2201,11 +2201,14 @@ static void qxl_realize_common(PCIQXLDevice *qxl, Error **errp) + + qemu_add_vm_change_state_handler(qxl_vm_change_state_handler, qxl); + +- qxl->update_irq = qemu_bh_new(qxl_update_irq_bh, qxl); ++ qxl->update_irq = qemu_bh_new_guarded(qxl_update_irq_bh, qxl, ++ &DEVICE(qxl)->mem_reentrancy_guard); + qxl_reset_state(qxl); + +- qxl->update_area_bh = qemu_bh_new(qxl_render_update_area_bh, qxl); +- qxl->ssd.cursor_bh = qemu_bh_new(qemu_spice_cursor_refresh_bh, &qxl->ssd); ++ qxl->update_area_bh = qemu_bh_new_guarded(qxl_render_update_area_bh, qxl, ++ &DEVICE(qxl)->mem_reentrancy_guard); ++ qxl->ssd.cursor_bh = qemu_bh_new_guarded(qemu_spice_cursor_refresh_bh, &qxl->ssd, ++ &DEVICE(qxl)->mem_reentrancy_guard); + } + + static void qxl_realize_primary(PCIDevice *dev, Error **errp) +diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c +index 5e15c79b94..66ac9b6cc5 100644 +--- a/hw/display/virtio-gpu.c ++++ b/hw/display/virtio-gpu.c +@@ -1339,8 +1339,10 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error **errp) + + g->ctrl_vq = virtio_get_queue(vdev, 0); + g->cursor_vq = virtio_get_queue(vdev, 1); +- g->ctrl_bh = qemu_bh_new(virtio_gpu_ctrl_bh, g); +- g->cursor_bh = qemu_bh_new(virtio_gpu_cursor_bh, g); ++ g->ctrl_bh = qemu_bh_new_guarded(virtio_gpu_ctrl_bh, g, ++ &qdev->mem_reentrancy_guard); ++ g->cursor_bh = qemu_bh_new_guarded(virtio_gpu_cursor_bh, g, ++ &qdev->mem_reentrancy_guard); + QTAILQ_INIT(&g->reslist); + QTAILQ_INIT(&g->cmdq); + QTAILQ_INIT(&g->fenceq); +diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c +index 55902e1df7..4e76d6b191 100644 +--- a/hw/ide/ahci.c ++++ b/hw/ide/ahci.c +@@ -1509,7 +1509,8 @@ static void ahci_cmd_done(const IDEDMA *dma) + ahci_write_fis_d2h(ad); + + if (ad->port_regs.cmd_issue && !ad->check_bh) { +- ad->check_bh = qemu_bh_new(ahci_check_cmd_bh, ad); ++ ad->check_bh = qemu_bh_new_guarded(ahci_check_cmd_bh, ad, ++ &ad->mem_reentrancy_guard); + qemu_bh_schedule(ad->check_bh); + } + } +diff --git a/hw/ide/ahci_internal.h b/hw/ide/ahci_internal.h +index 303fcd7235..2480455372 100644 +--- a/hw/ide/ahci_internal.h ++++ b/hw/ide/ahci_internal.h +@@ -321,6 +321,7 @@ struct AHCIDevice { + bool init_d2h_sent; + AHCICmdHdr *cur_cmd; + NCQTransferState ncq_tfs[AHCI_MAX_CMDS]; ++ MemReentrancyGuard mem_reentrancy_guard; + }; + + struct AHCIPCIState { +diff --git a/hw/ide/core.c b/hw/ide/core.c +index 45d14a25e9..de48ff9f86 100644 +--- a/hw/ide/core.c ++++ b/hw/ide/core.c +@@ -513,6 +513,7 @@ BlockAIOCB *ide_issue_trim( + BlockCompletionFunc *cb, void *cb_opaque, void *opaque) + { + IDEState *s = opaque; ++ IDEDevice *dev = s->unit ? s->bus->slave : s->bus->master; + TrimAIOCB *iocb; + + /* Paired with a decrement in ide_trim_bh_cb() */ +@@ -520,7 +521,8 @@ BlockAIOCB *ide_issue_trim( + + iocb = blk_aio_get(&trim_aiocb_info, s->blk, cb, cb_opaque); + iocb->s = s; +- iocb->bh = qemu_bh_new(ide_trim_bh_cb, iocb); ++ iocb->bh = qemu_bh_new_guarded(ide_trim_bh_cb, iocb, ++ &DEVICE(dev)->mem_reentrancy_guard); + iocb->ret = 0; + iocb->qiov = qiov; + iocb->i = -1; +diff --git a/hw/misc/imx_rngc.c b/hw/misc/imx_rngc.c +index 632c03779c..082c6980ad 100644 +--- a/hw/misc/imx_rngc.c ++++ b/hw/misc/imx_rngc.c +@@ -228,8 +228,10 @@ static void imx_rngc_realize(DeviceState *dev, Error **errp) + sysbus_init_mmio(sbd, &s->iomem); + + sysbus_init_irq(sbd, &s->irq); +- s->self_test_bh = qemu_bh_new(imx_rngc_self_test, s); +- s->seed_bh = qemu_bh_new(imx_rngc_seed, s); ++ s->self_test_bh = qemu_bh_new_guarded(imx_rngc_self_test, s, ++ &dev->mem_reentrancy_guard); ++ s->seed_bh = qemu_bh_new_guarded(imx_rngc_seed, s, ++ &dev->mem_reentrancy_guard); + } + + static void imx_rngc_reset(DeviceState *dev) +diff --git a/hw/misc/macio/mac_dbdma.c b/hw/misc/macio/mac_dbdma.c +index 43bb1f56ba..80a789f32b 100644 +--- a/hw/misc/macio/mac_dbdma.c ++++ b/hw/misc/macio/mac_dbdma.c +@@ -914,7 +914,7 @@ static void mac_dbdma_realize(DeviceState *dev, Error **errp) + { + DBDMAState *s = MAC_DBDMA(dev); + +- s->bh = qemu_bh_new(DBDMA_run_bh, s); ++ s->bh = qemu_bh_new_guarded(DBDMA_run_bh, s, &dev->mem_reentrancy_guard); + } + + static void mac_dbdma_class_init(ObjectClass *oc, void *data) +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 53e1c32643..447f669921 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -2917,7 +2917,8 @@ static void virtio_net_add_queue(VirtIONet *n, int index) + n->vqs[index].tx_vq = + virtio_add_queue(vdev, n->net_conf.tx_queue_size, + virtio_net_handle_tx_bh); +- n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]); ++ n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index], ++ &DEVICE(vdev)->mem_reentrancy_guard); + } + + n->vqs[index].tx_waiting = 0; +diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c +index ac24eeb5ed..e5a468975e 100644 +--- a/hw/nvme/ctrl.c ++++ b/hw/nvme/ctrl.c +@@ -4607,7 +4607,8 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr, + QTAILQ_INSERT_TAIL(&(sq->req_list), &sq->io_req[i], entry); + } + +- sq->bh = qemu_bh_new(nvme_process_sq, sq); ++ sq->bh = qemu_bh_new_guarded(nvme_process_sq, sq, ++ &DEVICE(sq->ctrl)->mem_reentrancy_guard); + + if (n->dbbuf_enabled) { + sq->db_addr = n->dbbuf_dbs + (sqid << 3); +@@ -5253,7 +5254,8 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr, + } + } + n->cq[cqid] = cq; +- cq->bh = qemu_bh_new(nvme_post_cqes, cq); ++ cq->bh = qemu_bh_new_guarded(nvme_post_cqes, cq, ++ &DEVICE(cq->ctrl)->mem_reentrancy_guard); + } + + static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req) +diff --git a/hw/scsi/mptsas.c b/hw/scsi/mptsas.c +index c485da792c..3de288b454 100644 +--- a/hw/scsi/mptsas.c ++++ b/hw/scsi/mptsas.c +@@ -1322,7 +1322,8 @@ static void mptsas_scsi_realize(PCIDevice *dev, Error **errp) + } + s->max_devices = MPTSAS_NUM_PORTS; + +- s->request_bh = qemu_bh_new(mptsas_fetch_requests, s); ++ s->request_bh = qemu_bh_new_guarded(mptsas_fetch_requests, s, ++ &DEVICE(dev)->mem_reentrancy_guard); + + scsi_bus_init(&s->bus, sizeof(s->bus), &dev->qdev, &mptsas_scsi_info); + } +diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c +index c97176110c..3c20b47ad0 100644 +--- a/hw/scsi/scsi-bus.c ++++ b/hw/scsi/scsi-bus.c +@@ -193,7 +193,8 @@ static void scsi_dma_restart_cb(void *opaque, bool running, RunState state) + AioContext *ctx = blk_get_aio_context(s->conf.blk); + /* The reference is dropped in scsi_dma_restart_bh.*/ + object_ref(OBJECT(s)); +- s->bh = aio_bh_new(ctx, scsi_dma_restart_bh, s); ++ s->bh = aio_bh_new_guarded(ctx, scsi_dma_restart_bh, s, ++ &DEVICE(s)->mem_reentrancy_guard); + qemu_bh_schedule(s->bh); + } + } +diff --git a/hw/scsi/vmw_pvscsi.c b/hw/scsi/vmw_pvscsi.c +index fa76696855..4de34536e9 100644 +--- a/hw/scsi/vmw_pvscsi.c ++++ b/hw/scsi/vmw_pvscsi.c +@@ -1184,7 +1184,8 @@ pvscsi_realizefn(PCIDevice *pci_dev, Error **errp) + pcie_endpoint_cap_init(pci_dev, PVSCSI_EXP_EP_OFFSET); + } + +- s->completion_worker = qemu_bh_new(pvscsi_process_completion_queue, s); ++ s->completion_worker = qemu_bh_new_guarded(pvscsi_process_completion_queue, s, ++ &DEVICE(pci_dev)->mem_reentrancy_guard); + + scsi_bus_init(&s->bus, sizeof(s->bus), DEVICE(pci_dev), &pvscsi_scsi_info); + /* override default SCSI bus hotplug-handler, with pvscsi's one */ +diff --git a/hw/usb/dev-uas.c b/hw/usb/dev-uas.c +index 88f99c05d5..f013ded91e 100644 +--- a/hw/usb/dev-uas.c ++++ b/hw/usb/dev-uas.c +@@ -937,7 +937,8 @@ static void usb_uas_realize(USBDevice *dev, Error **errp) + + QTAILQ_INIT(&uas->results); + QTAILQ_INIT(&uas->requests); +- uas->status_bh = qemu_bh_new(usb_uas_send_status_bh, uas); ++ uas->status_bh = qemu_bh_new_guarded(usb_uas_send_status_bh, uas, ++ &d->mem_reentrancy_guard); + + dev->flags |= (1 << USB_DEV_FLAG_IS_SCSI_STORAGE); + scsi_bus_init(&uas->bus, sizeof(uas->bus), DEVICE(dev), &usb_uas_scsi_info); +diff --git a/hw/usb/hcd-dwc2.c b/hw/usb/hcd-dwc2.c +index 8755e9cbb0..a0c4e782b2 100644 +--- a/hw/usb/hcd-dwc2.c ++++ b/hw/usb/hcd-dwc2.c +@@ -1364,7 +1364,8 @@ static void dwc2_realize(DeviceState *dev, Error **errp) + s->fi = USB_FRMINTVL - 1; + s->eof_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, dwc2_frame_boundary, s); + s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, dwc2_work_timer, s); +- s->async_bh = qemu_bh_new(dwc2_work_bh, s); ++ s->async_bh = qemu_bh_new_guarded(dwc2_work_bh, s, ++ &dev->mem_reentrancy_guard); + + sysbus_init_irq(sbd, &s->irq); + } +diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c +index d4da8dcb8d..c930c60921 100644 +--- a/hw/usb/hcd-ehci.c ++++ b/hw/usb/hcd-ehci.c +@@ -2533,7 +2533,8 @@ void usb_ehci_realize(EHCIState *s, DeviceState *dev, Error **errp) + } + + s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, ehci_work_timer, s); +- s->async_bh = qemu_bh_new(ehci_work_bh, s); ++ s->async_bh = qemu_bh_new_guarded(ehci_work_bh, s, ++ &dev->mem_reentrancy_guard); + s->device = dev; + + s->vmstate = qemu_add_vm_change_state_handler(usb_ehci_vm_state_change, s); +diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c +index 8ac1175ad2..77baaa7a6b 100644 +--- a/hw/usb/hcd-uhci.c ++++ b/hw/usb/hcd-uhci.c +@@ -1190,7 +1190,7 @@ void usb_uhci_common_realize(PCIDevice *dev, Error **errp) + USB_SPEED_MASK_LOW | USB_SPEED_MASK_FULL); + } + } +- s->bh = qemu_bh_new(uhci_bh, s); ++ s->bh = qemu_bh_new_guarded(uhci_bh, s, &DEVICE(dev)->mem_reentrancy_guard); + s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, uhci_frame_timer, s); + s->num_ports_vmstate = NB_PORTS; + QTAILQ_INIT(&s->queues); +diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c +index 176868d345..f500db85ab 100644 +--- a/hw/usb/host-libusb.c ++++ b/hw/usb/host-libusb.c +@@ -1141,7 +1141,8 @@ static void usb_host_nodev_bh(void *opaque) + static void usb_host_nodev(USBHostDevice *s) + { + if (!s->bh_nodev) { +- s->bh_nodev = qemu_bh_new(usb_host_nodev_bh, s); ++ s->bh_nodev = qemu_bh_new_guarded(usb_host_nodev_bh, s, ++ &DEVICE(s)->mem_reentrancy_guard); + } + qemu_bh_schedule(s->bh_nodev); + } +@@ -1739,7 +1740,8 @@ static int usb_host_post_load(void *opaque, int version_id) + USBHostDevice *dev = opaque; + + if (!dev->bh_postld) { +- dev->bh_postld = qemu_bh_new(usb_host_post_load_bh, dev); ++ dev->bh_postld = qemu_bh_new_guarded(usb_host_post_load_bh, dev, ++ &DEVICE(dev)->mem_reentrancy_guard); + } + qemu_bh_schedule(dev->bh_postld); + dev->bh_postld_pending = true; +diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c +index fd7df599bc..39fbaaab16 100644 +--- a/hw/usb/redirect.c ++++ b/hw/usb/redirect.c +@@ -1441,8 +1441,10 @@ static void usbredir_realize(USBDevice *udev, Error **errp) + } + } + +- dev->chardev_close_bh = qemu_bh_new(usbredir_chardev_close_bh, dev); +- dev->device_reject_bh = qemu_bh_new(usbredir_device_reject_bh, dev); ++ dev->chardev_close_bh = qemu_bh_new_guarded(usbredir_chardev_close_bh, dev, ++ &DEVICE(dev)->mem_reentrancy_guard); ++ dev->device_reject_bh = qemu_bh_new_guarded(usbredir_device_reject_bh, dev, ++ &DEVICE(dev)->mem_reentrancy_guard); + dev->attach_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, usbredir_do_attach, dev); + + packet_id_queue_init(&dev->cancelled, dev, "cancelled"); +diff --git a/hw/usb/xen-usb.c b/hw/usb/xen-usb.c +index 66cb3f7c24..38ee660a30 100644 +--- a/hw/usb/xen-usb.c ++++ b/hw/usb/xen-usb.c +@@ -1032,7 +1032,8 @@ static void usbback_alloc(struct XenLegacyDevice *xendev) + + QTAILQ_INIT(&usbif->req_free_q); + QSIMPLEQ_INIT(&usbif->hotplug_q); +- usbif->bh = qemu_bh_new(usbback_bh, usbif); ++ usbif->bh = qemu_bh_new_guarded(usbback_bh, usbif, ++ &DEVICE(xendev)->mem_reentrancy_guard); + } + + static int usbback_free(struct XenLegacyDevice *xendev) +diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c +index 43092aa634..5186e831dd 100644 +--- a/hw/virtio/virtio-balloon.c ++++ b/hw/virtio/virtio-balloon.c +@@ -909,8 +909,9 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp) + precopy_add_notifier(&s->free_page_hint_notify); + + object_ref(OBJECT(s->iothread)); +- s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread), +- virtio_ballloon_get_free_page_hints, s); ++ s->free_page_bh = aio_bh_new_guarded(iothread_get_aio_context(s->iothread), ++ virtio_ballloon_get_free_page_hints, s, ++ &dev->mem_reentrancy_guard); + } + + if (virtio_has_feature(s->host_features, VIRTIO_BALLOON_F_REPORTING)) { +diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c +index 802e1b9659..2fe804510f 100644 +--- a/hw/virtio/virtio-crypto.c ++++ b/hw/virtio/virtio-crypto.c +@@ -1074,7 +1074,8 @@ static void virtio_crypto_device_realize(DeviceState *dev, Error **errp) + vcrypto->vqs[i].dataq = + virtio_add_queue(vdev, 1024, virtio_crypto_handle_dataq_bh); + vcrypto->vqs[i].dataq_bh = +- qemu_bh_new(virtio_crypto_dataq_bh, &vcrypto->vqs[i]); ++ qemu_bh_new_guarded(virtio_crypto_dataq_bh, &vcrypto->vqs[i], ++ &dev->mem_reentrancy_guard); + vcrypto->vqs[i].vcrypto = vcrypto; + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-scsi-lsi53c895a-Fix-reentrancy-issues-in-the-LSI-.patch b/SOURCES/kvm-hw-scsi-lsi53c895a-Fix-reentrancy-issues-in-the-LSI-.patch new file mode 100644 index 0000000..efa966e --- /dev/null +++ b/SOURCES/kvm-hw-scsi-lsi53c895a-Fix-reentrancy-issues-in-the-LSI-.patch @@ -0,0 +1,141 @@ +From 8075a9e05699ef0c4e078017eefc20db3186328f Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Mon, 29 May 2023 14:21:08 -0400 +Subject: [PATCH 17/21] hw/scsi/lsi53c895a: Fix reentrancy issues in the LSI + controller (CVE-2023-0330) + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [13/13] 0b6fa742075ef2db3a354ee672dccca3747051cc (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit b987718bbb1d0eabf95499b976212dd5f0120d75 +Author: Thomas Huth +Date: Mon May 22 11:10:11 2023 +0200 + + hw/scsi/lsi53c895a: Fix reentrancy issues in the LSI controller (CVE-2023-0330) + + We cannot use the generic reentrancy guard in the LSI code, so + we have to manually prevent endless reentrancy here. The problematic + lsi_execute_script() function has already a way to detect whether + too many instructions have been executed - we just have to slightly + change the logic here that it also takes into account if the function + has been called too often in a reentrant way. + + The code in fuzz-lsi53c895a-test.c has been taken from an earlier + patch by Mauro Matteo Cascella. + + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1563 + Message-Id: <20230522091011.1082574-1-thuth@redhat.com> + Reviewed-by: Stefan Hajnoczi + Reviewed-by: Alexander Bulekov + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + hw/scsi/lsi53c895a.c | 23 +++++++++++++++------ + tests/qtest/fuzz-lsi53c895a-test.c | 33 ++++++++++++++++++++++++++++++ + 2 files changed, 50 insertions(+), 6 deletions(-) + +diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c +index 048436352b..f7d45b0b20 100644 +--- a/hw/scsi/lsi53c895a.c ++++ b/hw/scsi/lsi53c895a.c +@@ -1134,15 +1134,24 @@ static void lsi_execute_script(LSIState *s) + uint32_t addr, addr_high; + int opcode; + int insn_processed = 0; ++ static int reentrancy_level; ++ ++ reentrancy_level++; + + s->istat1 |= LSI_ISTAT1_SRUN; + again: +- if (++insn_processed > LSI_MAX_INSN) { +- /* Some windows drivers make the device spin waiting for a memory +- location to change. If we have been executed a lot of code then +- assume this is the case and force an unexpected device disconnect. +- This is apparently sufficient to beat the drivers into submission. +- */ ++ /* ++ * Some windows drivers make the device spin waiting for a memory location ++ * to change. If we have executed more than LSI_MAX_INSN instructions then ++ * assume this is the case and force an unexpected device disconnect. This ++ * is apparently sufficient to beat the drivers into submission. ++ * ++ * Another issue (CVE-2023-0330) can occur if the script is programmed to ++ * trigger itself again and again. Avoid this problem by stopping after ++ * being called multiple times in a reentrant way (8 is an arbitrary value ++ * which should be enough for all valid use cases). ++ */ ++ if (++insn_processed > LSI_MAX_INSN || reentrancy_level > 8) { + if (!(s->sien0 & LSI_SIST0_UDC)) { + qemu_log_mask(LOG_GUEST_ERROR, + "lsi_scsi: inf. loop with UDC masked"); +@@ -1596,6 +1605,8 @@ again: + } + } + trace_lsi_execute_script_stop(); ++ ++ reentrancy_level--; + } + + static uint8_t lsi_reg_readb(LSIState *s, int offset) +diff --git a/tests/qtest/fuzz-lsi53c895a-test.c b/tests/qtest/fuzz-lsi53c895a-test.c +index 2012bd54b7..1b55928b9f 100644 +--- a/tests/qtest/fuzz-lsi53c895a-test.c ++++ b/tests/qtest/fuzz-lsi53c895a-test.c +@@ -8,6 +8,36 @@ + #include "qemu/osdep.h" + #include "libqtest.h" + ++/* ++ * This used to trigger a DMA reentrancy issue ++ * leading to memory corruption bugs like stack ++ * overflow or use-after-free ++ * https://gitlab.com/qemu-project/qemu/-/issues/1563 ++ */ ++static void test_lsi_dma_reentrancy(void) ++{ ++ QTestState *s; ++ ++ s = qtest_init("-M q35 -m 512M -nodefaults " ++ "-blockdev driver=null-co,node-name=null0 " ++ "-device lsi53c810 -device scsi-cd,drive=null0"); ++ ++ qtest_outl(s, 0xcf8, 0x80000804); /* PCI Command Register */ ++ qtest_outw(s, 0xcfc, 0x7); /* Enables accesses */ ++ qtest_outl(s, 0xcf8, 0x80000814); /* Memory Bar 1 */ ++ qtest_outl(s, 0xcfc, 0xff100000); /* Set MMIO Address*/ ++ qtest_outl(s, 0xcf8, 0x80000818); /* Memory Bar 2 */ ++ qtest_outl(s, 0xcfc, 0xff000000); /* Set RAM Address*/ ++ qtest_writel(s, 0xff000000, 0xc0000024); ++ qtest_writel(s, 0xff000114, 0x00000080); ++ qtest_writel(s, 0xff00012c, 0xff000000); ++ qtest_writel(s, 0xff000004, 0xff000114); ++ qtest_writel(s, 0xff000008, 0xff100014); ++ qtest_writel(s, 0xff10002f, 0x000000ff); ++ ++ qtest_quit(s); ++} ++ + /* + * This used to trigger a UAF in lsi_do_msgout() + * https://gitlab.com/qemu-project/qemu/-/issues/972 +@@ -124,5 +154,8 @@ int main(int argc, char **argv) + qtest_add_func("fuzz/lsi53c895a/lsi_do_msgout_cancel_req", + test_lsi_do_msgout_cancel_req); + ++ qtest_add_func("fuzz/lsi53c895a/lsi_dma_reentrancy", ++ test_lsi_dma_reentrancy); ++ + return g_test_run(); + } +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-smbios-fix-field-corruption-in-type-4-table.patch b/SOURCES/kvm-hw-smbios-fix-field-corruption-in-type-4-table.patch deleted file mode 100644 index b452281..0000000 --- a/SOURCES/kvm-hw-smbios-fix-field-corruption-in-type-4-table.patch +++ /dev/null @@ -1,59 +0,0 @@ -From 8b0c5c6d356fd6cce9092727e20097b70e07bba9 Mon Sep 17 00:00:00 2001 -From: Julia Suvorova -Date: Thu, 23 Feb 2023 13:57:47 +0100 -Subject: [PATCH] hw/smbios: fix field corruption in type 4 table - -RH-Author: Julia Suvorova -RH-MergeRequest: 156: hw/smbios: fix field corruption in type 4 table -RH-Bugzilla: 2169904 -RH-Acked-by: Igor Mammedov -RH-Acked-by: MST -RH-Acked-by: Ani Sinha -RH-Commit: [1/1] ee6d9bb6dfa0fb2625915947072cb91a0926c4ec - -Since table type 4 of SMBIOS version 2.6 is shorter than 3.0, the -strings which follow immediately after the struct fields have been -overwritten by unconditional filling of later fields such as core_count2. -Make these fields dependent on the SMBIOS version. - -Fixes: 05e27d74c7 ("hw/smbios: add core_count2 to smbios table type 4") -Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2169904 - -Signed-off-by: Julia Suvorova -Message-Id: <20230223125747.254914-1-jusual@redhat.com> -Reviewed-by: Igor Mammedov -Reviewed-by: Ani Sinha -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 60d09b8dc7dd4256d664ad680795cb1327805b2b) ---- - hw/smbios/smbios.c | 8 +++++--- - 1 file changed, 5 insertions(+), 3 deletions(-) - -diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index c5ad69237e..2d2ece3edb 100644 ---- a/hw/smbios/smbios.c -+++ b/hw/smbios/smbios.c -@@ -752,14 +752,16 @@ static void smbios_build_type_4_table(MachineState *ms, unsigned instance) - t->core_count = (ms->smp.cores > 255) ? 0xFF : ms->smp.cores; - t->core_enabled = t->core_count; - -- t->core_count2 = t->core_enabled2 = cpu_to_le16(ms->smp.cores); -- - t->thread_count = (ms->smp.threads > 255) ? 0xFF : ms->smp.threads; -- t->thread_count2 = cpu_to_le16(ms->smp.threads); - - t->processor_characteristics = cpu_to_le16(0x02); /* Unknown */ - t->processor_family2 = cpu_to_le16(0x01); /* Other */ - -+ if (tbl_len == SMBIOS_TYPE_4_LEN_V30) { -+ t->core_count2 = t->core_enabled2 = cpu_to_le16(ms->smp.cores); -+ t->thread_count2 = cpu_to_le16(ms->smp.threads); -+ } -+ - SMBIOS_BUILD_TABLE_POST; - smbios_type4_count++; - } --- -2.31.1 - diff --git a/SOURCES/kvm-hw-vfio-pci-quirks-Sanitize-capability-pointer.patch b/SOURCES/kvm-hw-vfio-pci-quirks-Sanitize-capability-pointer.patch new file mode 100644 index 0000000..ffabd75 --- /dev/null +++ b/SOURCES/kvm-hw-vfio-pci-quirks-Sanitize-capability-pointer.patch @@ -0,0 +1,76 @@ +From fcd6219a95851d17fd8bde69d87e78c6533be990 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 24/37] hw/vfio/pci-quirks: Sanitize capability pointer +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [22/28] cb080409c1912f4365f8e31cd23c914b48f91575 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 0ddcb39c9357 +Author: Alex Williamson +Date: Fri Jun 30 16:36:08 2023 -0600 + + hw/vfio/pci-quirks: Sanitize capability pointer + + Coverity reports a tained scalar when traversing the capabilities + chain (CID 1516589). In practice I've never seen a device with a + chain so broken as to cause an issue, but it's also pretty easy to + sanitize. + + Fixes: f6b30c1984f7 ("hw/vfio/pci-quirks: Support alternate offset for GPUDirect Cliques") + Signed-off-by: Alex Williamson + Reviewed-by: Cédric Le Goater + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/pci-quirks.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c +index 0ed2fcd531..f4ff836805 100644 +--- a/hw/vfio/pci-quirks.c ++++ b/hw/vfio/pci-quirks.c +@@ -1530,6 +1530,12 @@ const PropertyInfo qdev_prop_nv_gpudirect_clique = { + .set = set_nv_gpudirect_clique_id, + }; + ++static bool is_valid_std_cap_offset(uint8_t pos) ++{ ++ return (pos >= PCI_STD_HEADER_SIZEOF && ++ pos <= (PCI_CFG_SPACE_SIZE - PCI_CAP_SIZEOF)); ++} ++ + static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) + { + PCIDevice *pdev = &vdev->pdev; +@@ -1563,7 +1569,7 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) + */ + ret = pread(vdev->vbasedev.fd, &tmp, 1, + vdev->config_offset + PCI_CAPABILITY_LIST); +- if (ret != 1 || !tmp) { ++ if (ret != 1 || !is_valid_std_cap_offset(tmp)) { + error_setg(errp, "NVIDIA GPUDirect Clique ID: error getting cap list"); + return -EINVAL; + } +@@ -1575,7 +1581,7 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) + d4_conflict = true; + } + tmp = pdev->config[tmp + PCI_CAP_LIST_NEXT]; +- } while (tmp); ++ } while (is_valid_std_cap_offset(tmp)); + + if (!c8_conflict) { + pos = 0xC8; +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-vfio-pci-quirks-Support-alternate-offset-for-GPUD.patch b/SOURCES/kvm-hw-vfio-pci-quirks-Support-alternate-offset-for-GPUD.patch new file mode 100644 index 0000000..99f5c75 --- /dev/null +++ b/SOURCES/kvm-hw-vfio-pci-quirks-Support-alternate-offset-for-GPUD.patch @@ -0,0 +1,110 @@ +From dd38230a0a375fb8427fa106ff79562e56c51b6c Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 18/37] hw/vfio/pci-quirks: Support alternate offset for + GPUDirect Cliques +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [16/28] 9befb7c9adaeb58e9d0b49686cf54b751c742832 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit f6b30c1984f7 +Author: Alex Williamson +Date: Thu Jun 8 12:05:07 2023 -0600 + + hw/vfio/pci-quirks: Support alternate offset for GPUDirect Cliques + + NVIDIA Turing and newer GPUs implement the MSI-X capability at the offset + previously reserved for use by hypervisors to implement the GPUDirect + Cliques capability. A revised specification provides an alternate + location. Add a config space walk to the quirk to check for conflicts, + allowing us to fall back to the new location or generate an error at the + quirk setup rather than when the real conflicting capability is added + should there be no available location. + + Signed-off-by: Alex Williamson + Reviewed-by: Cédric Le Goater + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/pci-quirks.c | 41 ++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 40 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c +index f0147a050a..0ed2fcd531 100644 +--- a/hw/vfio/pci-quirks.c ++++ b/hw/vfio/pci-quirks.c +@@ -1490,6 +1490,9 @@ void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev) + * +---------------------------------+---------------------------------+ + * + * https://lists.gnu.org/archive/html/qemu-devel/2017-08/pdfUda5iEpgOS.pdf ++ * ++ * Specification for Turning and later GPU architectures: ++ * https://lists.gnu.org/archive/html/qemu-devel/2023-06/pdf142OR4O4c2.pdf + */ + static void get_nv_gpudirect_clique_id(Object *obj, Visitor *v, + const char *name, void *opaque, +@@ -1530,7 +1533,9 @@ const PropertyInfo qdev_prop_nv_gpudirect_clique = { + static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) + { + PCIDevice *pdev = &vdev->pdev; +- int ret, pos = 0xC8; ++ int ret, pos; ++ bool c8_conflict = false, d4_conflict = false; ++ uint8_t tmp; + + if (vdev->nv_gpudirect_clique == 0xFF) { + return 0; +@@ -1547,6 +1552,40 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) + return -EINVAL; + } + ++ /* ++ * Per the updated specification above, it's recommended to use offset ++ * D4h for Turing and later GPU architectures due to a conflict of the ++ * MSI-X capability at C8h. We don't know how to determine the GPU ++ * architecture, instead we walk the capability chain to mark conflicts ++ * and choose one or error based on the result. ++ * ++ * NB. Cap list head in pdev->config is already cleared, read from device. ++ */ ++ ret = pread(vdev->vbasedev.fd, &tmp, 1, ++ vdev->config_offset + PCI_CAPABILITY_LIST); ++ if (ret != 1 || !tmp) { ++ error_setg(errp, "NVIDIA GPUDirect Clique ID: error getting cap list"); ++ return -EINVAL; ++ } ++ ++ do { ++ if (tmp == 0xC8) { ++ c8_conflict = true; ++ } else if (tmp == 0xD4) { ++ d4_conflict = true; ++ } ++ tmp = pdev->config[tmp + PCI_CAP_LIST_NEXT]; ++ } while (tmp); ++ ++ if (!c8_conflict) { ++ pos = 0xC8; ++ } else if (!d4_conflict) { ++ pos = 0xD4; ++ } else { ++ error_setg(errp, "NVIDIA GPUDirect Clique ID: invalid config space"); ++ return -EINVAL; ++ } ++ + ret = pci_add_capability(pdev, PCI_CAP_ID_VNDR, pos, 8, errp); + if (ret < 0) { + error_prepend(errp, "Failed to add NVIDIA GPUDirect cap: "); +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-virtio-iommu-Fix-potential-OOB-access-in-virtio_i.patch b/SOURCES/kvm-hw-virtio-iommu-Fix-potential-OOB-access-in-virtio_i.patch new file mode 100644 index 0000000..7a5963c --- /dev/null +++ b/SOURCES/kvm-hw-virtio-iommu-Fix-potential-OOB-access-in-virtio_i.patch @@ -0,0 +1,62 @@ +From 0a731ac1191182546e80af5f39d178a5a2f3688f Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Mon, 17 Jul 2023 18:21:26 +0200 +Subject: [PATCH 07/14] hw/virtio-iommu: Fix potential OOB access in + virtio_iommu_handle_command() + +RH-Author: Eric Auger +RH-MergeRequest: 197: virtio-iommu/smmu: backport some late fixes +RH-Bugzilla: 2229133 +RH-Acked-by: Thomas Huth +RH-Acked-by: Peter Xu +RH-Commit: [1/3] ecdb1e1aa6b93761dc87ea79bc0a1093ad649a74 (eauger1/centos-qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2229133 + +In the virtio_iommu_handle_command() when a PROBE request is handled, +output_size takes a value greater than the tail size and on a subsequent +iteration we can get a stack out-of-band access. Initialize the +output_size on each iteration. + +The issue was found with ASAN. Credits to: +Yiming Tao(Zhejiang University) +Gaoning Pan(Zhejiang University) + +Fixes: 1733eebb9e7 ("virtio-iommu: Implement RESV_MEM probe request") +Signed-off-by: Eric Auger +Reported-by: Mauro Matteo Cascella +Cc: qemu-stable@nongnu.org + +Message-Id: <20230717162126.11693-1-eric.auger@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit cf2f89edf36a59183166ae8721a8d7ab5cd286bd) +Signed-off-by: Eric Auger +--- + hw/virtio/virtio-iommu.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c +index 421e2a944f..17ce630200 100644 +--- a/hw/virtio/virtio-iommu.c ++++ b/hw/virtio/virtio-iommu.c +@@ -728,13 +728,15 @@ static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq) + VirtIOIOMMU *s = VIRTIO_IOMMU(vdev); + struct virtio_iommu_req_head head; + struct virtio_iommu_req_tail tail = {}; +- size_t output_size = sizeof(tail), sz; + VirtQueueElement *elem; + unsigned int iov_cnt; + struct iovec *iov; + void *buf = NULL; ++ size_t sz; + + for (;;) { ++ size_t output_size = sizeof(tail); ++ + elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); + if (!elem) { + return; +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-cpu-Update-how-the-EBX-register-of-CPUID-0x8000.patch b/SOURCES/kvm-i386-cpu-Update-how-the-EBX-register-of-CPUID-0x8000.patch new file mode 100644 index 0000000..3ee6b29 --- /dev/null +++ b/SOURCES/kvm-i386-cpu-Update-how-the-EBX-register-of-CPUID-0x8000.patch @@ -0,0 +1,52 @@ +From f9d982fae156aa9db0506e1e098c1e8a7f7eec94 Mon Sep 17 00:00:00 2001 +From: Bandan Das +Date: Thu, 3 Aug 2023 14:29:15 -0400 +Subject: [PATCH 13/14] i386/cpu: Update how the EBX register of CPUID + 0x8000001F is set + +RH-Author: Bandan Das +RH-MergeRequest: 196: Updates to SEV reduced-phys-bits parameter +RH-Bugzilla: 2214839 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [4/4] efc368b2c844fd4fbc3c755a5e2da288329e7a2c (bdas1/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2214839 + +commit fb6bbafc0f19385fb257ee073ed13dcaf613f2f8 +Author: Tom Lendacky +Date: Fri Sep 30 10:14:30 2022 -0500 + + i386/cpu: Update how the EBX register of CPUID 0x8000001F is set + + Update the setting of CPUID 0x8000001F EBX to clearly document the ranges + associated with fields being set. + + Fixes: 6cb8f2a663 ("cpu/i386: populate CPUID 0x8000_001F when SEV is active") + Signed-off-by: Tom Lendacky + Reviewed-by: Dr. David Alan Gilbert + Message-Id: <5822fd7d02b575121380e1f493a8f6d9eba2b11a.1664550870.git.thomas.lendacky@amd.com> + Signed-off-by: Paolo Bonzini + +Signed-off-by: Bandan Das +--- + target/i386/cpu.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 839706b430..4ac3046313 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -6008,8 +6008,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + if (sev_enabled()) { + *eax = 0x2; + *eax |= sev_es_enabled() ? 0x8 : 0; +- *ebx = sev_get_cbit_position(); +- *ebx |= sev_get_reduced_phys_bits() << 6; ++ *ebx = sev_get_cbit_position() & 0x3f; /* EBX[5:0] */ ++ *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */ + } + break; + default: +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Update-checks-and-information-related-to-re.patch b/SOURCES/kvm-i386-sev-Update-checks-and-information-related-to-re.patch new file mode 100644 index 0000000..e9d28d3 --- /dev/null +++ b/SOURCES/kvm-i386-sev-Update-checks-and-information-related-to-re.patch @@ -0,0 +1,77 @@ +From 5c0d254762caaffd574bd95dbfc1df416e6e2509 Mon Sep 17 00:00:00 2001 +From: Bandan Das +Date: Thu, 3 Aug 2023 14:22:55 -0400 +Subject: [PATCH 12/14] i386/sev: Update checks and information related to + reduced-phys-bits + +RH-Author: Bandan Das +RH-MergeRequest: 196: Updates to SEV reduced-phys-bits parameter +RH-Bugzilla: 2214839 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/4] 7c5e7ea9f6cd39e84e5b60417c849430296399fd (bdas1/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2214839 + +commit 8168fed9f84e3128f7628969ae78af49433d5ce7 +Author: Tom Lendacky +Date: Fri Sep 30 10:14:29 2022 -0500 + + i386/sev: Update checks and information related to reduced-phys-bits + + The value of the reduced-phys-bits parameter is propogated to the CPUID + information exposed to the guest. Update the current validation check to + account for the size of the CPUID field (6-bits), ensuring the value is + in the range of 1 to 63. + + Maintain backward compatibility, to an extent, by allowing a value greater + than 1 (so that the previously documented value of 5 still works), but not + allowing anything over 63. + + Fixes: d8575c6c02 ("sev/i386: add command to initialize the memory encryption context") + Signed-off-by: Tom Lendacky + Reviewed-by: Dr. David Alan Gilbert + Message-Id: + Signed-off-by: Paolo Bonzini + +Signed-off-by: Bandan Das +--- + target/i386/sev.c | 17 ++++++++++++++--- + 1 file changed, 14 insertions(+), 3 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 859e06f6ad..fe2144c038 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -932,15 +932,26 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + host_cpuid(0x8000001F, 0, NULL, &ebx, NULL, NULL); + host_cbitpos = ebx & 0x3f; + ++ /* ++ * The cbitpos value will be placed in bit positions 5:0 of the EBX ++ * register of CPUID 0x8000001F. No need to verify the range as the ++ * comparison against the host value accomplishes that. ++ */ + if (host_cbitpos != sev->cbitpos) { + error_setg(errp, "%s: cbitpos check failed, host '%d' requested '%d'", + __func__, host_cbitpos, sev->cbitpos); + goto err; + } + +- if (sev->reduced_phys_bits < 1) { +- error_setg(errp, "%s: reduced_phys_bits check failed, it should be >=1," +- " requested '%d'", __func__, sev->reduced_phys_bits); ++ /* ++ * The reduced-phys-bits value will be placed in bit positions 11:6 of ++ * the EBX register of CPUID 0x8000001F, so verify the supplied value ++ * is in the range of 1 to 63. ++ */ ++ if (sev->reduced_phys_bits < 1 || sev->reduced_phys_bits > 63) { ++ error_setg(errp, "%s: reduced_phys_bits check failed," ++ " it should be in the range of 1 to 63, requested '%d'", ++ __func__, sev->reduced_phys_bits); + goto err; + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-intel-iommu-fail-DEVIOTLB_UNMAP-without-dt-mode.patch b/SOURCES/kvm-intel-iommu-fail-DEVIOTLB_UNMAP-without-dt-mode.patch deleted file mode 100644 index 0f321e4..0000000 --- a/SOURCES/kvm-intel-iommu-fail-DEVIOTLB_UNMAP-without-dt-mode.patch +++ /dev/null @@ -1,64 +0,0 @@ -From cadcc1c6a001622d971c86d44925516905e3d104 Mon Sep 17 00:00:00 2001 -From: Jason Wang -Date: Thu, 23 Feb 2023 14:59:21 +0800 -Subject: [PATCH 8/8] intel-iommu: fail DEVIOTLB_UNMAP without dt mode - -RH-Author: Laurent Vivier -RH-MergeRequest: 157: intel-iommu: fail DEVIOTLB_UNMAP without dt mode -RH-Bugzilla: 2156876 -RH-Acked-by: Eric Auger -RH-Acked-by: Peter Xu -RH-Acked-by: MST -RH-Commit: [1/1] eb9dbae6140ef4ba10d90b9e66abd75540f6892d (lvivier/qemu-kvm-centos) - -Without dt mode, device IOTLB notifier won't work since guest won't -send device IOTLB invalidation descriptor in this case. Let's fail -early instead of misbehaving silently. - -Reviewed-by: Laurent Vivier -Tested-by: Laurent Vivier -Tested-by: Viktor Prutyanov -Buglink: https://bugzilla.redhat.com/2156876 -Signed-off-by: Jason Wang -Message-Id: <20230223065924.42503-3-jasowang@redhat.com> -Reviewed-by: Peter Xu -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 09adb0e021207b60a0c51a68939b4539d98d3ef3) - -Conflict in hw/i386/intel_iommu.c because of missing commit: - - 4ce27463ccce ("intel-iommu: fail MAP notifier without caching mode") ---- - hw/i386/intel_iommu.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c -index a08ee85edf..d2983f40d3 100644 ---- a/hw/i386/intel_iommu.c -+++ b/hw/i386/intel_iommu.c -@@ -3179,6 +3179,7 @@ static int vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu, - { - VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu); - IntelIOMMUState *s = vtd_as->iommu_state; -+ X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); - - /* TODO: add support for VFIO and vhost users */ - if (s->snoop_control) { -@@ -3186,6 +3187,13 @@ static int vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu, - "Snoop Control with vhost or VFIO is not supported"); - return -ENOTSUP; - } -+ if (!x86_iommu->dt_supported && (new & IOMMU_NOTIFIER_DEVIOTLB_UNMAP)) { -+ error_setg_errno(errp, ENOTSUP, -+ "device %02x.%02x.%x requires device IOTLB mode", -+ pci_bus_num(vtd_as->bus), PCI_SLOT(vtd_as->devfn), -+ PCI_FUNC(vtd_as->devfn)); -+ return -ENOTSUP; -+ } - - /* Update per-address-space notifier flags */ - vtd_as->notifier_flags = new; --- -2.39.1 - diff --git a/SOURCES/kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch b/SOURCES/kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch deleted file mode 100644 index 22abf35..0000000 --- a/SOURCES/kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch +++ /dev/null @@ -1,386 +0,0 @@ -From 3a29b50036b972caae5bca0e5dfc34d910b1d5e9 Mon Sep 17 00:00:00 2001 -From: "manish.mishra" -Date: Tue, 20 Dec 2022 18:44:17 +0000 -Subject: [PATCH 6/8] io: Add support for MSG_PEEK for socket channel -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 150: migration: Fix multifd crash on channel disorders -RH-Bugzilla: 2169732 -RH-Acked-by: quintela1 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Dr. David Alan Gilbert -RH-Commit: [1/2] 266563f3e387e97ec710d9bc179e5de26dfd09f1 (peterx/qemu-kvm) - -MSG_PEEK peeks at the channel, The data is treated as unread and -the next read shall still return this data. This support is -currently added only for socket class. Extra parameter 'flags' -is added to io_readv calls to pass extra read flags like MSG_PEEK. - -Reviewed-by: Peter Xu -Reviewed-by: Daniel P. Berrange -Reviewed-by: Juan Quintela -Suggested-by: Daniel P. Berrange -Signed-off-by: manish.mishra -Signed-off-by: Juan Quintela -(cherry picked from commit 84615a19ddf2bfb38d7b3a0d487d2397ee55e4f3) -Signed-off-by: Peter Xu ---- - chardev/char-socket.c | 4 ++-- - include/io/channel.h | 6 ++++++ - io/channel-buffer.c | 1 + - io/channel-command.c | 1 + - io/channel-file.c | 1 + - io/channel-null.c | 1 + - io/channel-socket.c | 19 ++++++++++++++++++- - io/channel-tls.c | 1 + - io/channel-websock.c | 1 + - io/channel.c | 16 ++++++++++++---- - migration/channel-block.c | 1 + - migration/rdma.c | 1 + - scsi/qemu-pr-helper.c | 2 +- - tests/qtest/tpm-emu.c | 2 +- - tests/unit/test-io-channel-socket.c | 1 + - util/vhost-user-server.c | 2 +- - 16 files changed, 50 insertions(+), 10 deletions(-) - -diff --git a/chardev/char-socket.c b/chardev/char-socket.c -index 879564aa8a..5afce9a464 100644 ---- a/chardev/char-socket.c -+++ b/chardev/char-socket.c -@@ -283,11 +283,11 @@ static ssize_t tcp_chr_recv(Chardev *chr, char *buf, size_t len) - if (qio_channel_has_feature(s->ioc, QIO_CHANNEL_FEATURE_FD_PASS)) { - ret = qio_channel_readv_full(s->ioc, &iov, 1, - &msgfds, &msgfds_num, -- NULL); -+ 0, NULL); - } else { - ret = qio_channel_readv_full(s->ioc, &iov, 1, - NULL, NULL, -- NULL); -+ 0, NULL); - } - - if (msgfds_num) { -diff --git a/include/io/channel.h b/include/io/channel.h -index c680ee7480..716235d496 100644 ---- a/include/io/channel.h -+++ b/include/io/channel.h -@@ -34,6 +34,8 @@ OBJECT_DECLARE_TYPE(QIOChannel, QIOChannelClass, - - #define QIO_CHANNEL_WRITE_FLAG_ZERO_COPY 0x1 - -+#define QIO_CHANNEL_READ_FLAG_MSG_PEEK 0x1 -+ - typedef enum QIOChannelFeature QIOChannelFeature; - - enum QIOChannelFeature { -@@ -41,6 +43,7 @@ enum QIOChannelFeature { - QIO_CHANNEL_FEATURE_SHUTDOWN, - QIO_CHANNEL_FEATURE_LISTEN, - QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY, -+ QIO_CHANNEL_FEATURE_READ_MSG_PEEK, - }; - - -@@ -114,6 +117,7 @@ struct QIOChannelClass { - size_t niov, - int **fds, - size_t *nfds, -+ int flags, - Error **errp); - int (*io_close)(QIOChannel *ioc, - Error **errp); -@@ -188,6 +192,7 @@ void qio_channel_set_name(QIOChannel *ioc, - * @niov: the length of the @iov array - * @fds: pointer to an array that will received file handles - * @nfds: pointer filled with number of elements in @fds on return -+ * @flags: read flags (QIO_CHANNEL_READ_FLAG_*) - * @errp: pointer to a NULL-initialized error object - * - * Read data from the IO channel, storing it in the -@@ -224,6 +229,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc, - size_t niov, - int **fds, - size_t *nfds, -+ int flags, - Error **errp); - - -diff --git a/io/channel-buffer.c b/io/channel-buffer.c -index bf52011be2..8096180f85 100644 ---- a/io/channel-buffer.c -+++ b/io/channel-buffer.c -@@ -54,6 +54,7 @@ static ssize_t qio_channel_buffer_readv(QIOChannel *ioc, - size_t niov, - int **fds, - size_t *nfds, -+ int flags, - Error **errp) - { - QIOChannelBuffer *bioc = QIO_CHANNEL_BUFFER(ioc); -diff --git a/io/channel-command.c b/io/channel-command.c -index 74516252ba..e7edd091af 100644 ---- a/io/channel-command.c -+++ b/io/channel-command.c -@@ -203,6 +203,7 @@ static ssize_t qio_channel_command_readv(QIOChannel *ioc, - size_t niov, - int **fds, - size_t *nfds, -+ int flags, - Error **errp) - { - QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc); -diff --git a/io/channel-file.c b/io/channel-file.c -index b67687c2aa..d76663e6ae 100644 ---- a/io/channel-file.c -+++ b/io/channel-file.c -@@ -86,6 +86,7 @@ static ssize_t qio_channel_file_readv(QIOChannel *ioc, - size_t niov, - int **fds, - size_t *nfds, -+ int flags, - Error **errp) - { - QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc); -diff --git a/io/channel-null.c b/io/channel-null.c -index 75e3781507..4fafdb770d 100644 ---- a/io/channel-null.c -+++ b/io/channel-null.c -@@ -60,6 +60,7 @@ qio_channel_null_readv(QIOChannel *ioc, - size_t niov, - int **fds G_GNUC_UNUSED, - size_t *nfds G_GNUC_UNUSED, -+ int flags, - Error **errp) - { - QIOChannelNull *nioc = QIO_CHANNEL_NULL(ioc); -diff --git a/io/channel-socket.c b/io/channel-socket.c -index b76dca9cc1..7aca84f61a 100644 ---- a/io/channel-socket.c -+++ b/io/channel-socket.c -@@ -173,6 +173,9 @@ int qio_channel_socket_connect_sync(QIOChannelSocket *ioc, - } - #endif - -+ qio_channel_set_feature(QIO_CHANNEL(ioc), -+ QIO_CHANNEL_FEATURE_READ_MSG_PEEK); -+ - return 0; - } - -@@ -406,6 +409,9 @@ qio_channel_socket_accept(QIOChannelSocket *ioc, - } - #endif /* WIN32 */ - -+ qio_channel_set_feature(QIO_CHANNEL(cioc), -+ QIO_CHANNEL_FEATURE_READ_MSG_PEEK); -+ - trace_qio_channel_socket_accept_complete(ioc, cioc, cioc->fd); - return cioc; - -@@ -496,6 +502,7 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc, - size_t niov, - int **fds, - size_t *nfds, -+ int flags, - Error **errp) - { - QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); -@@ -517,6 +524,10 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc, - - } - -+ if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) { -+ sflags |= MSG_PEEK; -+ } -+ - retry: - ret = recvmsg(sioc->fd, &msg, sflags); - if (ret < 0) { -@@ -624,11 +635,17 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc, - size_t niov, - int **fds, - size_t *nfds, -+ int flags, - Error **errp) - { - QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); - ssize_t done = 0; - ssize_t i; -+ int sflags = 0; -+ -+ if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) { -+ sflags |= MSG_PEEK; -+ } - - for (i = 0; i < niov; i++) { - ssize_t ret; -@@ -636,7 +653,7 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc, - ret = recv(sioc->fd, - iov[i].iov_base, - iov[i].iov_len, -- 0); -+ sflags); - if (ret < 0) { - if (errno == EAGAIN) { - if (done) { -diff --git a/io/channel-tls.c b/io/channel-tls.c -index 4ce890a538..c730cb8ec5 100644 ---- a/io/channel-tls.c -+++ b/io/channel-tls.c -@@ -260,6 +260,7 @@ static ssize_t qio_channel_tls_readv(QIOChannel *ioc, - size_t niov, - int **fds, - size_t *nfds, -+ int flags, - Error **errp) - { - QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc); -diff --git a/io/channel-websock.c b/io/channel-websock.c -index fb4932ade7..a12acc27cf 100644 ---- a/io/channel-websock.c -+++ b/io/channel-websock.c -@@ -1081,6 +1081,7 @@ static ssize_t qio_channel_websock_readv(QIOChannel *ioc, - size_t niov, - int **fds, - size_t *nfds, -+ int flags, - Error **errp) - { - QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc); -diff --git a/io/channel.c b/io/channel.c -index 0640941ac5..a8c7f11649 100644 ---- a/io/channel.c -+++ b/io/channel.c -@@ -52,6 +52,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc, - size_t niov, - int **fds, - size_t *nfds, -+ int flags, - Error **errp) - { - QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc); -@@ -63,7 +64,14 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc, - return -1; - } - -- return klass->io_readv(ioc, iov, niov, fds, nfds, errp); -+ if ((flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) && -+ !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { -+ error_setg_errno(errp, EINVAL, -+ "Channel does not support peek read"); -+ return -1; -+ } -+ -+ return klass->io_readv(ioc, iov, niov, fds, nfds, flags, errp); - } - - -@@ -146,7 +154,7 @@ int qio_channel_readv_full_all_eof(QIOChannel *ioc, - while ((nlocal_iov > 0) || local_fds) { - ssize_t len; - len = qio_channel_readv_full(ioc, local_iov, nlocal_iov, local_fds, -- local_nfds, errp); -+ local_nfds, 0, errp); - if (len == QIO_CHANNEL_ERR_BLOCK) { - if (qemu_in_coroutine()) { - qio_channel_yield(ioc, G_IO_IN); -@@ -284,7 +292,7 @@ ssize_t qio_channel_readv(QIOChannel *ioc, - size_t niov, - Error **errp) - { -- return qio_channel_readv_full(ioc, iov, niov, NULL, NULL, errp); -+ return qio_channel_readv_full(ioc, iov, niov, NULL, NULL, 0, errp); - } - - -@@ -303,7 +311,7 @@ ssize_t qio_channel_read(QIOChannel *ioc, - Error **errp) - { - struct iovec iov = { .iov_base = buf, .iov_len = buflen }; -- return qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, errp); -+ return qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, 0, errp); - } - - -diff --git a/migration/channel-block.c b/migration/channel-block.c -index f4ab53acdb..b7374363c3 100644 ---- a/migration/channel-block.c -+++ b/migration/channel-block.c -@@ -53,6 +53,7 @@ qio_channel_block_readv(QIOChannel *ioc, - size_t niov, - int **fds, - size_t *nfds, -+ int flags, - Error **errp) - { - QIOChannelBlock *bioc = QIO_CHANNEL_BLOCK(ioc); -diff --git a/migration/rdma.c b/migration/rdma.c -index 94a55dd95b..d8b4632094 100644 ---- a/migration/rdma.c -+++ b/migration/rdma.c -@@ -2854,6 +2854,7 @@ static ssize_t qio_channel_rdma_readv(QIOChannel *ioc, - size_t niov, - int **fds, - size_t *nfds, -+ int flags, - Error **errp) - { - QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); -diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c -index 196b78c00d..199227a556 100644 ---- a/scsi/qemu-pr-helper.c -+++ b/scsi/qemu-pr-helper.c -@@ -614,7 +614,7 @@ static int coroutine_fn prh_read(PRHelperClient *client, void *buf, int sz, - iov.iov_base = buf; - iov.iov_len = sz; - n_read = qio_channel_readv_full(QIO_CHANNEL(client->ioc), &iov, 1, -- &fds, &nfds, errp); -+ &fds, &nfds, 0, errp); - - if (n_read == QIO_CHANNEL_ERR_BLOCK) { - qio_channel_yield(QIO_CHANNEL(client->ioc), G_IO_IN); -diff --git a/tests/qtest/tpm-emu.c b/tests/qtest/tpm-emu.c -index 2994d1cf42..3cf1acaf7d 100644 ---- a/tests/qtest/tpm-emu.c -+++ b/tests/qtest/tpm-emu.c -@@ -106,7 +106,7 @@ void *tpm_emu_ctrl_thread(void *data) - int *pfd = NULL; - size_t nfd = 0; - -- qio_channel_readv_full(ioc, &iov, 1, &pfd, &nfd, &error_abort); -+ qio_channel_readv_full(ioc, &iov, 1, &pfd, &nfd, 0, &error_abort); - cmd = be32_to_cpu(cmd); - g_assert_cmpint(cmd, ==, CMD_SET_DATAFD); - g_assert_cmpint(nfd, ==, 1); -diff --git a/tests/unit/test-io-channel-socket.c b/tests/unit/test-io-channel-socket.c -index b36a5d972a..b964bb202d 100644 ---- a/tests/unit/test-io-channel-socket.c -+++ b/tests/unit/test-io-channel-socket.c -@@ -460,6 +460,7 @@ static void test_io_channel_unix_fd_pass(void) - G_N_ELEMENTS(iorecv), - &fdrecv, - &nfdrecv, -+ 0, - &error_abort); - - g_assert(nfdrecv == G_N_ELEMENTS(fdsend)); -diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c -index 232984ace6..145eb17c08 100644 ---- a/util/vhost-user-server.c -+++ b/util/vhost-user-server.c -@@ -116,7 +116,7 @@ vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg) - * qio_channel_readv_full may have short reads, keeping calling it - * until getting VHOST_USER_HDR_SIZE or 0 bytes in total - */ -- rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, &local_err); -+ rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, 0, &local_err); - if (rc < 0) { - if (rc == QIO_CHANNEL_ERR_BLOCK) { - assert(local_err == NULL); --- -2.31.1 - diff --git a/SOURCES/kvm-io-remove-io-watch-if-TLS-channel-is-closed-during-h.patch b/SOURCES/kvm-io-remove-io-watch-if-TLS-channel-is-closed-during-h.patch index 95ae201..8d6795e 100644 --- a/SOURCES/kvm-io-remove-io-watch-if-TLS-channel-is-closed-during-h.patch +++ b/SOURCES/kvm-io-remove-io-watch-if-TLS-channel-is-closed-during-h.patch @@ -1,20 +1,19 @@ -From c13b4e32be9de900e7a55ebf5c341df8363e3b4a Mon Sep 17 00:00:00 2001 +From 0306736e3afbe7be99d01e4d70d1a5f2e38c32c2 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Tue, 15 Aug 2023 00:08:55 +0000 -Subject: [PATCH 4/4] io: remove io watch if TLS channel is closed during - handshake +Subject: [PATCH] io: remove io watch if TLS channel is closed during handshake MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RH-Author: Jon Maloy -RH-MergeRequest: 301: io: remove io watch if TLS channel is closed during handshake -RH-Bugzilla: 2216503 +RH-MergeRequest: 315: io: remove io watch if TLS channel is closed during handshake +RH-Bugzilla: 2216504 RH-Acked-by: Peter Xu -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [1/1] 10bc3055a369a89996a1be34ce8d6c1fbc2c531e (redhat/rhel/src/qemu-kvm/jons-qemu-kvm-2) +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/1] 5f23602074b2edde0d445d529f07434bd156202d (redhat/rhel/src/qemu-kvm/jons-qemu-kvm-2) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2216503 +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2216504 CVE: CVE-2023-3354 Upstream: Merged @@ -56,10 +55,10 @@ index 5672479e9e..26c67f17e2 100644 /** diff --git a/io/channel-tls.c b/io/channel-tls.c -index c730cb8ec5..bd79e78837 100644 +index 9805dd0a3f..847d5297c3 100644 --- a/io/channel-tls.c +++ b/io/channel-tls.c -@@ -195,12 +195,13 @@ static void qio_channel_tls_handshake_task(QIOChannelTLS *ioc, +@@ -198,12 +198,13 @@ static void qio_channel_tls_handshake_task(QIOChannelTLS *ioc, } trace_qio_channel_tls_handshake_pending(ioc, status); @@ -79,7 +78,7 @@ index c730cb8ec5..bd79e78837 100644 } } -@@ -215,6 +216,7 @@ static gboolean qio_channel_tls_handshake_io(QIOChannel *ioc, +@@ -218,6 +219,7 @@ static gboolean qio_channel_tls_handshake_io(QIOChannel *ioc, QIOChannelTLS *tioc = QIO_CHANNEL_TLS( qio_task_get_source(task)); @@ -87,7 +86,7 @@ index c730cb8ec5..bd79e78837 100644 g_free(data); qio_channel_tls_handshake_task(tioc, task, context); -@@ -375,6 +377,10 @@ static int qio_channel_tls_close(QIOChannel *ioc, +@@ -378,6 +380,10 @@ static int qio_channel_tls_close(QIOChannel *ioc, { QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc); diff --git a/SOURCES/kvm-iotests-106-214-308-Read-only-one-size-line.patch b/SOURCES/kvm-iotests-106-214-308-Read-only-one-size-line.patch deleted file mode 100644 index 399acfc..0000000 --- a/SOURCES/kvm-iotests-106-214-308-Read-only-one-size-line.patch +++ /dev/null @@ -1,99 +0,0 @@ -From 6727e92a97f8ee9f367a41111bef3f5cad4a479a Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Mon, 20 Jun 2022 18:27:02 +0200 -Subject: [PATCH 15/20] iotests/106, 214, 308: Read only one size line - -RH-Author: Hanna Czenczek -RH-MergeRequest: 145: Show protocol-level information in qemu-img info -RH-Bugzilla: 1860292 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella -RH-Commit: [10/12] 1554e0a92b92ed101a251478ccae43f45f6e071e (hreitz/qemu-kvm-c-9-s) - -These tests read size information (sometimes disk size, sometimes -virtual size) from qemu-img info's output. Once qemu-img starts -printing info about child nodes, we are going to see multiple instances -of that per image, but these tests are only interested in the first one, -so use "head -n 1" to get it. - -Signed-off-by: Hanna Reitz -Message-Id: <20220620162704.80987-11-hreitz@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 74163adda3101b127943f7cbbf8fcccd2d472426) -Signed-off-by: Hanna Czenczek ---- - tests/qemu-iotests/106 | 4 ++-- - tests/qemu-iotests/214 | 6 ++++-- - tests/qemu-iotests/308 | 4 ++-- - 3 files changed, 8 insertions(+), 6 deletions(-) - -diff --git a/tests/qemu-iotests/106 b/tests/qemu-iotests/106 -index 9d6adb542d..ae0fc46691 100755 ---- a/tests/qemu-iotests/106 -+++ b/tests/qemu-iotests/106 -@@ -66,7 +66,7 @@ for create_mode in off falloc full; do - expected_size=$((expected_size + $GROWTH_SIZE)) - fi - -- actual_size=$($QEMU_IMG info -f "$IMGFMT" "$TEST_IMG" | grep 'disk size') -+ actual_size=$($QEMU_IMG info -f "$IMGFMT" "$TEST_IMG" | grep 'disk size' | head -n 1) - actual_size=$(echo "$actual_size" | sed -e 's/^[^0-9]*\([0-9]\+\).*$/\1/') - - # The actual size may exceed the expected size, depending on the file -@@ -105,7 +105,7 @@ for growth_mode in falloc full; do - _make_test_img -o "extent_size_hint=0" 2G - $QEMU_IMG resize -f "$IMGFMT" --preallocation=$growth_mode "$TEST_IMG" +${GROWTH_SIZE}K - -- actual_size=$($QEMU_IMG info -f "$IMGFMT" "$TEST_IMG" | grep 'disk size') -+ actual_size=$($QEMU_IMG info -f "$IMGFMT" "$TEST_IMG" | grep 'disk size' | head -n 1) - actual_size=$(echo "$actual_size" | sed -e 's/^[^0-9]*\([0-9]\+\).*$/\1/') - - if [ $actual_size -lt $GROWTH_SIZE ]; then -diff --git a/tests/qemu-iotests/214 b/tests/qemu-iotests/214 -index c66e246ba2..55ffcd7f44 100755 ---- a/tests/qemu-iotests/214 -+++ b/tests/qemu-iotests/214 -@@ -102,7 +102,8 @@ let data_size="8 * $cluster_size" - $QEMU_IO -c "write -P 0xaa 0 $data_size" "$TEST_IMG" \ - 2>&1 | _filter_qemu_io | _filter_testdir - sizeA=$($QEMU_IMG info --output=json "$TEST_IMG" | -- sed -n '/"actual-size":/ s/[^0-9]//gp') -+ sed -n '/"actual-size":/ s/[^0-9]//gp' | -+ head -n 1) - - _make_test_img 2M -o cluster_size=$cluster_size - echo "Write compressed data:" -@@ -124,7 +125,8 @@ $QEMU_IO -c "write -P 0xcc $offset $data_size" "json:{\ - _filter_qemu_io | _filter_testdir - - sizeB=$($QEMU_IMG info --output=json "$TEST_IMG" | -- sed -n '/"actual-size":/ s/[^0-9]//gp') -+ sed -n '/"actual-size":/ s/[^0-9]//gp' | -+ head -n 1) - - if [ $sizeA -lt $sizeB ] - then -diff --git a/tests/qemu-iotests/308 b/tests/qemu-iotests/308 -index bde4aac2fa..09275e9a10 100755 ---- a/tests/qemu-iotests/308 -+++ b/tests/qemu-iotests/308 -@@ -217,12 +217,12 @@ echo - echo '=== Remove export ===' - - # Double-check that $EXT_MP appears as a non-empty file (the raw image) --$QEMU_IMG info -f raw "$EXT_MP" | grep 'virtual size' -+$QEMU_IMG info -f raw "$EXT_MP" | grep 'virtual size' | head -n 1 - - fuse_export_del 'export-mp' - - # See that the file appears empty again --$QEMU_IMG info -f raw "$EXT_MP" | grep 'virtual size' -+$QEMU_IMG info -f raw "$EXT_MP" | grep 'virtual size' | head -n 1 - - echo - echo '=== Writable export ===' --- -2.31.1 - diff --git a/SOURCES/kvm-iotests-Filter-child-node-information.patch b/SOURCES/kvm-iotests-Filter-child-node-information.patch deleted file mode 100644 index 12eee3a..0000000 --- a/SOURCES/kvm-iotests-Filter-child-node-information.patch +++ /dev/null @@ -1,171 +0,0 @@ -From 3102e62f80757729c97e58e2b3d62a6a9de952a7 Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Mon, 20 Jun 2022 18:27:01 +0200 -Subject: [PATCH 14/20] iotests: Filter child node information - -RH-Author: Hanna Czenczek -RH-MergeRequest: 145: Show protocol-level information in qemu-img info -RH-Bugzilla: 1860292 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella -RH-Commit: [9/12] 0b0a42d54397791f7f149e53c9175b7863707e70 (hreitz/qemu-kvm-c-9-s) - -Before we let qemu-img info print child node information, have -common.filter, common.rc, and iotests.py filter it from the test output -so we get as few reference output changes as possible. - -Signed-off-by: Hanna Reitz -Message-Id: <20220620162704.80987-10-hreitz@redhat.com> -Tested-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit bcc6777ad6facede73c0cf8b1700045bf4365f7d) -Signed-off-by: Hanna Czenczek ---- - tests/qemu-iotests/common.filter | 22 ++++++++++++++-------- - tests/qemu-iotests/common.rc | 22 ++++++++++++++-------- - tests/qemu-iotests/iotests.py | 18 +++++++++++++++--- - 3 files changed, 43 insertions(+), 19 deletions(-) - -diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter -index 6a13757177..6ddda2ee64 100644 ---- a/tests/qemu-iotests/common.filter -+++ b/tests/qemu-iotests/common.filter -@@ -224,6 +224,7 @@ _filter_img_info() - - discard=0 - regex_json_spec_start='^ *"format-specific": \{' -+ regex_json_child_start='^ *"children": \[' - gsed -e "s#$REMOTE_TEST_DIR#TEST_DIR#g" \ - -e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" \ - -e "s#$TEST_DIR#TEST_DIR#g" \ -@@ -252,20 +253,25 @@ _filter_img_info() - -e 's/\(compression type: \)\(zlib\|zstd\)/\1COMPRESSION_TYPE/' \ - -e "s/uuid: [-a-f0-9]\\+/uuid: 00000000-0000-0000-0000-000000000000/" | \ - while IFS='' read -r line; do -- if [[ $format_specific == 1 ]]; then -- discard=0 -- elif [[ $line == "Format specific information:" ]]; then -- discard=1 -- elif [[ $line =~ $regex_json_spec_start ]]; then -- discard=2 -- regex_json_spec_end="^${line%%[^ ]*}\\},? *$" -+ if [[ $discard == 0 ]]; then -+ if [[ $format_specific == 0 && $line == "Format specific information:" ]]; then -+ discard=1 -+ elif [[ $line =~ "Child node '/" ]]; then -+ discard=1 -+ elif [[ $line =~ $regex_json_spec_start ]]; then -+ discard=2 -+ regex_json_end="^${line%%[^ ]*}\\},? *$" -+ elif [[ $line =~ $regex_json_child_start ]]; then -+ discard=2 -+ regex_json_end="^${line%%[^ ]*}\\],? *$" -+ fi - fi - if [[ $discard == 0 ]]; then - echo "$line" - elif [[ $discard == 1 && ! $line ]]; then - echo - discard=0 -- elif [[ $discard == 2 && $line =~ $regex_json_spec_end ]]; then -+ elif [[ $discard == 2 && $line =~ $regex_json_end ]]; then - discard=0 - fi - done -diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc -index db757025cb..f4476b62f7 100644 ---- a/tests/qemu-iotests/common.rc -+++ b/tests/qemu-iotests/common.rc -@@ -711,6 +711,7 @@ _img_info() - - discard=0 - regex_json_spec_start='^ *"format-specific": \{' -+ regex_json_child_start='^ *"children": \[' - $QEMU_IMG info $QEMU_IMG_EXTRA_ARGS "$@" "$TEST_IMG" 2>&1 | \ - sed -e "s#$REMOTE_TEST_DIR#TEST_DIR#g" \ - -e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" \ -@@ -721,20 +722,25 @@ _img_info() - -e "/^disk size:/ D" \ - -e "/actual-size/ D" | \ - while IFS='' read -r line; do -- if [[ $format_specific == 1 ]]; then -- discard=0 -- elif [[ $line == "Format specific information:" ]]; then -- discard=1 -- elif [[ $line =~ $regex_json_spec_start ]]; then -- discard=2 -- regex_json_spec_end="^${line%%[^ ]*}\\},? *$" -+ if [[ $discard == 0 ]]; then -+ if [[ $format_specific == 0 && $line == "Format specific information:" ]]; then -+ discard=1 -+ elif [[ $line =~ "Child node '/" ]]; then -+ discard=1 -+ elif [[ $format_specific == 0 && $line =~ $regex_json_spec_start ]]; then -+ discard=2 -+ regex_json_end="^${line%%[^ ]*}\\},? *$" -+ elif [[ $line =~ $regex_json_child_start ]]; then -+ discard=2 -+ regex_json_end="^${line%%[^ ]*}\\],? *$" -+ fi - fi - if [[ $discard == 0 ]]; then - echo "$line" - elif [[ $discard == 1 && ! $line ]]; then - echo - discard=0 -- elif [[ $discard == 2 && $line =~ $regex_json_spec_end ]]; then -+ elif [[ $discard == 2 && $line =~ $regex_json_end ]]; then - discard=0 - fi - done -diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py -index da7d6637e1..94aeb3f3b2 100644 ---- a/tests/qemu-iotests/iotests.py -+++ b/tests/qemu-iotests/iotests.py -@@ -329,7 +329,7 @@ def qemu_img_log(*args: str, check: bool = True - - def img_info_log(filename: str, filter_path: Optional[str] = None, - use_image_opts: bool = False, extra_args: Sequence[str] = (), -- check: bool = True, -+ check: bool = True, drop_child_info: bool = True, - ) -> None: - args = ['info'] - if use_image_opts: -@@ -342,7 +342,7 @@ def img_info_log(filename: str, filter_path: Optional[str] = None, - output = qemu_img(*args, check=check).stdout - if not filter_path: - filter_path = filename -- log(filter_img_info(output, filter_path)) -+ log(filter_img_info(output, filter_path, drop_child_info)) - - def qemu_io_wrap_args(args: Sequence[str]) -> List[str]: - if '-f' in args or '--image-opts' in args: -@@ -642,11 +642,23 @@ def _filter(_key, value): - def filter_generated_node_ids(msg): - return re.sub("#block[0-9]+", "NODE_NAME", msg) - --def filter_img_info(output, filename): -+def filter_img_info(output: str, filename: str, -+ drop_child_info: bool = True) -> str: - lines = [] -+ drop_indented = False - for line in output.split('\n'): - if 'disk size' in line or 'actual-size' in line: - continue -+ -+ # Drop child node info -+ if drop_indented: -+ if line.startswith(' '): -+ continue -+ drop_indented = False -+ if drop_child_info and "Child node '/" in line: -+ drop_indented = True -+ continue -+ - line = line.replace(filename, 'TEST_IMG') - line = filter_testfiles(line) - line = line.replace(imgfmt, 'IMGFMT') --- -2.31.1 - diff --git a/SOURCES/kvm-iotests-Test-commit-with-iothreads-and-ongoing-I-O.patch b/SOURCES/kvm-iotests-Test-commit-with-iothreads-and-ongoing-I-O.patch new file mode 100644 index 0000000..1fc5697 --- /dev/null +++ b/SOURCES/kvm-iotests-Test-commit-with-iothreads-and-ongoing-I-O.patch @@ -0,0 +1,144 @@ +From 399bfc04fb8352af6d2f4c984e68c334d2043368 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 17 May 2023 17:28:34 +0200 +Subject: [PATCH 04/21] iotests: Test commit with iothreads and ongoing I/O + +RH-Author: Kevin Wolf +RH-MergeRequest: 166: block/graph-lock: Disable locking for now +RH-Bugzilla: 2186725 +RH-Acked-by: Eric Blake +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [4/4] 1e42fde5951ae12bddc4eea2320f066f7079878f (kmwolf/centos-qemu-kvm) + +This tests exercises graph locking, draining, and graph modifications +with AioContext switches a lot. Amongst others, it serves as a +regression test for bdrv_graph_wrlock() deadlocking because it is called +with a locked AioContext and for AioContext handling in the NBD server. + +Signed-off-by: Kevin Wolf +Message-Id: <20230517152834.277483-4-kwolf@redhat.com> +Tested-by: Eric Blake +Reviewed-by: Eric Blake +Signed-off-by: Kevin Wolf +(cherry picked from commit 95fdd8db61848d31fde1d9b32da7f3f76babfa25) +Signed-off-by: Kevin Wolf +--- + tests/qemu-iotests/iotests.py | 4 ++ + .../qemu-iotests/tests/graph-changes-while-io | 56 +++++++++++++++++-- + .../tests/graph-changes-while-io.out | 4 +- + 3 files changed, 58 insertions(+), 6 deletions(-) + +diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py +index 3e82c634cf..7073579a7d 100644 +--- a/tests/qemu-iotests/iotests.py ++++ b/tests/qemu-iotests/iotests.py +@@ -462,6 +462,10 @@ def qmp(self, cmd: str, args: Optional[Dict[str, object]] = None) \ + assert self._qmp is not None + return self._qmp.cmd(cmd, args) + ++ def get_qmp(self) -> QEMUMonitorProtocol: ++ assert self._qmp is not None ++ return self._qmp ++ + def stop(self, kill_signal=15): + self._p.send_signal(kill_signal) + self._p.wait() +diff --git a/tests/qemu-iotests/tests/graph-changes-while-io b/tests/qemu-iotests/tests/graph-changes-while-io +index 7664f33689..750e7d4d38 100755 +--- a/tests/qemu-iotests/tests/graph-changes-while-io ++++ b/tests/qemu-iotests/tests/graph-changes-while-io +@@ -22,19 +22,19 @@ + import os + from threading import Thread + import iotests +-from iotests import imgfmt, qemu_img, qemu_img_create, QMPTestCase, \ +- QemuStorageDaemon ++from iotests import imgfmt, qemu_img, qemu_img_create, qemu_io, \ ++ QMPTestCase, QemuStorageDaemon + + + top = os.path.join(iotests.test_dir, 'top.img') + nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock') + + +-def do_qemu_img_bench() -> None: ++def do_qemu_img_bench(count: int = 2000000) -> None: + """ + Do some I/O requests on `nbd_sock`. + """ +- qemu_img('bench', '-f', 'raw', '-c', '2000000', ++ qemu_img('bench', '-f', 'raw', '-c', str(count), + f'nbd+unix:///node0?socket={nbd_sock}') + + +@@ -84,6 +84,54 @@ class TestGraphChangesWhileIO(QMPTestCase): + + bench_thr.join() + ++ def test_commit_while_io(self) -> None: ++ # Run qemu-img bench in the background ++ bench_thr = Thread(target=do_qemu_img_bench, args=(200000, )) ++ bench_thr.start() ++ ++ qemu_io('-c', 'write 0 64k', top) ++ qemu_io('-c', 'write 128k 64k', top) ++ ++ result = self.qsd.qmp('blockdev-add', { ++ 'driver': imgfmt, ++ 'node-name': 'overlay', ++ 'backing': None, ++ 'file': { ++ 'driver': 'file', ++ 'filename': top ++ } ++ }) ++ self.assert_qmp(result, 'return', {}) ++ ++ result = self.qsd.qmp('blockdev-snapshot', { ++ 'node': 'node0', ++ 'overlay': 'overlay', ++ }) ++ self.assert_qmp(result, 'return', {}) ++ ++ # While qemu-img bench is running, repeatedly commit overlay to node0 ++ while bench_thr.is_alive(): ++ result = self.qsd.qmp('block-commit', { ++ 'job-id': 'job0', ++ 'device': 'overlay', ++ }) ++ self.assert_qmp(result, 'return', {}) ++ ++ result = self.qsd.qmp('block-job-cancel', { ++ 'device': 'job0', ++ }) ++ self.assert_qmp(result, 'return', {}) ++ ++ cancelled = False ++ while not cancelled: ++ for event in self.qsd.get_qmp().get_events(wait=10.0): ++ if event['event'] != 'JOB_STATUS_CHANGE': ++ continue ++ if event['data']['status'] == 'null': ++ cancelled = True ++ ++ bench_thr.join() ++ + if __name__ == '__main__': + # Format must support raw backing files + iotests.main(supported_fmts=['qcow', 'qcow2', 'qed'], +diff --git a/tests/qemu-iotests/tests/graph-changes-while-io.out b/tests/qemu-iotests/tests/graph-changes-while-io.out +index ae1213e6f8..fbc63e62f8 100644 +--- a/tests/qemu-iotests/tests/graph-changes-while-io.out ++++ b/tests/qemu-iotests/tests/graph-changes-while-io.out +@@ -1,5 +1,5 @@ +-. ++.. + ---------------------------------------------------------------------- +-Ran 1 tests ++Ran 2 tests + + OK +-- +2.39.3 + diff --git a/SOURCES/kvm-iotests-Test-resizing-image-attached-to-an-iothread.patch b/SOURCES/kvm-iotests-Test-resizing-image-attached-to-an-iothread.patch new file mode 100644 index 0000000..4e91505 --- /dev/null +++ b/SOURCES/kvm-iotests-Test-resizing-image-attached-to-an-iothread.patch @@ -0,0 +1,132 @@ +From 2c9e6892369ff99decd4030642b8dcf3875e9ebf Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 9 May 2023 15:41:33 +0200 +Subject: [PATCH 55/56] iotests: Test resizing image attached to an iothread + +RH-Author: Kevin Wolf +RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize() +RH-Bugzilla: 2185688 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [4/4] 8d31752d1e6e8c6a422d68d9cb2251fbc34b7aef (kmwolf/centos-qemu-kvm) + +This tests that trying to resize an image with QMP block_resize doesn't +hang or otherwise fail when the image is attached to a device running in +an iothread. + +This is a regression test for the recent fix that changed +qmp_block_resize, which is a coroutine based QMP handler, to avoid +calling no_coroutine_fns directly. + +Signed-off-by: Kevin Wolf +Message-Id: <20230509134133.373408-1-kwolf@redhat.com> +Reviewed-by: Eric Blake +Signed-off-by: Kevin Wolf +(cherry picked from commit e113362e4cdfdcfe1d497e569527f70a0021333a) +Signed-off-by: Kevin Wolf +--- + tests/qemu-iotests/tests/iothreads-resize | 71 +++++++++++++++++++ + tests/qemu-iotests/tests/iothreads-resize.out | 11 +++ + 2 files changed, 82 insertions(+) + create mode 100755 tests/qemu-iotests/tests/iothreads-resize + create mode 100644 tests/qemu-iotests/tests/iothreads-resize.out + +diff --git a/tests/qemu-iotests/tests/iothreads-resize b/tests/qemu-iotests/tests/iothreads-resize +new file mode 100755 +index 0000000000..36e4598c62 +--- /dev/null ++++ b/tests/qemu-iotests/tests/iothreads-resize +@@ -0,0 +1,71 @@ ++#!/usr/bin/env bash ++# group: rw auto quick ++# ++# Test resizing an image that is attached to a separate iothread ++# ++# Copyright (C) 2023 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++# creator ++owner=kwolf@redhat.com ++ ++seq=`basename $0` ++echo "QA output created by $seq" ++ ++status=1 # failure is the default! ++ ++_cleanup() ++{ ++ _cleanup_test_img ++} ++trap "_cleanup; exit \$status" 0 1 2 3 15 ++ ++# get standard environment, filters and checks ++cd .. ++. ./common.rc ++. ./common.filter ++ ++# Resizing images is only supported by a few block drivers ++_supported_fmt raw qcow2 qed ++_supported_proto file ++_require_devices virtio-scsi-pci ++ ++size=64M ++_make_test_img $size ++ ++qmp() { ++cat < +Date: Thu, 11 May 2023 13:03:22 +0200 +Subject: [PATCH 54/56] iotests: Use alternative CPU type that is not + deprecated in RHEL + +RH-Author: Kevin Wolf +RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize() +RH-Bugzilla: 2185688 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [3/4] 038d4718c0ee7a17ff5e6f4af8fc04d07e452f8d (kmwolf/centos-qemu-kvm) + +This is a downstream-only patch that is necessary because the default +CPU in RHEL is marked as deprecated. This makes test cases fail due to +the warning in the output: + +qemu-system-x86_64: warning: CPU model qemu64-x86_64-cpu is deprecated -- use at least 'Nehalem' / 'Opteron_G4', or 'host' / 'max' + +Fixes: 318178778db60b6475d1484509bee136317156d3 +Signed-off-by: Kevin Wolf +--- + tests/qemu-iotests/testenv.py | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/tests/qemu-iotests/testenv.py b/tests/qemu-iotests/testenv.py +index 9a37ad9152..963514aab3 100644 +--- a/tests/qemu-iotests/testenv.py ++++ b/tests/qemu-iotests/testenv.py +@@ -244,6 +244,9 @@ def __init__(self, source_dir: str, build_dir: str, + if self.qemu_prog.endswith(f'qemu-system-{suffix}'): + self.qemu_options += f' -machine {machine}' + ++ if self.qemu_prog.endswith('qemu-system-x86_64'): ++ self.qemu_options += ' -cpu Nehalem' ++ + # QEMU_DEFAULT_MACHINE + self.qemu_default_machine = get_default_machine(self.qemu_prog) + +-- +2.39.1 + diff --git a/SOURCES/kvm-iotests-iov-padding-New-test.patch b/SOURCES/kvm-iotests-iov-padding-New-test.patch new file mode 100644 index 0000000..9ef37a2 --- /dev/null +++ b/SOURCES/kvm-iotests-iov-padding-New-test.patch @@ -0,0 +1,186 @@ +From add833b5de202d6765dda56c8773985fbe7f40a6 Mon Sep 17 00:00:00 2001 +From: Hanna Czenczek +Date: Tue, 11 Apr 2023 19:34:18 +0200 +Subject: [PATCH 4/9] iotests/iov-padding: New test + +RH-Author: Hanna Czenczek +RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX +RH-Bugzilla: 2174676 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [4/5] b32715b5c2a3e2add39c5ed6e8f71df56e0b91a0 (hreitz/qemu-kvm-c-9-s) + +Test that even vectored IO requests with 1024 vector elements that are +not aligned to the device's request alignment will succeed. + +Reviewed-by: Eric Blake +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Czenczek +Message-Id: <20230411173418.19549-5-hreitz@redhat.com> +(cherry picked from commit d7e1905e3f54ff9512db4c7a946a8603b62b108d) +Signed-off-by: Hanna Czenczek +--- + tests/qemu-iotests/tests/iov-padding | 85 ++++++++++++++++++++++++ + tests/qemu-iotests/tests/iov-padding.out | 59 ++++++++++++++++ + 2 files changed, 144 insertions(+) + create mode 100755 tests/qemu-iotests/tests/iov-padding + create mode 100644 tests/qemu-iotests/tests/iov-padding.out + +diff --git a/tests/qemu-iotests/tests/iov-padding b/tests/qemu-iotests/tests/iov-padding +new file mode 100755 +index 0000000000..b9604900c7 +--- /dev/null ++++ b/tests/qemu-iotests/tests/iov-padding +@@ -0,0 +1,85 @@ ++#!/usr/bin/env bash ++# group: rw quick ++# ++# Check the interaction of request padding (to fit alignment restrictions) with ++# vectored I/O from the guest ++# ++# Copyright Red Hat ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++seq=$(basename $0) ++echo "QA output created by $seq" ++ ++status=1 # failure is the default! ++ ++_cleanup() ++{ ++ _cleanup_test_img ++} ++trap "_cleanup; exit \$status" 0 1 2 3 15 ++ ++# get standard environment, filters and checks ++cd .. ++. ./common.rc ++. ./common.filter ++ ++_supported_fmt raw ++_supported_proto file ++ ++_make_test_img 1M ++ ++IMGSPEC="driver=blkdebug,align=4096,image.driver=file,image.filename=$TEST_IMG" ++ ++# Four combinations: ++# - Offset 4096, length 1023 * 512 + 512: Fully aligned to 4k ++# - Offset 4096, length 1023 * 512 + 4096: Head is aligned, tail is not ++# - Offset 512, length 1023 * 512 + 512: Neither head nor tail are aligned ++# - Offset 512, length 1023 * 512 + 4096: Tail is aligned, head is not ++for start_offset in 4096 512; do ++ for last_element_length in 512 4096; do ++ length=$((1023 * 512 + $last_element_length)) ++ ++ echo ++ echo "== performing 1024-element vectored requests to image (offset: $start_offset; length: $length) ==" ++ ++ # Fill with data for testing ++ $QEMU_IO -c 'write -P 1 0 1M' "$TEST_IMG" | _filter_qemu_io ++ ++ # 1023 512-byte buffers, and then one with length $last_element_length ++ cmd_params="-P 2 $start_offset $(yes 512 | head -n 1023 | tr '\n' ' ') $last_element_length" ++ QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS_NO_FMT" $QEMU_IO \ ++ -c "writev $cmd_params" \ ++ --image-opts \ ++ "$IMGSPEC" \ ++ | _filter_qemu_io ++ ++ # Read all patterns -- read the part we just wrote with writev twice, ++ # once "normally", and once with a readv, so we see that that works, too ++ QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS_NO_FMT" $QEMU_IO \ ++ -c "read -P 1 0 $start_offset" \ ++ -c "read -P 2 $start_offset $length" \ ++ -c "readv $cmd_params" \ ++ -c "read -P 1 $((start_offset + length)) $((1024 * 1024 - length - start_offset))" \ ++ --image-opts \ ++ "$IMGSPEC" \ ++ | _filter_qemu_io ++ done ++done ++ ++# success, all done ++echo "*** done" ++rm -f $seq.full ++status=0 +diff --git a/tests/qemu-iotests/tests/iov-padding.out b/tests/qemu-iotests/tests/iov-padding.out +new file mode 100644 +index 0000000000..e07a91fac7 +--- /dev/null ++++ b/tests/qemu-iotests/tests/iov-padding.out +@@ -0,0 +1,59 @@ ++QA output created by iov-padding ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 ++ ++== performing 1024-element vectored requests to image (offset: 4096; length: 524288) == ++wrote 1048576/1048576 bytes at offset 0 ++1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++wrote 524288/524288 bytes at offset 4096 ++512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 4096/4096 bytes at offset 0 ++4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 524288/524288 bytes at offset 4096 ++512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 524288/524288 bytes at offset 4096 ++512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 520192/520192 bytes at offset 528384 ++508 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++== performing 1024-element vectored requests to image (offset: 4096; length: 527872) == ++wrote 1048576/1048576 bytes at offset 0 ++1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++wrote 527872/527872 bytes at offset 4096 ++515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 4096/4096 bytes at offset 0 ++4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 527872/527872 bytes at offset 4096 ++515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 527872/527872 bytes at offset 4096 ++515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 516608/516608 bytes at offset 531968 ++504.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++== performing 1024-element vectored requests to image (offset: 512; length: 524288) == ++wrote 1048576/1048576 bytes at offset 0 ++1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++wrote 524288/524288 bytes at offset 512 ++512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 512/512 bytes at offset 0 ++512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 524288/524288 bytes at offset 512 ++512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 524288/524288 bytes at offset 512 ++512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 523776/523776 bytes at offset 524800 ++511.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++== performing 1024-element vectored requests to image (offset: 512; length: 527872) == ++wrote 1048576/1048576 bytes at offset 0 ++1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++wrote 527872/527872 bytes at offset 512 ++515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 512/512 bytes at offset 0 ++512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 527872/527872 bytes at offset 512 ++515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 527872/527872 bytes at offset 512 ++515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 520192/520192 bytes at offset 528384 ++508 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++*** done +-- +2.39.3 + diff --git a/SOURCES/kvm-kvm-Atomic-memslot-updates.patch b/SOURCES/kvm-kvm-Atomic-memslot-updates.patch deleted file mode 100644 index 14e9e32..0000000 --- a/SOURCES/kvm-kvm-Atomic-memslot-updates.patch +++ /dev/null @@ -1,286 +0,0 @@ -From e13fdc97ff05cdee46c112c2dee70b6ef33e7fa7 Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Mon, 16 Jan 2023 07:17:31 -0500 -Subject: [PATCH 31/31] kvm: Atomic memslot updates - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 138: accel: introduce accelerator blocker API -RH-Bugzilla: 1979276 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: David Hildenbrand -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/3] 9f03181ebcad2474fbe859acbce7b9891caa216b (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1979276 - -commit f39b7d2b96e3e73c01bb678cd096f7baf0b9ab39 -Author: David Hildenbrand -Date: Fri Nov 11 10:47:58 2022 -0500 - - kvm: Atomic memslot updates - - If we update an existing memslot (e.g., resize, split), we temporarily - remove the memslot to re-add it immediately afterwards. These updates - are not atomic, especially not for KVM VCPU threads, such that we can - get spurious faults. - - Let's inhibit most KVM ioctls while performing relevant updates, such - that we can perform the update just as if it would happen atomically - without additional kernel support. - - We capture the add/del changes and apply them in the notifier commit - stage instead. There, we can check for overlaps and perform the ioctl - inhibiting only if really required (-> overlap). - - To keep things simple we don't perform additional checks that wouldn't - actually result in an overlap -- such as !RAM memory regions in some - cases (see kvm_set_phys_mem()). - - To minimize cache-line bouncing, use a separate indicator - (in_ioctl_lock) per CPU. Also, make sure to hold the kvm_slots_lock - while performing both actions (removing+re-adding). - - We have to wait until all IOCTLs were exited and block new ones from - getting executed. - - This approach cannot result in a deadlock as long as the inhibitor does - not hold any locks that might hinder an IOCTL from getting finished and - exited - something fairly unusual. The inhibitor will always hold the BQL. - - AFAIKs, one possible candidate would be userfaultfd. If a page cannot be - placed (e.g., during postcopy), because we're waiting for a lock, or if the - userfaultfd thread cannot process a fault, because it is waiting for a - lock, there could be a deadlock. However, the BQL is not applicable here, - because any other guest memory access while holding the BQL would already - result in a deadlock. - - Nothing else in the kernel should block forever and wait for userspace - intervention. - - Note: pause_all_vcpus()/resume_all_vcpus() or - start_exclusive()/end_exclusive() cannot be used, as they either drop - the BQL or require to be called without the BQL - something inhibitors - cannot handle. We need a low-level locking mechanism that is - deadlock-free even when not releasing the BQL. - - Signed-off-by: David Hildenbrand - Signed-off-by: Emanuele Giuseppe Esposito - Tested-by: Emanuele Giuseppe Esposito - Message-Id: <20221111154758.1372674-4-eesposit@redhat.com> - Signed-off-by: Paolo Bonzini - -Signed-off-by: Emanuele Giuseppe Esposito ---- - accel/kvm/kvm-all.c | 101 ++++++++++++++++++++++++++++++++++----- - include/sysemu/kvm_int.h | 8 ++++ - 2 files changed, 98 insertions(+), 11 deletions(-) - -diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index ff660fd469..39ed30ab59 100644 ---- a/accel/kvm/kvm-all.c -+++ b/accel/kvm/kvm-all.c -@@ -31,6 +31,7 @@ - #include "sysemu/kvm_int.h" - #include "sysemu/runstate.h" - #include "sysemu/cpus.h" -+#include "sysemu/accel-blocker.h" - #include "qemu/bswap.h" - #include "exec/memory.h" - #include "exec/ram_addr.h" -@@ -46,6 +47,7 @@ - #include "sysemu/hw_accel.h" - #include "kvm-cpus.h" - #include "sysemu/dirtylimit.h" -+#include "qemu/range.h" - - #include "hw/boards.h" - #include "monitor/stats.h" -@@ -1292,6 +1294,7 @@ void kvm_set_max_memslot_size(hwaddr max_slot_size) - kvm_max_slot_size = max_slot_size; - } - -+/* Called with KVMMemoryListener.slots_lock held */ - static void kvm_set_phys_mem(KVMMemoryListener *kml, - MemoryRegionSection *section, bool add) - { -@@ -1326,14 +1329,12 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, - ram = memory_region_get_ram_ptr(mr) + mr_offset; - ram_start_offset = memory_region_get_ram_addr(mr) + mr_offset; - -- kvm_slots_lock(); -- - if (!add) { - do { - slot_size = MIN(kvm_max_slot_size, size); - mem = kvm_lookup_matching_slot(kml, start_addr, slot_size); - if (!mem) { -- goto out; -+ return; - } - if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { - /* -@@ -1371,7 +1372,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, - start_addr += slot_size; - size -= slot_size; - } while (size); -- goto out; -+ return; - } - - /* register the new slot */ -@@ -1396,9 +1397,6 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, - ram += slot_size; - size -= slot_size; - } while (size); -- --out: -- kvm_slots_unlock(); - } - - static void *kvm_dirty_ring_reaper_thread(void *data) -@@ -1455,18 +1453,95 @@ static void kvm_region_add(MemoryListener *listener, - MemoryRegionSection *section) - { - KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener); -+ KVMMemoryUpdate *update; -+ -+ update = g_new0(KVMMemoryUpdate, 1); -+ update->section = *section; - -- memory_region_ref(section->mr); -- kvm_set_phys_mem(kml, section, true); -+ QSIMPLEQ_INSERT_TAIL(&kml->transaction_add, update, next); - } - - static void kvm_region_del(MemoryListener *listener, - MemoryRegionSection *section) - { - KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener); -+ KVMMemoryUpdate *update; -+ -+ update = g_new0(KVMMemoryUpdate, 1); -+ update->section = *section; -+ -+ QSIMPLEQ_INSERT_TAIL(&kml->transaction_del, update, next); -+} -+ -+static void kvm_region_commit(MemoryListener *listener) -+{ -+ KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, -+ listener); -+ KVMMemoryUpdate *u1, *u2; -+ bool need_inhibit = false; -+ -+ if (QSIMPLEQ_EMPTY(&kml->transaction_add) && -+ QSIMPLEQ_EMPTY(&kml->transaction_del)) { -+ return; -+ } -+ -+ /* -+ * We have to be careful when regions to add overlap with ranges to remove. -+ * We have to simulate atomic KVM memslot updates by making sure no ioctl() -+ * is currently active. -+ * -+ * The lists are order by addresses, so it's easy to find overlaps. -+ */ -+ u1 = QSIMPLEQ_FIRST(&kml->transaction_del); -+ u2 = QSIMPLEQ_FIRST(&kml->transaction_add); -+ while (u1 && u2) { -+ Range r1, r2; -+ -+ range_init_nofail(&r1, u1->section.offset_within_address_space, -+ int128_get64(u1->section.size)); -+ range_init_nofail(&r2, u2->section.offset_within_address_space, -+ int128_get64(u2->section.size)); -+ -+ if (range_overlaps_range(&r1, &r2)) { -+ need_inhibit = true; -+ break; -+ } -+ if (range_lob(&r1) < range_lob(&r2)) { -+ u1 = QSIMPLEQ_NEXT(u1, next); -+ } else { -+ u2 = QSIMPLEQ_NEXT(u2, next); -+ } -+ } -+ -+ kvm_slots_lock(); -+ if (need_inhibit) { -+ accel_ioctl_inhibit_begin(); -+ } -+ -+ /* Remove all memslots before adding the new ones. */ -+ while (!QSIMPLEQ_EMPTY(&kml->transaction_del)) { -+ u1 = QSIMPLEQ_FIRST(&kml->transaction_del); -+ QSIMPLEQ_REMOVE_HEAD(&kml->transaction_del, next); - -- kvm_set_phys_mem(kml, section, false); -- memory_region_unref(section->mr); -+ kvm_set_phys_mem(kml, &u1->section, false); -+ memory_region_unref(u1->section.mr); -+ -+ g_free(u1); -+ } -+ while (!QSIMPLEQ_EMPTY(&kml->transaction_add)) { -+ u1 = QSIMPLEQ_FIRST(&kml->transaction_add); -+ QSIMPLEQ_REMOVE_HEAD(&kml->transaction_add, next); -+ -+ memory_region_ref(u1->section.mr); -+ kvm_set_phys_mem(kml, &u1->section, true); -+ -+ g_free(u1); -+ } -+ -+ if (need_inhibit) { -+ accel_ioctl_inhibit_end(); -+ } -+ kvm_slots_unlock(); - } - - static void kvm_log_sync(MemoryListener *listener, -@@ -1610,8 +1685,12 @@ void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml, - kml->slots[i].slot = i; - } - -+ QSIMPLEQ_INIT(&kml->transaction_add); -+ QSIMPLEQ_INIT(&kml->transaction_del); -+ - kml->listener.region_add = kvm_region_add; - kml->listener.region_del = kvm_region_del; -+ kml->listener.commit = kvm_region_commit; - kml->listener.log_start = kvm_log_start; - kml->listener.log_stop = kvm_log_stop; - kml->listener.priority = 10; -diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h -index 3b4adcdc10..60b520a13e 100644 ---- a/include/sysemu/kvm_int.h -+++ b/include/sysemu/kvm_int.h -@@ -12,6 +12,7 @@ - #include "exec/memory.h" - #include "qapi/qapi-types-common.h" - #include "qemu/accel.h" -+#include "qemu/queue.h" - #include "sysemu/kvm.h" - - typedef struct KVMSlot -@@ -31,10 +32,17 @@ typedef struct KVMSlot - ram_addr_t ram_start_offset; - } KVMSlot; - -+typedef struct KVMMemoryUpdate { -+ QSIMPLEQ_ENTRY(KVMMemoryUpdate) next; -+ MemoryRegionSection section; -+} KVMMemoryUpdate; -+ - typedef struct KVMMemoryListener { - MemoryListener listener; - KVMSlot *slots; - int as_id; -+ QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_add; -+ QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_del; - } KVMMemoryListener; - - #define KVM_MSI_HASHTAB_SIZE 256 --- -2.31.1 - diff --git a/SOURCES/kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch b/SOURCES/kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch index 16db441..d6a6d73 100644 --- a/SOURCES/kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch +++ b/SOURCES/kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch @@ -1,15 +1,20 @@ -From 6319eaee8c2206c4eca858a11ed7c9b7a2f3dff9 Mon Sep 17 00:00:00 2001 +From a5857fb12fcad46e27c415fe82ce13c0cb5d09c7 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti -Date: Thu, 29 Jun 2023 15:13:57 -0300 -Subject: [PATCH] kvm: reuse per-vcpu stats fd to avoid vcpu interruption +Date: Thu, 29 Jun 2023 14:48:32 -0300 +Subject: [PATCH 5/6] kvm: reuse per-vcpu stats fd to avoid vcpu interruption +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit RH-Author: Marcelo Tosatti -RH-MergeRequest: 290: kvm: reuse per-vcpu stats fd to avoid vcpu interruption -RH-Bugzilla: 2221219 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/1] cf6c3188e26c6eae99b48db1f75837e11d1e4489 +RH-MergeRequest: 177: kvm: reuse per-vcpu stats fd to avoid vcpu interruption +RH-Bugzilla: 2218644 +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Leonardo Brás +RH-Commit: [1/1] 4ec72385a9047888121485f49bacb1aff84f7018 (mtosatti/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2214884 +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2218644 Commit: 3b6f485275ae95a81eec589d2773b86ca9ddec4d A regression has been detected in latency testing of KVM guests. @@ -32,7 +37,7 @@ Signed-off-by: Paolo Bonzini 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index 39ed30ab59..c86a6798c6 100644 +index cf3a88d90e..fa7ca46c66 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -450,6 +450,8 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp) @@ -44,7 +49,7 @@ index 39ed30ab59..c86a6798c6 100644 err: return ret; } -@@ -3950,7 +3952,7 @@ static StatsDescriptors *find_stats_descriptors(StatsTarget target, int stats_fd +@@ -3959,7 +3961,7 @@ static StatsDescriptors *find_stats_descriptors(StatsTarget target, int stats_fd /* Read stats header */ kvm_stats_header = &descriptors->kvm_stats_header; @@ -53,7 +58,7 @@ index 39ed30ab59..c86a6798c6 100644 if (ret != sizeof(*kvm_stats_header)) { error_setg(errp, "KVM stats: failed to read stats header: " "expected %zu actual %zu", -@@ -3981,7 +3983,8 @@ static StatsDescriptors *find_stats_descriptors(StatsTarget target, int stats_fd +@@ -3990,7 +3992,8 @@ static StatsDescriptors *find_stats_descriptors(StatsTarget target, int stats_fd } static void query_stats(StatsResultList **result, StatsTarget target, @@ -63,7 +68,7 @@ index 39ed30ab59..c86a6798c6 100644 { struct kvm_stats_desc *kvm_stats_desc; struct kvm_stats_header *kvm_stats_header; -@@ -4039,7 +4042,7 @@ static void query_stats(StatsResultList **result, StatsTarget target, +@@ -4048,7 +4051,7 @@ static void query_stats(StatsResultList **result, StatsTarget target, break; case STATS_TARGET_VCPU: add_stats_entry(result, STATS_PROVIDER_KVM, @@ -72,7 +77,7 @@ index 39ed30ab59..c86a6798c6 100644 stats_list); break; default: -@@ -4076,10 +4079,9 @@ static void query_stats_schema(StatsSchemaList **result, StatsTarget target, +@@ -4085,10 +4088,9 @@ static void query_stats_schema(StatsSchemaList **result, StatsTarget target, add_stats_schema(result, STATS_PROVIDER_KVM, target, stats_list); } @@ -85,7 +90,7 @@ index 39ed30ab59..c86a6798c6 100644 Error *local_err = NULL; if (stats_fd == -1) { -@@ -4088,14 +4090,13 @@ static void query_stats_vcpu(CPUState *cpu, run_on_cpu_data data) +@@ -4097,14 +4099,13 @@ static void query_stats_vcpu(CPUState *cpu, run_on_cpu_data data) return; } query_stats(kvm_stats_args->result.stats, STATS_TARGET_VCPU, @@ -104,7 +109,7 @@ index 39ed30ab59..c86a6798c6 100644 Error *local_err = NULL; if (stats_fd == -1) { -@@ -4105,7 +4106,6 @@ static void query_stats_schema_vcpu(CPUState *cpu, run_on_cpu_data data) +@@ -4114,7 +4115,6 @@ static void query_stats_schema_vcpu(CPUState *cpu, run_on_cpu_data data) } query_stats_schema(kvm_stats_args->result.schema, STATS_TARGET_VCPU, stats_fd, kvm_stats_args->errp); @@ -112,7 +117,7 @@ index 39ed30ab59..c86a6798c6 100644 } static void query_stats_cb(StatsResultList **result, StatsTarget target, -@@ -4123,7 +4123,7 @@ static void query_stats_cb(StatsResultList **result, StatsTarget target, +@@ -4132,7 +4132,7 @@ static void query_stats_cb(StatsResultList **result, StatsTarget target, error_setg_errno(errp, errno, "KVM stats: ioctl failed"); return; } @@ -121,7 +126,7 @@ index 39ed30ab59..c86a6798c6 100644 close(stats_fd); break; } -@@ -4137,7 +4137,7 @@ static void query_stats_cb(StatsResultList **result, StatsTarget target, +@@ -4146,7 +4146,7 @@ static void query_stats_cb(StatsResultList **result, StatsTarget target, if (!apply_str_list_filter(cpu->parent_obj.canonical_path, targets)) { continue; } @@ -130,7 +135,7 @@ index 39ed30ab59..c86a6798c6 100644 } break; } -@@ -4163,6 +4163,6 @@ void query_stats_schemas_cb(StatsSchemaList **result, Error **errp) +@@ -4172,6 +4172,6 @@ void query_stats_schemas_cb(StatsSchemaList **result, Error **errp) if (first_cpu) { stats_args.result.schema = result; stats_args.errp = errp; @@ -139,10 +144,10 @@ index 39ed30ab59..c86a6798c6 100644 } } diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h -index 2417597236..362f22ca06 100644 +index 397fd3ac68..ae96be07e7 100644 --- a/include/hw/core/cpu.h +++ b/include/hw/core/cpu.h -@@ -397,6 +397,7 @@ struct CPUState { +@@ -399,6 +399,7 @@ struct CPUState { struct kvm_dirty_gfn *kvm_dirty_gfns; uint32_t kvm_fetch_index; uint64_t dirty_pages; diff --git a/SOURCES/kvm-linux-headers-Update-to-v6.1.patch b/SOURCES/kvm-linux-headers-Update-to-v6.1.patch deleted file mode 100644 index 6ce9c7d..0000000 --- a/SOURCES/kvm-linux-headers-Update-to-v6.1.patch +++ /dev/null @@ -1,577 +0,0 @@ -From cbe35c6a4794107ea1ddecf0b381ba4b1c8799f5 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Tue, 7 Feb 2023 15:57:10 -0500 -Subject: [PATCH 3/8] linux-headers: Update to v6.1 - -RH-Author: Peter Xu -RH-MergeRequest: 149: Support /dev/userfaultfd -RH-Bugzilla: 2158704 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: quintela1 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/3] 15d97026e802a0f01b5f80f81fb4414dc69b2b2d (peterx/qemu-kvm) - -Signed-off-by: Peter Xu -Reviewed-by: Juan Quintela -Acked-by: Cornelia Huck -Signed-off-by: Juan Quintela -(cherry picked from commit 93e0932b7be2498024cd6ba8446a0fa2cb1769bc) -Signed-off-by: Peter Xu ---- - include/standard-headers/drm/drm_fourcc.h | 34 ++++- - include/standard-headers/linux/ethtool.h | 63 +++++++- - include/standard-headers/linux/fuse.h | 6 +- - .../linux/input-event-codes.h | 1 + - include/standard-headers/linux/virtio_blk.h | 19 +++ - linux-headers/asm-generic/hugetlb_encode.h | 26 ++-- - linux-headers/asm-generic/mman-common.h | 2 + - linux-headers/asm-mips/mman.h | 2 + - linux-headers/asm-riscv/kvm.h | 4 + - linux-headers/linux/kvm.h | 1 + - linux-headers/linux/psci.h | 14 ++ - linux-headers/linux/userfaultfd.h | 4 + - linux-headers/linux/vfio.h | 142 ++++++++++++++++++ - 13 files changed, 298 insertions(+), 20 deletions(-) - -diff --git a/include/standard-headers/drm/drm_fourcc.h b/include/standard-headers/drm/drm_fourcc.h -index 48b620cbef..b868488f93 100644 ---- a/include/standard-headers/drm/drm_fourcc.h -+++ b/include/standard-headers/drm/drm_fourcc.h -@@ -98,18 +98,42 @@ extern "C" { - #define DRM_FORMAT_INVALID 0 - - /* color index */ -+#define DRM_FORMAT_C1 fourcc_code('C', '1', ' ', ' ') /* [7:0] C0:C1:C2:C3:C4:C5:C6:C7 1:1:1:1:1:1:1:1 eight pixels/byte */ -+#define DRM_FORMAT_C2 fourcc_code('C', '2', ' ', ' ') /* [7:0] C0:C1:C2:C3 2:2:2:2 four pixels/byte */ -+#define DRM_FORMAT_C4 fourcc_code('C', '4', ' ', ' ') /* [7:0] C0:C1 4:4 two pixels/byte */ - #define DRM_FORMAT_C8 fourcc_code('C', '8', ' ', ' ') /* [7:0] C */ - --/* 8 bpp Red */ -+/* 1 bpp Darkness (inverse relationship between channel value and brightness) */ -+#define DRM_FORMAT_D1 fourcc_code('D', '1', ' ', ' ') /* [7:0] D0:D1:D2:D3:D4:D5:D6:D7 1:1:1:1:1:1:1:1 eight pixels/byte */ -+ -+/* 2 bpp Darkness (inverse relationship between channel value and brightness) */ -+#define DRM_FORMAT_D2 fourcc_code('D', '2', ' ', ' ') /* [7:0] D0:D1:D2:D3 2:2:2:2 four pixels/byte */ -+ -+/* 4 bpp Darkness (inverse relationship between channel value and brightness) */ -+#define DRM_FORMAT_D4 fourcc_code('D', '4', ' ', ' ') /* [7:0] D0:D1 4:4 two pixels/byte */ -+ -+/* 8 bpp Darkness (inverse relationship between channel value and brightness) */ -+#define DRM_FORMAT_D8 fourcc_code('D', '8', ' ', ' ') /* [7:0] D */ -+ -+/* 1 bpp Red (direct relationship between channel value and brightness) */ -+#define DRM_FORMAT_R1 fourcc_code('R', '1', ' ', ' ') /* [7:0] R0:R1:R2:R3:R4:R5:R6:R7 1:1:1:1:1:1:1:1 eight pixels/byte */ -+ -+/* 2 bpp Red (direct relationship between channel value and brightness) */ -+#define DRM_FORMAT_R2 fourcc_code('R', '2', ' ', ' ') /* [7:0] R0:R1:R2:R3 2:2:2:2 four pixels/byte */ -+ -+/* 4 bpp Red (direct relationship between channel value and brightness) */ -+#define DRM_FORMAT_R4 fourcc_code('R', '4', ' ', ' ') /* [7:0] R0:R1 4:4 two pixels/byte */ -+ -+/* 8 bpp Red (direct relationship between channel value and brightness) */ - #define DRM_FORMAT_R8 fourcc_code('R', '8', ' ', ' ') /* [7:0] R */ - --/* 10 bpp Red */ -+/* 10 bpp Red (direct relationship between channel value and brightness) */ - #define DRM_FORMAT_R10 fourcc_code('R', '1', '0', ' ') /* [15:0] x:R 6:10 little endian */ - --/* 12 bpp Red */ -+/* 12 bpp Red (direct relationship between channel value and brightness) */ - #define DRM_FORMAT_R12 fourcc_code('R', '1', '2', ' ') /* [15:0] x:R 4:12 little endian */ - --/* 16 bpp Red */ -+/* 16 bpp Red (direct relationship between channel value and brightness) */ - #define DRM_FORMAT_R16 fourcc_code('R', '1', '6', ' ') /* [15:0] R little endian */ - - /* 16 bpp RG */ -@@ -204,7 +228,9 @@ extern "C" { - #define DRM_FORMAT_VYUY fourcc_code('V', 'Y', 'U', 'Y') /* [31:0] Y1:Cb0:Y0:Cr0 8:8:8:8 little endian */ - - #define DRM_FORMAT_AYUV fourcc_code('A', 'Y', 'U', 'V') /* [31:0] A:Y:Cb:Cr 8:8:8:8 little endian */ -+#define DRM_FORMAT_AVUY8888 fourcc_code('A', 'V', 'U', 'Y') /* [31:0] A:Cr:Cb:Y 8:8:8:8 little endian */ - #define DRM_FORMAT_XYUV8888 fourcc_code('X', 'Y', 'U', 'V') /* [31:0] X:Y:Cb:Cr 8:8:8:8 little endian */ -+#define DRM_FORMAT_XVUY8888 fourcc_code('X', 'V', 'U', 'Y') /* [31:0] X:Cr:Cb:Y 8:8:8:8 little endian */ - #define DRM_FORMAT_VUY888 fourcc_code('V', 'U', '2', '4') /* [23:0] Cr:Cb:Y 8:8:8 little endian */ - #define DRM_FORMAT_VUY101010 fourcc_code('V', 'U', '3', '0') /* Y followed by U then V, 10:10:10. Non-linear modifier only */ - -diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h -index 4537da20cc..1dc56cdc0a 100644 ---- a/include/standard-headers/linux/ethtool.h -+++ b/include/standard-headers/linux/ethtool.h -@@ -736,6 +736,51 @@ enum ethtool_module_power_mode { - ETHTOOL_MODULE_POWER_MODE_HIGH, - }; - -+/** -+ * enum ethtool_podl_pse_admin_state - operational state of the PoDL PSE -+ * functions. IEEE 802.3-2018 30.15.1.1.2 aPoDLPSEAdminState -+ * @ETHTOOL_PODL_PSE_ADMIN_STATE_UNKNOWN: state of PoDL PSE functions are -+ * unknown -+ * @ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED: PoDL PSE functions are disabled -+ * @ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED: PoDL PSE functions are enabled -+ */ -+enum ethtool_podl_pse_admin_state { -+ ETHTOOL_PODL_PSE_ADMIN_STATE_UNKNOWN = 1, -+ ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED, -+ ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED, -+}; -+ -+/** -+ * enum ethtool_podl_pse_pw_d_status - power detection status of the PoDL PSE. -+ * IEEE 802.3-2018 30.15.1.1.3 aPoDLPSEPowerDetectionStatus: -+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_UNKNOWN: PoDL PSE -+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_DISABLED: "The enumeration “disabled” is -+ * asserted true when the PoDL PSE state diagram variable mr_pse_enable is -+ * false" -+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_SEARCHING: "The enumeration “searching” is -+ * asserted true when either of the PSE state diagram variables -+ * pi_detecting or pi_classifying is true." -+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_DELIVERING: "The enumeration “deliveringPower” -+ * is asserted true when the PoDL PSE state diagram variable pi_powered is -+ * true." -+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_SLEEP: "The enumeration “sleep” is asserted -+ * true when the PoDL PSE state diagram variable pi_sleeping is true." -+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_IDLE: "The enumeration “idle” is asserted true -+ * when the logical combination of the PoDL PSE state diagram variables -+ * pi_prebiased*!pi_sleeping is true." -+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_ERROR: "The enumeration “error” is asserted -+ * true when the PoDL PSE state diagram variable overload_held is true." -+ */ -+enum ethtool_podl_pse_pw_d_status { -+ ETHTOOL_PODL_PSE_PW_D_STATUS_UNKNOWN = 1, -+ ETHTOOL_PODL_PSE_PW_D_STATUS_DISABLED, -+ ETHTOOL_PODL_PSE_PW_D_STATUS_SEARCHING, -+ ETHTOOL_PODL_PSE_PW_D_STATUS_DELIVERING, -+ ETHTOOL_PODL_PSE_PW_D_STATUS_SLEEP, -+ ETHTOOL_PODL_PSE_PW_D_STATUS_IDLE, -+ ETHTOOL_PODL_PSE_PW_D_STATUS_ERROR, -+}; -+ - /** - * struct ethtool_gstrings - string set for data tagging - * @cmd: Command number = %ETHTOOL_GSTRINGS -@@ -1840,6 +1885,20 @@ static inline int ethtool_validate_duplex(uint8_t duplex) - #define MASTER_SLAVE_STATE_SLAVE 3 - #define MASTER_SLAVE_STATE_ERR 4 - -+/* These are used to throttle the rate of data on the phy interface when the -+ * native speed of the interface is higher than the link speed. These should -+ * not be used for phy interfaces which natively support multiple speeds (e.g. -+ * MII or SGMII). -+ */ -+/* No rate matching performed. */ -+#define RATE_MATCH_NONE 0 -+/* The phy sends pause frames to throttle the MAC. */ -+#define RATE_MATCH_PAUSE 1 -+/* The phy asserts CRS to prevent the MAC from transmitting. */ -+#define RATE_MATCH_CRS 2 -+/* The MAC is programmed with a sufficiently-large IPG. */ -+#define RATE_MATCH_OPEN_LOOP 3 -+ - /* Which connector port. */ - #define PORT_TP 0x00 - #define PORT_AUI 0x01 -@@ -2033,8 +2092,8 @@ enum ethtool_reset_flags { - * reported consistently by PHYLIB. Read-only. - * @master_slave_cfg: Master/slave port mode. - * @master_slave_state: Master/slave port state. -+ * @rate_matching: Rate adaptation performed by the PHY - * @reserved: Reserved for future use; see the note on reserved space. -- * @reserved1: Reserved for future use; see the note on reserved space. - * @link_mode_masks: Variable length bitmaps. - * - * If autonegotiation is disabled, the speed and @duplex represent the -@@ -2085,7 +2144,7 @@ struct ethtool_link_settings { - uint8_t transceiver; - uint8_t master_slave_cfg; - uint8_t master_slave_state; -- uint8_t reserved1[1]; -+ uint8_t rate_matching; - uint32_t reserved[7]; - uint32_t link_mode_masks[]; - /* layout of link_mode_masks fields: -diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h -index bda06258be..713d259768 100644 ---- a/include/standard-headers/linux/fuse.h -+++ b/include/standard-headers/linux/fuse.h -@@ -194,6 +194,9 @@ - * - add FUSE_SECURITY_CTX init flag - * - add security context to create, mkdir, symlink, and mknod requests - * - add FUSE_HAS_INODE_DAX, FUSE_ATTR_DAX -+ * -+ * 7.37 -+ * - add FUSE_TMPFILE - */ - - #ifndef _LINUX_FUSE_H -@@ -225,7 +228,7 @@ - #define FUSE_KERNEL_VERSION 7 - - /** Minor version number of this interface */ --#define FUSE_KERNEL_MINOR_VERSION 36 -+#define FUSE_KERNEL_MINOR_VERSION 37 - - /** The node ID of the root inode */ - #define FUSE_ROOT_ID 1 -@@ -533,6 +536,7 @@ enum fuse_opcode { - FUSE_SETUPMAPPING = 48, - FUSE_REMOVEMAPPING = 49, - FUSE_SYNCFS = 50, -+ FUSE_TMPFILE = 51, - - /* CUSE specific operations */ - CUSE_INIT = 4096, -diff --git a/include/standard-headers/linux/input-event-codes.h b/include/standard-headers/linux/input-event-codes.h -index 50790aee5a..815f7a1dff 100644 ---- a/include/standard-headers/linux/input-event-codes.h -+++ b/include/standard-headers/linux/input-event-codes.h -@@ -862,6 +862,7 @@ - #define ABS_TOOL_WIDTH 0x1c - - #define ABS_VOLUME 0x20 -+#define ABS_PROFILE 0x21 - - #define ABS_MISC 0x28 - -diff --git a/include/standard-headers/linux/virtio_blk.h b/include/standard-headers/linux/virtio_blk.h -index 2dcc90826a..e81715cd70 100644 ---- a/include/standard-headers/linux/virtio_blk.h -+++ b/include/standard-headers/linux/virtio_blk.h -@@ -40,6 +40,7 @@ - #define VIRTIO_BLK_F_MQ 12 /* support more than one vq */ - #define VIRTIO_BLK_F_DISCARD 13 /* DISCARD is supported */ - #define VIRTIO_BLK_F_WRITE_ZEROES 14 /* WRITE ZEROES is supported */ -+#define VIRTIO_BLK_F_SECURE_ERASE 16 /* Secure Erase is supported */ - - /* Legacy feature bits */ - #ifndef VIRTIO_BLK_NO_LEGACY -@@ -119,6 +120,21 @@ struct virtio_blk_config { - uint8_t write_zeroes_may_unmap; - - uint8_t unused1[3]; -+ -+ /* the next 3 entries are guarded by VIRTIO_BLK_F_SECURE_ERASE */ -+ /* -+ * The maximum secure erase sectors (in 512-byte sectors) for -+ * one segment. -+ */ -+ __virtio32 max_secure_erase_sectors; -+ /* -+ * The maximum number of secure erase segments in a -+ * secure erase command. -+ */ -+ __virtio32 max_secure_erase_seg; -+ /* Secure erase commands must be aligned to this number of sectors. */ -+ __virtio32 secure_erase_sector_alignment; -+ - } QEMU_PACKED; - - /* -@@ -153,6 +169,9 @@ struct virtio_blk_config { - /* Write zeroes command */ - #define VIRTIO_BLK_T_WRITE_ZEROES 13 - -+/* Secure erase command */ -+#define VIRTIO_BLK_T_SECURE_ERASE 14 -+ - #ifndef VIRTIO_BLK_NO_LEGACY - /* Barrier before this op. */ - #define VIRTIO_BLK_T_BARRIER 0x80000000 -diff --git a/linux-headers/asm-generic/hugetlb_encode.h b/linux-headers/asm-generic/hugetlb_encode.h -index 4f3d5aaa11..de687009bf 100644 ---- a/linux-headers/asm-generic/hugetlb_encode.h -+++ b/linux-headers/asm-generic/hugetlb_encode.h -@@ -20,18 +20,18 @@ - #define HUGETLB_FLAG_ENCODE_SHIFT 26 - #define HUGETLB_FLAG_ENCODE_MASK 0x3f - --#define HUGETLB_FLAG_ENCODE_16KB (14 << HUGETLB_FLAG_ENCODE_SHIFT) --#define HUGETLB_FLAG_ENCODE_64KB (16 << HUGETLB_FLAG_ENCODE_SHIFT) --#define HUGETLB_FLAG_ENCODE_512KB (19 << HUGETLB_FLAG_ENCODE_SHIFT) --#define HUGETLB_FLAG_ENCODE_1MB (20 << HUGETLB_FLAG_ENCODE_SHIFT) --#define HUGETLB_FLAG_ENCODE_2MB (21 << HUGETLB_FLAG_ENCODE_SHIFT) --#define HUGETLB_FLAG_ENCODE_8MB (23 << HUGETLB_FLAG_ENCODE_SHIFT) --#define HUGETLB_FLAG_ENCODE_16MB (24 << HUGETLB_FLAG_ENCODE_SHIFT) --#define HUGETLB_FLAG_ENCODE_32MB (25 << HUGETLB_FLAG_ENCODE_SHIFT) --#define HUGETLB_FLAG_ENCODE_256MB (28 << HUGETLB_FLAG_ENCODE_SHIFT) --#define HUGETLB_FLAG_ENCODE_512MB (29 << HUGETLB_FLAG_ENCODE_SHIFT) --#define HUGETLB_FLAG_ENCODE_1GB (30 << HUGETLB_FLAG_ENCODE_SHIFT) --#define HUGETLB_FLAG_ENCODE_2GB (31 << HUGETLB_FLAG_ENCODE_SHIFT) --#define HUGETLB_FLAG_ENCODE_16GB (34 << HUGETLB_FLAG_ENCODE_SHIFT) -+#define HUGETLB_FLAG_ENCODE_16KB (14U << HUGETLB_FLAG_ENCODE_SHIFT) -+#define HUGETLB_FLAG_ENCODE_64KB (16U << HUGETLB_FLAG_ENCODE_SHIFT) -+#define HUGETLB_FLAG_ENCODE_512KB (19U << HUGETLB_FLAG_ENCODE_SHIFT) -+#define HUGETLB_FLAG_ENCODE_1MB (20U << HUGETLB_FLAG_ENCODE_SHIFT) -+#define HUGETLB_FLAG_ENCODE_2MB (21U << HUGETLB_FLAG_ENCODE_SHIFT) -+#define HUGETLB_FLAG_ENCODE_8MB (23U << HUGETLB_FLAG_ENCODE_SHIFT) -+#define HUGETLB_FLAG_ENCODE_16MB (24U << HUGETLB_FLAG_ENCODE_SHIFT) -+#define HUGETLB_FLAG_ENCODE_32MB (25U << HUGETLB_FLAG_ENCODE_SHIFT) -+#define HUGETLB_FLAG_ENCODE_256MB (28U << HUGETLB_FLAG_ENCODE_SHIFT) -+#define HUGETLB_FLAG_ENCODE_512MB (29U << HUGETLB_FLAG_ENCODE_SHIFT) -+#define HUGETLB_FLAG_ENCODE_1GB (30U << HUGETLB_FLAG_ENCODE_SHIFT) -+#define HUGETLB_FLAG_ENCODE_2GB (31U << HUGETLB_FLAG_ENCODE_SHIFT) -+#define HUGETLB_FLAG_ENCODE_16GB (34U << HUGETLB_FLAG_ENCODE_SHIFT) - - #endif /* _ASM_GENERIC_HUGETLB_ENCODE_H_ */ -diff --git a/linux-headers/asm-generic/mman-common.h b/linux-headers/asm-generic/mman-common.h -index 6c1aa92a92..6ce1f1ceb4 100644 ---- a/linux-headers/asm-generic/mman-common.h -+++ b/linux-headers/asm-generic/mman-common.h -@@ -77,6 +77,8 @@ - - #define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */ - -+#define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */ -+ - /* compatibility flags */ - #define MAP_FILE 0 - -diff --git a/linux-headers/asm-mips/mman.h b/linux-headers/asm-mips/mman.h -index 1be428663c..c6e1fc77c9 100644 ---- a/linux-headers/asm-mips/mman.h -+++ b/linux-headers/asm-mips/mman.h -@@ -103,6 +103,8 @@ - - #define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */ - -+#define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */ -+ - /* compatibility flags */ - #define MAP_FILE 0 - -diff --git a/linux-headers/asm-riscv/kvm.h b/linux-headers/asm-riscv/kvm.h -index 7351417afd..8985ff234c 100644 ---- a/linux-headers/asm-riscv/kvm.h -+++ b/linux-headers/asm-riscv/kvm.h -@@ -48,6 +48,7 @@ struct kvm_sregs { - /* CONFIG registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ - struct kvm_riscv_config { - unsigned long isa; -+ unsigned long zicbom_block_size; - }; - - /* CORE registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ -@@ -98,6 +99,9 @@ enum KVM_RISCV_ISA_EXT_ID { - KVM_RISCV_ISA_EXT_M, - KVM_RISCV_ISA_EXT_SVPBMT, - KVM_RISCV_ISA_EXT_SSTC, -+ KVM_RISCV_ISA_EXT_SVINVAL, -+ KVM_RISCV_ISA_EXT_ZIHINTPAUSE, -+ KVM_RISCV_ISA_EXT_ZICBOM, - KVM_RISCV_ISA_EXT_MAX, - }; - -diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h -index ebdafa576d..b2783c5202 100644 ---- a/linux-headers/linux/kvm.h -+++ b/linux-headers/linux/kvm.h -@@ -1175,6 +1175,7 @@ struct kvm_ppc_resize_hpt { - #define KVM_CAP_VM_DISABLE_NX_HUGE_PAGES 220 - #define KVM_CAP_S390_ZPCI_OP 221 - #define KVM_CAP_S390_CPU_TOPOLOGY 222 -+#define KVM_CAP_DIRTY_LOG_RING_ACQ_REL 223 - - #ifdef KVM_CAP_IRQ_ROUTING - -diff --git a/linux-headers/linux/psci.h b/linux-headers/linux/psci.h -index 213b2a0f70..e60dfd8907 100644 ---- a/linux-headers/linux/psci.h -+++ b/linux-headers/linux/psci.h -@@ -48,12 +48,26 @@ - #define PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU PSCI_0_2_FN64(7) - - #define PSCI_1_0_FN_PSCI_FEATURES PSCI_0_2_FN(10) -+#define PSCI_1_0_FN_CPU_FREEZE PSCI_0_2_FN(11) -+#define PSCI_1_0_FN_CPU_DEFAULT_SUSPEND PSCI_0_2_FN(12) -+#define PSCI_1_0_FN_NODE_HW_STATE PSCI_0_2_FN(13) - #define PSCI_1_0_FN_SYSTEM_SUSPEND PSCI_0_2_FN(14) - #define PSCI_1_0_FN_SET_SUSPEND_MODE PSCI_0_2_FN(15) -+#define PSCI_1_0_FN_STAT_RESIDENCY PSCI_0_2_FN(16) -+#define PSCI_1_0_FN_STAT_COUNT PSCI_0_2_FN(17) -+ - #define PSCI_1_1_FN_SYSTEM_RESET2 PSCI_0_2_FN(18) -+#define PSCI_1_1_FN_MEM_PROTECT PSCI_0_2_FN(19) -+#define PSCI_1_1_FN_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN(19) - -+#define PSCI_1_0_FN64_CPU_DEFAULT_SUSPEND PSCI_0_2_FN64(12) -+#define PSCI_1_0_FN64_NODE_HW_STATE PSCI_0_2_FN64(13) - #define PSCI_1_0_FN64_SYSTEM_SUSPEND PSCI_0_2_FN64(14) -+#define PSCI_1_0_FN64_STAT_RESIDENCY PSCI_0_2_FN64(16) -+#define PSCI_1_0_FN64_STAT_COUNT PSCI_0_2_FN64(17) -+ - #define PSCI_1_1_FN64_SYSTEM_RESET2 PSCI_0_2_FN64(18) -+#define PSCI_1_1_FN64_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN64(19) - - /* PSCI v0.2 power state encoding for CPU_SUSPEND function */ - #define PSCI_0_2_POWER_STATE_ID_MASK 0xffff -diff --git a/linux-headers/linux/userfaultfd.h b/linux-headers/linux/userfaultfd.h -index a3a377cd44..ba5d0df52f 100644 ---- a/linux-headers/linux/userfaultfd.h -+++ b/linux-headers/linux/userfaultfd.h -@@ -12,6 +12,10 @@ - - #include - -+/* ioctls for /dev/userfaultfd */ -+#define USERFAULTFD_IOC 0xAA -+#define USERFAULTFD_IOC_NEW _IO(USERFAULTFD_IOC, 0x00) -+ - /* - * If the UFFDIO_API is upgraded someday, the UFFDIO_UNREGISTER and - * UFFDIO_WAKE ioctls should be defined as _IOW and not as _IOR. In -diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h -index ede44b5572..bee7e42198 100644 ---- a/linux-headers/linux/vfio.h -+++ b/linux-headers/linux/vfio.h -@@ -986,6 +986,148 @@ enum vfio_device_mig_state { - VFIO_DEVICE_STATE_RUNNING_P2P = 5, - }; - -+/* -+ * Upon VFIO_DEVICE_FEATURE_SET, allow the device to be moved into a low power -+ * state with the platform-based power management. Device use of lower power -+ * states depends on factors managed by the runtime power management core, -+ * including system level support and coordinating support among dependent -+ * devices. Enabling device low power entry does not guarantee lower power -+ * usage by the device, nor is a mechanism provided through this feature to -+ * know the current power state of the device. If any device access happens -+ * (either from the host or through the vfio uAPI) when the device is in the -+ * low power state, then the host will move the device out of the low power -+ * state as necessary prior to the access. Once the access is completed, the -+ * device may re-enter the low power state. For single shot low power support -+ * with wake-up notification, see -+ * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP below. Access to mmap'd -+ * device regions is disabled on LOW_POWER_ENTRY and may only be resumed after -+ * calling LOW_POWER_EXIT. -+ */ -+#define VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY 3 -+ -+/* -+ * This device feature has the same behavior as -+ * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY with the exception that the user -+ * provides an eventfd for wake-up notification. When the device moves out of -+ * the low power state for the wake-up, the host will not allow the device to -+ * re-enter a low power state without a subsequent user call to one of the low -+ * power entry device feature IOCTLs. Access to mmap'd device regions is -+ * disabled on LOW_POWER_ENTRY_WITH_WAKEUP and may only be resumed after the -+ * low power exit. The low power exit can happen either through LOW_POWER_EXIT -+ * or through any other access (where the wake-up notification has been -+ * generated). The access to mmap'd device regions will not trigger low power -+ * exit. -+ * -+ * The notification through the provided eventfd will be generated only when -+ * the device has entered and is resumed from a low power state after -+ * calling this device feature IOCTL. A device that has not entered low power -+ * state, as managed through the runtime power management core, will not -+ * generate a notification through the provided eventfd on access. Calling the -+ * LOW_POWER_EXIT feature is optional in the case where notification has been -+ * signaled on the provided eventfd that a resume from low power has occurred. -+ */ -+struct vfio_device_low_power_entry_with_wakeup { -+ __s32 wakeup_eventfd; -+ __u32 reserved; -+}; -+ -+#define VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP 4 -+ -+/* -+ * Upon VFIO_DEVICE_FEATURE_SET, disallow use of device low power states as -+ * previously enabled via VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY or -+ * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP device features. -+ * This device feature IOCTL may itself generate a wakeup eventfd notification -+ * in the latter case if the device had previously entered a low power state. -+ */ -+#define VFIO_DEVICE_FEATURE_LOW_POWER_EXIT 5 -+ -+/* -+ * Upon VFIO_DEVICE_FEATURE_SET start/stop device DMA logging. -+ * VFIO_DEVICE_FEATURE_PROBE can be used to detect if the device supports -+ * DMA logging. -+ * -+ * DMA logging allows a device to internally record what DMAs the device is -+ * initiating and report them back to userspace. It is part of the VFIO -+ * migration infrastructure that allows implementing dirty page tracking -+ * during the pre copy phase of live migration. Only DMA WRITEs are logged, -+ * and this API is not connected to VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE. -+ * -+ * When DMA logging is started a range of IOVAs to monitor is provided and the -+ * device can optimize its logging to cover only the IOVA range given. Each -+ * DMA that the device initiates inside the range will be logged by the device -+ * for later retrieval. -+ * -+ * page_size is an input that hints what tracking granularity the device -+ * should try to achieve. If the device cannot do the hinted page size then -+ * it's the driver choice which page size to pick based on its support. -+ * On output the device will return the page size it selected. -+ * -+ * ranges is a pointer to an array of -+ * struct vfio_device_feature_dma_logging_range. -+ * -+ * The core kernel code guarantees to support by minimum num_ranges that fit -+ * into a single kernel page. User space can try higher values but should give -+ * up if the above can't be achieved as of some driver limitations. -+ * -+ * A single call to start device DMA logging can be issued and a matching stop -+ * should follow at the end. Another start is not allowed in the meantime. -+ */ -+struct vfio_device_feature_dma_logging_control { -+ __aligned_u64 page_size; -+ __u32 num_ranges; -+ __u32 __reserved; -+ __aligned_u64 ranges; -+}; -+ -+struct vfio_device_feature_dma_logging_range { -+ __aligned_u64 iova; -+ __aligned_u64 length; -+}; -+ -+#define VFIO_DEVICE_FEATURE_DMA_LOGGING_START 6 -+ -+/* -+ * Upon VFIO_DEVICE_FEATURE_SET stop device DMA logging that was started -+ * by VFIO_DEVICE_FEATURE_DMA_LOGGING_START -+ */ -+#define VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP 7 -+ -+/* -+ * Upon VFIO_DEVICE_FEATURE_GET read back and clear the device DMA log -+ * -+ * Query the device's DMA log for written pages within the given IOVA range. -+ * During querying the log is cleared for the IOVA range. -+ * -+ * bitmap is a pointer to an array of u64s that will hold the output bitmap -+ * with 1 bit reporting a page_size unit of IOVA. The mapping of IOVA to bits -+ * is given by: -+ * bitmap[(addr - iova)/page_size] & (1ULL << (addr % 64)) -+ * -+ * The input page_size can be any power of two value and does not have to -+ * match the value given to VFIO_DEVICE_FEATURE_DMA_LOGGING_START. The driver -+ * will format its internal logging to match the reporting page size, possibly -+ * by replicating bits if the internal page size is lower than requested. -+ * -+ * The LOGGING_REPORT will only set bits in the bitmap and never clear or -+ * perform any initialization of the user provided bitmap. -+ * -+ * If any error is returned userspace should assume that the dirty log is -+ * corrupted. Error recovery is to consider all memory dirty and try to -+ * restart the dirty tracking, or to abort/restart the whole migration. -+ * -+ * If DMA logging is not enabled, an error will be returned. -+ * -+ */ -+struct vfio_device_feature_dma_logging_report { -+ __aligned_u64 iova; -+ __aligned_u64 length; -+ __aligned_u64 page_size; -+ __aligned_u64 bitmap; -+}; -+ -+#define VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT 8 -+ - /* -------- API for Type1 VFIO IOMMU -------- */ - - /** --- -2.31.1 - diff --git a/SOURCES/kvm-loongarch-mark-loongarch_ipi_iocsr-re-entrnacy-safe.patch b/SOURCES/kvm-loongarch-mark-loongarch_ipi_iocsr-re-entrnacy-safe.patch new file mode 100644 index 0000000..c1100a5 --- /dev/null +++ b/SOURCES/kvm-loongarch-mark-loongarch_ipi_iocsr-re-entrnacy-safe.patch @@ -0,0 +1,53 @@ +From 6de2f37d9a5db6578554929227377e4fd6d2feb3 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 May 2023 10:29:03 -0400 +Subject: [PATCH 14/21] loongarch: mark loongarch_ipi_iocsr re-entrnacy safe + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [10/13] 02435b9148b906960137de32eb5a3c4961e44a57 (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit 6d0589e0e6c64b888864a2bf980537be20389264 +Author: Alexander Bulekov +Date: Sat May 6 07:21:45 2023 -0400 + + loongarch: mark loongarch_ipi_iocsr re-entrnacy safe + + loongarch_ipi_iocsr MRs rely on re-entrant IO through the ipi_send + function. As such, mark these MRs re-entrancy-safe. + + Fixes: a2e1753b80 ("memory: prevent dma-reentracy issues") + Signed-off-by: Alexander Bulekov + Reviewed-by: Song Gao + Message-Id: <20230506112145.3563708-1-alxndr@bu.edu> + Signed-off-by: Song Gao + +Signed-off-by: Jon Maloy +--- + hw/intc/loongarch_ipi.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/hw/intc/loongarch_ipi.c b/hw/intc/loongarch_ipi.c +index aa4bf9eb74..40e98af2ce 100644 +--- a/hw/intc/loongarch_ipi.c ++++ b/hw/intc/loongarch_ipi.c +@@ -215,6 +215,10 @@ static void loongarch_ipi_init(Object *obj) + for (cpu = 0; cpu < MAX_IPI_CORE_NUM; cpu++) { + memory_region_init_io(&s->ipi_iocsr_mem[cpu], obj, &loongarch_ipi_ops, + &lams->ipi_core[cpu], "loongarch_ipi_iocsr", 0x48); ++ ++ /* loongarch_ipi_iocsr performs re-entrant IO through ipi_send */ ++ s->ipi_iocsr_mem[cpu].disable_reentrancy_guard = true; ++ + sysbus_init_mmio(sbd, &s->ipi_iocsr_mem[cpu]); + + memory_region_init_io(&s->ipi64_iocsr_mem[cpu], obj, &loongarch_ipi64_ops, +-- +2.39.3 + diff --git a/SOURCES/kvm-lsi53c895a-disable-reentrancy-detection-for-MMIO-reg.patch b/SOURCES/kvm-lsi53c895a-disable-reentrancy-detection-for-MMIO-reg.patch new file mode 100644 index 0000000..359d53f --- /dev/null +++ b/SOURCES/kvm-lsi53c895a-disable-reentrancy-detection-for-MMIO-reg.patch @@ -0,0 +1,70 @@ +From 0660a7a6994db0db9f6d0b84f6345aa06dc61761 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Mon, 29 May 2023 14:21:08 -0400 +Subject: [PATCH 16/21] lsi53c895a: disable reentrancy detection for MMIO + region, too + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [12/13] fb9da8b68cdf0dc0b0bd8fb8540849c944d0bf20 (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit d139fe9ad8a27bcc50b4ead77d2f97d191a0e95e +Author: Thomas Huth +Date: Tue May 16 11:05:56 2023 +0200 + + lsi53c895a: disable reentrancy detection for MMIO region, too + + While trying to use a SCSI disk on the LSI controller with an + older version of Fedora (25), I'm getting: + + qemu: warning: Blocked re-entrant IO on MemoryRegion: lsi-mmio at addr: 0x34 + + and the SCSI controller is not usable. Seems like we have to + disable the reentrancy checker for the MMIO region, too, to + get this working again. + + The problem could be reproduced it like this: + + ./qemu-system-x86_64 -accel kvm -m 2G -machine q35 \ + -device lsi53c810,id=lsi1 -device scsi-hd,drive=d0 \ + -drive if=none,id=d0,file=.../somedisk.qcow2 \ + -cdrom Fedora-Everything-netinst-i386-25-1.3.iso + + Where somedisk.qcow2 is an image that contains already some partitions + and file systems. + + In the boot menu of Fedora, go to + "Troubleshooting" -> "Rescue a Fedora system" -> "3) Skip to shell" + + Then check "dmesg | grep -i 53c" for failure messages, and try to mount + a partition from somedisk.qcow2. + + Message-Id: <20230516090556.553813-1-thuth@redhat.com> + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + hw/scsi/lsi53c895a.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c +index db27872963..048436352b 100644 +--- a/hw/scsi/lsi53c895a.c ++++ b/hw/scsi/lsi53c895a.c +@@ -2307,6 +2307,7 @@ static void lsi_scsi_realize(PCIDevice *dev, Error **errp) + * re-entrancy guard. + */ + s->ram_io.disable_reentrancy_guard = true; ++ s->mmio_io.disable_reentrancy_guard = true; + + address_space_init(&s->pci_io_as, pci_address_space_io(dev), "lsi-pci-io"); + qdev_init_gpio_out(d, &s->ext_irq, 1); +-- +2.39.3 + diff --git a/SOURCES/kvm-lsi53c895a-disable-reentrancy-detection-for-script-R.patch b/SOURCES/kvm-lsi53c895a-disable-reentrancy-detection-for-script-R.patch new file mode 100644 index 0000000..e671c92 --- /dev/null +++ b/SOURCES/kvm-lsi53c895a-disable-reentrancy-detection-for-script-R.patch @@ -0,0 +1,58 @@ +From 621808c6c4da3adcc073231493d487d6360386c9 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 May 2023 10:29:03 -0400 +Subject: [PATCH 09/21] lsi53c895a: disable reentrancy detection for script RAM + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [5/13] 765d65fc3fb735eb4b52a408ccff91b538ad32b6 (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit bfd6e7ae6a72b84e2eb9574f56e6ec037f05182c +Author: Alexander Bulekov +Date: Thu Apr 27 17:10:10 2023 -0400 + + lsi53c895a: disable reentrancy detection for script RAM + + As the code is designed to use the memory APIs to access the script ram, + disable reentrancy checks for the pseudo-RAM ram_io MemoryRegion. + + In the future, ram_io may be converted from an IO to a proper RAM MemoryRegion. + + Reported-by: Fiona Ebner + Signed-off-by: Alexander Bulekov + Reviewed-by: Thomas Huth + Reviewed-by: Darren Kenny + Message-Id: <20230427211013.2994127-6-alxndr@bu.edu> + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + hw/scsi/lsi53c895a.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c +index af93557a9a..db27872963 100644 +--- a/hw/scsi/lsi53c895a.c ++++ b/hw/scsi/lsi53c895a.c +@@ -2302,6 +2302,12 @@ static void lsi_scsi_realize(PCIDevice *dev, Error **errp) + memory_region_init_io(&s->io_io, OBJECT(s), &lsi_io_ops, s, + "lsi-io", 256); + ++ /* ++ * Since we use the address-space API to interact with ram_io, disable the ++ * re-entrancy guard. ++ */ ++ s->ram_io.disable_reentrancy_guard = true; ++ + address_space_init(&s->pci_io_as, pci_address_space_io(dev), "lsi-pci-io"); + qdev_init_gpio_out(d, &s->ext_irq, 1); + +-- +2.39.3 + diff --git a/SOURCES/kvm-memory-prevent-dma-reentracy-issues.patch b/SOURCES/kvm-memory-prevent-dma-reentracy-issues.patch new file mode 100644 index 0000000..d3697dc --- /dev/null +++ b/SOURCES/kvm-memory-prevent-dma-reentracy-issues.patch @@ -0,0 +1,150 @@ +From 0bc9295be331781491e993b6f1b0dca959194f13 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 May 2023 10:29:03 -0400 +Subject: [PATCH 05/21] memory: prevent dma-reentracy issues + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/13] d4a762d3b156200a65d09cde58cd6d77b229071e (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 +CVE: CVE-2023-0330 + +commit a2e1753b8054344f32cf94f31c6399a58794a380 +Author: Alexander Bulekov +Date: Thu Apr 27 17:10:06 2023 -0400 + + memory: prevent dma-reentracy issues + + Add a flag to the DeviceState, when a device is engaged in PIO/MMIO/DMA. + This flag is set/checked prior to calling a device's MemoryRegion + handlers, and set when device code initiates DMA. The purpose of this + flag is to prevent two types of DMA-based reentrancy issues: + + 1.) mmio -> dma -> mmio case + 2.) bh -> dma write -> mmio case + + These issues have led to problems such as stack-exhaustion and + use-after-frees. + + Summary of the problem from Peter Maydell: + https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com + + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62 + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540 + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541 + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556 + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557 + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827 + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1282 + Resolves: CVE-2023-0330 + + Signed-off-by: Alexander Bulekov + Reviewed-by: Thomas Huth + Message-Id: <20230427211013.2994127-2-alxndr@bu.edu> + [thuth: Replace warn_report() with warn_report_once()] + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + include/exec/memory.h | 5 +++++ + include/hw/qdev-core.h | 7 +++++++ + softmmu/memory.c | 16 ++++++++++++++++ + 3 files changed, 28 insertions(+) + +diff --git a/include/exec/memory.h b/include/exec/memory.h +index 15ade918ba..e45ce6061f 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -767,6 +767,8 @@ struct MemoryRegion { + bool is_iommu; + RAMBlock *ram_block; + Object *owner; ++ /* owner as TYPE_DEVICE. Used for re-entrancy checks in MR access hotpath */ ++ DeviceState *dev; + + const MemoryRegionOps *ops; + void *opaque; +@@ -791,6 +793,9 @@ struct MemoryRegion { + unsigned ioeventfd_nb; + MemoryRegionIoeventfd *ioeventfds; + RamDiscardManager *rdm; /* Only for RAM */ ++ ++ /* For devices designed to perform re-entrant IO into their own IO MRs */ ++ bool disable_reentrancy_guard; + }; + + struct IOMMUMemoryRegion { +diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h +index bd50ad5ee1..7623703943 100644 +--- a/include/hw/qdev-core.h ++++ b/include/hw/qdev-core.h +@@ -162,6 +162,10 @@ struct NamedClockList { + QLIST_ENTRY(NamedClockList) node; + }; + ++typedef struct { ++ bool engaged_in_io; ++} MemReentrancyGuard; ++ + /** + * DeviceState: + * @realized: Indicates whether the device has been fully constructed. +@@ -194,6 +198,9 @@ struct DeviceState { + int alias_required_for_version; + ResettableState reset; + GSList *unplug_blockers; ++ ++ /* Is the device currently in mmio/pio/dma? Used to prevent re-entrancy */ ++ MemReentrancyGuard mem_reentrancy_guard; + }; + + struct DeviceListener { +diff --git a/softmmu/memory.c b/softmmu/memory.c +index b1a6cae6f5..b7b3386e9d 100644 +--- a/softmmu/memory.c ++++ b/softmmu/memory.c +@@ -542,6 +542,18 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, + access_size_max = 4; + } + ++ /* Do not allow more than one simultaneous access to a device's IO Regions */ ++ if (mr->dev && !mr->disable_reentrancy_guard && ++ !mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) { ++ if (mr->dev->mem_reentrancy_guard.engaged_in_io) { ++ warn_report_once("Blocked re-entrant IO on MemoryRegion: " ++ "%s at addr: 0x%" HWADDR_PRIX, ++ memory_region_name(mr), addr); ++ return MEMTX_ACCESS_ERROR; ++ } ++ mr->dev->mem_reentrancy_guard.engaged_in_io = true; ++ } ++ + /* FIXME: support unaligned access? */ + access_size = MAX(MIN(size, access_size_max), access_size_min); + access_mask = MAKE_64BIT_MASK(0, access_size * 8); +@@ -556,6 +568,9 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, + access_mask, attrs); + } + } ++ if (mr->dev) { ++ mr->dev->mem_reentrancy_guard.engaged_in_io = false; ++ } + return r; + } + +@@ -1170,6 +1185,7 @@ static void memory_region_do_init(MemoryRegion *mr, + } + mr->name = g_strdup(name); + mr->owner = owner; ++ mr->dev = (DeviceState *) object_dynamic_cast(mr->owner, TYPE_DEVICE); + mr->ram_block = NULL; + + if (name) { +-- +2.39.3 + diff --git a/SOURCES/kvm-memory-stricter-checks-prior-to-unsetting-engaged_in.patch b/SOURCES/kvm-memory-stricter-checks-prior-to-unsetting-engaged_in.patch new file mode 100644 index 0000000..f45abea --- /dev/null +++ b/SOURCES/kvm-memory-stricter-checks-prior-to-unsetting-engaged_in.patch @@ -0,0 +1,67 @@ +From 3f2042e33acb6db91594e12ebd63b9abd9e753cc Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 7 Jun 2023 11:45:09 -0400 +Subject: [PATCH 15/21] memory: stricter checks prior to unsetting + engaged_in_io + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [11/13] b8e1a4b49dd7fa3b7948d32f46dfe1d7f7a4c1cf (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit 3884bf6468ac6bbb58c2b3feaa74e87f821b52f3 +Author: Alexander Bulekov +Date: Tue May 16 04:40:02 2023 -0400 + + memory: stricter checks prior to unsetting engaged_in_io + + engaged_in_io could be unset by an MR with re-entrancy checks disabled. + Ensure that only MRs that can set the engaged_in_io flag can unset it. + + Signed-off-by: Alexander Bulekov + Message-Id: <20230516084002.3813836-1-alxndr@bu.edu> + Reviewed-by: Darren Kenny + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + softmmu/memory.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/softmmu/memory.c b/softmmu/memory.c +index b7b3386e9d..26424f1d78 100644 +--- a/softmmu/memory.c ++++ b/softmmu/memory.c +@@ -534,6 +534,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, + unsigned access_size; + unsigned i; + MemTxResult r = MEMTX_OK; ++ bool reentrancy_guard_applied = false; + + if (!access_size_min) { + access_size_min = 1; +@@ -552,6 +553,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, + return MEMTX_ACCESS_ERROR; + } + mr->dev->mem_reentrancy_guard.engaged_in_io = true; ++ reentrancy_guard_applied = true; + } + + /* FIXME: support unaligned access? */ +@@ -568,7 +570,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, + access_mask, attrs); + } + } +- if (mr->dev) { ++ if (mr->dev && reentrancy_guard_applied) { + mr->dev->mem_reentrancy_guard.engaged_in_io = false; + } + return r; +-- +2.39.3 + diff --git a/SOURCES/kvm-migration-Add-.save_prepare-handler-to-struct-SaveVM.patch b/SOURCES/kvm-migration-Add-.save_prepare-handler-to-struct-SaveVM.patch new file mode 100644 index 0000000..b94ba7c --- /dev/null +++ b/SOURCES/kvm-migration-Add-.save_prepare-handler-to-struct-SaveVM.patch @@ -0,0 +1,186 @@ +From d831672c4f1d41d863823584173452b89e754e26 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Mon, 11 Sep 2023 16:10:19 +0200 +Subject: [PATCH 3/4] migration: Add .save_prepare() handler to struct + SaveVMHandlers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 318: VFIO migration: fix a QEMU crash when postcopy is enabled +RH-Bugzilla: 2229868 +RH-Acked-by: Alex Williamson +RH-Acked-by: Peter Xu +RH-Commit: [3/4] b3154a736764ae4430561d7f5c298ab4c6ef9e01 + +Bugzilla: https://bugzilla.redhat.com/2229868 + +commit 08fc4cb51774f763dcc6fd74637aa9e00eb6a0ba +Author: Avihai Horon +Date: Wed Sep 6 18:08:51 2023 +0300 + + migration: Add .save_prepare() handler to struct SaveVMHandlers + + Add a new .save_prepare() handler to struct SaveVMHandlers. This handler + is called early, even before migration starts, and can be used by + devices to perform early checks. + + Refactor migrate_init() to be able to return errors and call + .save_prepare() from there. + + Suggested-by: Peter Xu + Signed-off-by: Avihai Horon + Reviewed-by: Peter Xu + Reviewed-by: Cédric Le Goater + Signed-off-by: Cédric Le Goater + +Conflicts: + - migration/migration.c + context change in migrate_init() due to missing commit + aff3f6606d14 ("migration: Rename ram_counters to mig_stats") + context change in migrate_prepare() due to missing commit + 87c22901094a ("migration: Move migrate_set_block_incremental() + to options.c") + +Signed-off-by: Cédric Le Goater +--- + include/migration/register.h | 5 +++++ + migration/migration.c | 15 +++++++++++++-- + migration/migration.h | 2 +- + migration/savevm.c | 29 ++++++++++++++++++++++++++++- + migration/savevm.h | 1 + + 5 files changed, 48 insertions(+), 4 deletions(-) + +diff --git a/include/migration/register.h b/include/migration/register.h +index 90914f32f5..2b12c6adec 100644 +--- a/include/migration/register.h ++++ b/include/migration/register.h +@@ -20,6 +20,11 @@ typedef struct SaveVMHandlers { + /* This runs inside the iothread lock. */ + SaveStateHandler *save_state; + ++ /* ++ * save_prepare is called early, even before migration starts, and can be ++ * used to perform early checks. ++ */ ++ int (*save_prepare)(void *opaque, Error **errp); + void (*save_cleanup)(void *opaque); + int (*save_live_complete_postcopy)(QEMUFile *f, void *opaque); + int (*save_live_complete_precopy)(QEMUFile *f, void *opaque); +diff --git a/migration/migration.c b/migration/migration.c +index a85c8936d9..cdaa757e23 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1389,8 +1389,15 @@ bool migration_is_active(MigrationState *s) + s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); + } + +-void migrate_init(MigrationState *s) ++int migrate_init(MigrationState *s, Error **errp) + { ++ int ret; ++ ++ ret = qemu_savevm_state_prepare(errp); ++ if (ret) { ++ return ret; ++ } ++ + /* + * Reinitialise all migration state, except + * parameters/capabilities that the user set, and +@@ -1429,6 +1436,8 @@ void migrate_init(MigrationState *s) + memset(&ram_counters, 0, sizeof(ram_counters)); + memset(&compression_counters, 0, sizeof(compression_counters)); + migration_reset_vfio_bytes_transferred(); ++ ++ return 0; + } + + int migrate_add_blocker_internal(Error *reason, Error **errp) +@@ -1638,7 +1647,9 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, + migrate_set_block_incremental(s, true); + } + +- migrate_init(s); ++ if (migrate_init(s, errp)) { ++ return false; ++ } + + return true; + } +diff --git a/migration/migration.h b/migration/migration.h +index c5b98485e3..cfbe7c390d 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -465,7 +465,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in); + bool migration_is_setup_or_active(int state); + bool migration_is_running(int state); + +-void migrate_init(MigrationState *s); ++int migrate_init(MigrationState *s, Error **errp); + bool migration_is_blocked(Error **errp); + /* True if outgoing migration has entered postcopy phase */ + bool migration_in_postcopy(void); +diff --git a/migration/savevm.c b/migration/savevm.c +index 13c1a9afa1..2913563d6e 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -1231,6 +1231,30 @@ bool qemu_savevm_state_guest_unplug_pending(void) + return false; + } + ++int qemu_savevm_state_prepare(Error **errp) ++{ ++ SaveStateEntry *se; ++ int ret; ++ ++ QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { ++ if (!se->ops || !se->ops->save_prepare) { ++ continue; ++ } ++ if (se->ops->is_active) { ++ if (!se->ops->is_active(se->opaque)) { ++ continue; ++ } ++ } ++ ++ ret = se->ops->save_prepare(se->opaque, errp); ++ if (ret < 0) { ++ return ret; ++ } ++ } ++ ++ return 0; ++} ++ + void qemu_savevm_state_setup(QEMUFile *f) + { + MigrationState *ms = migrate_get_current(); +@@ -1617,7 +1641,10 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) + return -EINVAL; + } + +- migrate_init(ms); ++ ret = migrate_init(ms, errp); ++ if (ret) { ++ return ret; ++ } + ms->to_dst_file = f; + + qemu_mutex_unlock_iothread(); +diff --git a/migration/savevm.h b/migration/savevm.h +index e894bbc143..74669733dd 100644 +--- a/migration/savevm.h ++++ b/migration/savevm.h +@@ -31,6 +31,7 @@ + + bool qemu_savevm_state_blocked(Error **errp); + void qemu_savevm_non_migratable_list(strList **reasons); ++int qemu_savevm_state_prepare(Error **errp); + void qemu_savevm_state_setup(QEMUFile *f); + bool qemu_savevm_state_guest_unplug_pending(void); + int qemu_savevm_state_resume_prepare(MigrationState *s); +-- +2.39.3 + diff --git a/SOURCES/kvm-migration-Add-migration-prefix-to-functions-in-targe.patch b/SOURCES/kvm-migration-Add-migration-prefix-to-functions-in-targe.patch new file mode 100644 index 0000000..5cba9f4 --- /dev/null +++ b/SOURCES/kvm-migration-Add-migration-prefix-to-functions-in-targe.patch @@ -0,0 +1,139 @@ +From f053185a7fb9fab2a41c0a5ae4e1a403bc99a9a0 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Mon, 11 Sep 2023 16:10:19 +0200 +Subject: [PATCH 1/4] migration: Add migration prefix to functions in target.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 318: VFIO migration: fix a QEMU crash when postcopy is enabled +RH-Bugzilla: 2229868 +RH-Acked-by: Alex Williamson +RH-Acked-by: Peter Xu +RH-Commit: [1/4] 4594d2035423385690d7f1feb5f2e4c8f0be74f5 + +Bugzilla: https://bugzilla.redhat.com/2229868 + +commit 38c482b4778595ee337761f73ec0730d6c47b404 +Author: Avihai Horon +Date: Wed Sep 6 18:08:48 2023 +0300 + + migration: Add migration prefix to functions in target.c + + The functions in target.c are not static, yet they don't have a proper + migration prefix. Add such prefix. + + Signed-off-by: Avihai Horon + Reviewed-by: Cédric Le Goater + Signed-off-by: Cédric Le Goater + +Conflicts: + - migration/migration.c, migration/savevm.c + context changes in migrate_prepare() and qemu_savevm_state() due + to missing commit aff3f6606d14 ("migration: Rename ram_counters + to mig_stats") + +Signed-off-by: Cédric Le Goater +--- + migration/migration.c | 6 +++--- + migration/migration.h | 4 ++-- + migration/savevm.c | 2 +- + migration/target.c | 8 ++++---- + 4 files changed, 10 insertions(+), 10 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 47ad6c43cb..5aa9e5dada 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1021,7 +1021,7 @@ static void fill_source_migration_info(MigrationInfo *info) + populate_time_info(info, s); + populate_ram_info(info, s); + populate_disk_info(info); +- populate_vfio_info(info); ++ migration_populate_vfio_info(info); + break; + case MIGRATION_STATUS_COLO: + info->has_status = true; +@@ -1030,7 +1030,7 @@ static void fill_source_migration_info(MigrationInfo *info) + case MIGRATION_STATUS_COMPLETED: + populate_time_info(info, s); + populate_ram_info(info, s); +- populate_vfio_info(info); ++ migration_populate_vfio_info(info); + break; + case MIGRATION_STATUS_FAILED: + info->has_status = true; +@@ -1638,7 +1638,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, + */ + memset(&ram_counters, 0, sizeof(ram_counters)); + memset(&compression_counters, 0, sizeof(compression_counters)); +- reset_vfio_bytes_transferred(); ++ migration_reset_vfio_bytes_transferred(); + + return true; + } +diff --git a/migration/migration.h b/migration/migration.h +index dfec649af8..c5b98485e3 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -505,8 +505,8 @@ void migration_consume_urgent_request(void); + bool migration_rate_limit(void); + void migration_cancel(const Error *error); + +-void populate_vfio_info(MigrationInfo *info); +-void reset_vfio_bytes_transferred(void); ++void migration_populate_vfio_info(MigrationInfo *info); ++void migration_reset_vfio_bytes_transferred(void); + void postcopy_temp_page_reset(PostcopyTmpPage *tmp_page); + + #endif +diff --git a/migration/savevm.c b/migration/savevm.c +index 83088fc3f8..05db79bfad 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -1620,7 +1620,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) + migrate_init(ms); + memset(&ram_counters, 0, sizeof(ram_counters)); + memset(&compression_counters, 0, sizeof(compression_counters)); +- reset_vfio_bytes_transferred(); ++ migration_reset_vfio_bytes_transferred(); + ms->to_dst_file = f; + + qemu_mutex_unlock_iothread(); +diff --git a/migration/target.c b/migration/target.c +index f39c9a8d88..a6ffa9a5ce 100644 +--- a/migration/target.c ++++ b/migration/target.c +@@ -15,7 +15,7 @@ + #endif + + #ifdef CONFIG_VFIO +-void populate_vfio_info(MigrationInfo *info) ++void migration_populate_vfio_info(MigrationInfo *info) + { + if (vfio_mig_active()) { + info->vfio = g_malloc0(sizeof(*info->vfio)); +@@ -23,16 +23,16 @@ void populate_vfio_info(MigrationInfo *info) + } + } + +-void reset_vfio_bytes_transferred(void) ++void migration_reset_vfio_bytes_transferred(void) + { + vfio_reset_bytes_transferred(); + } + #else +-void populate_vfio_info(MigrationInfo *info) ++void migration_populate_vfio_info(MigrationInfo *info) + { + } + +-void reset_vfio_bytes_transferred(void) ++void migration_reset_vfio_bytes_transferred(void) + { + } + #endif +-- +2.39.3 + diff --git a/SOURCES/kvm-migration-Add-switchover-ack-capability.patch b/SOURCES/kvm-migration-Add-switchover-ack-capability.patch new file mode 100644 index 0000000..399c9ed --- /dev/null +++ b/SOURCES/kvm-migration-Add-switchover-ack-capability.patch @@ -0,0 +1,162 @@ +From 8f89d3bc8f226cd038bf88b9fb3ef43b0fb33034 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 10/37] migration: Add switchover ack capability +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [8/28] 2f4ca020783bd617eca13b18289fce764279833b (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 6574232fff6a +Author: Avihai Horon +Date: Wed Jun 21 14:11:54 2023 +0300 + + migration: Add switchover ack capability + + Migration downtime estimation is calculated based on bandwidth and + remaining migration data. This assumes that loading of migration data in + the destination takes a negligible amount of time and that downtime + depends only on network speed. + + While this may be true for RAM, it's not necessarily true for other + migrated devices. For example, loading the data of a VFIO device in the + destination might require from the device to allocate resources, prepare + internal data structures and so on. These operations can take a + significant amount of time which can increase migration downtime. + + This patch adds a new capability "switchover ack" that prevents the + source from stopping the VM and completing the migration until an ACK + is received from the destination that it's OK to do so. + + This can be used by migrated devices in various ways to reduce downtime. + For example, a device can send initial precopy metadata to pre-allocate + resources in the destination and use this capability to make sure that + the pre-allocation is completed before the source VM is stopped, so it + will have full effect. + + This new capability relies on the return path capability to communicate + from the destination back to the source. + + The actual implementation of the capability will be added in the + following patches. + + Signed-off-by: Avihai Horon + Reviewed-by: Peter Xu + Acked-by: Markus Armbruster + Tested-by: YangHang Liu + Acked-by: Alex Williamson + Signed-off-by: Cédric Le Goater + +Conflicts: + - qapi/migration.json + re-indent of @switchover-ack to avoid ../qapi/migration.json:482:1: + unexpected de-indent (expected at least 17 spaces) + +Signed-off-by: Cédric Le Goater +--- + migration/options.c | 21 +++++++++++++++++++++ + migration/options.h | 1 + + qapi/migration.json | 14 +++++++++++++- + 3 files changed, 35 insertions(+), 1 deletion(-) + +diff --git a/migration/options.c b/migration/options.c +index a76984276d..c3df6c6dde 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -182,6 +182,8 @@ Property migration_properties[] = { + DEFINE_PROP_MIG_CAP("x-zero-copy-send", + MIGRATION_CAPABILITY_ZERO_COPY_SEND), + #endif ++ DEFINE_PROP_MIG_CAP("x-switchover-ack", ++ MIGRATION_CAPABILITY_SWITCHOVER_ACK), + + DEFINE_PROP_END_OF_LIST(), + }; +@@ -305,6 +307,13 @@ bool migrate_return_path(void) + return s->capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; + } + ++bool migrate_switchover_ack(void) ++{ ++ MigrationState *s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_SWITCHOVER_ACK]; ++} ++ + bool migrate_validate_uuid(void) + { + MigrationState *s = migrate_get_current(); +@@ -532,6 +541,18 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) + } + } + ++ if (new_caps[MIGRATION_CAPABILITY_SWITCHOVER_ACK]) { ++ if (!new_caps[MIGRATION_CAPABILITY_RETURN_PATH]) { ++ error_setg(errp, "Capability 'switchover-ack' requires capability " ++ "'return-path'"); ++ return false; ++ } ++ ++ /* Disable this capability until it's implemented */ ++ error_setg(errp, "'switchover-ack' is not implemented yet"); ++ return false; ++ } ++ + return true; + } + +diff --git a/migration/options.h b/migration/options.h +index 7b0f7245ad..0fc7be6869 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -47,6 +47,7 @@ bool migrate_postcopy_ram(void); + bool migrate_rdma_pin_all(void); + bool migrate_release_ram(void); + bool migrate_return_path(void); ++bool migrate_switchover_ack(void); + bool migrate_validate_uuid(void); + bool migrate_xbzrle(void); + bool migrate_zero_blocks(void); +diff --git a/qapi/migration.json b/qapi/migration.json +index 2c35b7b9cf..b6a58347cc 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -478,6 +478,18 @@ + # should not affect the correctness of postcopy migration. + # (since 7.1) + # ++# @switchover-ack: If enabled, migration will not stop the source VM ++# and complete the migration until an ACK is received ++# from the destination that it's OK to do so. ++# Exactly when this ACK is sent depends on the ++# migrated devices that use this feature. For ++# example, a device can use it to make sure some of ++# its data is sent and loaded in the destination ++# before doing switchover. This can reduce downtime ++# if devices that support this capability are ++# present. 'return-path' capability must be enabled ++# to use it. (since 8.1) ++# + # Features: + # @unstable: Members @x-colo and @x-ignore-shared are experimental. + # +@@ -492,7 +504,7 @@ + 'dirty-bitmaps', 'postcopy-blocktime', 'late-block-activate', + { 'name': 'x-ignore-shared', 'features': [ 'unstable' ] }, + 'validate-uuid', 'background-snapshot', +- 'zero-copy-send', 'postcopy-preempt'] } ++ 'zero-copy-send', 'postcopy-preempt', 'switchover-ack'] } + + ## + # @MigrationCapabilityStatus: +-- +2.39.3 + diff --git a/SOURCES/kvm-migration-Allow-postcopy_ram_supported_by_host-to-re.patch b/SOURCES/kvm-migration-Allow-postcopy_ram_supported_by_host-to-re.patch new file mode 100644 index 0000000..7c9748b --- /dev/null +++ b/SOURCES/kvm-migration-Allow-postcopy_ram_supported_by_host-to-re.patch @@ -0,0 +1,308 @@ +From e2c2910edf90186ca0d7d13c9943caa284e95ea9 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Tue, 25 Apr 2023 21:15:14 -0400 +Subject: [PATCH 51/56] migration: Allow postcopy_ram_supported_by_host() to + report err +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [50/50] 08c44affc11c27ddf1aa7ce0dfacbaf5effb80cb (peterx/qemu-kvm) + +Instead of print it to STDERR, bring the error upwards so that it can be +reported via QMP responses. + +E.g.: + +{ "execute": "migrate-set-capabilities" , + "arguments": { "capabilities": + [ { "capability": "postcopy-ram", "state": true } ] } } + +{ "error": + { "class": "GenericError", + "desc": "Postcopy is not supported: Host backend files need to be TMPFS + or HUGETLBFS only" } } + +Signed-off-by: Peter Xu +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit 74c38cf7fd24c60e4f0a90585d17250478260877) +Signed-off-by: Peter Xu +--- + migration/options.c | 8 ++---- + migration/postcopy-ram.c | 60 +++++++++++++++++++++------------------- + migration/postcopy-ram.h | 3 +- + migration/savevm.c | 3 +- + 4 files changed, 39 insertions(+), 35 deletions(-) + +diff --git a/migration/options.c b/migration/options.c +index 4701c75a4d..e51d667e14 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -302,6 +302,7 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) + { + MigrationIncomingState *mis = migration_incoming_get_current(); + ++ ERRP_GUARD(); + #ifndef CONFIG_LIVE_BLOCK_MIGRATION + if (new_caps[MIGRATION_CAPABILITY_BLOCK]) { + error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) " +@@ -327,11 +328,8 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) + */ + if (!old_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM] && + runstate_check(RUN_STATE_INMIGRATE) && +- !postcopy_ram_supported_by_host(mis)) { +- /* postcopy_ram_supported_by_host will have emitted a more +- * detailed message +- */ +- error_setg(errp, "Postcopy is not supported"); ++ !postcopy_ram_supported_by_host(mis, errp)) { ++ error_prepend(errp, "Postcopy is not supported: "); + return false; + } + +diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c +index 0711500036..75aa276bb1 100644 +--- a/migration/postcopy-ram.c ++++ b/migration/postcopy-ram.c +@@ -283,11 +283,13 @@ static bool request_ufd_features(int ufd, uint64_t features) + return true; + } + +-static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) ++static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis, ++ Error **errp) + { + uint64_t asked_features = 0; + static uint64_t supported_features; + ++ ERRP_GUARD(); + /* + * it's not possible to + * request UFFD_API twice per one fd +@@ -295,7 +297,7 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) + */ + if (!supported_features) { + if (!receive_ufd_features(&supported_features)) { +- error_report("%s failed", __func__); ++ error_setg(errp, "Userfault feature detection failed"); + return false; + } + } +@@ -317,8 +319,7 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) + * userfault file descriptor + */ + if (!request_ufd_features(ufd, asked_features)) { +- error_report("%s failed: features %" PRIu64, __func__, +- asked_features); ++ error_setg(errp, "Failed features %" PRIu64, asked_features); + return false; + } + +@@ -329,7 +330,8 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) + have_hp = supported_features & UFFD_FEATURE_MISSING_HUGETLBFS; + #endif + if (!have_hp) { +- error_report("Userfault on this host does not support huge pages"); ++ error_setg(errp, ++ "Userfault on this host does not support huge pages"); + return false; + } + } +@@ -338,7 +340,7 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) + + /* Callback from postcopy_ram_supported_by_host block iterator. + */ +-static int test_ramblock_postcopiable(RAMBlock *rb) ++static int test_ramblock_postcopiable(RAMBlock *rb, Error **errp) + { + const char *block_name = qemu_ram_get_idstr(rb); + ram_addr_t length = qemu_ram_get_used_length(rb); +@@ -346,16 +348,18 @@ static int test_ramblock_postcopiable(RAMBlock *rb) + QemuFsType fs; + + if (length % pagesize) { +- error_report("Postcopy requires RAM blocks to be a page size multiple," +- " block %s is 0x" RAM_ADDR_FMT " bytes with a " +- "page size of 0x%zx", block_name, length, pagesize); ++ error_setg(errp, ++ "Postcopy requires RAM blocks to be a page size multiple," ++ " block %s is 0x" RAM_ADDR_FMT " bytes with a " ++ "page size of 0x%zx", block_name, length, pagesize); + return 1; + } + + if (rb->fd >= 0) { + fs = qemu_fd_getfs(rb->fd); + if (fs != QEMU_FS_TYPE_TMPFS && fs != QEMU_FS_TYPE_HUGETLBFS) { +- error_report("Host backend files need to be TMPFS or HUGETLBFS only"); ++ error_setg(errp, ++ "Host backend files need to be TMPFS or HUGETLBFS only"); + return 1; + } + } +@@ -368,7 +372,7 @@ static int test_ramblock_postcopiable(RAMBlock *rb) + * normally fine since if the postcopy succeeds it gets turned back on at the + * end. + */ +-bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) ++bool postcopy_ram_supported_by_host(MigrationIncomingState *mis, Error **errp) + { + long pagesize = qemu_real_host_page_size(); + int ufd = -1; +@@ -377,29 +381,27 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) + struct uffdio_register reg_struct; + struct uffdio_range range_struct; + uint64_t feature_mask; +- Error *local_err = NULL; + RAMBlock *block; + ++ ERRP_GUARD(); + if (qemu_target_page_size() > pagesize) { +- error_report("Target page size bigger than host page size"); ++ error_setg(errp, "Target page size bigger than host page size"); + goto out; + } + + ufd = uffd_open(O_CLOEXEC); + if (ufd == -1) { +- error_report("%s: userfaultfd not available: %s", __func__, +- strerror(errno)); ++ error_setg(errp, "Userfaultfd not available: %s", strerror(errno)); + goto out; + } + + /* Give devices a chance to object */ +- if (postcopy_notify(POSTCOPY_NOTIFY_PROBE, &local_err)) { +- error_report_err(local_err); ++ if (postcopy_notify(POSTCOPY_NOTIFY_PROBE, errp)) { + goto out; + } + + /* Version and features check */ +- if (!ufd_check_and_apply(ufd, mis)) { ++ if (!ufd_check_and_apply(ufd, mis, errp)) { + goto out; + } + +@@ -417,7 +419,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) + * affect in reality, or we can revisit. + */ + RAMBLOCK_FOREACH(block) { +- if (test_ramblock_postcopiable(block)) { ++ if (test_ramblock_postcopiable(block, errp)) { + goto out; + } + } +@@ -427,7 +429,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) + * it was enabled. + */ + if (munlockall()) { +- error_report("%s: munlockall: %s", __func__, strerror(errno)); ++ error_setg(errp, "munlockall() failed: %s", strerror(errno)); + goto out; + } + +@@ -439,8 +441,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) + testarea = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE | + MAP_ANONYMOUS, -1, 0); + if (testarea == MAP_FAILED) { +- error_report("%s: Failed to map test area: %s", __func__, +- strerror(errno)); ++ error_setg(errp, "Failed to map test area: %s", strerror(errno)); + goto out; + } + g_assert(QEMU_PTR_IS_ALIGNED(testarea, pagesize)); +@@ -450,14 +451,14 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) + reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING; + + if (ioctl(ufd, UFFDIO_REGISTER, ®_struct)) { +- error_report("%s userfault register: %s", __func__, strerror(errno)); ++ error_setg(errp, "UFFDIO_REGISTER failed: %s", strerror(errno)); + goto out; + } + + range_struct.start = (uintptr_t)testarea; + range_struct.len = pagesize; + if (ioctl(ufd, UFFDIO_UNREGISTER, &range_struct)) { +- error_report("%s userfault unregister: %s", __func__, strerror(errno)); ++ error_setg(errp, "UFFDIO_UNREGISTER failed: %s", strerror(errno)); + goto out; + } + +@@ -465,8 +466,8 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) + (__u64)1 << _UFFDIO_COPY | + (__u64)1 << _UFFDIO_ZEROPAGE; + if ((reg_struct.ioctls & feature_mask) != feature_mask) { +- error_report("Missing userfault map features: %" PRIx64, +- (uint64_t)(~reg_struct.ioctls & feature_mask)); ++ error_setg(errp, "Missing userfault map features: %" PRIx64, ++ (uint64_t)(~reg_struct.ioctls & feature_mask)); + goto out; + } + +@@ -1188,6 +1189,8 @@ static int postcopy_temp_pages_setup(MigrationIncomingState *mis) + + int postcopy_ram_incoming_setup(MigrationIncomingState *mis) + { ++ Error *local_err = NULL; ++ + /* Open the fd for the kernel to give us userfaults */ + mis->userfault_fd = uffd_open(O_CLOEXEC | O_NONBLOCK); + if (mis->userfault_fd == -1) { +@@ -1200,7 +1203,8 @@ int postcopy_ram_incoming_setup(MigrationIncomingState *mis) + * Although the host check already tested the API, we need to + * do the check again as an ABI handshake on the new fd. + */ +- if (!ufd_check_and_apply(mis->userfault_fd, mis)) { ++ if (!ufd_check_and_apply(mis->userfault_fd, mis, &local_err)) { ++ error_report_err(local_err); + return -1; + } + +@@ -1360,7 +1364,7 @@ void fill_destination_postcopy_migration_info(MigrationInfo *info) + { + } + +-bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) ++bool postcopy_ram_supported_by_host(MigrationIncomingState *mis, Error **errp) + { + error_report("%s: No OS support", __func__); + return false; +diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h +index b4867a32d5..442ab89752 100644 +--- a/migration/postcopy-ram.h ++++ b/migration/postcopy-ram.h +@@ -14,7 +14,8 @@ + #define QEMU_POSTCOPY_RAM_H + + /* Return true if the host supports everything we need to do postcopy-ram */ +-bool postcopy_ram_supported_by_host(MigrationIncomingState *mis); ++bool postcopy_ram_supported_by_host(MigrationIncomingState *mis, ++ Error **errp); + + /* + * Make all of RAM sensitive to accesses to areas that haven't yet been written +diff --git a/migration/savevm.c b/migration/savevm.c +index 9671211339..211eff3a8b 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -1753,7 +1753,8 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis, + return -EINVAL; + } + +- if (!postcopy_ram_supported_by_host(mis)) { ++ if (!postcopy_ram_supported_by_host(mis, &local_err)) { ++ error_report_err(local_err); + postcopy_state_set(POSTCOPY_INCOMING_NONE); + return -1; + } +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Attempt-disk-reactivation-in-more-failure-.patch b/SOURCES/kvm-migration-Attempt-disk-reactivation-in-more-failure-.patch new file mode 100644 index 0000000..d1620f0 --- /dev/null +++ b/SOURCES/kvm-migration-Attempt-disk-reactivation-in-more-failure-.patch @@ -0,0 +1,111 @@ +From 3691bb5f956e3c60dbf6de183011b31dbc7a7801 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Tue, 2 May 2023 15:52:12 -0500 +Subject: [PATCH 01/56] migration: Attempt disk reactivation in more failure + scenarios + +RH-Author: Eric Blake +RH-MergeRequest: 161: Avoid migration assertion from failed NFS server. +RH-Bugzilla: 2058982 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Acked-by: Kevin Wolf +RH-Commit: [1/1] 5999b747b314641259d3b8809033b057805eed3f (ebblake/centos-qemu-kvm) + +Commit fe904ea824 added a fail_inactivate label, which tries to +reactivate disks on the source after a failure while s->state == +MIGRATION_STATUS_ACTIVE, but didn't actually use the label if +qemu_savevm_state_complete_precopy() failed. This failure to +reactivate is also present in commit 6039dd5b1c (also covering the new +s->state == MIGRATION_STATUS_DEVICE state) and 403d18ae (ensuring +s->block_inactive is set more reliably). + +Consolidate the two labels back into one - no matter HOW migration is +failed, if there is any chance we can reach vm_start() after having +attempted inactivation, it is essential that we have tried to restart +disks before then. This also makes the cleanup more like +migrate_fd_cancel(). + +Suggested-by: Kevin Wolf +Signed-off-by: Eric Blake +Message-Id: <20230502205212.134680-1-eblake@redhat.com> +Acked-by: Peter Xu +Reviewed-by: Juan Quintela +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 6dab4c93ecfae48e2e67b984d1032c1e988d3005) +[eblake: downstream migrate_colo() => migrate_colo_enabled()] +Signed-off-by: Eric Blake +--- + migration/migration.c | 24 ++++++++++++++---------- + 1 file changed, 14 insertions(+), 10 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 08007cef4e..99f86bd6c2 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -3443,6 +3443,11 @@ static void migration_completion(MigrationState *s) + MIGRATION_STATUS_DEVICE); + } + if (ret >= 0) { ++ /* ++ * Inactivate disks except in COLO, and track that we ++ * have done so in order to remember to reactivate ++ * them if migration fails or is cancelled. ++ */ + s->block_inactive = !migrate_colo_enabled(); + qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); + ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, +@@ -3487,13 +3492,13 @@ static void migration_completion(MigrationState *s) + rp_error = await_return_path_close_on_source(s); + trace_migration_return_path_end_after(rp_error); + if (rp_error) { +- goto fail_invalidate; ++ goto fail; + } + } + + if (qemu_file_get_error(s->to_dst_file)) { + trace_migration_completion_file_err(); +- goto fail_invalidate; ++ goto fail; + } + + if (migrate_colo_enabled() && s->state == MIGRATION_STATUS_ACTIVE) { +@@ -3507,26 +3512,25 @@ static void migration_completion(MigrationState *s) + + return; + +-fail_invalidate: +- /* If not doing postcopy, vm_start() will be called: let's regain +- * control on images. +- */ +- if (s->state == MIGRATION_STATUS_ACTIVE || +- s->state == MIGRATION_STATUS_DEVICE) { ++fail: ++ if (s->block_inactive && (s->state == MIGRATION_STATUS_ACTIVE || ++ s->state == MIGRATION_STATUS_DEVICE)) { ++ /* ++ * If not doing postcopy, vm_start() will be called: let's ++ * regain control on images. ++ */ + Error *local_err = NULL; + + qemu_mutex_lock_iothread(); + bdrv_activate_all(&local_err); + if (local_err) { + error_report_err(local_err); +- s->block_inactive = true; + } else { + s->block_inactive = false; + } + qemu_mutex_unlock_iothread(); + } + +-fail: + migrate_set_state(&s->state, current_active_state, + MIGRATION_STATUS_FAILED); + } +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Create-migrate_cap_set.patch b/SOURCES/kvm-migration-Create-migrate_cap_set.patch new file mode 100644 index 0000000..33268bb --- /dev/null +++ b/SOURCES/kvm-migration-Create-migrate_cap_set.patch @@ -0,0 +1,93 @@ +From d772464e9a51a085e10864b2dc7ffd49991fc23b Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 21:02:42 +0100 +Subject: [PATCH 22/56] migration: Create migrate_cap_set() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [21/50] 5b12f04013cf2d374a869134bb67c938c789e24d (peterx/qemu-kvm) + +And remove the convoluted use of qmp_migrate_set_capabilities() to +enable disable MIGRATION_CAPABILITY_BLOCK. + +Signed-off-by: Juan Quintela +Reviewed-by: Fabiano Rosas +(cherry picked from commit 9eb1109cfba5415dd0b0cb82e80fc5e42fe861b7) +Signed-off-by: Peter Xu +--- + migration/migration.c | 34 ++++++++++++++++------------------ + 1 file changed, 16 insertions(+), 18 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index b745d829a4..18058fb597 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1912,25 +1912,24 @@ void migrate_set_state(int *state, int old_state, int new_state) + } + } + +-static MigrationCapabilityStatus *migrate_cap_add(MigrationCapability index, +- bool state) ++static bool migrate_cap_set(int cap, bool value, Error **errp) + { +- MigrationCapabilityStatus *cap; +- +- cap = g_new0(MigrationCapabilityStatus, 1); +- cap->capability = index; +- cap->state = state; ++ MigrationState *s = migrate_get_current(); ++ bool new_caps[MIGRATION_CAPABILITY__MAX]; + +- return cap; +-} ++ if (migration_is_running(s->state)) { ++ error_setg(errp, QERR_MIGRATION_ACTIVE); ++ return false; ++ } + +-void migrate_set_block_enabled(bool value, Error **errp) +-{ +- MigrationCapabilityStatusList *cap = NULL; ++ memcpy(new_caps, s->capabilities, sizeof(new_caps)); ++ new_caps[cap] = value; + +- QAPI_LIST_PREPEND(cap, migrate_cap_add(MIGRATION_CAPABILITY_BLOCK, value)); +- qmp_migrate_set_capabilities(cap, errp); +- qapi_free_MigrationCapabilityStatusList(cap); ++ if (!migrate_caps_check(s->capabilities, new_caps, errp)) { ++ return false; ++ } ++ s->capabilities[cap] = value; ++ return true; + } + + static void migrate_set_block_incremental(MigrationState *s, bool value) +@@ -1942,7 +1941,7 @@ static void block_cleanup_parameters(MigrationState *s) + { + if (s->must_remove_block_options) { + /* setting to false can never fail */ +- migrate_set_block_enabled(false, &error_abort); ++ migrate_cap_set(MIGRATION_CAPABILITY_BLOCK, false, &error_abort); + migrate_set_block_incremental(s, false); + s->must_remove_block_options = false; + } +@@ -2429,8 +2428,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, + "current migration capabilities"); + return false; + } +- migrate_set_block_enabled(true, &local_err); +- if (local_err) { ++ if (!migrate_cap_set(MIGRATION_CAPABILITY_BLOCK, true, &local_err)) { + error_propagate(errp, local_err); + return false; + } +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Create-migrate_checkpoint_delay.patch b/SOURCES/kvm-migration-Create-migrate_checkpoint_delay.patch new file mode 100644 index 0000000..408d258 --- /dev/null +++ b/SOURCES/kvm-migration-Create-migrate_checkpoint_delay.patch @@ -0,0 +1,84 @@ +From a17bee3c8ab48daa471ec53bed0e2cb0bb41fc76 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 01:04:55 +0100 +Subject: [PATCH 41/56] migration: Create migrate_checkpoint_delay() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [40/50] b972d3f12e49dc27aa78eb723ca6d0fac4d174d8 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Fabiano Rosas +(cherry picked from commit f94a858fa3e72ba954a338c01ae9fecc15fcce5c) +Signed-off-by: Peter Xu +--- + migration/colo.c | 5 ++--- + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + 3 files changed, 12 insertions(+), 3 deletions(-) + +diff --git a/migration/colo.c b/migration/colo.c +index 93b78c9270..07bfa21fea 100644 +--- a/migration/colo.c ++++ b/migration/colo.c +@@ -576,7 +576,7 @@ static void colo_process_checkpoint(MigrationState *s) + trace_colo_vm_state_change("stop", "run"); + + timer_mod(s->colo_delay_timer, qemu_clock_get_ms(QEMU_CLOCK_HOST) + +- s->parameters.x_checkpoint_delay); ++ migrate_checkpoint_delay()); + + while (s->state == MIGRATION_STATUS_COLO) { + if (failover_get_state() != FAILOVER_STATUS_NONE) { +@@ -651,8 +651,7 @@ void colo_checkpoint_notify(void *opaque) + + qemu_event_set(&s->colo_checkpoint_event); + s->colo_checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST); +- next_notify_time = s->colo_checkpoint_time + +- s->parameters.x_checkpoint_delay; ++ next_notify_time = s->colo_checkpoint_time + migrate_checkpoint_delay(); + timer_mod(s->colo_delay_timer, next_notify_time); + } + +diff --git a/migration/options.c b/migration/options.c +index b9f3815f7e..0e102e5700 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -472,6 +472,15 @@ bool migrate_block_incremental(void) + return s->parameters.block_incremental; + } + ++uint32_t migrate_checkpoint_delay(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.x_checkpoint_delay; ++} ++ + int migrate_compress_level(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index aa54443353..adc2879bbb 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -46,6 +46,7 @@ bool migrate_cap_set(int cap, bool value, Error **errp); + /* parameters */ + + bool migrate_block_incremental(void); ++uint32_t migrate_checkpoint_delay(void); + int migrate_compress_level(void); + int migrate_compress_threads(void); + int migrate_compress_wait_thread(void); +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Create-migrate_cpu_throttle_increment-func.patch b/SOURCES/kvm-migration-Create-migrate_cpu_throttle_increment-func.patch new file mode 100644 index 0000000..65bad3c --- /dev/null +++ b/SOURCES/kvm-migration-Create-migrate_cpu_throttle_increment-func.patch @@ -0,0 +1,75 @@ +From 7ff430e011780dad00e5ebaad0318c5fa3aec102 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 10:20:49 +0100 +Subject: [PATCH 45/56] migration: Create migrate_cpu_throttle_increment() + function +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [44/50] aec990a106a0347b265f5c056a516e0b91e8183c (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Fabiano Rosas +(cherry picked from commit 9605c2ac282c565bb00b5f344217161bef29eff8) +Signed-off-by: Peter Xu +--- + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + migration/ram.c | 2 +- + 3 files changed, 11 insertions(+), 1 deletion(-) + +diff --git a/migration/options.c b/migration/options.c +index f7fb6999f7..31435d2b45 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -509,6 +509,15 @@ int migrate_compress_wait_thread(void) + return s->parameters.compress_wait_thread; + } + ++uint8_t migrate_cpu_throttle_increment(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.cpu_throttle_increment; ++} ++ + uint8_t migrate_cpu_throttle_initial(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index fd8b91d767..49b29bdafd 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -50,6 +50,7 @@ uint32_t migrate_checkpoint_delay(void); + int migrate_compress_level(void); + int migrate_compress_threads(void); + int migrate_compress_wait_thread(void); ++uint8_t migrate_cpu_throttle_increment(void); + uint8_t migrate_cpu_throttle_initial(void); + int migrate_decompress_threads(void); + uint8_t migrate_max_cpu_throttle(void); +diff --git a/migration/ram.c b/migration/ram.c +index 5e855d5c22..5645745a42 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -713,7 +713,7 @@ static void mig_throttle_guest_down(uint64_t bytes_dirty_period, + { + MigrationState *s = migrate_get_current(); + uint64_t pct_initial = migrate_cpu_throttle_initial(); +- uint64_t pct_increment = s->parameters.cpu_throttle_increment; ++ uint64_t pct_increment = migrate_cpu_throttle_increment(); + bool pct_tailslow = s->parameters.cpu_throttle_tailslow; + int pct_max = migrate_max_cpu_throttle(); + +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Create-migrate_cpu_throttle_initial-to-opt.patch b/SOURCES/kvm-migration-Create-migrate_cpu_throttle_initial-to-opt.patch new file mode 100644 index 0000000..aab2013 --- /dev/null +++ b/SOURCES/kvm-migration-Create-migrate_cpu_throttle_initial-to-opt.patch @@ -0,0 +1,75 @@ +From fdc2f14bfb3ef8897310a7db63287a9bab1fb858 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 01:22:44 +0100 +Subject: [PATCH 44/56] migration: Create migrate_cpu_throttle_initial() to + option.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [43/50] e0e0db7218f28aefd4bd022edbaec236e2030cb1 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Fabiano Rosas +(cherry picked from commit 2a8ec38082f8098f2693bb3632175453c0c84a51) +Signed-off-by: Peter Xu +--- + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + migration/ram.c | 2 +- + 3 files changed, 11 insertions(+), 1 deletion(-) + +diff --git a/migration/options.c b/migration/options.c +index 418aafac64..f7fb6999f7 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -509,6 +509,15 @@ int migrate_compress_wait_thread(void) + return s->parameters.compress_wait_thread; + } + ++uint8_t migrate_cpu_throttle_initial(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.cpu_throttle_initial; ++} ++ + int migrate_decompress_threads(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index 72b1a320b7..fd8b91d767 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -50,6 +50,7 @@ uint32_t migrate_checkpoint_delay(void); + int migrate_compress_level(void); + int migrate_compress_threads(void); + int migrate_compress_wait_thread(void); ++uint8_t migrate_cpu_throttle_initial(void); + int migrate_decompress_threads(void); + uint8_t migrate_max_cpu_throttle(void); + int64_t migrate_max_postcopy_bandwidth(void); +diff --git a/migration/ram.c b/migration/ram.c +index 5c786513ef..5e855d5c22 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -712,7 +712,7 @@ static void mig_throttle_guest_down(uint64_t bytes_dirty_period, + uint64_t bytes_dirty_threshold) + { + MigrationState *s = migrate_get_current(); +- uint64_t pct_initial = s->parameters.cpu_throttle_initial; ++ uint64_t pct_initial = migrate_cpu_throttle_initial(); + uint64_t pct_increment = s->parameters.cpu_throttle_increment; + bool pct_tailslow = s->parameters.cpu_throttle_tailslow; + int pct_max = migrate_max_cpu_throttle(); +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Create-migrate_cpu_throttle_tailslow-funct.patch b/SOURCES/kvm-migration-Create-migrate_cpu_throttle_tailslow-funct.patch new file mode 100644 index 0000000..e36f003 --- /dev/null +++ b/SOURCES/kvm-migration-Create-migrate_cpu_throttle_tailslow-funct.patch @@ -0,0 +1,78 @@ +From b88c51c4b02639e28da73143b1da7bd3d6706ce5 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 10:29:51 +0100 +Subject: [PATCH 46/56] migration: Create migrate_cpu_throttle_tailslow() + function +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [45/50] e93e96392405c60f75abbf288e4fddb191bbc996 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Fabiano Rosas +(cherry picked from commit 873f674c559e3162a6e6e92994301d400c5cc873) +Signed-off-by: Peter Xu +--- + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + migration/ram.c | 3 +-- + 3 files changed, 11 insertions(+), 2 deletions(-) + +diff --git a/migration/options.c b/migration/options.c +index 31435d2b45..615534c151 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -527,6 +527,15 @@ uint8_t migrate_cpu_throttle_initial(void) + return s->parameters.cpu_throttle_initial; + } + ++bool migrate_cpu_throttle_tailslow(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.cpu_throttle_tailslow; ++} ++ + int migrate_decompress_threads(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index 49b29bdafd..99f6bbd7a1 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -52,6 +52,7 @@ int migrate_compress_threads(void); + int migrate_compress_wait_thread(void); + uint8_t migrate_cpu_throttle_increment(void); + uint8_t migrate_cpu_throttle_initial(void); ++bool migrate_cpu_throttle_tailslow(void); + int migrate_decompress_threads(void); + uint8_t migrate_max_cpu_throttle(void); + int64_t migrate_max_postcopy_bandwidth(void); +diff --git a/migration/ram.c b/migration/ram.c +index 5645745a42..01356f60a4 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -711,10 +711,9 @@ static size_t save_page_header(PageSearchStatus *pss, QEMUFile *f, + static void mig_throttle_guest_down(uint64_t bytes_dirty_period, + uint64_t bytes_dirty_threshold) + { +- MigrationState *s = migrate_get_current(); + uint64_t pct_initial = migrate_cpu_throttle_initial(); + uint64_t pct_increment = migrate_cpu_throttle_increment(); +- bool pct_tailslow = s->parameters.cpu_throttle_tailslow; ++ bool pct_tailslow = migrate_cpu_throttle_tailslow(); + int pct_max = migrate_max_cpu_throttle(); + + uint64_t throttle_now = cpu_throttle_get_percentage(); +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Create-migrate_max_bandwidth-function.patch b/SOURCES/kvm-migration-Create-migrate_max_bandwidth-function.patch new file mode 100644 index 0000000..ba1d34c --- /dev/null +++ b/SOURCES/kvm-migration-Create-migrate_max_bandwidth-function.patch @@ -0,0 +1,232 @@ +From b6228b3122f5c1f220f92042277ab1bfbb5ba086 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 11:00:12 +0100 +Subject: [PATCH 48/56] migration: Create migrate_max_bandwidth() function +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [47/50] 3874656f70cb9c2a30f4d63e146539480d422326 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Fabiano Rosas +(cherry picked from commit 9c894df3a37d675652390f7dbbe2f65b7bad7efa) +Signed-off-by: Peter Xu +--- + migration/migration.c | 70 +------------------------------------- + migration/options.c | 79 +++++++++++++++++++++++++++++++++++++++++++ + migration/options.h | 1 + + 3 files changed, 81 insertions(+), 69 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 46a5ea4d42..c2e109329d 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -886,74 +886,6 @@ void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value) + migrate_send_rp_message(mis, MIG_RP_MSG_RESUME_ACK, sizeof(buf), &buf); + } + +-MigrationParameters *qmp_query_migrate_parameters(Error **errp) +-{ +- MigrationParameters *params; +- MigrationState *s = migrate_get_current(); +- +- /* TODO use QAPI_CLONE() instead of duplicating it inline */ +- params = g_malloc0(sizeof(*params)); +- params->has_compress_level = true; +- params->compress_level = s->parameters.compress_level; +- params->has_compress_threads = true; +- params->compress_threads = s->parameters.compress_threads; +- params->has_compress_wait_thread = true; +- params->compress_wait_thread = s->parameters.compress_wait_thread; +- params->has_decompress_threads = true; +- params->decompress_threads = s->parameters.decompress_threads; +- params->has_throttle_trigger_threshold = true; +- params->throttle_trigger_threshold = s->parameters.throttle_trigger_threshold; +- params->has_cpu_throttle_initial = true; +- params->cpu_throttle_initial = s->parameters.cpu_throttle_initial; +- params->has_cpu_throttle_increment = true; +- params->cpu_throttle_increment = s->parameters.cpu_throttle_increment; +- params->has_cpu_throttle_tailslow = true; +- params->cpu_throttle_tailslow = s->parameters.cpu_throttle_tailslow; +- params->tls_creds = g_strdup(s->parameters.tls_creds); +- params->tls_hostname = g_strdup(s->parameters.tls_hostname); +- params->tls_authz = g_strdup(s->parameters.tls_authz ? +- s->parameters.tls_authz : ""); +- params->has_max_bandwidth = true; +- params->max_bandwidth = s->parameters.max_bandwidth; +- params->has_downtime_limit = true; +- params->downtime_limit = s->parameters.downtime_limit; +- params->has_x_checkpoint_delay = true; +- params->x_checkpoint_delay = s->parameters.x_checkpoint_delay; +- params->has_block_incremental = true; +- params->block_incremental = s->parameters.block_incremental; +- params->has_multifd_channels = true; +- params->multifd_channels = s->parameters.multifd_channels; +- params->has_multifd_compression = true; +- params->multifd_compression = s->parameters.multifd_compression; +- params->has_multifd_zlib_level = true; +- params->multifd_zlib_level = s->parameters.multifd_zlib_level; +- params->has_multifd_zstd_level = true; +- params->multifd_zstd_level = s->parameters.multifd_zstd_level; +- params->has_xbzrle_cache_size = true; +- params->xbzrle_cache_size = s->parameters.xbzrle_cache_size; +- params->has_max_postcopy_bandwidth = true; +- params->max_postcopy_bandwidth = s->parameters.max_postcopy_bandwidth; +- params->has_max_cpu_throttle = true; +- params->max_cpu_throttle = s->parameters.max_cpu_throttle; +- params->has_announce_initial = true; +- params->announce_initial = s->parameters.announce_initial; +- params->has_announce_max = true; +- params->announce_max = s->parameters.announce_max; +- params->has_announce_rounds = true; +- params->announce_rounds = s->parameters.announce_rounds; +- params->has_announce_step = true; +- params->announce_step = s->parameters.announce_step; +- +- if (s->parameters.has_block_bitmap_mapping) { +- params->has_block_bitmap_mapping = true; +- params->block_bitmap_mapping = +- QAPI_CLONE(BitmapMigrationNodeAliasList, +- s->parameters.block_bitmap_mapping); +- } +- +- return params; +-} +- + /* + * Return true if we're already in the middle of a migration + * (i.e. any of the active or setup states) +@@ -3775,7 +3707,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) + XFER_LIMIT_RATIO; + } else { + /* This is a fresh new migration */ +- rate_limit = s->parameters.max_bandwidth / XFER_LIMIT_RATIO; ++ rate_limit = migrate_max_bandwidth() / XFER_LIMIT_RATIO; + + /* Notify before starting migration thread */ + notifier_list_notify(&migration_state_notifiers, s); +diff --git a/migration/options.c b/migration/options.c +index 8bd2d949ae..8e8753d9be 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -12,8 +12,10 @@ + */ + + #include "qemu/osdep.h" ++#include "qapi/clone-visitor.h" + #include "qapi/error.h" + #include "qapi/qapi-commands-migration.h" ++#include "qapi/qapi-visit-migration.h" + #include "qapi/qmp/qerror.h" + #include "sysemu/runstate.h" + #include "migration/misc.h" +@@ -562,6 +564,15 @@ uint8_t migrate_max_cpu_throttle(void) + return s->parameters.max_cpu_throttle; + } + ++uint64_t migrate_max_bandwidth(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.max_bandwidth; ++} ++ + int64_t migrate_max_postcopy_bandwidth(void) + { + MigrationState *s; +@@ -641,3 +652,71 @@ AnnounceParameters *migrate_announce_params(void) + + return ≈ + } ++ ++MigrationParameters *qmp_query_migrate_parameters(Error **errp) ++{ ++ MigrationParameters *params; ++ MigrationState *s = migrate_get_current(); ++ ++ /* TODO use QAPI_CLONE() instead of duplicating it inline */ ++ params = g_malloc0(sizeof(*params)); ++ params->has_compress_level = true; ++ params->compress_level = s->parameters.compress_level; ++ params->has_compress_threads = true; ++ params->compress_threads = s->parameters.compress_threads; ++ params->has_compress_wait_thread = true; ++ params->compress_wait_thread = s->parameters.compress_wait_thread; ++ params->has_decompress_threads = true; ++ params->decompress_threads = s->parameters.decompress_threads; ++ params->has_throttle_trigger_threshold = true; ++ params->throttle_trigger_threshold = s->parameters.throttle_trigger_threshold; ++ params->has_cpu_throttle_initial = true; ++ params->cpu_throttle_initial = s->parameters.cpu_throttle_initial; ++ params->has_cpu_throttle_increment = true; ++ params->cpu_throttle_increment = s->parameters.cpu_throttle_increment; ++ params->has_cpu_throttle_tailslow = true; ++ params->cpu_throttle_tailslow = s->parameters.cpu_throttle_tailslow; ++ params->tls_creds = g_strdup(s->parameters.tls_creds); ++ params->tls_hostname = g_strdup(s->parameters.tls_hostname); ++ params->tls_authz = g_strdup(s->parameters.tls_authz ? ++ s->parameters.tls_authz : ""); ++ params->has_max_bandwidth = true; ++ params->max_bandwidth = s->parameters.max_bandwidth; ++ params->has_downtime_limit = true; ++ params->downtime_limit = s->parameters.downtime_limit; ++ params->has_x_checkpoint_delay = true; ++ params->x_checkpoint_delay = s->parameters.x_checkpoint_delay; ++ params->has_block_incremental = true; ++ params->block_incremental = s->parameters.block_incremental; ++ params->has_multifd_channels = true; ++ params->multifd_channels = s->parameters.multifd_channels; ++ params->has_multifd_compression = true; ++ params->multifd_compression = s->parameters.multifd_compression; ++ params->has_multifd_zlib_level = true; ++ params->multifd_zlib_level = s->parameters.multifd_zlib_level; ++ params->has_multifd_zstd_level = true; ++ params->multifd_zstd_level = s->parameters.multifd_zstd_level; ++ params->has_xbzrle_cache_size = true; ++ params->xbzrle_cache_size = s->parameters.xbzrle_cache_size; ++ params->has_max_postcopy_bandwidth = true; ++ params->max_postcopy_bandwidth = s->parameters.max_postcopy_bandwidth; ++ params->has_max_cpu_throttle = true; ++ params->max_cpu_throttle = s->parameters.max_cpu_throttle; ++ params->has_announce_initial = true; ++ params->announce_initial = s->parameters.announce_initial; ++ params->has_announce_max = true; ++ params->announce_max = s->parameters.announce_max; ++ params->has_announce_rounds = true; ++ params->announce_rounds = s->parameters.announce_rounds; ++ params->has_announce_step = true; ++ params->announce_step = s->parameters.announce_step; ++ ++ if (s->parameters.has_block_bitmap_mapping) { ++ params->has_block_bitmap_mapping = true; ++ params->block_bitmap_mapping = ++ QAPI_CLONE(BitmapMigrationNodeAliasList, ++ s->parameters.block_bitmap_mapping); ++ } ++ ++ return params; ++} +diff --git a/migration/options.h b/migration/options.h +index 093bc907a1..1b78fa9f3d 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -64,6 +64,7 @@ uint8_t migrate_cpu_throttle_initial(void); + bool migrate_cpu_throttle_tailslow(void); + int migrate_decompress_threads(void); + uint8_t migrate_max_cpu_throttle(void); ++uint64_t migrate_max_bandwidth(void); + int64_t migrate_max_postcopy_bandwidth(void); + int migrate_multifd_channels(void); + MultiFDCompression migrate_multifd_compression(void); +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Create-migrate_max_cpu_throttle.patch b/SOURCES/kvm-migration-Create-migrate_max_cpu_throttle.patch new file mode 100644 index 0000000..6628b80 --- /dev/null +++ b/SOURCES/kvm-migration-Create-migrate_max_cpu_throttle.patch @@ -0,0 +1,88 @@ +From f0d4e34b00f66d2336b755a34a1ba226571641c4 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 01:13:01 +0100 +Subject: [PATCH 42/56] migration: Create migrate_max_cpu_throttle() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [41/50] fc7537c06d8e1f53d7bb552661f6ddb0133a978d (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Fabiano Rosas +(cherry picked from commit 24155bd0520035d5148c0af5b925932c4d8064a8) +Signed-off-by: Peter Xu +--- + migration/migration.h | 2 -- + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + migration/ram.c | 2 +- + 4 files changed, 11 insertions(+), 3 deletions(-) + +diff --git a/migration/migration.h b/migration/migration.h +index 86051af132..3ae938b19c 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -451,8 +451,6 @@ bool migrate_postcopy(void); + + int migrate_use_tls(void); + +-int migrate_max_cpu_throttle(void); +- + uint64_t ram_get_total_transferred_pages(void); + + /* Sending on the return path - generic and then for each message type */ +diff --git a/migration/options.c b/migration/options.c +index 0e102e5700..2cb04fbbd1 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -517,6 +517,15 @@ int migrate_decompress_threads(void) + return s->parameters.decompress_threads; + } + ++uint8_t migrate_max_cpu_throttle(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.max_cpu_throttle; ++} ++ + int64_t migrate_max_postcopy_bandwidth(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index adc2879bbb..72b1a320b7 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -51,6 +51,7 @@ int migrate_compress_level(void); + int migrate_compress_threads(void); + int migrate_compress_wait_thread(void); + int migrate_decompress_threads(void); ++uint8_t migrate_max_cpu_throttle(void); + int64_t migrate_max_postcopy_bandwidth(void); + int migrate_multifd_channels(void); + MultiFDCompression migrate_multifd_compression(void); +diff --git a/migration/ram.c b/migration/ram.c +index e82cee97c3..5c786513ef 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -715,7 +715,7 @@ static void mig_throttle_guest_down(uint64_t bytes_dirty_period, + uint64_t pct_initial = s->parameters.cpu_throttle_initial; + uint64_t pct_increment = s->parameters.cpu_throttle_increment; + bool pct_tailslow = s->parameters.cpu_throttle_tailslow; +- int pct_max = s->parameters.max_cpu_throttle; ++ int pct_max = migrate_max_cpu_throttle(); + + uint64_t throttle_now = cpu_throttle_get_percentage(); + uint64_t cpu_now, cpu_ideal, throttle_inc; +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Create-migrate_rdma_pin_all-function.patch b/SOURCES/kvm-migration-Create-migrate_rdma_pin_all-function.patch new file mode 100644 index 0000000..c7799f1 --- /dev/null +++ b/SOURCES/kvm-migration-Create-migrate_rdma_pin_all-function.patch @@ -0,0 +1,95 @@ +From e4ef0f2cee6cdf2cf4bd225ac9e610f41d66dfcb Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 22:41:55 +0100 +Subject: [PATCH 32/56] migration: Create migrate_rdma_pin_all() function +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [31/50] 206d96d47d9ee73ddc89dd01186560bf62ea5295 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy + +--- + +Fixed missing space after comma (fabiano) + +(cherry picked from commit 17cba690cdd42108369fafe6b07bff09872fbea6) +Signed-off-by: Peter Xu +--- + migration/options.c | 7 +++++++ + migration/options.h | 1 + + migration/rdma.c | 6 +++--- + 3 files changed, 11 insertions(+), 3 deletions(-) + +diff --git a/migration/options.c b/migration/options.c +index 2003e413da..9c9b8e5863 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -138,6 +138,13 @@ bool migrate_postcopy_ram(void) + return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; + } + ++bool migrate_rdma_pin_all(void) ++{ ++ MigrationState *s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL]; ++} ++ + bool migrate_release_ram(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index 316efd1063..25c002b37a 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -30,6 +30,7 @@ bool migrate_pause_before_switchover(void); + bool migrate_postcopy_blocktime(void); + bool migrate_postcopy_preempt(void); + bool migrate_postcopy_ram(void); ++bool migrate_rdma_pin_all(void); + bool migrate_release_ram(void); + bool migrate_return_path(void); + bool migrate_validate_uuid(void); +diff --git a/migration/rdma.c b/migration/rdma.c +index bf55e2f163..0af5e944f0 100644 +--- a/migration/rdma.c ++++ b/migration/rdma.c +@@ -35,6 +35,7 @@ + #include + #include "trace.h" + #include "qom/object.h" ++#include "options.h" + #include + + /* +@@ -4178,8 +4179,7 @@ void rdma_start_outgoing_migration(void *opaque, + goto err; + } + +- ret = qemu_rdma_source_init(rdma, +- s->capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp); ++ ret = qemu_rdma_source_init(rdma, migrate_rdma_pin_all(), errp); + + if (ret) { + goto err; +@@ -4201,7 +4201,7 @@ void rdma_start_outgoing_migration(void *opaque, + } + + ret = qemu_rdma_source_init(rdma_return_path, +- s->capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp); ++ migrate_rdma_pin_all(), errp); + + if (ret) { + goto return_path_err; +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Create-migrate_throttle_trigger_threshold.patch b/SOURCES/kvm-migration-Create-migrate_throttle_trigger_threshold.patch new file mode 100644 index 0000000..5fc1072 --- /dev/null +++ b/SOURCES/kvm-migration-Create-migrate_throttle_trigger_threshold.patch @@ -0,0 +1,75 @@ +From 27862b9d31da6447b60f185cdad95764018c6bc6 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 00:59:13 +0100 +Subject: [PATCH 40/56] migration: Create migrate_throttle_trigger_threshold() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [39/50] b8af9080c49be3d38bd2784d61289be89c03db3e (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Fabiano Rosas +(cherry picked from commit 6499efdb16e5c1288b4c8390d3bf68b313329b8b) +Signed-off-by: Peter Xu +--- + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + migration/ram.c | 3 +-- + 3 files changed, 11 insertions(+), 2 deletions(-) + +diff --git a/migration/options.c b/migration/options.c +index 2b6d88b4b9..b9f3815f7e 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -554,6 +554,15 @@ int migrate_multifd_zstd_level(void) + return s->parameters.multifd_zstd_level; + } + ++uint8_t migrate_throttle_trigger_threshold(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.throttle_trigger_threshold; ++} ++ + uint64_t migrate_xbzrle_cache_size(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index 96d5a8e6e4..aa54443353 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -55,6 +55,7 @@ int migrate_multifd_channels(void); + MultiFDCompression migrate_multifd_compression(void); + int migrate_multifd_zlib_level(void); + int migrate_multifd_zstd_level(void); ++uint8_t migrate_throttle_trigger_threshold(void); + uint64_t migrate_xbzrle_cache_size(void); + + #endif +diff --git a/migration/ram.c b/migration/ram.c +index 4576d0d849..e82cee97c3 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1178,8 +1178,7 @@ static void migration_update_rates(RAMState *rs, int64_t end_time) + + static void migration_trigger_throttle(RAMState *rs) + { +- MigrationState *s = migrate_get_current(); +- uint64_t threshold = s->parameters.throttle_trigger_threshold; ++ uint64_t threshold = migrate_throttle_trigger_threshold(); + uint64_t bytes_xfer_period = + stat64_get(&ram_counters.transferred) - rs->bytes_xfer_prev; + uint64_t bytes_dirty_period = rs->num_dirty_pages_period * TARGET_PAGE_SIZE; +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Create-options.c.patch b/SOURCES/kvm-migration-Create-options.c.patch new file mode 100644 index 0000000..ea60202 --- /dev/null +++ b/SOURCES/kvm-migration-Create-options.c.patch @@ -0,0 +1,524 @@ +From 282634a835f4711c8b501dd76c344058bc399fbd Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 21:18:45 +0100 +Subject: [PATCH 23/56] migration: Create options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [22/50] 10c9be528b9fcfae93f1a12fcd09db1a69e58f64 (peterx/qemu-kvm) + +We move there all capabilities helpers from migration.c. + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert + +--- + +Following David advise: +- looked through the history, capabilities are newer than 2012, so we + can remove that bit of the header. +- This part is posterior to Anthony. + Original Author is Orit. Once there, + I put myself. Peter Xu also did quite a bit of work here. + Anyone else wants/needs to be there? I didn't search too hard + because nobody asked before to be added. + +What do you think? + +(cherry picked from commit 1f0776f1c03312aad5d6a5f98871240bc3af01e5) +Signed-off-by: Peter Xu +--- + hw/virtio/virtio-balloon.c | 1 + + migration/block-dirty-bitmap.c | 1 + + migration/block.c | 1 + + migration/colo.c | 1 + + migration/meson.build | 1 + + migration/migration.c | 109 +---------------------------- + migration/migration.h | 12 ---- + migration/options.c | 124 +++++++++++++++++++++++++++++++++ + migration/options.h | 32 +++++++++ + migration/postcopy-ram.c | 1 + + migration/ram.c | 1 + + migration/savevm.c | 1 + + migration/socket.c | 1 + + 13 files changed, 166 insertions(+), 120 deletions(-) + create mode 100644 migration/options.c + create mode 100644 migration/options.h + +diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c +index 746f07c4d2..43092aa634 100644 +--- a/hw/virtio/virtio-balloon.c ++++ b/hw/virtio/virtio-balloon.c +@@ -32,6 +32,7 @@ + #include "qemu/error-report.h" + #include "migration/misc.h" + #include "migration/migration.h" ++#include "migration/options.h" + + #include "hw/virtio/virtio-bus.h" + #include "hw/virtio/virtio-access.h" +diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c +index fe73aa94b1..a6ffae0002 100644 +--- a/migration/block-dirty-bitmap.c ++++ b/migration/block-dirty-bitmap.c +@@ -79,6 +79,7 @@ + #include "qapi/qapi-visit-migration.h" + #include "qapi/clone-visitor.h" + #include "trace.h" ++#include "options.h" + + #define CHUNK_SIZE (1 << 10) + +diff --git a/migration/block.c b/migration/block.c +index b2497bbd32..4b167fa5cf 100644 +--- a/migration/block.c ++++ b/migration/block.c +@@ -28,6 +28,7 @@ + #include "migration/vmstate.h" + #include "sysemu/block-backend.h" + #include "trace.h" ++#include "options.h" + + #define BLK_MIG_BLOCK_SIZE (1ULL << 20) + #define BDRV_SECTORS_PER_DIRTY_CHUNK (BLK_MIG_BLOCK_SIZE >> BDRV_SECTOR_BITS) +diff --git a/migration/colo.c b/migration/colo.c +index 0716e64689..93b78c9270 100644 +--- a/migration/colo.c ++++ b/migration/colo.c +@@ -36,6 +36,7 @@ + #include "sysemu/cpus.h" + #include "sysemu/runstate.h" + #include "net/filter.h" ++#include "options.h" + + static bool vmstate_loading; + static Notifier packets_compare_notifier; +diff --git a/migration/meson.build b/migration/meson.build +index 0d1bb9f96e..480ff6854a 100644 +--- a/migration/meson.build ++++ b/migration/meson.build +@@ -22,6 +22,7 @@ softmmu_ss.add(files( + 'migration.c', + 'multifd.c', + 'multifd-zlib.c', ++ 'options.c', + 'postcopy-ram.c', + 'savevm.c', + 'socket.c', +diff --git a/migration/migration.c b/migration/migration.c +index 18058fb597..66ea55be06 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -63,6 +63,7 @@ + #include "sysemu/cpus.h" + #include "yank_functions.h" + #include "sysemu/qtest.h" ++#include "options.h" + + #define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ + +@@ -357,15 +358,6 @@ static void migrate_generate_event(int new_state) + } + } + +-static bool migrate_late_block_activate(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; +-} +- + /* + * Send a message on the return channel back to the source + * of the migration. +@@ -2525,56 +2517,11 @@ void qmp_migrate_continue(MigrationStatus state, Error **errp) + qemu_sem_post(&s->pause_sem); + } + +-bool migrate_release_ram(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; +-} +- +-bool migrate_postcopy_ram(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; +-} +- + bool migrate_postcopy(void) + { + return migrate_postcopy_ram() || migrate_dirty_bitmaps(); + } + +-bool migrate_auto_converge(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; +-} +- +-bool migrate_zero_blocks(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; +-} +- +-bool migrate_postcopy_blocktime(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; +-} +- + bool migrate_use_compression(void) + { + MigrationState *s; +@@ -2620,33 +2567,6 @@ int migrate_decompress_threads(void) + return s->parameters.decompress_threads; + } + +-bool migrate_dirty_bitmaps(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; +-} +- +-bool migrate_ignore_shared(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED]; +-} +- +-bool migrate_validate_uuid(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; +-} +- + bool migrate_use_events(void) + { + MigrationState *s; +@@ -2665,15 +2585,6 @@ bool migrate_use_multifd(void) + return s->capabilities[MIGRATION_CAPABILITY_MULTIFD]; + } + +-bool migrate_pause_before_switchover(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; +-} +- + int migrate_multifd_channels(void) + { + MigrationState *s; +@@ -2785,24 +2696,6 @@ bool migrate_use_block_incremental(void) + return s->parameters.block_incremental; + } + +-bool migrate_background_snapshot(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; +-} +- +-bool migrate_postcopy_preempt(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]; +-} +- + /* migration thread support */ + /* + * Something bad happened to the RP stream, mark an error +diff --git a/migration/migration.h b/migration/migration.h +index 04e0860b4e..a25fed6ef0 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -449,16 +449,7 @@ MigrationState *migrate_get_current(void); + + bool migrate_postcopy(void); + +-bool migrate_release_ram(void); +-bool migrate_postcopy_ram(void); +-bool migrate_zero_blocks(void); +-bool migrate_dirty_bitmaps(void); +-bool migrate_ignore_shared(void); +-bool migrate_validate_uuid(void); +- +-bool migrate_auto_converge(void); + bool migrate_use_multifd(void); +-bool migrate_pause_before_switchover(void); + int migrate_multifd_channels(void); + MultiFDCompression migrate_multifd_compression(void); + int migrate_multifd_zlib_level(void); +@@ -487,9 +478,6 @@ int migrate_compress_threads(void); + int migrate_compress_wait_thread(void); + int migrate_decompress_threads(void); + bool migrate_use_events(void); +-bool migrate_postcopy_blocktime(void); +-bool migrate_background_snapshot(void); +-bool migrate_postcopy_preempt(void); + + /* Sending on the return path - generic and then for each message type */ + void migrate_send_rp_shut(MigrationIncomingState *mis, +diff --git a/migration/options.c b/migration/options.c +new file mode 100644 +index 0000000000..88a9a45913 +--- /dev/null ++++ b/migration/options.c +@@ -0,0 +1,124 @@ ++/* ++ * QEMU migration capabilities ++ * ++ * Copyright (c) 2012-2023 Red Hat Inc ++ * ++ * Authors: ++ * Orit Wasserman ++ * Juan Quintela ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++ ++#include "qemu/osdep.h" ++#include "migration.h" ++#include "options.h" ++ ++bool migrate_auto_converge(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; ++} ++ ++bool migrate_background_snapshot(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; ++} ++ ++bool migrate_dirty_bitmaps(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; ++} ++ ++bool migrate_ignore_shared(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED]; ++} ++ ++bool migrate_late_block_activate(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; ++} ++ ++bool migrate_pause_before_switchover(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; ++} ++ ++bool migrate_postcopy_blocktime(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; ++} ++ ++bool migrate_postcopy_preempt(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]; ++} ++ ++bool migrate_postcopy_ram(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; ++} ++ ++bool migrate_release_ram(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; ++} ++ ++bool migrate_validate_uuid(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; ++} ++ ++bool migrate_zero_blocks(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; ++} +diff --git a/migration/options.h b/migration/options.h +new file mode 100644 +index 0000000000..0dfa0af245 +--- /dev/null ++++ b/migration/options.h +@@ -0,0 +1,32 @@ ++/* ++ * QEMU migration capabilities ++ * ++ * Copyright (c) 2012-2023 Red Hat Inc ++ * ++ * Authors: ++ * Orit Wasserman ++ * Juan Quintela ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++ ++#ifndef QEMU_MIGRATION_OPTIONS_H ++#define QEMU_MIGRATION_OPTIONS_H ++ ++/* capabilities */ ++ ++bool migrate_auto_converge(void); ++bool migrate_background_snapshot(void); ++bool migrate_dirty_bitmaps(void); ++bool migrate_ignore_shared(void); ++bool migrate_late_block_activate(void); ++bool migrate_pause_before_switchover(void); ++bool migrate_postcopy_blocktime(void); ++bool migrate_postcopy_preempt(void); ++bool migrate_postcopy_ram(void); ++bool migrate_release_ram(void); ++bool migrate_validate_uuid(void); ++bool migrate_zero_blocks(void); ++ ++#endif +diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c +index d7b48dd920..0711500036 100644 +--- a/migration/postcopy-ram.c ++++ b/migration/postcopy-ram.c +@@ -37,6 +37,7 @@ + #include "tls.h" + #include "qemu/userfaultfd.h" + #include "qemu/mmap-alloc.h" ++#include "options.h" + + /* Arbitrary limit on size of each discard command, + * keeps them around ~200 bytes +diff --git a/migration/ram.c b/migration/ram.c +index 229714045a..912ccd89fa 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -57,6 +57,7 @@ + #include "qemu/iov.h" + #include "multifd.h" + #include "sysemu/runstate.h" ++#include "options.h" + + #include "hw/boards.h" /* for machine_dump_guest_core() */ + +diff --git a/migration/savevm.c b/migration/savevm.c +index 589ef926ab..ebcf571e37 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -67,6 +67,7 @@ + #include "qemu/yank.h" + #include "yank_functions.h" + #include "sysemu/qtest.h" ++#include "options.h" + + const unsigned int postcopy_ram_discard_version; + +diff --git a/migration/socket.c b/migration/socket.c +index e6fdf3c5e1..ebf9ac41af 100644 +--- a/migration/socket.c ++++ b/migration/socket.c +@@ -27,6 +27,7 @@ + #include "io/net-listener.h" + #include "trace.h" + #include "postcopy-ram.h" ++#include "options.h" + + struct SocketOutgoingArgs { + SocketAddress *saddr; +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Enable-switchover-ack-capability.patch b/SOURCES/kvm-migration-Enable-switchover-ack-capability.patch new file mode 100644 index 0000000..e08e5df --- /dev/null +++ b/SOURCES/kvm-migration-Enable-switchover-ack-capability.patch @@ -0,0 +1,56 @@ +From bbe565f7d3b7fe46971e020e9bd8e79dc9ffa69c Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 12/37] migration: Enable switchover ack capability +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [10/28] c4a7d7d26a97181c9516d133a6610bfa5dcb1d16 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 538ef4fe2f72 +Author: Avihai Horon +Date: Wed Jun 21 14:11:56 2023 +0300 + + migration: Enable switchover ack capability + + Now that switchover ack logic has been implemented, enable the + capability. + + Signed-off-by: Avihai Horon + Reviewed-by: Juan Quintela + Reviewed-by: Peter Xu + Tested-by: YangHang Liu + Acked-by: Alex Williamson + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + migration/options.c | 4 ---- + 1 file changed, 4 deletions(-) + +diff --git a/migration/options.c b/migration/options.c +index c3df6c6dde..ccd7ef3907 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -547,10 +547,6 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) + "'return-path'"); + return false; + } +- +- /* Disable this capability until it's implemented */ +- error_setg(errp, "'switchover-ack' is not implemented yet"); +- return false; + } + + return true; +-- +2.39.3 + diff --git a/SOURCES/kvm-migration-Handle-block-device-inactivation-failures-.patch b/SOURCES/kvm-migration-Handle-block-device-inactivation-failures-.patch new file mode 100644 index 0000000..26c8437 --- /dev/null +++ b/SOURCES/kvm-migration-Handle-block-device-inactivation-failures-.patch @@ -0,0 +1,116 @@ +From 2aac64623d8d2d06d248c1bcc71aa13572fc843c Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Fri, 14 Apr 2023 10:33:58 -0500 +Subject: [PATCH 1/2] migration: Handle block device inactivation failures + better + +RH-Author: Eric Blake +RH-MergeRequest: 161: Avoid migration assertion from failed NFS server. +RH-Bugzilla: 2058982 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [1/2] 5ae143c9234f6eee9fc5154944172bcd56975b36 (ebblake/centos-qemu-kvm) + +Consider what happens when performing a migration between two host +machines connected to an NFS server serving multiple block devices to +the guest, when the NFS server becomes unavailable. The migration +attempts to inactivate all block devices on the source (a necessary +step before the destination can take over); but if the NFS server is +non-responsive, the attempt to inactivate can itself fail. When that +happens, the destination fails to get the migrated guest (good, +because the source wasn't able to flush everything properly): + + (qemu) qemu-kvm: load of migration failed: Input/output error + +at which point, our only hope for the guest is for the source to take +back control. With the current code base, the host outputs a message, but then appears to resume: + + (qemu) qemu-kvm: qemu_savevm_state_complete_precopy_non_iterable: bdrv_inactivate_all() failed (-1) + + (src qemu)info status + VM status: running + +but a second migration attempt now asserts: + + (src qemu) qemu-kvm: ../block.c:6738: int bdrv_inactivate_recurse(BlockDriverState *): Assertion `!(bs->open_flags & BDRV_O_INACTIVE)' failed. + +Whether the guest is recoverable on the source after the first failure +is debatable, but what we do not want is to have qemu itself fail due +to an assertion. It looks like the problem is as follows: + +In migration.c:migration_completion(), the source sets 'inactivate' to +true (since COLO is not enabled), then tries +savevm.c:qemu_savevm_state_complete_precopy() with a request to +inactivate block devices. In turn, this calls +block.c:bdrv_inactivate_all(), which fails when flushing runs up +against the non-responsive NFS server. With savevm failing, we are +now left in a state where some, but not all, of the block devices have +been inactivated; but migration_completion() then jumps to 'fail' +rather than 'fail_invalidate' and skips an attempt to reclaim those +those disks by calling bdrv_activate_all(). Even if we do attempt to +reclaim disks, we aren't taking note of failure there, either. + +Thus, we have reached a state where the migration engine has forgotten +all state about whether a block device is inactive, because we did not +set s->block_inactive in enough places; so migration allows the source +to reach vm_start() and resume execution, violating the block layer +invariant that the guest CPUs should not be restarted while a device +is inactive. Note that the code in migration.c:migrate_fd_cancel() +will also try to reactivate all block devices if s->block_inactive was +set, but because we failed to set that flag after the first failure, +the source assumes it has reclaimed all devices, even though it still +has remaining inactivated devices and does not try again. Normally, +qmp_cont() will also try to reactivate all disks (or correctly fail if +the disks are not reclaimable because NFS is not yet back up), but the +auto-resumption of the source after a migration failure does not go +through qmp_cont(). And because we have left the block layer in an +inconsistent state with devices still inactivated, the later migration +attempt is hitting the assertion failure. + +Since it is important to not resume the source with inactive disks, +this patch marks s->block_inactive before attempting inactivation, +rather than after succeeding, in order to prevent any vm_start() until +it has successfully reactivated all devices. + +See also https://bugzilla.redhat.com/show_bug.cgi?id=2058982 + +Signed-off-by: Eric Blake +Reviewed-by: Juan Quintela +Acked-by: Lukas Straub +Tested-by: Lukas Straub +Signed-off-by: Juan Quintela +(cherry picked from commit 403d18ae384239876764bbfa111d6cc5dcb673d1) +--- + migration/migration.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index bda4789193..cb0d42c061 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -3444,13 +3444,11 @@ static void migration_completion(MigrationState *s) + MIGRATION_STATUS_DEVICE); + } + if (ret >= 0) { ++ s->block_inactive = inactivate; + qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); + ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, + inactivate); + } +- if (inactivate && ret >= 0) { +- s->block_inactive = true; +- } + } + qemu_mutex_unlock_iothread(); + +@@ -3522,6 +3520,7 @@ fail_invalidate: + bdrv_activate_all(&local_err); + if (local_err) { + error_report_err(local_err); ++ s->block_inactive = true; + } else { + s->block_inactive = false; + } +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Implement-switchover-ack-logic.patch b/SOURCES/kvm-migration-Implement-switchover-ack-logic.patch new file mode 100644 index 0000000..49b9f12 --- /dev/null +++ b/SOURCES/kvm-migration-Implement-switchover-ack-logic.patch @@ -0,0 +1,339 @@ +From 387c39f198d94f600be525e363edc7ca916dc261 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 11/37] migration: Implement switchover ack logic +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [9/28] 853e1978f3b9f87942863bba894a0ed908bde6b1 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 1b4adb10f898 +Author: Avihai Horon +Date: Wed Jun 21 14:11:55 2023 +0300 + + migration: Implement switchover ack logic + + Implement switchover ack logic. This prevents the source from stopping + the VM and completing the migration until an ACK is received from the + destination that it's OK to do so. + + To achieve this, a new SaveVMHandlers handler switchover_ack_needed() + and a new return path message MIG_RP_MSG_SWITCHOVER_ACK are added. + + The switchover_ack_needed() handler is called during migration setup in + the destination to check if switchover ack is used by the migrated + device. + + When switchover is approved by all migrated devices in the destination + that support this capability, the MIG_RP_MSG_SWITCHOVER_ACK return path + message is sent to the source to notify it that it's OK to do + switchover. + + Signed-off-by: Avihai Horon + Reviewed-by: Peter Xu + Tested-by: YangHang Liu + Acked-by: Alex Williamson + Signed-off-by: Cédric Le Goater + +Conflicts: + - migration/migration.c + context changes due to commit f4584076fc31 ("migration: switch + from .vm_was_running to .vm_old_state") + +Signed-off-by: Cédric Le Goater +--- + include/migration/register.h | 2 ++ + migration/migration.c | 32 +++++++++++++++++++-- + migration/migration.h | 14 ++++++++++ + migration/savevm.c | 54 ++++++++++++++++++++++++++++++++++++ + migration/savevm.h | 1 + + migration/trace-events | 3 ++ + 6 files changed, 104 insertions(+), 2 deletions(-) + +diff --git a/include/migration/register.h b/include/migration/register.h +index a8dfd8fefd..90914f32f5 100644 +--- a/include/migration/register.h ++++ b/include/migration/register.h +@@ -71,6 +71,8 @@ typedef struct SaveVMHandlers { + int (*load_cleanup)(void *opaque); + /* Called when postcopy migration wants to resume from failure */ + int (*resume_prepare)(MigrationState *s, void *opaque); ++ /* Checks if switchover ack should be used. Called only in dest */ ++ bool (*switchover_ack_needed)(void *opaque); + } SaveVMHandlers; + + int register_savevm_live(const char *idstr, +diff --git a/migration/migration.c b/migration/migration.c +index 1ac5f19bc2..9bf1caee6c 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -76,6 +76,7 @@ enum mig_rp_message_type { + MIG_RP_MSG_REQ_PAGES, /* data (start: be64, len: be32) */ + MIG_RP_MSG_RECV_BITMAP, /* send recved_bitmap back to source */ + MIG_RP_MSG_RESUME_ACK, /* tell source that we are ready to resume */ ++ MIG_RP_MSG_SWITCHOVER_ACK, /* Tell source it's OK to do switchover */ + + MIG_RP_MSG_MAX + }; +@@ -756,6 +757,11 @@ bool migration_has_all_channels(void) + return true; + } + ++int migrate_send_rp_switchover_ack(MigrationIncomingState *mis) ++{ ++ return migrate_send_rp_message(mis, MIG_RP_MSG_SWITCHOVER_ACK, 0, NULL); ++} ++ + /* + * Send a 'SHUT' message on the return channel with the given value + * to indicate that we've finished with the RP. Non-0 value indicates +@@ -1415,6 +1421,7 @@ void migrate_init(MigrationState *s) + s->vm_was_running = false; + s->iteration_initial_bytes = 0; + s->threshold_size = 0; ++ s->switchover_acked = false; + } + + int migrate_add_blocker_internal(Error *reason, Error **errp) +@@ -1731,6 +1738,7 @@ static struct rp_cmd_args { + [MIG_RP_MSG_REQ_PAGES_ID] = { .len = -1, .name = "REQ_PAGES_ID" }, + [MIG_RP_MSG_RECV_BITMAP] = { .len = -1, .name = "RECV_BITMAP" }, + [MIG_RP_MSG_RESUME_ACK] = { .len = 4, .name = "RESUME_ACK" }, ++ [MIG_RP_MSG_SWITCHOVER_ACK] = { .len = 0, .name = "SWITCHOVER_ACK" }, + [MIG_RP_MSG_MAX] = { .len = -1, .name = "MAX" }, + }; + +@@ -1969,6 +1977,11 @@ retry: + } + break; + ++ case MIG_RP_MSG_SWITCHOVER_ACK: ++ ms->switchover_acked = true; ++ trace_source_return_path_thread_switchover_acked(); ++ break; ++ + default: + break; + } +@@ -2720,6 +2733,20 @@ static void migration_update_counters(MigrationState *s, + bandwidth, s->threshold_size); + } + ++static bool migration_can_switchover(MigrationState *s) ++{ ++ if (!migrate_switchover_ack()) { ++ return true; ++ } ++ ++ /* No reason to wait for switchover ACK if VM is stopped */ ++ if (!runstate_is_running()) { ++ return true; ++ } ++ ++ return s->switchover_acked; ++} ++ + /* Migration thread iteration status */ + typedef enum { + MIG_ITERATE_RESUME, /* Resume current iteration */ +@@ -2735,6 +2762,7 @@ static MigIterateState migration_iteration_run(MigrationState *s) + { + uint64_t must_precopy, can_postcopy; + bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE; ++ bool can_switchover = migration_can_switchover(s); + + qemu_savevm_state_pending_estimate(&must_precopy, &can_postcopy); + uint64_t pending_size = must_precopy + can_postcopy; +@@ -2747,14 +2775,14 @@ static MigIterateState migration_iteration_run(MigrationState *s) + trace_migrate_pending_exact(pending_size, must_precopy, can_postcopy); + } + +- if (!pending_size || pending_size < s->threshold_size) { ++ if ((!pending_size || pending_size < s->threshold_size) && can_switchover) { + trace_migration_thread_low_pending(pending_size); + migration_completion(s); + return MIG_ITERATE_BREAK; + } + + /* Still a significant amount to transfer */ +- if (!in_postcopy && must_precopy <= s->threshold_size && ++ if (!in_postcopy && must_precopy <= s->threshold_size && can_switchover && + qatomic_read(&s->start_postcopy)) { + if (postcopy_start(s)) { + error_report("%s: postcopy failed to start", __func__); +diff --git a/migration/migration.h b/migration/migration.h +index 2b71df8617..e9679f8029 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -204,6 +204,13 @@ struct MigrationIncomingState { + * contains valid information. + */ + QemuMutex page_request_mutex; ++ ++ /* ++ * Number of devices that have yet to approve switchover. When this reaches ++ * zero an ACK that it's OK to do switchover is sent to the source. No lock ++ * is needed as this field is updated serially. ++ */ ++ unsigned int switchover_ack_pending_num; + }; + + MigrationIncomingState *migration_incoming_get_current(void); +@@ -421,6 +428,12 @@ struct MigrationState { + + /* QEMU_VM_VMDESCRIPTION content filled for all non-iterable devices. */ + JSONWriter *vmdesc; ++ ++ /* ++ * Indicates whether an ACK from the destination that it's OK to do ++ * switchover has been received. ++ */ ++ bool switchover_acked; + }; + + void migrate_set_state(int *state, int old_state, int new_state); +@@ -461,6 +474,7 @@ int migrate_send_rp_message_req_pages(MigrationIncomingState *mis, + void migrate_send_rp_recv_bitmap(MigrationIncomingState *mis, + char *block_name); + void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value); ++int migrate_send_rp_switchover_ack(MigrationIncomingState *mis); + + void dirty_bitmap_mig_before_vm_start(void); + void dirty_bitmap_mig_cancel_outgoing(void); +diff --git a/migration/savevm.c b/migration/savevm.c +index 211eff3a8b..aff70e6263 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -2358,6 +2358,21 @@ static int loadvm_process_command(QEMUFile *f) + error_report("CMD_OPEN_RETURN_PATH failed"); + return -1; + } ++ ++ /* ++ * Switchover ack is enabled but no device uses it, so send an ACK to ++ * source that it's OK to switchover. Do it here, after return path has ++ * been created. ++ */ ++ if (migrate_switchover_ack() && !mis->switchover_ack_pending_num) { ++ int ret = migrate_send_rp_switchover_ack(mis); ++ if (ret) { ++ error_report( ++ "Could not send switchover ack RP MSG, err %d (%s)", ret, ++ strerror(-ret)); ++ return ret; ++ } ++ } + break; + + case MIG_CMD_PING: +@@ -2584,6 +2599,23 @@ static int qemu_loadvm_state_header(QEMUFile *f) + return 0; + } + ++static void qemu_loadvm_state_switchover_ack_needed(MigrationIncomingState *mis) ++{ ++ SaveStateEntry *se; ++ ++ QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { ++ if (!se->ops || !se->ops->switchover_ack_needed) { ++ continue; ++ } ++ ++ if (se->ops->switchover_ack_needed(se->opaque)) { ++ mis->switchover_ack_pending_num++; ++ } ++ } ++ ++ trace_loadvm_state_switchover_ack_needed(mis->switchover_ack_pending_num); ++} ++ + static int qemu_loadvm_state_setup(QEMUFile *f) + { + SaveStateEntry *se; +@@ -2787,6 +2819,10 @@ int qemu_loadvm_state(QEMUFile *f) + return -EINVAL; + } + ++ if (migrate_switchover_ack()) { ++ qemu_loadvm_state_switchover_ack_needed(mis); ++ } ++ + cpu_synchronize_all_pre_loadvm(); + + ret = qemu_loadvm_state_main(f, mis); +@@ -2860,6 +2896,24 @@ int qemu_load_device_state(QEMUFile *f) + return 0; + } + ++int qemu_loadvm_approve_switchover(void) ++{ ++ MigrationIncomingState *mis = migration_incoming_get_current(); ++ ++ if (!mis->switchover_ack_pending_num) { ++ return -EINVAL; ++ } ++ ++ mis->switchover_ack_pending_num--; ++ trace_loadvm_approve_switchover(mis->switchover_ack_pending_num); ++ ++ if (mis->switchover_ack_pending_num) { ++ return 0; ++ } ++ ++ return migrate_send_rp_switchover_ack(mis); ++} ++ + bool save_snapshot(const char *name, bool overwrite, const char *vmstate, + bool has_devices, strList *devices, Error **errp) + { +diff --git a/migration/savevm.h b/migration/savevm.h +index fb636735f0..e894bbc143 100644 +--- a/migration/savevm.h ++++ b/migration/savevm.h +@@ -65,6 +65,7 @@ int qemu_loadvm_state(QEMUFile *f); + void qemu_loadvm_state_cleanup(void); + int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis); + int qemu_load_device_state(QEMUFile *f); ++int qemu_loadvm_approve_switchover(void); + int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f, + bool in_postcopy, bool inactivate_disks); + +diff --git a/migration/trace-events b/migration/trace-events +index 92161eeac5..cda807d271 100644 +--- a/migration/trace-events ++++ b/migration/trace-events +@@ -7,6 +7,7 @@ qemu_loadvm_state_section_partend(uint32_t section_id) "%u" + qemu_loadvm_state_post_main(int ret) "%d" + qemu_loadvm_state_section_startfull(uint32_t section_id, const char *idstr, uint32_t instance_id, uint32_t version_id) "%u(%s) %u %u" + qemu_savevm_send_packaged(void) "" ++loadvm_state_switchover_ack_needed(unsigned int switchover_ack_pending_num) "Switchover ack pending num=%u" + loadvm_state_setup(void) "" + loadvm_state_cleanup(void) "" + loadvm_handle_cmd_packaged(unsigned int length) "%u" +@@ -23,6 +24,7 @@ loadvm_postcopy_ram_handle_discard_end(void) "" + loadvm_postcopy_ram_handle_discard_header(const char *ramid, uint16_t len) "%s: %ud" + loadvm_process_command(const char *s, uint16_t len) "com=%s len=%d" + loadvm_process_command_ping(uint32_t val) "0x%x" ++loadvm_approve_switchover(unsigned int switchover_ack_pending_num) "Switchover ack pending num=%u" + postcopy_ram_listen_thread_exit(void) "" + postcopy_ram_listen_thread_start(void) "" + qemu_savevm_send_postcopy_advise(void) "" +@@ -180,6 +182,7 @@ source_return_path_thread_loop_top(void) "" + source_return_path_thread_pong(uint32_t val) "0x%x" + source_return_path_thread_shut(uint32_t val) "0x%x" + source_return_path_thread_resume_ack(uint32_t v) "%"PRIu32 ++source_return_path_thread_switchover_acked(void) "" + migration_thread_low_pending(uint64_t pending) "%" PRIu64 + migrate_transferred(uint64_t tranferred, uint64_t time_spent, uint64_t bandwidth, uint64_t size) "transferred %" PRIu64 " time_spent %" PRIu64 " bandwidth %" PRIu64 " max_size %" PRId64 + process_incoming_migration_co_end(int ret, int ps) "ret=%d postcopy-state=%d" +-- +2.39.3 + diff --git a/SOURCES/kvm-migration-Make-all-functions-check-have-the-same-for.patch b/SOURCES/kvm-migration-Make-all-functions-check-have-the-same-for.patch new file mode 100644 index 0000000..f873f3f --- /dev/null +++ b/SOURCES/kvm-migration-Make-all-functions-check-have-the-same-for.patch @@ -0,0 +1,431 @@ +From eaccfc91b34f93dcaf597e6b39f78741da618ff3 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 08/37] migration: Make all functions check have the same + format +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [6/28] 774df2a81502d3eab5d5b8f64fa9b69f8be43669 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 8f9c532756c5 +Author: Juan Quintela +Date: Wed Mar 1 23:11:08 2023 +0100 + + migration: Make all functions check have the same format + + Signed-off-by: Juan Quintela + Reviewed-by: Vladimir Sementsov-Ogievskiy + +Signed-off-by: Cédric Le Goater +--- + migration/options.c | 153 +++++++++++--------------------------------- + 1 file changed, 39 insertions(+), 114 deletions(-) + +diff --git a/migration/options.c b/migration/options.c +index e51d667e14..bcfe244fa9 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -33,27 +33,21 @@ + + bool migrate_auto_converge(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; + } + + bool migrate_background_snapshot(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; + } + + bool migrate_block(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_BLOCK]; + } +@@ -61,95 +55,76 @@ bool migrate_block(void) + bool migrate_colo(void) + { + MigrationState *s = migrate_get_current(); ++ + return s->capabilities[MIGRATION_CAPABILITY_X_COLO]; + } + + bool migrate_compress(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_COMPRESS]; + } + + bool migrate_dirty_bitmaps(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; + } + + bool migrate_events(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_EVENTS]; + } + + bool migrate_ignore_shared(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED]; + } + + bool migrate_late_block_activate(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; + } + + bool migrate_multifd(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_MULTIFD]; + } + + bool migrate_pause_before_switchover(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; + } + + bool migrate_postcopy_blocktime(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; + } + + bool migrate_postcopy_preempt(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]; + } + + bool migrate_postcopy_ram(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; + } +@@ -163,54 +138,42 @@ bool migrate_rdma_pin_all(void) + + bool migrate_release_ram(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; + } + + bool migrate_return_path(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; + } + + bool migrate_validate_uuid(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; + } + + bool migrate_xbzrle(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_XBZRLE]; + } + + bool migrate_zero_blocks(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; + } + + bool migrate_zero_copy_send(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; + } +@@ -224,9 +187,7 @@ bool migrate_postcopy(void) + + bool migrate_tls(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.tls_creds && *s->parameters.tls_creds; + } +@@ -491,126 +452,98 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, + + bool migrate_block_incremental(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.block_incremental; + } + + uint32_t migrate_checkpoint_delay(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.x_checkpoint_delay; + } + + int migrate_compress_level(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.compress_level; + } + + int migrate_compress_threads(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.compress_threads; + } + + int migrate_compress_wait_thread(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.compress_wait_thread; + } + + uint8_t migrate_cpu_throttle_increment(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.cpu_throttle_increment; + } + + uint8_t migrate_cpu_throttle_initial(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.cpu_throttle_initial; + } + + bool migrate_cpu_throttle_tailslow(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.cpu_throttle_tailslow; + } + + int migrate_decompress_threads(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.decompress_threads; + } + + uint8_t migrate_max_cpu_throttle(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.max_cpu_throttle; + } + + uint64_t migrate_max_bandwidth(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.max_bandwidth; + } + + int64_t migrate_max_postcopy_bandwidth(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.max_postcopy_bandwidth; + } + + int migrate_multifd_channels(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.multifd_channels; + } + + MultiFDCompression migrate_multifd_compression(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + assert(s->parameters.multifd_compression < MULTIFD_COMPRESSION__MAX); + return s->parameters.multifd_compression; +@@ -618,36 +551,28 @@ MultiFDCompression migrate_multifd_compression(void) + + int migrate_multifd_zlib_level(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.multifd_zlib_level; + } + + int migrate_multifd_zstd_level(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.multifd_zstd_level; + } + + uint8_t migrate_throttle_trigger_threshold(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.throttle_trigger_threshold; + } + + uint64_t migrate_xbzrle_cache_size(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.xbzrle_cache_size; + } +-- +2.39.3 + diff --git a/SOURCES/kvm-migration-Make-dirty_sync_count-atomic.patch b/SOURCES/kvm-migration-Make-dirty_sync_count-atomic.patch new file mode 100644 index 0000000..ad1de7b --- /dev/null +++ b/SOURCES/kvm-migration-Make-dirty_sync_count-atomic.patch @@ -0,0 +1,105 @@ +From 886b511e0a225b1c4428c646534d7bcc65bd9e2a Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 11 Apr 2023 18:02:34 +0200 +Subject: [PATCH 14/56] migration: Make dirty_sync_count atomic +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [13/50] ef3ae8cdd960e944ba9e73a53d54c9a5a55bb1ce (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Peter Xu +(cherry picked from commit 536b5a4e56ec67c958f46e7d46cbd5ac34e5a239) +Signed-off-by: Peter Xu +--- + migration/migration.c | 3 ++- + migration/ram.c | 13 +++++++------ + migration/ram.h | 2 +- + 3 files changed, 10 insertions(+), 8 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 8f2847d298..8fca751050 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1148,7 +1148,8 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + info->ram->normal = stat64_get(&ram_counters.normal); + info->ram->normal_bytes = info->ram->normal * page_size; + info->ram->mbps = s->mbps; +- info->ram->dirty_sync_count = ram_counters.dirty_sync_count; ++ info->ram->dirty_sync_count = ++ stat64_get(&ram_counters.dirty_sync_count); + info->ram->dirty_sync_missed_zero_copy = + stat64_get(&ram_counters.dirty_sync_missed_zero_copy); + info->ram->postcopy_requests = ram_counters.postcopy_requests; +diff --git a/migration/ram.c b/migration/ram.c +index b1722b6071..3c13136559 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -764,7 +764,7 @@ static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr) + /* We don't care if this fails to allocate a new cache page + * as long as it updated an old one */ + cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page, +- ram_counters.dirty_sync_count); ++ stat64_get(&ram_counters.dirty_sync_count)); + } + + #define ENCODING_FLAG_XBZRLE 0x1 +@@ -790,13 +790,13 @@ static int save_xbzrle_page(RAMState *rs, PageSearchStatus *pss, + int encoded_len = 0, bytes_xbzrle; + uint8_t *prev_cached_page; + QEMUFile *file = pss->pss_channel; ++ uint64_t generation = stat64_get(&ram_counters.dirty_sync_count); + +- if (!cache_is_cached(XBZRLE.cache, current_addr, +- ram_counters.dirty_sync_count)) { ++ if (!cache_is_cached(XBZRLE.cache, current_addr, generation)) { + xbzrle_counters.cache_miss++; + if (!rs->last_stage) { + if (cache_insert(XBZRLE.cache, current_addr, *current_data, +- ram_counters.dirty_sync_count) == -1) { ++ generation) == -1) { + return -1; + } else { + /* update *current_data when the page has been +@@ -1209,7 +1209,7 @@ static void migration_bitmap_sync(RAMState *rs) + RAMBlock *block; + int64_t end_time; + +- ram_counters.dirty_sync_count++; ++ stat64_add(&ram_counters.dirty_sync_count, 1); + + if (!rs->time_last_bitmap_sync) { + rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); +@@ -1246,7 +1246,8 @@ static void migration_bitmap_sync(RAMState *rs) + rs->bytes_xfer_prev = stat64_get(&ram_counters.transferred); + } + if (migrate_use_events()) { +- qapi_event_send_migration_pass(ram_counters.dirty_sync_count); ++ uint64_t generation = stat64_get(&ram_counters.dirty_sync_count); ++ qapi_event_send_migration_pass(generation); + } + } + +diff --git a/migration/ram.h b/migration/ram.h +index bb52632424..8c0d07c43a 100644 +--- a/migration/ram.h ++++ b/migration/ram.h +@@ -42,7 +42,7 @@ + */ + typedef struct { + int64_t dirty_pages_rate; +- int64_t dirty_sync_count; ++ Stat64 dirty_sync_count; + Stat64 dirty_sync_missed_zero_copy; + Stat64 downtime_bytes; + Stat64 duplicate; +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Make-dirty_sync_missed_zero_copy-atomic.patch b/SOURCES/kvm-migration-Make-dirty_sync_missed_zero_copy-atomic.patch new file mode 100644 index 0000000..b7b0f60 --- /dev/null +++ b/SOURCES/kvm-migration-Make-dirty_sync_missed_zero_copy-atomic.patch @@ -0,0 +1,92 @@ +From e9ff20d7f7e6c2354f3696e8bca265e535eeb801 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 11 Apr 2023 17:33:56 +0200 +Subject: [PATCH 11/56] migration: Make dirty_sync_missed_zero_copy atomic +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [10/50] 041230abb087db0e7ffae02b4f85772490b805a0 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Peter Xu +(cherry picked from commit 4291823694fd8507831d26e2558d9cd0030841f7) +Signed-off-by: Peter Xu +--- + migration/migration.c | 2 +- + migration/multifd.c | 2 +- + migration/ram.c | 5 ----- + migration/ram.h | 4 +--- + 4 files changed, 3 insertions(+), 10 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index ca68808b5c..645fb4b3c5 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1150,7 +1150,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + info->ram->mbps = s->mbps; + info->ram->dirty_sync_count = ram_counters.dirty_sync_count; + info->ram->dirty_sync_missed_zero_copy = +- ram_counters.dirty_sync_missed_zero_copy; ++ stat64_get(&ram_counters.dirty_sync_missed_zero_copy); + info->ram->postcopy_requests = ram_counters.postcopy_requests; + info->ram->page_size = page_size; + info->ram->multifd_bytes = stat64_get(&ram_counters.multifd_bytes); +diff --git a/migration/multifd.c b/migration/multifd.c +index 1c992abf53..903df2117b 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -576,7 +576,7 @@ static int multifd_zero_copy_flush(QIOChannel *c) + return -1; + } + if (ret == 1) { +- dirty_sync_missed_zero_copy(); ++ stat64_add(&ram_counters.dirty_sync_missed_zero_copy, 1); + } + + return ret; +diff --git a/migration/ram.c b/migration/ram.c +index 71320ed27a..93e0a48af4 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -472,11 +472,6 @@ void ram_transferred_add(uint64_t bytes) + stat64_add(&ram_counters.transferred, bytes); + } + +-void dirty_sync_missed_zero_copy(void) +-{ +- ram_counters.dirty_sync_missed_zero_copy++; +-} +- + struct MigrationOps { + int (*ram_save_target_page)(RAMState *rs, PageSearchStatus *pss); + }; +diff --git a/migration/ram.h b/migration/ram.h +index ed70391317..2170c55e67 100644 +--- a/migration/ram.h ++++ b/migration/ram.h +@@ -43,7 +43,7 @@ + typedef struct { + int64_t dirty_pages_rate; + int64_t dirty_sync_count; +- uint64_t dirty_sync_missed_zero_copy; ++ Stat64 dirty_sync_missed_zero_copy; + uint64_t downtime_bytes; + Stat64 duplicate; + Stat64 multifd_bytes; +@@ -114,6 +114,4 @@ void ram_write_tracking_prepare(void); + int ram_write_tracking_start(void); + void ram_write_tracking_stop(void); + +-void dirty_sync_missed_zero_copy(void); +- + #endif +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Make-downtime_bytes-atomic.patch b/SOURCES/kvm-migration-Make-downtime_bytes-atomic.patch new file mode 100644 index 0000000..9b206bc --- /dev/null +++ b/SOURCES/kvm-migration-Make-downtime_bytes-atomic.patch @@ -0,0 +1,68 @@ +From 4c6af064277b5445b31db4a598e1c4402ba56452 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 11 Apr 2023 17:38:11 +0200 +Subject: [PATCH 13/56] migration: Make downtime_bytes atomic +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [12/50] ebfc16aae8bc4a8c1fec431780a062950e6f50c4 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Peter Xu +(cherry picked from commit 296a4ac2aa63038b6b702f2ee8f0f93ae26727ae) +Signed-off-by: Peter Xu +--- + migration/migration.c | 2 +- + migration/ram.c | 2 +- + migration/ram.h | 2 +- + 3 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 3a68d93d69..8f2847d298 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1156,7 +1156,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + info->ram->multifd_bytes = stat64_get(&ram_counters.multifd_bytes); + info->ram->pages_per_second = s->pages_per_second; + info->ram->precopy_bytes = stat64_get(&ram_counters.precopy_bytes); +- info->ram->downtime_bytes = ram_counters.downtime_bytes; ++ info->ram->downtime_bytes = stat64_get(&ram_counters.downtime_bytes); + info->ram->postcopy_bytes = stat64_get(&ram_counters.postcopy_bytes); + + if (migrate_use_xbzrle()) { +diff --git a/migration/ram.c b/migration/ram.c +index 0b4693215e..b1722b6071 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -467,7 +467,7 @@ void ram_transferred_add(uint64_t bytes) + } else if (migration_in_postcopy()) { + stat64_add(&ram_counters.postcopy_bytes, bytes); + } else { +- ram_counters.downtime_bytes += bytes; ++ stat64_add(&ram_counters.downtime_bytes, bytes); + } + stat64_add(&ram_counters.transferred, bytes); + } +diff --git a/migration/ram.h b/migration/ram.h +index a766b895fa..bb52632424 100644 +--- a/migration/ram.h ++++ b/migration/ram.h +@@ -44,7 +44,7 @@ typedef struct { + int64_t dirty_pages_rate; + int64_t dirty_sync_count; + Stat64 dirty_sync_missed_zero_copy; +- uint64_t downtime_bytes; ++ Stat64 downtime_bytes; + Stat64 duplicate; + Stat64 multifd_bytes; + Stat64 normal; +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Make-multifd_bytes-atomic.patch b/SOURCES/kvm-migration-Make-multifd_bytes-atomic.patch new file mode 100644 index 0000000..b315fdc --- /dev/null +++ b/SOURCES/kvm-migration-Make-multifd_bytes-atomic.patch @@ -0,0 +1,99 @@ +From bfcc4bc8f60b541d545f1ea27b1ff156d8092d33 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 23 Nov 2022 20:36:56 +0100 +Subject: [PATCH 10/56] migration: Make multifd_bytes atomic +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [9/50] c2bc6b173770a0ea81c3f9d850c583c651647070 (peterx/qemu-kvm) + +In the spirit of: + +commit 394d323bc3451e4d07f13341cb8817fac8dfbadd +Author: Peter Xu +Date: Tue Oct 11 17:55:51 2022 -0400 + + migration: Use atomic ops properly for page accountings + +Reviewed-by: David Edmondson +Reviewed-by: Peter Xu +Signed-off-by: Juan Quintela +(cherry picked from commit cf671116facf4e29d91fce9c9ffb535385ffac81) +Signed-off-by: Peter Xu +--- + migration/migration.c | 4 ++-- + migration/multifd.c | 4 ++-- + migration/ram.h | 2 +- + 3 files changed, 5 insertions(+), 5 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index a91704d35c..ca68808b5c 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1153,7 +1153,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + ram_counters.dirty_sync_missed_zero_copy; + info->ram->postcopy_requests = ram_counters.postcopy_requests; + info->ram->page_size = page_size; +- info->ram->multifd_bytes = ram_counters.multifd_bytes; ++ info->ram->multifd_bytes = stat64_get(&ram_counters.multifd_bytes); + info->ram->pages_per_second = s->pages_per_second; + info->ram->precopy_bytes = ram_counters.precopy_bytes; + info->ram->downtime_bytes = ram_counters.downtime_bytes; +@@ -3780,7 +3780,7 @@ static MigThrError migration_detect_error(MigrationState *s) + static uint64_t migration_total_bytes(MigrationState *s) + { + return qemu_file_total_transferred(s->to_dst_file) + +- ram_counters.multifd_bytes; ++ stat64_get(&ram_counters.multifd_bytes); + } + + static void migration_calculate_complete(MigrationState *s) +diff --git a/migration/multifd.c b/migration/multifd.c +index 6ef3a27938..1c992abf53 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -432,9 +432,9 @@ static int multifd_send_pages(QEMUFile *f) + p->pages = pages; + transferred = ((uint64_t) pages->num) * p->page_size + p->packet_len; + qemu_file_acct_rate_limit(f, transferred); +- ram_counters.multifd_bytes += transferred; + qemu_mutex_unlock(&p->mutex); + stat64_add(&ram_counters.transferred, transferred); ++ stat64_add(&ram_counters.multifd_bytes, transferred); + qemu_sem_post(&p->sem); + + return 1; +@@ -627,9 +627,9 @@ int multifd_send_sync_main(QEMUFile *f) + p->flags |= MULTIFD_FLAG_SYNC; + p->pending_job++; + qemu_file_acct_rate_limit(f, p->packet_len); +- ram_counters.multifd_bytes += p->packet_len; + qemu_mutex_unlock(&p->mutex); + stat64_add(&ram_counters.transferred, p->packet_len); ++ stat64_add(&ram_counters.multifd_bytes, p->packet_len); + qemu_sem_post(&p->sem); + } + for (i = 0; i < migrate_multifd_channels(); i++) { +diff --git a/migration/ram.h b/migration/ram.h +index 7c026b5242..ed70391317 100644 +--- a/migration/ram.h ++++ b/migration/ram.h +@@ -46,7 +46,7 @@ typedef struct { + uint64_t dirty_sync_missed_zero_copy; + uint64_t downtime_bytes; + Stat64 duplicate; +- uint64_t multifd_bytes; ++ Stat64 multifd_bytes; + Stat64 normal; + Stat64 postcopy_bytes; + int64_t postcopy_requests; +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Make-postcopy_requests-atomic.patch b/SOURCES/kvm-migration-Make-postcopy_requests-atomic.patch new file mode 100644 index 0000000..894419a --- /dev/null +++ b/SOURCES/kvm-migration-Make-postcopy_requests-atomic.patch @@ -0,0 +1,69 @@ +From e6ff4536a5e5f5bbfda370ecb525d0e066c3ab1c Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 11 Apr 2023 18:04:59 +0200 +Subject: [PATCH 15/56] migration: Make postcopy_requests atomic +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [14/50] d15c6052b77e7ded7bf34c66caa11bf86b75f2e8 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Peter Xu +(cherry picked from commit 3c764f9b2bc3e5eb5ed93ab45c2de6d599fef00f) +Signed-off-by: Peter Xu +--- + migration/migration.c | 3 ++- + migration/ram.c | 2 +- + migration/ram.h | 2 +- + 3 files changed, 4 insertions(+), 3 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 8fca751050..39501a0ed8 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1152,7 +1152,8 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + stat64_get(&ram_counters.dirty_sync_count); + info->ram->dirty_sync_missed_zero_copy = + stat64_get(&ram_counters.dirty_sync_missed_zero_copy); +- info->ram->postcopy_requests = ram_counters.postcopy_requests; ++ info->ram->postcopy_requests = ++ stat64_get(&ram_counters.postcopy_requests); + info->ram->page_size = page_size; + info->ram->multifd_bytes = stat64_get(&ram_counters.multifd_bytes); + info->ram->pages_per_second = s->pages_per_second; +diff --git a/migration/ram.c b/migration/ram.c +index 3c13136559..fe69ecaef4 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -2169,7 +2169,7 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len) + RAMBlock *ramblock; + RAMState *rs = ram_state; + +- ram_counters.postcopy_requests++; ++ stat64_add(&ram_counters.postcopy_requests, 1); + RCU_READ_LOCK_GUARD(); + + if (!rbname) { +diff --git a/migration/ram.h b/migration/ram.h +index 8c0d07c43a..afa68521d7 100644 +--- a/migration/ram.h ++++ b/migration/ram.h +@@ -49,7 +49,7 @@ typedef struct { + Stat64 multifd_bytes; + Stat64 normal; + Stat64 postcopy_bytes; +- int64_t postcopy_requests; ++ Stat64 postcopy_requests; + Stat64 precopy_bytes; + int64_t remaining; + Stat64 transferred; +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Make-precopy_bytes-atomic.patch b/SOURCES/kvm-migration-Make-precopy_bytes-atomic.patch new file mode 100644 index 0000000..8e6c177 --- /dev/null +++ b/SOURCES/kvm-migration-Make-precopy_bytes-atomic.patch @@ -0,0 +1,68 @@ +From 7e4d4316855f7f6556364eb16828f925b61c80d4 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 11 Apr 2023 17:36:48 +0200 +Subject: [PATCH 12/56] migration: Make precopy_bytes atomic +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [11/50] 23bec49b4b8f4d23c2192b401416139e3ca13626 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Peter Xu +(cherry picked from commit b013b5d1f32ef88457e66c7ce576f6475238f97f) +Signed-off-by: Peter Xu +--- + migration/migration.c | 2 +- + migration/ram.c | 2 +- + migration/ram.h | 2 +- + 3 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 645fb4b3c5..3a68d93d69 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1155,7 +1155,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + info->ram->page_size = page_size; + info->ram->multifd_bytes = stat64_get(&ram_counters.multifd_bytes); + info->ram->pages_per_second = s->pages_per_second; +- info->ram->precopy_bytes = ram_counters.precopy_bytes; ++ info->ram->precopy_bytes = stat64_get(&ram_counters.precopy_bytes); + info->ram->downtime_bytes = ram_counters.downtime_bytes; + info->ram->postcopy_bytes = stat64_get(&ram_counters.postcopy_bytes); + +diff --git a/migration/ram.c b/migration/ram.c +index 93e0a48af4..0b4693215e 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -463,7 +463,7 @@ RAMStats ram_counters; + void ram_transferred_add(uint64_t bytes) + { + if (runstate_is_running()) { +- ram_counters.precopy_bytes += bytes; ++ stat64_add(&ram_counters.precopy_bytes, bytes); + } else if (migration_in_postcopy()) { + stat64_add(&ram_counters.postcopy_bytes, bytes); + } else { +diff --git a/migration/ram.h b/migration/ram.h +index 2170c55e67..a766b895fa 100644 +--- a/migration/ram.h ++++ b/migration/ram.h +@@ -50,7 +50,7 @@ typedef struct { + Stat64 normal; + Stat64 postcopy_bytes; + int64_t postcopy_requests; +- uint64_t precopy_bytes; ++ Stat64 precopy_bytes; + int64_t remaining; + Stat64 transferred; + } RAMStats; +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Merge-ram_counters-and-ram_atomic_counters.patch b/SOURCES/kvm-migration-Merge-ram_counters-and-ram_atomic_counters.patch new file mode 100644 index 0000000..0679e89 --- /dev/null +++ b/SOURCES/kvm-migration-Merge-ram_counters-and-ram_atomic_counters.patch @@ -0,0 +1,270 @@ +From 5a87058eea6ee56f37fb454486c35baaf693d691 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 22 Feb 2023 15:56:45 +0100 +Subject: [PATCH 08/56] migration: Merge ram_counters and ram_atomic_counters +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [7/50] 90e395de66aa32b886cf151f7996a680190471f5 (peterx/qemu-kvm) + +Using MgrationStats as type for ram_counters mean that we didn't have +to re-declare each value in another struct. The need of atomic +counters have make us to create MigrationAtomicStats for this atomic +counters. + +Create RAMStats type which is a merge of MigrationStats and +MigrationAtomicStats removing unused members. + +Signed-off-by: Juan Quintela +Reviewed-by: Peter Xu + +--- + +Fix typos found by David Edmondson + +(cherry picked from commit abce5fa16d126ed085ccf8a5b3fe61a1efa20994) +Signed-off-by: Peter Xu +--- + migration/migration.c | 8 ++++---- + migration/multifd.c | 4 ++-- + migration/ram.c | 39 ++++++++++++++++----------------------- + migration/ram.h | 28 +++++++++++++++------------- + 4 files changed, 37 insertions(+), 42 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 99f86bd6c2..a91704d35c 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1140,12 +1140,12 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + size_t page_size = qemu_target_page_size(); + + info->ram = g_malloc0(sizeof(*info->ram)); +- info->ram->transferred = stat64_get(&ram_atomic_counters.transferred); ++ info->ram->transferred = stat64_get(&ram_counters.transferred); + info->ram->total = ram_bytes_total(); +- info->ram->duplicate = stat64_get(&ram_atomic_counters.duplicate); ++ info->ram->duplicate = stat64_get(&ram_counters.duplicate); + /* legacy value. It is not used anymore */ + info->ram->skipped = 0; +- info->ram->normal = stat64_get(&ram_atomic_counters.normal); ++ info->ram->normal = stat64_get(&ram_counters.normal); + info->ram->normal_bytes = info->ram->normal * page_size; + info->ram->mbps = s->mbps; + info->ram->dirty_sync_count = ram_counters.dirty_sync_count; +@@ -1157,7 +1157,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + info->ram->pages_per_second = s->pages_per_second; + info->ram->precopy_bytes = ram_counters.precopy_bytes; + info->ram->downtime_bytes = ram_counters.downtime_bytes; +- info->ram->postcopy_bytes = stat64_get(&ram_atomic_counters.postcopy_bytes); ++ info->ram->postcopy_bytes = stat64_get(&ram_counters.postcopy_bytes); + + if (migrate_use_xbzrle()) { + info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache)); +diff --git a/migration/multifd.c b/migration/multifd.c +index cbc0dfe39b..01fab01a92 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -433,7 +433,7 @@ static int multifd_send_pages(QEMUFile *f) + transferred = ((uint64_t) pages->num) * p->page_size + p->packet_len; + qemu_file_acct_rate_limit(f, transferred); + ram_counters.multifd_bytes += transferred; +- stat64_add(&ram_atomic_counters.transferred, transferred); ++ stat64_add(&ram_counters.transferred, transferred); + qemu_mutex_unlock(&p->mutex); + qemu_sem_post(&p->sem); + +@@ -628,7 +628,7 @@ int multifd_send_sync_main(QEMUFile *f) + p->pending_job++; + qemu_file_acct_rate_limit(f, p->packet_len); + ram_counters.multifd_bytes += p->packet_len; +- stat64_add(&ram_atomic_counters.transferred, p->packet_len); ++ stat64_add(&ram_counters.transferred, p->packet_len); + qemu_mutex_unlock(&p->mutex); + qemu_sem_post(&p->sem); + } +diff --git a/migration/ram.c b/migration/ram.c +index 0e68099bf9..71320ed27a 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -458,25 +458,18 @@ uint64_t ram_bytes_remaining(void) + 0; + } + +-/* +- * NOTE: not all stats in ram_counters are used in reality. See comments +- * for struct MigrationAtomicStats. The ultimate result of ram migration +- * counters will be a merged version with both ram_counters and the atomic +- * fields in ram_atomic_counters. +- */ +-MigrationStats ram_counters; +-MigrationAtomicStats ram_atomic_counters; ++RAMStats ram_counters; + + void ram_transferred_add(uint64_t bytes) + { + if (runstate_is_running()) { + ram_counters.precopy_bytes += bytes; + } else if (migration_in_postcopy()) { +- stat64_add(&ram_atomic_counters.postcopy_bytes, bytes); ++ stat64_add(&ram_counters.postcopy_bytes, bytes); + } else { + ram_counters.downtime_bytes += bytes; + } +- stat64_add(&ram_atomic_counters.transferred, bytes); ++ stat64_add(&ram_counters.transferred, bytes); + } + + void dirty_sync_missed_zero_copy(void) +@@ -756,7 +749,7 @@ void mig_throttle_counter_reset(void) + + rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + rs->num_dirty_pages_period = 0; +- rs->bytes_xfer_prev = stat64_get(&ram_atomic_counters.transferred); ++ rs->bytes_xfer_prev = stat64_get(&ram_counters.transferred); + } + + /** +@@ -1130,8 +1123,8 @@ uint64_t ram_pagesize_summary(void) + + uint64_t ram_get_total_transferred_pages(void) + { +- return stat64_get(&ram_atomic_counters.normal) + +- stat64_get(&ram_atomic_counters.duplicate) + ++ return stat64_get(&ram_counters.normal) + ++ stat64_get(&ram_counters.duplicate) + + compression_counters.pages + xbzrle_counters.pages; + } + +@@ -1192,7 +1185,7 @@ static void migration_trigger_throttle(RAMState *rs) + MigrationState *s = migrate_get_current(); + uint64_t threshold = s->parameters.throttle_trigger_threshold; + uint64_t bytes_xfer_period = +- stat64_get(&ram_atomic_counters.transferred) - rs->bytes_xfer_prev; ++ stat64_get(&ram_counters.transferred) - rs->bytes_xfer_prev; + uint64_t bytes_dirty_period = rs->num_dirty_pages_period * TARGET_PAGE_SIZE; + uint64_t bytes_dirty_threshold = bytes_xfer_period * threshold / 100; + +@@ -1255,7 +1248,7 @@ static void migration_bitmap_sync(RAMState *rs) + /* reset period counters */ + rs->time_last_bitmap_sync = end_time; + rs->num_dirty_pages_period = 0; +- rs->bytes_xfer_prev = stat64_get(&ram_atomic_counters.transferred); ++ rs->bytes_xfer_prev = stat64_get(&ram_counters.transferred); + } + if (migrate_use_events()) { + qapi_event_send_migration_pass(ram_counters.dirty_sync_count); +@@ -1331,7 +1324,7 @@ static int save_zero_page(PageSearchStatus *pss, QEMUFile *f, RAMBlock *block, + int len = save_zero_page_to_file(pss, f, block, offset); + + if (len) { +- stat64_add(&ram_atomic_counters.duplicate, 1); ++ stat64_add(&ram_counters.duplicate, 1); + ram_transferred_add(len); + return 1; + } +@@ -1368,9 +1361,9 @@ static bool control_save_page(PageSearchStatus *pss, RAMBlock *block, + } + + if (bytes_xmit > 0) { +- stat64_add(&ram_atomic_counters.normal, 1); ++ stat64_add(&ram_counters.normal, 1); + } else if (bytes_xmit == 0) { +- stat64_add(&ram_atomic_counters.duplicate, 1); ++ stat64_add(&ram_counters.duplicate, 1); + } + + return true; +@@ -1402,7 +1395,7 @@ static int save_normal_page(PageSearchStatus *pss, RAMBlock *block, + qemu_put_buffer(file, buf, TARGET_PAGE_SIZE); + } + ram_transferred_add(TARGET_PAGE_SIZE); +- stat64_add(&ram_atomic_counters.normal, 1); ++ stat64_add(&ram_counters.normal, 1); + return 1; + } + +@@ -1458,7 +1451,7 @@ static int ram_save_multifd_page(QEMUFile *file, RAMBlock *block, + if (multifd_queue_page(file, block, offset) < 0) { + return -1; + } +- stat64_add(&ram_atomic_counters.normal, 1); ++ stat64_add(&ram_counters.normal, 1); + + return 1; + } +@@ -1497,7 +1490,7 @@ update_compress_thread_counts(const CompressParam *param, int bytes_xmit) + ram_transferred_add(bytes_xmit); + + if (param->zero_page) { +- stat64_add(&ram_atomic_counters.duplicate, 1); ++ stat64_add(&ram_counters.duplicate, 1); + return; + } + +@@ -2632,9 +2625,9 @@ void acct_update_position(QEMUFile *f, size_t size, bool zero) + uint64_t pages = size / TARGET_PAGE_SIZE; + + if (zero) { +- stat64_add(&ram_atomic_counters.duplicate, pages); ++ stat64_add(&ram_counters.duplicate, pages); + } else { +- stat64_add(&ram_atomic_counters.normal, pages); ++ stat64_add(&ram_counters.normal, pages); + ram_transferred_add(size); + qemu_file_credit_transfer(f, size); + } +diff --git a/migration/ram.h b/migration/ram.h +index 81cbb0947c..7c026b5242 100644 +--- a/migration/ram.h ++++ b/migration/ram.h +@@ -35,25 +35,27 @@ + #include "qemu/stats64.h" + + /* +- * These are the migration statistic counters that need to be updated using +- * atomic ops (can be accessed by more than one thread). Here since we +- * cannot modify MigrationStats directly to use Stat64 as it was defined in +- * the QAPI scheme, we define an internal structure to hold them, and we +- * propagate the real values when QMP queries happen. +- * +- * IOW, the corresponding fields within ram_counters on these specific +- * fields will be always zero and not being used at all; they're just +- * placeholders to make it QAPI-compatible. ++ * These are the ram migration statistic counters. It is loosely ++ * based on MigrationStats. We change to Stat64 any counter that ++ * needs to be updated using atomic ops (can be accessed by more than ++ * one thread). + */ + typedef struct { +- Stat64 transferred; ++ int64_t dirty_pages_rate; ++ int64_t dirty_sync_count; ++ uint64_t dirty_sync_missed_zero_copy; ++ uint64_t downtime_bytes; + Stat64 duplicate; ++ uint64_t multifd_bytes; + Stat64 normal; + Stat64 postcopy_bytes; +-} MigrationAtomicStats; ++ int64_t postcopy_requests; ++ uint64_t precopy_bytes; ++ int64_t remaining; ++ Stat64 transferred; ++} RAMStats; + +-extern MigrationAtomicStats ram_atomic_counters; +-extern MigrationStats ram_counters; ++extern RAMStats ram_counters; + extern XBZRLECacheStats xbzrle_counters; + extern CompressionStats compression_counters; + +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Minor-control-flow-simplification.patch b/SOURCES/kvm-migration-Minor-control-flow-simplification.patch new file mode 100644 index 0000000..a0dbdd9 --- /dev/null +++ b/SOURCES/kvm-migration-Minor-control-flow-simplification.patch @@ -0,0 +1,52 @@ +From c3bc974ea4b5186a76daa433209c1209d94dd0b7 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Thu, 20 Apr 2023 09:35:51 -0500 +Subject: [PATCH 2/2] migration: Minor control flow simplification + +RH-Author: Eric Blake +RH-MergeRequest: 161: Avoid migration assertion from failed NFS server. +RH-Bugzilla: 2058982 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [2/2] 5afd8c25d6f14bdb2a380ecc77bc6c2f2a26df87 (ebblake/centos-qemu-kvm) + +No need to declare a temporary variable. + +Suggested-by: Juan Quintela +Fixes: 1df36e8c6289 ("migration: Handle block device inactivation failures better") +Signed-off-by: Eric Blake +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit 5d39f44d7ac5c63f53d4d0900ceba9521bc27e49) +--- + migration/migration.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index cb0d42c061..08007cef4e 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -3436,7 +3436,6 @@ static void migration_completion(MigrationState *s) + ret = global_state_store(); + + if (!ret) { +- bool inactivate = !migrate_colo_enabled(); + ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); + trace_migration_completion_vm_stop(ret); + if (ret >= 0) { +@@ -3444,10 +3443,10 @@ static void migration_completion(MigrationState *s) + MIGRATION_STATUS_DEVICE); + } + if (ret >= 0) { +- s->block_inactive = inactivate; ++ s->block_inactive = !migrate_colo_enabled(); + qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); + ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, +- inactivate); ++ s->block_inactive); + } + } + qemu_mutex_unlock_iothread(); +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Move-migrate_announce_params-to-option.c.patch b/SOURCES/kvm-migration-Move-migrate_announce_params-to-option.c.patch new file mode 100644 index 0000000..24dcb16 --- /dev/null +++ b/SOURCES/kvm-migration-Move-migrate_announce_params-to-option.c.patch @@ -0,0 +1,90 @@ +From 1f5232d611ecaaf61bcac151e7d90b8b452ac161 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 01:17:23 +0100 +Subject: [PATCH 43/56] migration: Move migrate_announce_params() to option.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [42/50] 541be7adc7f81c269058485aef5b14e787b2efe6 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Fabiano Rosas + +--- + +Fix extra whitespace (fabiano) + +(cherry picked from commit 2682c4eea72c621dfd0fb0151cbd758e81d1bdff) +Signed-off-by: Peter Xu +--- + migration/migration.c | 14 -------------- + migration/options.c | 17 +++++++++++++++++ + 2 files changed, 17 insertions(+), 14 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 724e841eb9..f27ce30be2 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -954,20 +954,6 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) + return params; + } + +-AnnounceParameters *migrate_announce_params(void) +-{ +- static AnnounceParameters ap; +- +- MigrationState *s = migrate_get_current(); +- +- ap.initial = s->parameters.announce_initial; +- ap.max = s->parameters.announce_max; +- ap.rounds = s->parameters.announce_rounds; +- ap.step = s->parameters.announce_step; +- +- return ≈ +-} +- + /* + * Return true if we're already in the middle of a migration + * (i.e. any of the active or setup states) +diff --git a/migration/options.c b/migration/options.c +index 2cb04fbbd1..418aafac64 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -16,6 +16,7 @@ + #include "qapi/qapi-commands-migration.h" + #include "qapi/qmp/qerror.h" + #include "sysemu/runstate.h" ++#include "migration/misc.h" + #include "migration.h" + #include "ram.h" + #include "options.h" +@@ -589,3 +590,19 @@ uint64_t migrate_xbzrle_cache_size(void) + + return s->parameters.xbzrle_cache_size; + } ++ ++/* parameters helpers */ ++ ++AnnounceParameters *migrate_announce_params(void) ++{ ++ static AnnounceParameters ap; ++ ++ MigrationState *s = migrate_get_current(); ++ ++ ap.initial = s->parameters.announce_initial; ++ ap.max = s->parameters.announce_max; ++ ap.rounds = s->parameters.announce_rounds; ++ ap.step = s->parameters.announce_step; ++ ++ return ≈ ++} +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Move-migrate_cap_set-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_cap_set-to-options.c.patch new file mode 100644 index 0000000..0e33c4c --- /dev/null +++ b/SOURCES/kvm-migration-Move-migrate_cap_set-to-options.c.patch @@ -0,0 +1,110 @@ +From 9c4f8d869f5bbdd07381f6baad2ed755b07d03f4 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 23:25:44 +0100 +Subject: [PATCH 36/56] migration: Move migrate_cap_set() to options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [35/50] d0cd6b8e9cf0534a56795d94c3da18622fa10ad7 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit f80196b772ddeeb07d3d80d5c8382cb5d1063fa2) +Signed-off-by: Peter Xu +--- + migration/migration.c | 20 -------------------- + migration/options.c | 21 +++++++++++++++++++++ + migration/options.h | 1 + + 3 files changed, 22 insertions(+), 20 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 369cd91796..880a51210e 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1666,26 +1666,6 @@ void migrate_set_state(int *state, int old_state, int new_state) + } + } + +-static bool migrate_cap_set(int cap, bool value, Error **errp) +-{ +- MigrationState *s = migrate_get_current(); +- bool new_caps[MIGRATION_CAPABILITY__MAX]; +- +- if (migration_is_running(s->state)) { +- error_setg(errp, QERR_MIGRATION_ACTIVE); +- return false; +- } +- +- memcpy(new_caps, s->capabilities, sizeof(new_caps)); +- new_caps[cap] = value; +- +- if (!migrate_caps_check(s->capabilities, new_caps, errp)) { +- return false; +- } +- s->capabilities[cap] = value; +- return true; +-} +- + static void migrate_set_block_incremental(MigrationState *s, bool value) + { + s->parameters.block_incremental = value; +diff --git a/migration/options.c b/migration/options.c +index 4cbe77e35a..f3b2d6e482 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -14,6 +14,7 @@ + #include "qemu/osdep.h" + #include "qapi/error.h" + #include "qapi/qapi-commands-migration.h" ++#include "qapi/qmp/qerror.h" + #include "sysemu/runstate.h" + #include "migration.h" + #include "ram.h" +@@ -392,6 +393,26 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) + return true; + } + ++bool migrate_cap_set(int cap, bool value, Error **errp) ++{ ++ MigrationState *s = migrate_get_current(); ++ bool new_caps[MIGRATION_CAPABILITY__MAX]; ++ ++ if (migration_is_running(s->state)) { ++ error_setg(errp, QERR_MIGRATION_ACTIVE); ++ return false; ++ } ++ ++ memcpy(new_caps, s->capabilities, sizeof(new_caps)); ++ new_caps[cap] = value; ++ ++ if (!migrate_caps_check(s->capabilities, new_caps, errp)) { ++ return false; ++ } ++ s->capabilities[cap] = value; ++ return true; ++} ++ + MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) + { + MigrationCapabilityStatusList *head = NULL, **tail = &head; +diff --git a/migration/options.h b/migration/options.h +index e779f14161..5979e4ff90 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -41,5 +41,6 @@ bool migrate_zero_copy_send(void); + /* capabilities helpers */ + + bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp); ++bool migrate_cap_set(int cap, bool value, Error **errp); + + #endif +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Move-migrate_caps_check-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_caps_check-to-options.c.patch new file mode 100644 index 0000000..0d6fa08 --- /dev/null +++ b/SOURCES/kvm-migration-Move-migrate_caps_check-to-options.c.patch @@ -0,0 +1,458 @@ +From 3af7c7aaf7407ec14c19e54d52a2229ce4dbb7c5 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 23:05:53 +0100 +Subject: [PATCH 33/56] migration: Move migrate_caps_check() to options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [32/50] 12999471063d97fffb2b04c6dcb80083b902f963 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 77608706459bd197e25ac1ef54591b9f8a0b46f8) +Signed-off-by: Peter Xu +--- + migration/migration.c | 190 ----------------------------------------- + migration/options.c | 192 ++++++++++++++++++++++++++++++++++++++++++ + migration/options.h | 4 + + 3 files changed, 196 insertions(+), 190 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index f7facecd66..d9e30ca918 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -136,39 +136,6 @@ enum mig_rp_message_type { + MIG_RP_MSG_MAX + }; + +-/* Migration capabilities set */ +-struct MigrateCapsSet { +- int size; /* Capability set size */ +- MigrationCapability caps[]; /* Variadic array of capabilities */ +-}; +-typedef struct MigrateCapsSet MigrateCapsSet; +- +-/* Define and initialize MigrateCapsSet */ +-#define INITIALIZE_MIGRATE_CAPS_SET(_name, ...) \ +- MigrateCapsSet _name = { \ +- .size = sizeof((int []) { __VA_ARGS__ }) / sizeof(int), \ +- .caps = { __VA_ARGS__ } \ +- } +- +-/* Background-snapshot compatibility check list */ +-static const +-INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot, +- MIGRATION_CAPABILITY_POSTCOPY_RAM, +- MIGRATION_CAPABILITY_DIRTY_BITMAPS, +- MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME, +- MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE, +- MIGRATION_CAPABILITY_RETURN_PATH, +- MIGRATION_CAPABILITY_MULTIFD, +- MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER, +- MIGRATION_CAPABILITY_AUTO_CONVERGE, +- MIGRATION_CAPABILITY_RELEASE_RAM, +- MIGRATION_CAPABILITY_RDMA_PIN_ALL, +- MIGRATION_CAPABILITY_COMPRESS, +- MIGRATION_CAPABILITY_XBZRLE, +- MIGRATION_CAPABILITY_X_COLO, +- MIGRATION_CAPABILITY_VALIDATE_UUID, +- MIGRATION_CAPABILITY_ZERO_COPY_SEND); +- + /* When we add fault tolerance, we could have several + migrations at once. For now we don't need to add + dynamic creation of migration */ +@@ -1235,163 +1202,6 @@ static void fill_source_migration_info(MigrationInfo *info) + info->status = state; + } + +-typedef enum WriteTrackingSupport { +- WT_SUPPORT_UNKNOWN = 0, +- WT_SUPPORT_ABSENT, +- WT_SUPPORT_AVAILABLE, +- WT_SUPPORT_COMPATIBLE +-} WriteTrackingSupport; +- +-static +-WriteTrackingSupport migrate_query_write_tracking(void) +-{ +- /* Check if kernel supports required UFFD features */ +- if (!ram_write_tracking_available()) { +- return WT_SUPPORT_ABSENT; +- } +- /* +- * Check if current memory configuration is +- * compatible with required UFFD features. +- */ +- if (!ram_write_tracking_compatible()) { +- return WT_SUPPORT_AVAILABLE; +- } +- +- return WT_SUPPORT_COMPATIBLE; +-} +- +-/** +- * @migration_caps_check - check capability compatibility +- * +- * @old_caps: old capability list +- * @new_caps: new capability list +- * @errp: set *errp if the check failed, with reason +- * +- * Returns true if check passed, otherwise false. +- */ +-static bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) +-{ +- MigrationIncomingState *mis = migration_incoming_get_current(); +- +-#ifndef CONFIG_LIVE_BLOCK_MIGRATION +- if (new_caps[MIGRATION_CAPABILITY_BLOCK]) { +- error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) " +- "block migration"); +- error_append_hint(errp, "Use drive_mirror+NBD instead.\n"); +- return false; +- } +-#endif +- +-#ifndef CONFIG_REPLICATION +- if (new_caps[MIGRATION_CAPABILITY_X_COLO]) { +- error_setg(errp, "QEMU compiled without replication module" +- " can't enable COLO"); +- error_append_hint(errp, "Please enable replication before COLO.\n"); +- return false; +- } +-#endif +- +- if (new_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { +- /* This check is reasonably expensive, so only when it's being +- * set the first time, also it's only the destination that needs +- * special support. +- */ +- if (!old_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM] && +- runstate_check(RUN_STATE_INMIGRATE) && +- !postcopy_ram_supported_by_host(mis)) { +- /* postcopy_ram_supported_by_host will have emitted a more +- * detailed message +- */ +- error_setg(errp, "Postcopy is not supported"); +- return false; +- } +- +- if (new_caps[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) { +- error_setg(errp, "Postcopy is not compatible with ignore-shared"); +- return false; +- } +- } +- +- if (new_caps[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) { +- WriteTrackingSupport wt_support; +- int idx; +- /* +- * Check if 'background-snapshot' capability is supported by +- * host kernel and compatible with guest memory configuration. +- */ +- wt_support = migrate_query_write_tracking(); +- if (wt_support < WT_SUPPORT_AVAILABLE) { +- error_setg(errp, "Background-snapshot is not supported by host kernel"); +- return false; +- } +- if (wt_support < WT_SUPPORT_COMPATIBLE) { +- error_setg(errp, "Background-snapshot is not compatible " +- "with guest memory configuration"); +- return false; +- } +- +- /* +- * Check if there are any migration capabilities +- * incompatible with 'background-snapshot'. +- */ +- for (idx = 0; idx < check_caps_background_snapshot.size; idx++) { +- int incomp_cap = check_caps_background_snapshot.caps[idx]; +- if (new_caps[incomp_cap]) { +- error_setg(errp, +- "Background-snapshot is not compatible with %s", +- MigrationCapability_str(incomp_cap)); +- return false; +- } +- } +- } +- +-#ifdef CONFIG_LINUX +- if (new_caps[MIGRATION_CAPABILITY_ZERO_COPY_SEND] && +- (!new_caps[MIGRATION_CAPABILITY_MULTIFD] || +- new_caps[MIGRATION_CAPABILITY_COMPRESS] || +- new_caps[MIGRATION_CAPABILITY_XBZRLE] || +- migrate_multifd_compression() || +- migrate_use_tls())) { +- error_setg(errp, +- "Zero copy only available for non-compressed non-TLS multifd migration"); +- return false; +- } +-#else +- if (new_caps[MIGRATION_CAPABILITY_ZERO_COPY_SEND]) { +- error_setg(errp, +- "Zero copy currently only available on Linux"); +- return false; +- } +-#endif +- +- if (new_caps[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]) { +- if (!new_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { +- error_setg(errp, "Postcopy preempt requires postcopy-ram"); +- return false; +- } +- +- /* +- * Preempt mode requires urgent pages to be sent in separate +- * channel, OTOH compression logic will disorder all pages into +- * different compression channels, which is not compatible with the +- * preempt assumptions on channel assignments. +- */ +- if (new_caps[MIGRATION_CAPABILITY_COMPRESS]) { +- error_setg(errp, "Postcopy preempt not compatible with compress"); +- return false; +- } +- } +- +- if (new_caps[MIGRATION_CAPABILITY_MULTIFD]) { +- if (new_caps[MIGRATION_CAPABILITY_COMPRESS]) { +- error_setg(errp, "Multifd is not compatible with compress"); +- return false; +- } +- } +- +- return true; +-} +- + static void fill_destination_migration_info(MigrationInfo *info) + { + MigrationIncomingState *mis = migration_incoming_get_current(); +diff --git a/migration/options.c b/migration/options.c +index 9c9b8e5863..367c930f46 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -12,7 +12,10 @@ + */ + + #include "qemu/osdep.h" ++#include "qapi/error.h" ++#include "sysemu/runstate.h" + #include "migration.h" ++#include "ram.h" + #include "options.h" + + bool migrate_auto_converge(void) +@@ -198,3 +201,192 @@ bool migrate_zero_copy_send(void) + + return s->capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; + } ++typedef enum WriteTrackingSupport { ++ WT_SUPPORT_UNKNOWN = 0, ++ WT_SUPPORT_ABSENT, ++ WT_SUPPORT_AVAILABLE, ++ WT_SUPPORT_COMPATIBLE ++} WriteTrackingSupport; ++ ++static ++WriteTrackingSupport migrate_query_write_tracking(void) ++{ ++ /* Check if kernel supports required UFFD features */ ++ if (!ram_write_tracking_available()) { ++ return WT_SUPPORT_ABSENT; ++ } ++ /* ++ * Check if current memory configuration is ++ * compatible with required UFFD features. ++ */ ++ if (!ram_write_tracking_compatible()) { ++ return WT_SUPPORT_AVAILABLE; ++ } ++ ++ return WT_SUPPORT_COMPATIBLE; ++} ++ ++/* Migration capabilities set */ ++struct MigrateCapsSet { ++ int size; /* Capability set size */ ++ MigrationCapability caps[]; /* Variadic array of capabilities */ ++}; ++typedef struct MigrateCapsSet MigrateCapsSet; ++ ++/* Define and initialize MigrateCapsSet */ ++#define INITIALIZE_MIGRATE_CAPS_SET(_name, ...) \ ++ MigrateCapsSet _name = { \ ++ .size = sizeof((int []) { __VA_ARGS__ }) / sizeof(int), \ ++ .caps = { __VA_ARGS__ } \ ++ } ++ ++/* Background-snapshot compatibility check list */ ++static const ++INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot, ++ MIGRATION_CAPABILITY_POSTCOPY_RAM, ++ MIGRATION_CAPABILITY_DIRTY_BITMAPS, ++ MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME, ++ MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE, ++ MIGRATION_CAPABILITY_RETURN_PATH, ++ MIGRATION_CAPABILITY_MULTIFD, ++ MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER, ++ MIGRATION_CAPABILITY_AUTO_CONVERGE, ++ MIGRATION_CAPABILITY_RELEASE_RAM, ++ MIGRATION_CAPABILITY_RDMA_PIN_ALL, ++ MIGRATION_CAPABILITY_COMPRESS, ++ MIGRATION_CAPABILITY_XBZRLE, ++ MIGRATION_CAPABILITY_X_COLO, ++ MIGRATION_CAPABILITY_VALIDATE_UUID, ++ MIGRATION_CAPABILITY_ZERO_COPY_SEND); ++ ++/** ++ * @migration_caps_check - check capability compatibility ++ * ++ * @old_caps: old capability list ++ * @new_caps: new capability list ++ * @errp: set *errp if the check failed, with reason ++ * ++ * Returns true if check passed, otherwise false. ++ */ ++bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) ++{ ++ MigrationIncomingState *mis = migration_incoming_get_current(); ++ ++#ifndef CONFIG_LIVE_BLOCK_MIGRATION ++ if (new_caps[MIGRATION_CAPABILITY_BLOCK]) { ++ error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) " ++ "block migration"); ++ error_append_hint(errp, "Use drive_mirror+NBD instead.\n"); ++ return false; ++ } ++#endif ++ ++#ifndef CONFIG_REPLICATION ++ if (new_caps[MIGRATION_CAPABILITY_X_COLO]) { ++ error_setg(errp, "QEMU compiled without replication module" ++ " can't enable COLO"); ++ error_append_hint(errp, "Please enable replication before COLO.\n"); ++ return false; ++ } ++#endif ++ ++ if (new_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { ++ /* This check is reasonably expensive, so only when it's being ++ * set the first time, also it's only the destination that needs ++ * special support. ++ */ ++ if (!old_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM] && ++ runstate_check(RUN_STATE_INMIGRATE) && ++ !postcopy_ram_supported_by_host(mis)) { ++ /* postcopy_ram_supported_by_host will have emitted a more ++ * detailed message ++ */ ++ error_setg(errp, "Postcopy is not supported"); ++ return false; ++ } ++ ++ if (new_caps[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) { ++ error_setg(errp, "Postcopy is not compatible with ignore-shared"); ++ return false; ++ } ++ } ++ ++ if (new_caps[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) { ++ WriteTrackingSupport wt_support; ++ int idx; ++ /* ++ * Check if 'background-snapshot' capability is supported by ++ * host kernel and compatible with guest memory configuration. ++ */ ++ wt_support = migrate_query_write_tracking(); ++ if (wt_support < WT_SUPPORT_AVAILABLE) { ++ error_setg(errp, "Background-snapshot is not supported by host kernel"); ++ return false; ++ } ++ if (wt_support < WT_SUPPORT_COMPATIBLE) { ++ error_setg(errp, "Background-snapshot is not compatible " ++ "with guest memory configuration"); ++ return false; ++ } ++ ++ /* ++ * Check if there are any migration capabilities ++ * incompatible with 'background-snapshot'. ++ */ ++ for (idx = 0; idx < check_caps_background_snapshot.size; idx++) { ++ int incomp_cap = check_caps_background_snapshot.caps[idx]; ++ if (new_caps[incomp_cap]) { ++ error_setg(errp, ++ "Background-snapshot is not compatible with %s", ++ MigrationCapability_str(incomp_cap)); ++ return false; ++ } ++ } ++ } ++ ++#ifdef CONFIG_LINUX ++ if (new_caps[MIGRATION_CAPABILITY_ZERO_COPY_SEND] && ++ (!new_caps[MIGRATION_CAPABILITY_MULTIFD] || ++ new_caps[MIGRATION_CAPABILITY_COMPRESS] || ++ new_caps[MIGRATION_CAPABILITY_XBZRLE] || ++ migrate_multifd_compression() || ++ migrate_use_tls())) { ++ error_setg(errp, ++ "Zero copy only available for non-compressed non-TLS multifd migration"); ++ return false; ++ } ++#else ++ if (new_caps[MIGRATION_CAPABILITY_ZERO_COPY_SEND]) { ++ error_setg(errp, ++ "Zero copy currently only available on Linux"); ++ return false; ++ } ++#endif ++ ++ if (new_caps[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]) { ++ if (!new_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { ++ error_setg(errp, "Postcopy preempt requires postcopy-ram"); ++ return false; ++ } ++ ++ /* ++ * Preempt mode requires urgent pages to be sent in separate ++ * channel, OTOH compression logic will disorder all pages into ++ * different compression channels, which is not compatible with the ++ * preempt assumptions on channel assignments. ++ */ ++ if (new_caps[MIGRATION_CAPABILITY_COMPRESS]) { ++ error_setg(errp, "Postcopy preempt not compatible with compress"); ++ return false; ++ } ++ } ++ ++ if (new_caps[MIGRATION_CAPABILITY_MULTIFD]) { ++ if (new_caps[MIGRATION_CAPABILITY_COMPRESS]) { ++ error_setg(errp, "Multifd is not compatible with compress"); ++ return false; ++ } ++ } ++ ++ return true; ++} +diff --git a/migration/options.h b/migration/options.h +index 25c002b37a..e779f14161 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -38,4 +38,8 @@ bool migrate_xbzrle(void); + bool migrate_zero_blocks(void); + bool migrate_zero_copy_send(void); + ++/* capabilities helpers */ ++ ++bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp); ++ + #endif +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Move-migrate_colo_enabled-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_colo_enabled-to-options.c.patch new file mode 100644 index 0000000..47c6f83 --- /dev/null +++ b/SOURCES/kvm-migration-Move-migrate_colo_enabled-to-options.c.patch @@ -0,0 +1,136 @@ +From 13da9060fa2dfc666cd6f4b9bc85b7cee0fef45e Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 22:00:16 +0100 +Subject: [PATCH 24/56] migration: Move migrate_colo_enabled() to options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [23/50] 4809b1091edee38bd222af41b6313133705785c7 (peterx/qemu-kvm) + +Once that we are there, we rename the function to migrate_colo() to be +consistent with all other capabilities. + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 5e8046445575dc5879e63c5d07af893d174813d0) +Signed-off-by: Peter Xu +--- + migration/migration.c | 16 +++++----------- + migration/migration.h | 1 - + migration/options.c | 6 ++++++ + migration/options.h | 1 + + 4 files changed, 12 insertions(+), 12 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 66ea55be06..59ee0ef82b 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -2411,7 +2411,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, + } + + if (blk || blk_inc) { +- if (migrate_colo_enabled()) { ++ if (migrate_colo()) { + error_setg(errp, "No disk migration is required in COLO mode"); + return false; + } +@@ -3304,7 +3304,7 @@ static void migration_completion(MigrationState *s) + * have done so in order to remember to reactivate + * them if migration fails or is cancelled. + */ +- s->block_inactive = !migrate_colo_enabled(); ++ s->block_inactive = !migrate_colo(); + qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); + ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, + s->block_inactive); +@@ -3357,7 +3357,7 @@ static void migration_completion(MigrationState *s) + goto fail; + } + +- if (migrate_colo_enabled() && s->state == MIGRATION_STATUS_ACTIVE) { ++ if (migrate_colo() && s->state == MIGRATION_STATUS_ACTIVE) { + /* COLO does not support postcopy */ + migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE, + MIGRATION_STATUS_COLO); +@@ -3435,12 +3435,6 @@ fail: + MIGRATION_STATUS_FAILED); + } + +-bool migrate_colo_enabled(void) +-{ +- MigrationState *s = migrate_get_current(); +- return s->capabilities[MIGRATION_CAPABILITY_X_COLO]; +-} +- + typedef enum MigThrError { + /* No error detected */ + MIG_THR_ERR_NONE = 0, +@@ -3771,7 +3765,7 @@ static void migration_iteration_finish(MigrationState *s) + runstate_set(RUN_STATE_POSTMIGRATE); + break; + case MIGRATION_STATUS_COLO: +- if (!migrate_colo_enabled()) { ++ if (!migrate_colo()) { + error_report("%s: critical error: calling COLO code without " + "COLO enabled", __func__); + } +@@ -3967,7 +3961,7 @@ static void *migration_thread(void *opaque) + qemu_savevm_send_postcopy_advise(s->to_dst_file); + } + +- if (migrate_colo_enabled()) { ++ if (migrate_colo()) { + /* Notify migration destination that we enable COLO */ + qemu_savevm_send_colo_enable(s->to_dst_file); + } +diff --git a/migration/migration.h b/migration/migration.h +index a25fed6ef0..42f0c68b6f 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -463,7 +463,6 @@ bool migrate_use_zero_copy_send(void); + int migrate_use_tls(void); + int migrate_use_xbzrle(void); + uint64_t migrate_xbzrle_cache_size(void); +-bool migrate_colo_enabled(void); + + bool migrate_use_block(void); + bool migrate_use_block_incremental(void); +diff --git a/migration/options.c b/migration/options.c +index 88a9a45913..bd33c5da0a 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -33,6 +33,12 @@ bool migrate_background_snapshot(void) + return s->capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; + } + ++bool migrate_colo(void) ++{ ++ MigrationState *s = migrate_get_current(); ++ return s->capabilities[MIGRATION_CAPABILITY_X_COLO]; ++} ++ + bool migrate_dirty_bitmaps(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index 0dfa0af245..2a0ee61ff8 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -18,6 +18,7 @@ + + bool migrate_auto_converge(void); + bool migrate_background_snapshot(void); ++bool migrate_colo(void); + bool migrate_dirty_bitmaps(void); + bool migrate_ignore_shared(void); + bool migrate_late_block_activate(void); +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Move-migrate_postcopy-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_postcopy-to-options.c.patch new file mode 100644 index 0000000..892ec9e --- /dev/null +++ b/SOURCES/kvm-migration-Move-migrate_postcopy-to-options.c.patch @@ -0,0 +1,98 @@ +From 710fe195a3c13ffe96795a7a2b550c00319997ea Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 10:44:20 +0100 +Subject: [PATCH 47/56] migration: Move migrate_postcopy() to options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [46/50] a4f3455b3524a331f44b481bf7a79318aef5abaa (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Fabiano Rosas +(cherry picked from commit f774fde5d4e97cbfc64dab6622c2c53c5fe5c9fe) +Signed-off-by: Peter Xu +--- + migration/migration.c | 5 ----- + migration/migration.h | 2 -- + migration/options.c | 8 ++++++++ + migration/options.h | 9 +++++++++ + 4 files changed, 17 insertions(+), 7 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index f27ce30be2..46a5ea4d42 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -2245,11 +2245,6 @@ void qmp_migrate_continue(MigrationStatus state, Error **errp) + qemu_sem_post(&s->pause_sem); + } + +-bool migrate_postcopy(void) +-{ +- return migrate_postcopy_ram() || migrate_dirty_bitmaps(); +-} +- + int migrate_use_tls(void) + { + MigrationState *s; +diff --git a/migration/migration.h b/migration/migration.h +index 3ae938b19c..dcf906868d 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -447,8 +447,6 @@ bool migration_is_blocked(Error **errp); + bool migration_in_postcopy(void); + MigrationState *migrate_get_current(void); + +-bool migrate_postcopy(void); +- + int migrate_use_tls(void); + + uint64_t ram_get_total_transferred_pages(void); +diff --git a/migration/options.c b/migration/options.c +index 615534c151..8bd2d949ae 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -204,6 +204,14 @@ bool migrate_zero_copy_send(void) + + return s->capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; + } ++ ++/* pseudo capabilities */ ++ ++bool migrate_postcopy(void) ++{ ++ return migrate_postcopy_ram() || migrate_dirty_bitmaps(); ++} ++ + typedef enum WriteTrackingSupport { + WT_SUPPORT_UNKNOWN = 0, + WT_SUPPORT_ABSENT, +diff --git a/migration/options.h b/migration/options.h +index 99f6bbd7a1..093bc907a1 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -38,6 +38,15 @@ bool migrate_xbzrle(void); + bool migrate_zero_blocks(void); + bool migrate_zero_copy_send(void); + ++/* ++ * pseudo capabilities ++ * ++ * These are functions that are used in a similar way to capabilities ++ * check, but they are not a capability. ++ */ ++ ++bool migrate_postcopy(void); ++ + /* capabilities helpers */ + + bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp); +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Move-migrate_use_block-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_use_block-to-options.c.patch new file mode 100644 index 0000000..f7cb338 --- /dev/null +++ b/SOURCES/kvm-migration-Move-migrate_use_block-to-options.c.patch @@ -0,0 +1,134 @@ +From 276877a71778a5cef0dc5bc843e2679f0fdabb77 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 22:23:57 +0100 +Subject: [PATCH 30/56] migration: Move migrate_use_block() to options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [29/50] fcaeb0e07cf828f3cd0d115515b30d913525a0a2 (peterx/qemu-kvm) + +Once that we are there, we rename the function to migrate_block() +to be consistent with all other capabilities. + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 9d4b1e5f22a838285ebeb8f0eb7cc8df1161998f) +Signed-off-by: Peter Xu +--- + migration/block.c | 2 +- + migration/migration.c | 11 +---------- + migration/migration.h | 1 - + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + migration/savevm.c | 2 +- + 6 files changed, 13 insertions(+), 13 deletions(-) + +diff --git a/migration/block.c b/migration/block.c +index 4b167fa5cf..f0977217cf 100644 +--- a/migration/block.c ++++ b/migration/block.c +@@ -1001,7 +1001,7 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) + + static bool block_is_active(void *opaque) + { +- return migrate_use_block(); ++ return migrate_block(); + } + + static SaveVMHandlers savevm_block_handlers = { +diff --git a/migration/migration.c b/migration/migration.c +index a4ede4294e..96f82bd165 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -2415,7 +2415,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, + error_setg(errp, "No disk migration is required in COLO mode"); + return false; + } +- if (migrate_use_block() || migrate_use_block_incremental()) { ++ if (migrate_block() || migrate_use_block_incremental()) { + error_setg(errp, "Command options are incompatible with " + "current migration capabilities"); + return false; +@@ -2622,15 +2622,6 @@ static int64_t migrate_max_postcopy_bandwidth(void) + return s->parameters.max_postcopy_bandwidth; + } + +-bool migrate_use_block(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_BLOCK]; +-} +- + bool migrate_use_return_path(void) + { + MigrationState *s; +diff --git a/migration/migration.h b/migration/migration.h +index e2bb5b1e2f..d4b68b08a5 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -457,7 +457,6 @@ int migrate_multifd_zstd_level(void); + int migrate_use_tls(void); + uint64_t migrate_xbzrle_cache_size(void); + +-bool migrate_use_block(void); + bool migrate_use_block_incremental(void); + int migrate_max_cpu_throttle(void); + bool migrate_use_return_path(void); +diff --git a/migration/options.c b/migration/options.c +index 25264c500e..fe1eadeed6 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -33,6 +33,15 @@ bool migrate_background_snapshot(void) + return s->capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; + } + ++bool migrate_block(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_BLOCK]; ++} ++ + bool migrate_colo(void) + { + MigrationState *s = migrate_get_current(); +diff --git a/migration/options.h b/migration/options.h +index 8f76a88329..e985a5233e 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -18,6 +18,7 @@ + + bool migrate_auto_converge(void); + bool migrate_background_snapshot(void); ++bool migrate_block(void); + bool migrate_colo(void); + bool migrate_compress(void); + bool migrate_dirty_bitmaps(void); +diff --git a/migration/savevm.c b/migration/savevm.c +index ebcf571e37..9671211339 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -1612,7 +1612,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) + return -EINVAL; + } + +- if (migrate_use_block()) { ++ if (migrate_block()) { + error_setg(errp, "Block migration and snapshots are incompatible"); + return -EINVAL; + } +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Move-migrate_use_block_incremental-to-opti.patch b/SOURCES/kvm-migration-Move-migrate_use_block_incremental-to-opti.patch new file mode 100644 index 0000000..3f20289 --- /dev/null +++ b/SOURCES/kvm-migration-Move-migrate_use_block_incremental-to-opti.patch @@ -0,0 +1,121 @@ +From def66503f4ccb97cf8029f88efe8e955edc8d32f Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 00:49:47 +0100 +Subject: [PATCH 39/56] migration: Move migrate_use_block_incremental() to + option.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [38/50] 961fda6464df3384fbcee88c726b56a33c26e14e (peterx/qemu-kvm) + +To be consistent with every other parameter, rename to +migrate_block_incremental(). + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 6f8be7080a1f79bf3832cf798fba1697c409c597) +Signed-off-by: Peter Xu +--- + migration/block.c | 2 +- + migration/migration.c | 11 +---------- + migration/migration.h | 1 - + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + 5 files changed, 12 insertions(+), 12 deletions(-) + +diff --git a/migration/block.c b/migration/block.c +index f0977217cf..6d532ac7a2 100644 +--- a/migration/block.c ++++ b/migration/block.c +@@ -417,7 +417,7 @@ static int init_blk_migration(QEMUFile *f) + bmds->bulk_completed = 0; + bmds->total_sectors = sectors; + bmds->completed_sectors = 0; +- bmds->shared_base = migrate_use_block_incremental(); ++ bmds->shared_base = migrate_block_incremental(); + + assert(i < num_bs); + bmds_bs[i].bmds = bmds; +diff --git a/migration/migration.c b/migration/migration.c +index 78bca9a93f..724e841eb9 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -2157,7 +2157,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, + error_setg(errp, "No disk migration is required in COLO mode"); + return false; + } +- if (migrate_block() || migrate_use_block_incremental()) { ++ if (migrate_block() || migrate_block_incremental()) { + error_setg(errp, "Command options are incompatible with " + "current migration capabilities"); + return false; +@@ -2273,15 +2273,6 @@ int migrate_use_tls(void) + return s->parameters.tls_creds && *s->parameters.tls_creds; + } + +-bool migrate_use_block_incremental(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->parameters.block_incremental; +-} +- + /* migration thread support */ + /* + * Something bad happened to the RP stream, mark an error +diff --git a/migration/migration.h b/migration/migration.h +index 8451e5f2fe..86051af132 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -451,7 +451,6 @@ bool migrate_postcopy(void); + + int migrate_use_tls(void); + +-bool migrate_use_block_incremental(void); + int migrate_max_cpu_throttle(void); + + uint64_t ram_get_total_transferred_pages(void); +diff --git a/migration/options.c b/migration/options.c +index 8d15be858c..2b6d88b4b9 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -463,6 +463,15 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, + + /* parameters */ + ++bool migrate_block_incremental(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.block_incremental; ++} ++ + int migrate_compress_level(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index b24ee92283..96d5a8e6e4 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -45,6 +45,7 @@ bool migrate_cap_set(int cap, bool value, Error **errp); + + /* parameters */ + ++bool migrate_block_incremental(void); + int migrate_compress_level(void); + int migrate_compress_threads(void); + int migrate_compress_wait_thread(void); +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Move-migrate_use_compression-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_use_compression-to-options.c.patch new file mode 100644 index 0000000..8b74183 --- /dev/null +++ b/SOURCES/kvm-migration-Move-migrate_use_compression-to-options.c.patch @@ -0,0 +1,183 @@ +From ae183bfc9d7b001d3c4929556b095a76203bc08d Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 22:03:48 +0100 +Subject: [PATCH 25/56] migration: Move migrate_use_compression() to options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [24/50] 126b865f51bd4a1ae3a46411fdcd59033bfc5376 (peterx/qemu-kvm) + +Once that we are there, we rename the function to migrate_compress() +to be consistent with all other capabilities. + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit a7a94d14358dd7b445e20c2f26218ff987747642) +Signed-off-by: Peter Xu +--- + migration/migration.c | 11 +---------- + migration/migration.h | 1 - + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + migration/ram.c | 16 ++++++++-------- + 5 files changed, 19 insertions(+), 19 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 59ee0ef82b..c6e32555a8 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1133,7 +1133,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + info->xbzrle_cache->overflow = xbzrle_counters.overflow; + } + +- if (migrate_use_compression()) { ++ if (migrate_compress()) { + info->compression = g_malloc0(sizeof(*info->compression)); + info->compression->pages = compression_counters.pages; + info->compression->busy = compression_counters.busy; +@@ -2522,15 +2522,6 @@ bool migrate_postcopy(void) + return migrate_postcopy_ram() || migrate_dirty_bitmaps(); + } + +-bool migrate_use_compression(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_COMPRESS]; +-} +- + int migrate_compress_level(void) + { + MigrationState *s; +diff --git a/migration/migration.h b/migration/migration.h +index 42f0c68b6f..77aa91c840 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -471,7 +471,6 @@ bool migrate_use_return_path(void); + + uint64_t ram_get_total_transferred_pages(void); + +-bool migrate_use_compression(void); + int migrate_compress_level(void); + int migrate_compress_threads(void); + int migrate_compress_wait_thread(void); +diff --git a/migration/options.c b/migration/options.c +index bd33c5da0a..fa7a13d3dc 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -39,6 +39,15 @@ bool migrate_colo(void) + return s->capabilities[MIGRATION_CAPABILITY_X_COLO]; + } + ++bool migrate_compress(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_COMPRESS]; ++} ++ + bool migrate_dirty_bitmaps(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index 2a0ee61ff8..da2193fd94 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -19,6 +19,7 @@ + bool migrate_auto_converge(void); + bool migrate_background_snapshot(void); + bool migrate_colo(void); ++bool migrate_compress(void); + bool migrate_dirty_bitmaps(void); + bool migrate_ignore_shared(void); + bool migrate_late_block_activate(void); +diff --git a/migration/ram.c b/migration/ram.c +index 912ccd89fa..d050d0c5fd 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -586,7 +586,7 @@ static void compress_threads_save_cleanup(void) + { + int i, thread_count; + +- if (!migrate_use_compression() || !comp_param) { ++ if (!migrate_compress() || !comp_param) { + return; + } + +@@ -625,7 +625,7 @@ static int compress_threads_save_setup(void) + { + int i, thread_count; + +- if (!migrate_use_compression()) { ++ if (!migrate_compress()) { + return 0; + } + thread_count = migrate_compress_threads(); +@@ -1155,7 +1155,7 @@ static void migration_update_rates(RAMState *rs, int64_t end_time) + rs->xbzrle_bytes_prev = xbzrle_counters.bytes; + } + +- if (migrate_use_compression()) { ++ if (migrate_compress()) { + compression_counters.busy_rate = (double)(compression_counters.busy - + rs->compress_thread_busy_prev) / page_count; + rs->compress_thread_busy_prev = compression_counters.busy; +@@ -2270,7 +2270,7 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len) + + static bool save_page_use_compression(RAMState *rs) + { +- if (!migrate_use_compression()) { ++ if (!migrate_compress()) { + return false; + } + +@@ -3734,7 +3734,7 @@ static int wait_for_decompress_done(void) + { + int idx, thread_count; + +- if (!migrate_use_compression()) { ++ if (!migrate_compress()) { + return 0; + } + +@@ -3753,7 +3753,7 @@ static void compress_threads_load_cleanup(void) + { + int i, thread_count; + +- if (!migrate_use_compression()) { ++ if (!migrate_compress()) { + return; + } + thread_count = migrate_decompress_threads(); +@@ -3794,7 +3794,7 @@ static int compress_threads_load_setup(QEMUFile *f) + { + int i, thread_count; + +- if (!migrate_use_compression()) { ++ if (!migrate_compress()) { + return 0; + } + +@@ -4260,7 +4260,7 @@ static int ram_load_precopy(QEMUFile *f) + int flags = 0, ret = 0, invalid_flags = 0, len = 0, i = 0; + /* ADVISE is earlier, it shows the source has the postcopy capability on */ + bool postcopy_advised = migration_incoming_postcopy_advised(); +- if (!migrate_use_compression()) { ++ if (!migrate_compress()) { + invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE; + } + +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Move-migrate_use_events-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_use_events-to-options.c.patch new file mode 100644 index 0000000..41e05c3 --- /dev/null +++ b/SOURCES/kvm-migration-Move-migrate_use_events-to-options.c.patch @@ -0,0 +1,120 @@ +From 940f1eb4347c72edb3e1abc02c8d7e7c95753dcf Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 22:08:09 +0100 +Subject: [PATCH 26/56] migration: Move migrate_use_events() to options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [25/50] b3acd949af2a0fae18061d360e4f51dc12d32c6c (peterx/qemu-kvm) + +Once that we are there, we rename the function to migrate_events() +to be consistent with all other capabilities. + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit b890902c9c025b87d02e718eec3090fd3525ab18) +Signed-off-by: Peter Xu +--- + migration/migration.c | 11 +---------- + migration/migration.h | 1 - + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + migration/ram.c | 2 +- + 5 files changed, 12 insertions(+), 12 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index c6e32555a8..032cd5c050 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -353,7 +353,7 @@ void migration_incoming_state_destroy(void) + + static void migrate_generate_event(int new_state) + { +- if (migrate_use_events()) { ++ if (migrate_events()) { + qapi_event_send_migration(new_state); + } + } +@@ -2558,15 +2558,6 @@ int migrate_decompress_threads(void) + return s->parameters.decompress_threads; + } + +-bool migrate_use_events(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_EVENTS]; +-} +- + bool migrate_use_multifd(void) + { + MigrationState *s; +diff --git a/migration/migration.h b/migration/migration.h +index 77aa91c840..bd06520c19 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -475,7 +475,6 @@ int migrate_compress_level(void); + int migrate_compress_threads(void); + int migrate_compress_wait_thread(void); + int migrate_decompress_threads(void); +-bool migrate_use_events(void); + + /* Sending on the return path - generic and then for each message type */ + void migrate_send_rp_shut(MigrationIncomingState *mis, +diff --git a/migration/options.c b/migration/options.c +index fa7a13d3dc..d2219ee0e4 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -57,6 +57,15 @@ bool migrate_dirty_bitmaps(void) + return s->capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; + } + ++bool migrate_events(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_EVENTS]; ++} ++ + bool migrate_ignore_shared(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index da2193fd94..b998024eba 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -21,6 +21,7 @@ bool migrate_background_snapshot(void); + bool migrate_colo(void); + bool migrate_compress(void); + bool migrate_dirty_bitmaps(void); ++bool migrate_events(void); + bool migrate_ignore_shared(void); + bool migrate_late_block_activate(void); + bool migrate_pause_before_switchover(void); +diff --git a/migration/ram.c b/migration/ram.c +index d050d0c5fd..ee454a3849 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1246,7 +1246,7 @@ static void migration_bitmap_sync(RAMState *rs) + rs->num_dirty_pages_period = 0; + rs->bytes_xfer_prev = stat64_get(&ram_counters.transferred); + } +- if (migrate_use_events()) { ++ if (migrate_events()) { + uint64_t generation = stat64_get(&ram_counters.dirty_sync_count); + qapi_event_send_migration_pass(generation); + } +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Move-migrate_use_multifd-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_use_multifd-to-options.c.patch new file mode 100644 index 0000000..97d6597 --- /dev/null +++ b/SOURCES/kvm-migration-Move-migrate_use_multifd-to-options.c.patch @@ -0,0 +1,247 @@ +From afd8fb766af2be5cff97753b026847b91b09a30e Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 22:10:29 +0100 +Subject: [PATCH 27/56] migration: Move migrate_use_multifd() to options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [26/50] f2d72eae9cc80b2402ef613e809b40aa296d2e4c (peterx/qemu-kvm) + +Once that we are there, we rename the function to migrate_multifd() +to be consistent with all other capabilities. + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 51b07548f7c31793adc178c7460c5f4369733c61) +Signed-off-by: Peter Xu +--- + migration/migration.c | 19 +++++-------------- + migration/migration.h | 1 - + migration/multifd.c | 16 ++++++++-------- + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + migration/ram.c | 2 +- + migration/socket.c | 2 +- + 7 files changed, 25 insertions(+), 25 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 032cd5c050..e1d7f25786 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -186,7 +186,7 @@ static void migrate_fd_cancel(MigrationState *s); + + static bool migration_needs_multiple_sockets(void) + { +- return migrate_use_multifd() || migrate_postcopy_preempt(); ++ return migrate_multifd() || migrate_postcopy_preempt(); + } + + static bool uri_supports_multi_channels(const char *uri) +@@ -732,7 +732,7 @@ void migration_fd_process_incoming(QEMUFile *f, Error **errp) + static bool migration_should_start_incoming(bool main_channel) + { + /* Multifd doesn't start unless all channels are established */ +- if (migrate_use_multifd()) { ++ if (migrate_multifd()) { + return migration_has_all_channels(); + } + +@@ -759,7 +759,7 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) + uint32_t channel_magic = 0; + int ret = 0; + +- if (migrate_use_multifd() && !migrate_postcopy_ram() && ++ if (migrate_multifd() && !migrate_postcopy_ram() && + qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { + /* + * With multiple channels, it is possible that we receive channels +@@ -798,7 +798,7 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) + } else { + /* Multiple connections */ + assert(migration_needs_multiple_sockets()); +- if (migrate_use_multifd()) { ++ if (migrate_multifd()) { + multifd_recv_new_channel(ioc, &local_err); + } else { + assert(migrate_postcopy_preempt()); +@@ -834,7 +834,7 @@ bool migration_has_all_channels(void) + return false; + } + +- if (migrate_use_multifd()) { ++ if (migrate_multifd()) { + return multifd_recv_all_channels_created(); + } + +@@ -2558,15 +2558,6 @@ int migrate_decompress_threads(void) + return s->parameters.decompress_threads; + } + +-bool migrate_use_multifd(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_MULTIFD]; +-} +- + int migrate_multifd_channels(void) + { + MigrationState *s; +diff --git a/migration/migration.h b/migration/migration.h +index bd06520c19..49c0e13f41 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -449,7 +449,6 @@ MigrationState *migrate_get_current(void); + + bool migrate_postcopy(void); + +-bool migrate_use_multifd(void); + int migrate_multifd_channels(void); + MultiFDCompression migrate_multifd_compression(void); + int migrate_multifd_zlib_level(void); +diff --git a/migration/multifd.c b/migration/multifd.c +index 903df2117b..6807328189 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -516,7 +516,7 @@ void multifd_save_cleanup(void) + { + int i; + +- if (!migrate_use_multifd()) { ++ if (!migrate_multifd()) { + return; + } + multifd_send_terminate_threads(NULL); +@@ -587,7 +587,7 @@ int multifd_send_sync_main(QEMUFile *f) + int i; + bool flush_zero_copy; + +- if (!migrate_use_multifd()) { ++ if (!migrate_multifd()) { + return 0; + } + if (multifd_send_state->pages->num) { +@@ -911,7 +911,7 @@ int multifd_save_setup(Error **errp) + uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size(); + uint8_t i; + +- if (!migrate_use_multifd()) { ++ if (!migrate_multifd()) { + return 0; + } + +@@ -1016,7 +1016,7 @@ static void multifd_recv_terminate_threads(Error *err) + + void multifd_load_shutdown(void) + { +- if (migrate_use_multifd()) { ++ if (migrate_multifd()) { + multifd_recv_terminate_threads(NULL); + } + } +@@ -1025,7 +1025,7 @@ void multifd_load_cleanup(void) + { + int i; + +- if (!migrate_use_multifd()) { ++ if (!migrate_multifd()) { + return; + } + multifd_recv_terminate_threads(NULL); +@@ -1072,7 +1072,7 @@ void multifd_recv_sync_main(void) + { + int i; + +- if (!migrate_use_multifd()) { ++ if (!migrate_multifd()) { + return; + } + for (i = 0; i < migrate_multifd_channels(); i++) { +@@ -1170,7 +1170,7 @@ int multifd_load_setup(Error **errp) + * Return successfully if multiFD recv state is already initialised + * or multiFD is not enabled. + */ +- if (multifd_recv_state || !migrate_use_multifd()) { ++ if (multifd_recv_state || !migrate_multifd()) { + return 0; + } + +@@ -1216,7 +1216,7 @@ bool multifd_recv_all_channels_created(void) + { + int thread_count = migrate_multifd_channels(); + +- if (!migrate_use_multifd()) { ++ if (!migrate_multifd()) { + return true; + } + +diff --git a/migration/options.c b/migration/options.c +index d2219ee0e4..58673fc101 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -84,6 +84,15 @@ bool migrate_late_block_activate(void) + return s->capabilities[MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; + } + ++bool migrate_multifd(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_MULTIFD]; ++} ++ + bool migrate_pause_before_switchover(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index b998024eba..d07269ee38 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -24,6 +24,7 @@ bool migrate_dirty_bitmaps(void); + bool migrate_events(void); + bool migrate_ignore_shared(void); + bool migrate_late_block_activate(void); ++bool migrate_multifd(void); + bool migrate_pause_before_switchover(void); + bool migrate_postcopy_blocktime(void); + bool migrate_postcopy_preempt(void); +diff --git a/migration/ram.c b/migration/ram.c +index ee454a3849..859dd7b63f 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -2362,7 +2362,7 @@ static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss) + * if host page size == guest page size the dest guest during run may + * still see partially copied pages which is data corruption. + */ +- if (migrate_use_multifd() && !migration_in_postcopy()) { ++ if (migrate_multifd() && !migration_in_postcopy()) { + return ram_save_multifd_page(pss->pss_channel, block, offset); + } + +diff --git a/migration/socket.c b/migration/socket.c +index ebf9ac41af..f4835a256a 100644 +--- a/migration/socket.c ++++ b/migration/socket.c +@@ -183,7 +183,7 @@ socket_start_incoming_migration_internal(SocketAddress *saddr, + + qio_net_listener_set_name(listener, "migration-socket-listener"); + +- if (migrate_use_multifd()) { ++ if (migrate_multifd()) { + num = migrate_multifd_channels(); + } else if (migrate_postcopy_preempt()) { + num = RAM_CHANNEL_MAX; +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Move-migrate_use_return-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_use_return-to-options.c.patch new file mode 100644 index 0000000..b250d40 --- /dev/null +++ b/SOURCES/kvm-migration-Move-migrate_use_return-to-options.c.patch @@ -0,0 +1,138 @@ +From 145b630767dbc7020ddf39b20075f4691f71321a Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 22:25:47 +0100 +Subject: [PATCH 31/56] migration: Move migrate_use_return() to options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [30/50] 5cc150188bcc61b69ea0844253597594ab18fc13 (peterx/qemu-kvm) + +Once that we are there, we rename the function to migrate_return_path() +to be consistent with all other capabilities. + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 38ad1110e368bf91453c0abbd657224d57b65d47) +Signed-off-by: Peter Xu +--- + migration/migration.c | 11 +---------- + migration/migration.h | 1 - + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + migration/rdma.c | 6 +++--- + 5 files changed, 14 insertions(+), 14 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 96f82bd165..f7facecd66 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -2622,15 +2622,6 @@ static int64_t migrate_max_postcopy_bandwidth(void) + return s->parameters.max_postcopy_bandwidth; + } + +-bool migrate_use_return_path(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; +-} +- + bool migrate_use_block_incremental(void) + { + MigrationState *s; +@@ -4175,7 +4166,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) + * precopy, only if user specified "return-path" capability would + * QEMU uses the return path. + */ +- if (migrate_postcopy_ram() || migrate_use_return_path()) { ++ if (migrate_postcopy_ram() || migrate_return_path()) { + if (open_return_path_on_source(s, !resume)) { + error_report("Unable to open return-path for postcopy"); + migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); +diff --git a/migration/migration.h b/migration/migration.h +index d4b68b08a5..24184622a8 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -459,7 +459,6 @@ uint64_t migrate_xbzrle_cache_size(void); + + bool migrate_use_block_incremental(void); + int migrate_max_cpu_throttle(void); +-bool migrate_use_return_path(void); + + uint64_t ram_get_total_transferred_pages(void); + +diff --git a/migration/options.c b/migration/options.c +index fe1eadeed6..2003e413da 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -147,6 +147,15 @@ bool migrate_release_ram(void) + return s->capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; + } + ++bool migrate_return_path(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; ++} ++ + bool migrate_validate_uuid(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index e985a5233e..316efd1063 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -31,6 +31,7 @@ bool migrate_postcopy_blocktime(void); + bool migrate_postcopy_preempt(void); + bool migrate_postcopy_ram(void); + bool migrate_release_ram(void); ++bool migrate_return_path(void); + bool migrate_validate_uuid(void); + bool migrate_xbzrle(void); + bool migrate_zero_blocks(void); +diff --git a/migration/rdma.c b/migration/rdma.c +index f35f021963..bf55e2f163 100644 +--- a/migration/rdma.c ++++ b/migration/rdma.c +@@ -3373,7 +3373,7 @@ static int qemu_rdma_accept(RDMAContext *rdma) + * initialize the RDMAContext for return path for postcopy after first + * connection request reached. + */ +- if ((migrate_postcopy() || migrate_use_return_path()) ++ if ((migrate_postcopy() || migrate_return_path()) + && !rdma->is_return_path) { + rdma_return_path = qemu_rdma_data_init(rdma->host_port, NULL); + if (rdma_return_path == NULL) { +@@ -3456,7 +3456,7 @@ static int qemu_rdma_accept(RDMAContext *rdma) + } + + /* Accept the second connection request for return path */ +- if ((migrate_postcopy() || migrate_use_return_path()) ++ if ((migrate_postcopy() || migrate_return_path()) + && !rdma->is_return_path) { + qemu_set_fd_handler(rdma->channel->fd, rdma_accept_incoming_migration, + NULL, +@@ -4193,7 +4193,7 @@ void rdma_start_outgoing_migration(void *opaque, + } + + /* RDMA postcopy need a separate queue pair for return path */ +- if (migrate_postcopy() || migrate_use_return_path()) { ++ if (migrate_postcopy() || migrate_return_path()) { + rdma_return_path = qemu_rdma_data_init(host_port, errp); + + if (rdma_return_path == NULL) { +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Move-migrate_use_tls-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_use_tls-to-options.c.patch new file mode 100644 index 0000000..84734af --- /dev/null +++ b/SOURCES/kvm-migration-Move-migrate_use_tls-to-options.c.patch @@ -0,0 +1,134 @@ +From 2e2df63892e191e91216b8253171162f69b93387 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 10:41:23 +0100 +Subject: [PATCH 49/56] migration: Move migrate_use_tls() to options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [48/50] 314431b0f5e92d2211e58a8161f32d7b67d69e38 (peterx/qemu-kvm) + +Once there, rename it to migrate_tls() and make it return bool for +consistency. + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy + +--- + +Fix typos found by fabiano + +(cherry picked from commit 10d4703be5d884bbbb6ecafe0e8bb270ad6ea937) +Signed-off-by: Peter Xu +--- + migration/migration.c | 9 --------- + migration/migration.h | 2 -- + migration/options.c | 11 ++++++++++- + migration/options.h | 1 + + migration/tls.c | 3 ++- + 5 files changed, 13 insertions(+), 13 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index c2e109329d..22ef83c619 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -2177,15 +2177,6 @@ void qmp_migrate_continue(MigrationStatus state, Error **errp) + qemu_sem_post(&s->pause_sem); + } + +-int migrate_use_tls(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->parameters.tls_creds && *s->parameters.tls_creds; +-} +- + /* migration thread support */ + /* + * Something bad happened to the RP stream, mark an error +diff --git a/migration/migration.h b/migration/migration.h +index dcf906868d..2b71df8617 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -447,8 +447,6 @@ bool migration_is_blocked(Error **errp); + bool migration_in_postcopy(void); + MigrationState *migrate_get_current(void); + +-int migrate_use_tls(void); +- + uint64_t ram_get_total_transferred_pages(void); + + /* Sending on the return path - generic and then for each message type */ +diff --git a/migration/options.c b/migration/options.c +index 8e8753d9be..d4c0714683 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -214,6 +214,15 @@ bool migrate_postcopy(void) + return migrate_postcopy_ram() || migrate_dirty_bitmaps(); + } + ++bool migrate_tls(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.tls_creds && *s->parameters.tls_creds; ++} ++ + typedef enum WriteTrackingSupport { + WT_SUPPORT_UNKNOWN = 0, + WT_SUPPORT_ABSENT, +@@ -363,7 +372,7 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) + new_caps[MIGRATION_CAPABILITY_COMPRESS] || + new_caps[MIGRATION_CAPABILITY_XBZRLE] || + migrate_multifd_compression() || +- migrate_use_tls())) { ++ migrate_tls())) { + error_setg(errp, + "Zero copy only available for non-compressed non-TLS multifd migration"); + return false; +diff --git a/migration/options.h b/migration/options.h +index 1b78fa9f3d..13318a16c7 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -46,6 +46,7 @@ bool migrate_zero_copy_send(void); + */ + + bool migrate_postcopy(void); ++bool migrate_tls(void); + + /* capabilities helpers */ + +diff --git a/migration/tls.c b/migration/tls.c +index 4d2166a209..acd38e0b62 100644 +--- a/migration/tls.c ++++ b/migration/tls.c +@@ -22,6 +22,7 @@ + #include "channel.h" + #include "migration.h" + #include "tls.h" ++#include "options.h" + #include "crypto/tlscreds.h" + #include "qemu/error-report.h" + #include "qapi/error.h" +@@ -165,7 +166,7 @@ void migration_tls_channel_connect(MigrationState *s, + + bool migrate_channel_requires_tls_upgrade(QIOChannel *ioc) + { +- if (!migrate_use_tls()) { ++ if (!migrate_tls()) { + return false; + } + +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Move-migrate_use_xbzrle-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_use_xbzrle-to-options.c.patch new file mode 100644 index 0000000..e3a8bab --- /dev/null +++ b/SOURCES/kvm-migration-Move-migrate_use_xbzrle-to-options.c.patch @@ -0,0 +1,156 @@ +From 2184f7dae0df5fa52deba2dc884e09c6bdbc7b5f Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 22:20:13 +0100 +Subject: [PATCH 29/56] migration: Move migrate_use_xbzrle() to options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [28/50] fc8bee0f691a96e6bd0b41f2511abe507b81fea5 (peterx/qemu-kvm) + +Once that we are there, we rename the function to migrate_xbzrle() +to be consistent with all other capabilities. +We change the type to return bool also for consistency. + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 87dca0c9bb63014ef73ad82f7aedea1cb5a822e7) +Signed-off-by: Peter Xu +--- + migration/migration.c | 11 +---------- + migration/migration.h | 1 - + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + migration/ram.c | 10 +++++----- + 5 files changed, 16 insertions(+), 16 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 1d63718e88..a4ede4294e 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1122,7 +1122,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + info->ram->downtime_bytes = stat64_get(&ram_counters.downtime_bytes); + info->ram->postcopy_bytes = stat64_get(&ram_counters.postcopy_bytes); + +- if (migrate_use_xbzrle()) { ++ if (migrate_xbzrle()) { + info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache)); + info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size(); + info->xbzrle_cache->bytes = xbzrle_counters.bytes; +@@ -2604,15 +2604,6 @@ int migrate_use_tls(void) + return s->parameters.tls_creds && *s->parameters.tls_creds; + } + +-int migrate_use_xbzrle(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_XBZRLE]; +-} +- + uint64_t migrate_xbzrle_cache_size(void) + { + MigrationState *s; +diff --git a/migration/migration.h b/migration/migration.h +index c939f82d53..e2bb5b1e2f 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -455,7 +455,6 @@ int migrate_multifd_zlib_level(void); + int migrate_multifd_zstd_level(void); + + int migrate_use_tls(void); +-int migrate_use_xbzrle(void); + uint64_t migrate_xbzrle_cache_size(void); + + bool migrate_use_block(void); +diff --git a/migration/options.c b/migration/options.c +index f357c99996..25264c500e 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -147,6 +147,15 @@ bool migrate_validate_uuid(void) + return s->capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; + } + ++bool migrate_xbzrle(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_XBZRLE]; ++} ++ + bool migrate_zero_blocks(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index ad22f4d24a..8f76a88329 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -31,6 +31,7 @@ bool migrate_postcopy_preempt(void); + bool migrate_postcopy_ram(void); + bool migrate_release_ram(void); + bool migrate_validate_uuid(void); ++bool migrate_xbzrle(void); + bool migrate_zero_blocks(void); + bool migrate_zero_copy_send(void); + +diff --git a/migration/ram.c b/migration/ram.c +index 859dd7b63f..4576d0d849 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -156,14 +156,14 @@ static struct { + + static void XBZRLE_cache_lock(void) + { +- if (migrate_use_xbzrle()) { ++ if (migrate_xbzrle()) { + qemu_mutex_lock(&XBZRLE.lock); + } + } + + static void XBZRLE_cache_unlock(void) + { +- if (migrate_use_xbzrle()) { ++ if (migrate_xbzrle()) { + qemu_mutex_unlock(&XBZRLE.lock); + } + } +@@ -1137,7 +1137,7 @@ static void migration_update_rates(RAMState *rs, int64_t end_time) + return; + } + +- if (migrate_use_xbzrle()) { ++ if (migrate_xbzrle()) { + double encoded_size, unencoded_size; + + xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss - +@@ -1626,7 +1626,7 @@ static int find_dirty_block(RAMState *rs, PageSearchStatus *pss) + /* Flag that we've looped */ + pss->complete_round = true; + /* After the first round, enable XBZRLE. */ +- if (migrate_use_xbzrle()) { ++ if (migrate_xbzrle()) { + rs->xbzrle_enabled = true; + } + } +@@ -2979,7 +2979,7 @@ static int xbzrle_init(void) + { + Error *local_err = NULL; + +- if (!migrate_use_xbzrle()) { ++ if (!migrate_xbzrle()) { + return 0; + } + +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Move-migrate_use_zero_copy_send-to-options.patch b/SOURCES/kvm-migration-Move-migrate_use_zero_copy_send-to-options.patch new file mode 100644 index 0000000..90031df --- /dev/null +++ b/SOURCES/kvm-migration-Move-migrate_use_zero_copy_send-to-options.patch @@ -0,0 +1,167 @@ +From 6eb252887378d639ad2e90dd426a1812d4b72ca6 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 22:17:14 +0100 +Subject: [PATCH 28/56] migration: Move migrate_use_zero_copy_send() to + options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [27/50] 5a4c2b5e75c62e0f60f9c4121a2756bd140a60d9 (peterx/qemu-kvm) + +Once that we are there, we rename the function to +migrate_zero_copy_send() to be consistent with all other capabilities. + +We can remove the CONFIG_LINUX guard. We already check that we can't +setup this capability in migrate_caps_check(). + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit b4bc342c766640e0cb8a0b72f71e0ee5545fb790) +Signed-off-by: Peter Xu +--- + migration/migration.c | 13 +------------ + migration/migration.h | 5 ----- + migration/multifd.c | 8 ++++---- + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + migration/socket.c | 2 +- + 6 files changed, 16 insertions(+), 22 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index e1d7f25786..1d63718e88 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1609,7 +1609,7 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) + } + + #ifdef CONFIG_LINUX +- if (migrate_use_zero_copy_send() && ++ if (migrate_zero_copy_send() && + ((params->has_multifd_compression && params->multifd_compression) || + (params->tls_creds && *params->tls_creds))) { + error_setg(errp, +@@ -2595,17 +2595,6 @@ int migrate_multifd_zstd_level(void) + return s->parameters.multifd_zstd_level; + } + +-#ifdef CONFIG_LINUX +-bool migrate_use_zero_copy_send(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; +-} +-#endif +- + int migrate_use_tls(void) + { + MigrationState *s; +diff --git a/migration/migration.h b/migration/migration.h +index 49c0e13f41..c939f82d53 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -454,11 +454,6 @@ MultiFDCompression migrate_multifd_compression(void); + int migrate_multifd_zlib_level(void); + int migrate_multifd_zstd_level(void); + +-#ifdef CONFIG_LINUX +-bool migrate_use_zero_copy_send(void); +-#else +-#define migrate_use_zero_copy_send() (false) +-#endif + int migrate_use_tls(void); + int migrate_use_xbzrle(void); + uint64_t migrate_xbzrle_cache_size(void); +diff --git a/migration/multifd.c b/migration/multifd.c +index 6807328189..cce3ad6988 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -25,7 +25,7 @@ + #include "trace.h" + #include "multifd.h" + #include "threadinfo.h" +- ++#include "options.h" + #include "qemu/yank.h" + #include "io/channel-socket.h" + #include "yank_functions.h" +@@ -608,7 +608,7 @@ int multifd_send_sync_main(QEMUFile *f) + * all the dirty bitmaps. + */ + +- flush_zero_copy = migrate_use_zero_copy_send(); ++ flush_zero_copy = migrate_zero_copy_send(); + + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; +@@ -653,7 +653,7 @@ static void *multifd_send_thread(void *opaque) + MigrationThread *thread = NULL; + Error *local_err = NULL; + int ret = 0; +- bool use_zero_copy_send = migrate_use_zero_copy_send(); ++ bool use_zero_copy_send = migrate_zero_copy_send(); + + thread = MigrationThreadAdd(p->name, qemu_get_thread_id()); + +@@ -945,7 +945,7 @@ int multifd_save_setup(Error **errp) + p->page_size = qemu_target_page_size(); + p->page_count = page_count; + +- if (migrate_use_zero_copy_send()) { ++ if (migrate_zero_copy_send()) { + p->write_flags = QIO_CHANNEL_WRITE_FLAG_ZERO_COPY; + } else { + p->write_flags = 0; +diff --git a/migration/options.c b/migration/options.c +index 58673fc101..f357c99996 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -155,3 +155,12 @@ bool migrate_zero_blocks(void) + + return s->capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; + } ++ ++bool migrate_zero_copy_send(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; ++} +diff --git a/migration/options.h b/migration/options.h +index d07269ee38..ad22f4d24a 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -32,5 +32,6 @@ bool migrate_postcopy_ram(void); + bool migrate_release_ram(void); + bool migrate_validate_uuid(void); + bool migrate_zero_blocks(void); ++bool migrate_zero_copy_send(void); + + #endif +diff --git a/migration/socket.c b/migration/socket.c +index f4835a256a..1b6f5baefb 100644 +--- a/migration/socket.c ++++ b/migration/socket.c +@@ -98,7 +98,7 @@ static void socket_outgoing_migration(QIOTask *task, + + trace_migration_socket_outgoing_connected(data->hostname); + +- if (migrate_use_zero_copy_send() && ++ if (migrate_zero_copy_send() && + !qio_channel_has_feature(sioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) { + error_setg(&err, "Zero copy send feature not detected in host kernel"); + } +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Move-migration_properties-to-options.c.patch b/SOURCES/kvm-migration-Move-migration_properties-to-options.c.patch new file mode 100644 index 0000000..145b510 --- /dev/null +++ b/SOURCES/kvm-migration-Move-migration_properties-to-options.c.patch @@ -0,0 +1,409 @@ +From 0911e025a9dc8a0c85944ac11fb9df72e5ad0677 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 09/37] migration: Move migration_properties to options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [7/28] ff07358afa0c90f13125b177b0e08c74ef1b9905 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit f9436522c8dd +Author: Juan Quintela +Date: Thu Mar 2 12:55:57 2023 +0100 + + migration: Move migration_properties to options.c + + Signed-off-by: Juan Quintela + Reviewed-by: Vladimir Sementsov-Ogievskiy + +Signed-off-by: Cédric Le Goater +--- + migration/migration.c | 157 ------------------------------------------ + migration/options.c | 155 +++++++++++++++++++++++++++++++++++++++++ + migration/options.h | 7 ++ + 3 files changed, 162 insertions(+), 157 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 08f87f2b0e..1ac5f19bc2 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -52,8 +52,6 @@ + #include "io/channel-tls.h" + #include "migration/colo.h" + #include "hw/boards.h" +-#include "hw/qdev-properties.h" +-#include "hw/qdev-properties-system.h" + #include "monitor/monitor.h" + #include "net/announce.h" + #include "qemu/queue.h" +@@ -65,51 +63,6 @@ + #include "sysemu/qtest.h" + #include "options.h" + +-#define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ +- +-/* Time in milliseconds we are allowed to stop the source, +- * for sending the last part */ +-#define DEFAULT_MIGRATE_SET_DOWNTIME 300 +- +-/* Default compression thread count */ +-#define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8 +-/* Default decompression thread count, usually decompression is at +- * least 4 times as fast as compression.*/ +-#define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2 +-/*0: means nocompress, 1: best speed, ... 9: best compress ratio */ +-#define DEFAULT_MIGRATE_COMPRESS_LEVEL 1 +-/* Define default autoconverge cpu throttle migration parameters */ +-#define DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD 50 +-#define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20 +-#define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10 +-#define DEFAULT_MIGRATE_MAX_CPU_THROTTLE 99 +- +-/* Migration XBZRLE default cache size */ +-#define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024) +- +-/* The delay time (in ms) between two COLO checkpoints */ +-#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100) +-#define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2 +-#define DEFAULT_MIGRATE_MULTIFD_COMPRESSION MULTIFD_COMPRESSION_NONE +-/* 0: means nocompress, 1: best speed, ... 9: best compress ratio */ +-#define DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL 1 +-/* 0: means nocompress, 1: best speed, ... 20: best compress ratio */ +-#define DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL 1 +- +-/* Background transfer rate for postcopy, 0 means unlimited, note +- * that page requests can still exceed this limit. +- */ +-#define DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH 0 +- +-/* +- * Parameters for self_announce_delay giving a stream of RARP/ARP +- * packets after migration. +- */ +-#define DEFAULT_MIGRATE_ANNOUNCE_INITIAL 50 +-#define DEFAULT_MIGRATE_ANNOUNCE_MAX 550 +-#define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS 5 +-#define DEFAULT_MIGRATE_ANNOUNCE_STEP 100 +- + static NotifierList migration_state_notifiers = + NOTIFIER_LIST_INITIALIZER(migration_state_notifiers); + +@@ -3336,116 +3289,6 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) + s->migration_thread_running = true; + } + +-#define DEFINE_PROP_MIG_CAP(name, x) \ +- DEFINE_PROP_BOOL(name, MigrationState, capabilities[x], false) +- +-static Property migration_properties[] = { +- DEFINE_PROP_BOOL("store-global-state", MigrationState, +- store_global_state, true), +- DEFINE_PROP_BOOL("send-configuration", MigrationState, +- send_configuration, true), +- DEFINE_PROP_BOOL("send-section-footer", MigrationState, +- send_section_footer, true), +- DEFINE_PROP_BOOL("decompress-error-check", MigrationState, +- decompress_error_check, true), +- DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState, +- clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT), +- DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState, +- preempt_pre_7_2, false), +- +- /* Migration parameters */ +- DEFINE_PROP_UINT8("x-compress-level", MigrationState, +- parameters.compress_level, +- DEFAULT_MIGRATE_COMPRESS_LEVEL), +- DEFINE_PROP_UINT8("x-compress-threads", MigrationState, +- parameters.compress_threads, +- DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT), +- DEFINE_PROP_BOOL("x-compress-wait-thread", MigrationState, +- parameters.compress_wait_thread, true), +- DEFINE_PROP_UINT8("x-decompress-threads", MigrationState, +- parameters.decompress_threads, +- DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT), +- DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState, +- parameters.throttle_trigger_threshold, +- DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD), +- DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState, +- parameters.cpu_throttle_initial, +- DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL), +- DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState, +- parameters.cpu_throttle_increment, +- DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT), +- DEFINE_PROP_BOOL("x-cpu-throttle-tailslow", MigrationState, +- parameters.cpu_throttle_tailslow, false), +- DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState, +- parameters.max_bandwidth, MAX_THROTTLE), +- DEFINE_PROP_UINT64("x-downtime-limit", MigrationState, +- parameters.downtime_limit, +- DEFAULT_MIGRATE_SET_DOWNTIME), +- DEFINE_PROP_UINT32("x-checkpoint-delay", MigrationState, +- parameters.x_checkpoint_delay, +- DEFAULT_MIGRATE_X_CHECKPOINT_DELAY), +- DEFINE_PROP_UINT8("multifd-channels", MigrationState, +- parameters.multifd_channels, +- DEFAULT_MIGRATE_MULTIFD_CHANNELS), +- DEFINE_PROP_MULTIFD_COMPRESSION("multifd-compression", MigrationState, +- parameters.multifd_compression, +- DEFAULT_MIGRATE_MULTIFD_COMPRESSION), +- DEFINE_PROP_UINT8("multifd-zlib-level", MigrationState, +- parameters.multifd_zlib_level, +- DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL), +- DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, +- parameters.multifd_zstd_level, +- DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), +- DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, +- parameters.xbzrle_cache_size, +- DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), +- DEFINE_PROP_SIZE("max-postcopy-bandwidth", MigrationState, +- parameters.max_postcopy_bandwidth, +- DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH), +- DEFINE_PROP_UINT8("max-cpu-throttle", MigrationState, +- parameters.max_cpu_throttle, +- DEFAULT_MIGRATE_MAX_CPU_THROTTLE), +- DEFINE_PROP_SIZE("announce-initial", MigrationState, +- parameters.announce_initial, +- DEFAULT_MIGRATE_ANNOUNCE_INITIAL), +- DEFINE_PROP_SIZE("announce-max", MigrationState, +- parameters.announce_max, +- DEFAULT_MIGRATE_ANNOUNCE_MAX), +- DEFINE_PROP_SIZE("announce-rounds", MigrationState, +- parameters.announce_rounds, +- DEFAULT_MIGRATE_ANNOUNCE_ROUNDS), +- DEFINE_PROP_SIZE("announce-step", MigrationState, +- parameters.announce_step, +- DEFAULT_MIGRATE_ANNOUNCE_STEP), +- DEFINE_PROP_STRING("tls-creds", MigrationState, parameters.tls_creds), +- DEFINE_PROP_STRING("tls-hostname", MigrationState, parameters.tls_hostname), +- DEFINE_PROP_STRING("tls-authz", MigrationState, parameters.tls_authz), +- +- /* Migration capabilities */ +- DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE), +- DEFINE_PROP_MIG_CAP("x-rdma-pin-all", MIGRATION_CAPABILITY_RDMA_PIN_ALL), +- DEFINE_PROP_MIG_CAP("x-auto-converge", MIGRATION_CAPABILITY_AUTO_CONVERGE), +- DEFINE_PROP_MIG_CAP("x-zero-blocks", MIGRATION_CAPABILITY_ZERO_BLOCKS), +- DEFINE_PROP_MIG_CAP("x-compress", MIGRATION_CAPABILITY_COMPRESS), +- DEFINE_PROP_MIG_CAP("x-events", MIGRATION_CAPABILITY_EVENTS), +- DEFINE_PROP_MIG_CAP("x-postcopy-ram", MIGRATION_CAPABILITY_POSTCOPY_RAM), +- DEFINE_PROP_MIG_CAP("x-postcopy-preempt", +- MIGRATION_CAPABILITY_POSTCOPY_PREEMPT), +- DEFINE_PROP_MIG_CAP("x-colo", MIGRATION_CAPABILITY_X_COLO), +- DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM), +- DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK), +- DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH), +- DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD), +- DEFINE_PROP_MIG_CAP("x-background-snapshot", +- MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT), +-#ifdef CONFIG_LINUX +- DEFINE_PROP_MIG_CAP("x-zero-copy-send", +- MIGRATION_CAPABILITY_ZERO_COPY_SEND), +-#endif +- +- DEFINE_PROP_END_OF_LIST(), +-}; +- + static void migration_class_init(ObjectClass *klass, void *data) + { + DeviceClass *dc = DEVICE_CLASS(klass); +diff --git a/migration/options.c b/migration/options.c +index bcfe244fa9..a76984276d 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -31,6 +31,161 @@ + #define MAX_MIGRATE_DOWNTIME_SECONDS 2000 + #define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000) + ++#define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ ++ ++/* Time in milliseconds we are allowed to stop the source, ++ * for sending the last part */ ++#define DEFAULT_MIGRATE_SET_DOWNTIME 300 ++ ++/* Default compression thread count */ ++#define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8 ++/* Default decompression thread count, usually decompression is at ++ * least 4 times as fast as compression.*/ ++#define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2 ++/*0: means nocompress, 1: best speed, ... 9: best compress ratio */ ++#define DEFAULT_MIGRATE_COMPRESS_LEVEL 1 ++/* Define default autoconverge cpu throttle migration parameters */ ++#define DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD 50 ++#define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20 ++#define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10 ++#define DEFAULT_MIGRATE_MAX_CPU_THROTTLE 99 ++ ++/* Migration XBZRLE default cache size */ ++#define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024) ++ ++/* The delay time (in ms) between two COLO checkpoints */ ++#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100) ++#define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2 ++#define DEFAULT_MIGRATE_MULTIFD_COMPRESSION MULTIFD_COMPRESSION_NONE ++/* 0: means nocompress, 1: best speed, ... 9: best compress ratio */ ++#define DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL 1 ++/* 0: means nocompress, 1: best speed, ... 20: best compress ratio */ ++#define DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL 1 ++ ++/* Background transfer rate for postcopy, 0 means unlimited, note ++ * that page requests can still exceed this limit. ++ */ ++#define DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH 0 ++ ++/* ++ * Parameters for self_announce_delay giving a stream of RARP/ARP ++ * packets after migration. ++ */ ++#define DEFAULT_MIGRATE_ANNOUNCE_INITIAL 50 ++#define DEFAULT_MIGRATE_ANNOUNCE_MAX 550 ++#define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS 5 ++#define DEFAULT_MIGRATE_ANNOUNCE_STEP 100 ++ ++#define DEFINE_PROP_MIG_CAP(name, x) \ ++ DEFINE_PROP_BOOL(name, MigrationState, capabilities[x], false) ++ ++Property migration_properties[] = { ++ DEFINE_PROP_BOOL("store-global-state", MigrationState, ++ store_global_state, true), ++ DEFINE_PROP_BOOL("send-configuration", MigrationState, ++ send_configuration, true), ++ DEFINE_PROP_BOOL("send-section-footer", MigrationState, ++ send_section_footer, true), ++ DEFINE_PROP_BOOL("decompress-error-check", MigrationState, ++ decompress_error_check, true), ++ DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState, ++ clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT), ++ DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState, ++ preempt_pre_7_2, false), ++ ++ /* Migration parameters */ ++ DEFINE_PROP_UINT8("x-compress-level", MigrationState, ++ parameters.compress_level, ++ DEFAULT_MIGRATE_COMPRESS_LEVEL), ++ DEFINE_PROP_UINT8("x-compress-threads", MigrationState, ++ parameters.compress_threads, ++ DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT), ++ DEFINE_PROP_BOOL("x-compress-wait-thread", MigrationState, ++ parameters.compress_wait_thread, true), ++ DEFINE_PROP_UINT8("x-decompress-threads", MigrationState, ++ parameters.decompress_threads, ++ DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT), ++ DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState, ++ parameters.throttle_trigger_threshold, ++ DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD), ++ DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState, ++ parameters.cpu_throttle_initial, ++ DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL), ++ DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState, ++ parameters.cpu_throttle_increment, ++ DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT), ++ DEFINE_PROP_BOOL("x-cpu-throttle-tailslow", MigrationState, ++ parameters.cpu_throttle_tailslow, false), ++ DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState, ++ parameters.max_bandwidth, MAX_THROTTLE), ++ DEFINE_PROP_UINT64("x-downtime-limit", MigrationState, ++ parameters.downtime_limit, ++ DEFAULT_MIGRATE_SET_DOWNTIME), ++ DEFINE_PROP_UINT32("x-checkpoint-delay", MigrationState, ++ parameters.x_checkpoint_delay, ++ DEFAULT_MIGRATE_X_CHECKPOINT_DELAY), ++ DEFINE_PROP_UINT8("multifd-channels", MigrationState, ++ parameters.multifd_channels, ++ DEFAULT_MIGRATE_MULTIFD_CHANNELS), ++ DEFINE_PROP_MULTIFD_COMPRESSION("multifd-compression", MigrationState, ++ parameters.multifd_compression, ++ DEFAULT_MIGRATE_MULTIFD_COMPRESSION), ++ DEFINE_PROP_UINT8("multifd-zlib-level", MigrationState, ++ parameters.multifd_zlib_level, ++ DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL), ++ DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, ++ parameters.multifd_zstd_level, ++ DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), ++ DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, ++ parameters.xbzrle_cache_size, ++ DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), ++ DEFINE_PROP_SIZE("max-postcopy-bandwidth", MigrationState, ++ parameters.max_postcopy_bandwidth, ++ DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH), ++ DEFINE_PROP_UINT8("max-cpu-throttle", MigrationState, ++ parameters.max_cpu_throttle, ++ DEFAULT_MIGRATE_MAX_CPU_THROTTLE), ++ DEFINE_PROP_SIZE("announce-initial", MigrationState, ++ parameters.announce_initial, ++ DEFAULT_MIGRATE_ANNOUNCE_INITIAL), ++ DEFINE_PROP_SIZE("announce-max", MigrationState, ++ parameters.announce_max, ++ DEFAULT_MIGRATE_ANNOUNCE_MAX), ++ DEFINE_PROP_SIZE("announce-rounds", MigrationState, ++ parameters.announce_rounds, ++ DEFAULT_MIGRATE_ANNOUNCE_ROUNDS), ++ DEFINE_PROP_SIZE("announce-step", MigrationState, ++ parameters.announce_step, ++ DEFAULT_MIGRATE_ANNOUNCE_STEP), ++ DEFINE_PROP_STRING("tls-creds", MigrationState, parameters.tls_creds), ++ DEFINE_PROP_STRING("tls-hostname", MigrationState, parameters.tls_hostname), ++ DEFINE_PROP_STRING("tls-authz", MigrationState, parameters.tls_authz), ++ ++ /* Migration capabilities */ ++ DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE), ++ DEFINE_PROP_MIG_CAP("x-rdma-pin-all", MIGRATION_CAPABILITY_RDMA_PIN_ALL), ++ DEFINE_PROP_MIG_CAP("x-auto-converge", MIGRATION_CAPABILITY_AUTO_CONVERGE), ++ DEFINE_PROP_MIG_CAP("x-zero-blocks", MIGRATION_CAPABILITY_ZERO_BLOCKS), ++ DEFINE_PROP_MIG_CAP("x-compress", MIGRATION_CAPABILITY_COMPRESS), ++ DEFINE_PROP_MIG_CAP("x-events", MIGRATION_CAPABILITY_EVENTS), ++ DEFINE_PROP_MIG_CAP("x-postcopy-ram", MIGRATION_CAPABILITY_POSTCOPY_RAM), ++ DEFINE_PROP_MIG_CAP("x-postcopy-preempt", ++ MIGRATION_CAPABILITY_POSTCOPY_PREEMPT), ++ DEFINE_PROP_MIG_CAP("x-colo", MIGRATION_CAPABILITY_X_COLO), ++ DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM), ++ DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK), ++ DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH), ++ DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD), ++ DEFINE_PROP_MIG_CAP("x-background-snapshot", ++ MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT), ++#ifdef CONFIG_LINUX ++ DEFINE_PROP_MIG_CAP("x-zero-copy-send", ++ MIGRATION_CAPABILITY_ZERO_COPY_SEND), ++#endif ++ ++ DEFINE_PROP_END_OF_LIST(), ++}; ++ + bool migrate_auto_converge(void) + { + MigrationState *s = migrate_get_current(); +diff --git a/migration/options.h b/migration/options.h +index 89067e59a0..7b0f7245ad 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -14,6 +14,9 @@ + #ifndef QEMU_MIGRATION_OPTIONS_H + #define QEMU_MIGRATION_OPTIONS_H + ++#include "hw/qdev-properties.h" ++#include "hw/qdev-properties-system.h" ++ + /* constants */ + + /* Amount of time to allocate to each "chunk" of bandwidth-throttled +@@ -21,6 +24,10 @@ + #define BUFFER_DELAY 100 + #define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY) + ++/* migration properties */ ++ ++extern Property migration_properties[]; ++ + /* capabilities */ + + bool migrate_auto_converge(void); +-- +2.39.3 + diff --git a/SOURCES/kvm-migration-Move-more-initializations-to-migrate_init.patch b/SOURCES/kvm-migration-Move-more-initializations-to-migrate_init.patch new file mode 100644 index 0000000..10e5fe7 --- /dev/null +++ b/SOURCES/kvm-migration-Move-more-initializations-to-migrate_init.patch @@ -0,0 +1,94 @@ +From a90cae0dae6382cc1af63dfed8a51a3a27dc4bae Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Mon, 11 Sep 2023 16:10:19 +0200 +Subject: [PATCH 2/4] migration: Move more initializations to migrate_init() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 318: VFIO migration: fix a QEMU crash when postcopy is enabled +RH-Bugzilla: 2229868 +RH-Acked-by: Alex Williamson +RH-Acked-by: Peter Xu +RH-Commit: [2/4] 3706a3308c33046e2658ee511b364087e202708e + +Bugzilla: https://bugzilla.redhat.com/2229868 + +commit f543aa222da183ac37424d1ea3a65e5fb6202732 +Author: Avihai Horon +Date: Wed Sep 6 18:08:50 2023 +0300 + + migration: Move more initializations to migrate_init() + + Initialization of mig_stats, compression_counters and VFIO bytes + transferred is hard-coded in migration code path and snapshot code path. + + Make the code cleaner by initializing them in migrate_init(). + + Suggested-by: Cédric Le Goater + Signed-off-by: Avihai Horon + Reviewed-by: Cédric Le Goater + Signed-off-by: Cédric Le Goater + +Conflicts: + - migration/migration.c, migration/savevm.c + context changes in migrate_prepare() and qemu_savevm_state() due + to missing commit aff3f6606d14 ("migration: Rename ram_counters + to mig_stats") + +Signed-off-by: Cédric Le Goater +--- + migration/migration.c | 14 +++++++------- + migration/savevm.c | 3 --- + 2 files changed, 7 insertions(+), 10 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 5aa9e5dada..a85c8936d9 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1422,6 +1422,13 @@ void migrate_init(MigrationState *s) + s->iteration_initial_bytes = 0; + s->threshold_size = 0; + s->switchover_acked = false; ++ /* ++ * set mig_stats compression_counters memory to zero for a ++ * new migration ++ */ ++ memset(&ram_counters, 0, sizeof(ram_counters)); ++ memset(&compression_counters, 0, sizeof(compression_counters)); ++ migration_reset_vfio_bytes_transferred(); + } + + int migrate_add_blocker_internal(Error *reason, Error **errp) +@@ -1632,13 +1639,6 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, + } + + migrate_init(s); +- /* +- * set ram_counters compression_counters memory to zero for a +- * new migration +- */ +- memset(&ram_counters, 0, sizeof(ram_counters)); +- memset(&compression_counters, 0, sizeof(compression_counters)); +- migration_reset_vfio_bytes_transferred(); + + return true; + } +diff --git a/migration/savevm.c b/migration/savevm.c +index 05db79bfad..13c1a9afa1 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -1618,9 +1618,6 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) + } + + migrate_init(ms); +- memset(&ram_counters, 0, sizeof(ram_counters)); +- memset(&compression_counters, 0, sizeof(compression_counters)); +- migration_reset_vfio_bytes_transferred(); + ms->to_dst_file = f; + + qemu_mutex_unlock_iothread(); +-- +2.39.3 + diff --git a/SOURCES/kvm-migration-Move-parameters-functions-to-option.c.patch b/SOURCES/kvm-migration-Move-parameters-functions-to-option.c.patch new file mode 100644 index 0000000..ad4510b --- /dev/null +++ b/SOURCES/kvm-migration-Move-parameters-functions-to-option.c.patch @@ -0,0 +1,317 @@ +From d5ea4c82c44a59ac70313eb1eac77999ca5fde36 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 00:39:03 +0100 +Subject: [PATCH 37/56] migration: Move parameters functions to option.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [36/50] 2540921028025504723e762c0a1d2f295ac5a6d1 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 1dfc4b9e19bcf1ad41a1be9ac82db35b9647c3c1) +Signed-off-by: Peter Xu +--- + migration/migration.c | 91 --------------------------------------- + migration/migration.h | 11 ----- + migration/multifd-zlib.c | 1 + + migration/multifd-zstd.c | 1 + + migration/options.c | 93 ++++++++++++++++++++++++++++++++++++++++ + migration/options.h | 13 ++++++ + 6 files changed, 108 insertions(+), 102 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 880a51210e..7f2e770deb 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -2264,79 +2264,6 @@ bool migrate_postcopy(void) + return migrate_postcopy_ram() || migrate_dirty_bitmaps(); + } + +-int migrate_compress_level(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->parameters.compress_level; +-} +- +-int migrate_compress_threads(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->parameters.compress_threads; +-} +- +-int migrate_compress_wait_thread(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->parameters.compress_wait_thread; +-} +- +-int migrate_decompress_threads(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->parameters.decompress_threads; +-} +- +-int migrate_multifd_channels(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->parameters.multifd_channels; +-} +- +-MultiFDCompression migrate_multifd_compression(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- assert(s->parameters.multifd_compression < MULTIFD_COMPRESSION__MAX); +- return s->parameters.multifd_compression; +-} +- +-int migrate_multifd_zlib_level(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->parameters.multifd_zlib_level; +-} +- +-int migrate_multifd_zstd_level(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->parameters.multifd_zstd_level; +-} +- + int migrate_use_tls(void) + { + MigrationState *s; +@@ -2346,24 +2273,6 @@ int migrate_use_tls(void) + return s->parameters.tls_creds && *s->parameters.tls_creds; + } + +-uint64_t migrate_xbzrle_cache_size(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->parameters.xbzrle_cache_size; +-} +- +-static int64_t migrate_max_postcopy_bandwidth(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->parameters.max_postcopy_bandwidth; +-} +- + bool migrate_use_block_incremental(void) + { + MigrationState *s; +diff --git a/migration/migration.h b/migration/migration.h +index 24184622a8..8451e5f2fe 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -449,24 +449,13 @@ MigrationState *migrate_get_current(void); + + bool migrate_postcopy(void); + +-int migrate_multifd_channels(void); +-MultiFDCompression migrate_multifd_compression(void); +-int migrate_multifd_zlib_level(void); +-int migrate_multifd_zstd_level(void); +- + int migrate_use_tls(void); +-uint64_t migrate_xbzrle_cache_size(void); + + bool migrate_use_block_incremental(void); + int migrate_max_cpu_throttle(void); + + uint64_t ram_get_total_transferred_pages(void); + +-int migrate_compress_level(void); +-int migrate_compress_threads(void); +-int migrate_compress_wait_thread(void); +-int migrate_decompress_threads(void); +- + /* Sending on the return path - generic and then for each message type */ + void migrate_send_rp_shut(MigrationIncomingState *mis, + uint32_t value); +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index 37770248e1..81701250ad 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -18,6 +18,7 @@ + #include "qapi/error.h" + #include "migration.h" + #include "trace.h" ++#include "options.h" + #include "multifd.h" + + struct zlib_data { +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index f4a8e1ed1f..d1d29e76cc 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -18,6 +18,7 @@ + #include "qapi/error.h" + #include "migration.h" + #include "trace.h" ++#include "options.h" + #include "multifd.h" + + struct zstd_data { +diff --git a/migration/options.c b/migration/options.c +index f3b2d6e482..8d15be858c 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -460,3 +460,96 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, + s->capabilities[cap->value->capability] = cap->value->state; + } + } ++ ++/* parameters */ ++ ++int migrate_compress_level(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.compress_level; ++} ++ ++int migrate_compress_threads(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.compress_threads; ++} ++ ++int migrate_compress_wait_thread(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.compress_wait_thread; ++} ++ ++int migrate_decompress_threads(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.decompress_threads; ++} ++ ++int64_t migrate_max_postcopy_bandwidth(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.max_postcopy_bandwidth; ++} ++ ++int migrate_multifd_channels(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.multifd_channels; ++} ++ ++MultiFDCompression migrate_multifd_compression(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ assert(s->parameters.multifd_compression < MULTIFD_COMPRESSION__MAX); ++ return s->parameters.multifd_compression; ++} ++ ++int migrate_multifd_zlib_level(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.multifd_zlib_level; ++} ++ ++int migrate_multifd_zstd_level(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.multifd_zstd_level; ++} ++ ++uint64_t migrate_xbzrle_cache_size(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.xbzrle_cache_size; ++} +diff --git a/migration/options.h b/migration/options.h +index 5979e4ff90..b24ee92283 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -43,4 +43,17 @@ bool migrate_zero_copy_send(void); + bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp); + bool migrate_cap_set(int cap, bool value, Error **errp); + ++/* parameters */ ++ ++int migrate_compress_level(void); ++int migrate_compress_threads(void); ++int migrate_compress_wait_thread(void); ++int migrate_decompress_threads(void); ++int64_t migrate_max_postcopy_bandwidth(void); ++int migrate_multifd_channels(void); ++MultiFDCompression migrate_multifd_compression(void); ++int migrate_multifd_zlib_level(void); ++int migrate_multifd_zstd_level(void); ++uint64_t migrate_xbzrle_cache_size(void); ++ + #endif +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Move-qmp_migrate_set_capabilities-to-optio.patch b/SOURCES/kvm-migration-Move-qmp_migrate_set_capabilities-to-optio.patch new file mode 100644 index 0000000..10f185b --- /dev/null +++ b/SOURCES/kvm-migration-Move-qmp_migrate_set_capabilities-to-optio.patch @@ -0,0 +1,100 @@ +From d967ec22cdb20e0a846f050a2bc7bd4caa87940d Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 23:18:02 +0100 +Subject: [PATCH 35/56] migration: Move qmp_migrate_set_capabilities() to + options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [34/50] 16b62ca7e06c58d71389c449dc19c11939dd0882 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 45c1de13f09b1fd4ea26f54e6da12aae52f34cb8) +Signed-off-by: Peter Xu +--- + migration/migration.c | 26 -------------------------- + migration/options.c | 26 ++++++++++++++++++++++++++ + 2 files changed, 26 insertions(+), 26 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 3dc8ee4875..369cd91796 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1222,32 +1222,6 @@ MigrationInfo *qmp_query_migrate(Error **errp) + return info; + } + +-void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, +- Error **errp) +-{ +- MigrationState *s = migrate_get_current(); +- MigrationCapabilityStatusList *cap; +- bool new_caps[MIGRATION_CAPABILITY__MAX]; +- +- if (migration_is_running(s->state)) { +- error_setg(errp, QERR_MIGRATION_ACTIVE); +- return; +- } +- +- memcpy(new_caps, s->capabilities, sizeof(new_caps)); +- for (cap = params; cap; cap = cap->next) { +- new_caps[cap->value->capability] = cap->value->state; +- } +- +- if (!migrate_caps_check(s->capabilities, new_caps, errp)) { +- return; +- } +- +- for (cap = params; cap; cap = cap->next) { +- s->capabilities[cap->value->capability] = cap->value->state; +- } +-} +- + /* + * Check whether the parameters are valid. Error will be put into errp + * (if provided). Return true if valid, otherwise false. +diff --git a/migration/options.c b/migration/options.c +index ff621bdeb3..4cbe77e35a 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -413,3 +413,29 @@ MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) + + return head; + } ++ ++void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, ++ Error **errp) ++{ ++ MigrationState *s = migrate_get_current(); ++ MigrationCapabilityStatusList *cap; ++ bool new_caps[MIGRATION_CAPABILITY__MAX]; ++ ++ if (migration_is_running(s->state)) { ++ error_setg(errp, QERR_MIGRATION_ACTIVE); ++ return; ++ } ++ ++ memcpy(new_caps, s->capabilities, sizeof(new_caps)); ++ for (cap = params; cap; cap = cap->next) { ++ new_caps[cap->value->capability] = cap->value->state; ++ } ++ ++ if (!migrate_caps_check(s->capabilities, new_caps, errp)) { ++ return; ++ } ++ ++ for (cap = params; cap; cap = cap->next) { ++ s->capabilities[cap->value->capability] = cap->value->state; ++ } ++} +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Move-qmp_migrate_set_parameters-to-options.patch b/SOURCES/kvm-migration-Move-qmp_migrate_set_parameters-to-options.patch new file mode 100644 index 0000000..3685a33 --- /dev/null +++ b/SOURCES/kvm-migration-Move-qmp_migrate_set_parameters-to-options.patch @@ -0,0 +1,943 @@ +From 944bf4759d1279c342ddd29c47d47c9670b64625 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 10:13:16 +0100 +Subject: [PATCH 50/56] migration: Move qmp_migrate_set_parameters() to + options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [49/50] b55f7afe868e117d4212f1518b9a37514cc99b33 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 09d6c9658474e8573c5ada58dca8b20fe47dd99e) +Signed-off-by: Peter Xu +--- + migration/migration.c | 420 ------------------------------------------ + migration/options.c | 418 +++++++++++++++++++++++++++++++++++++++++ + migration/options.h | 11 ++ + 3 files changed, 429 insertions(+), 420 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 22ef83c619..08f87f2b0e 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -67,19 +67,10 @@ + + #define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ + +-/* Amount of time to allocate to each "chunk" of bandwidth-throttled +- * data. */ +-#define BUFFER_DELAY 100 +-#define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY) +- + /* Time in milliseconds we are allowed to stop the source, + * for sending the last part */ + #define DEFAULT_MIGRATE_SET_DOWNTIME 300 + +-/* Maximum migrate downtime set to 2000 seconds */ +-#define MAX_MIGRATE_DOWNTIME_SECONDS 2000 +-#define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000) +- + /* Default compression thread count */ + #define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8 + /* Default decompression thread count, usually decompression is at +@@ -1140,417 +1131,6 @@ MigrationInfo *qmp_query_migrate(Error **errp) + return info; + } + +-/* +- * Check whether the parameters are valid. Error will be put into errp +- * (if provided). Return true if valid, otherwise false. +- */ +-static bool migrate_params_check(MigrationParameters *params, Error **errp) +-{ +- if (params->has_compress_level && +- (params->compress_level > 9)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level", +- "a value between 0 and 9"); +- return false; +- } +- +- if (params->has_compress_threads && (params->compress_threads < 1)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "compress_threads", +- "a value between 1 and 255"); +- return false; +- } +- +- if (params->has_decompress_threads && (params->decompress_threads < 1)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "decompress_threads", +- "a value between 1 and 255"); +- return false; +- } +- +- if (params->has_throttle_trigger_threshold && +- (params->throttle_trigger_threshold < 1 || +- params->throttle_trigger_threshold > 100)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "throttle_trigger_threshold", +- "an integer in the range of 1 to 100"); +- return false; +- } +- +- if (params->has_cpu_throttle_initial && +- (params->cpu_throttle_initial < 1 || +- params->cpu_throttle_initial > 99)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "cpu_throttle_initial", +- "an integer in the range of 1 to 99"); +- return false; +- } +- +- if (params->has_cpu_throttle_increment && +- (params->cpu_throttle_increment < 1 || +- params->cpu_throttle_increment > 99)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "cpu_throttle_increment", +- "an integer in the range of 1 to 99"); +- return false; +- } +- +- if (params->has_max_bandwidth && (params->max_bandwidth > SIZE_MAX)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "max_bandwidth", +- "an integer in the range of 0 to "stringify(SIZE_MAX) +- " bytes/second"); +- return false; +- } +- +- if (params->has_downtime_limit && +- (params->downtime_limit > MAX_MIGRATE_DOWNTIME)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "downtime_limit", +- "an integer in the range of 0 to " +- stringify(MAX_MIGRATE_DOWNTIME)" ms"); +- return false; +- } +- +- /* x_checkpoint_delay is now always positive */ +- +- if (params->has_multifd_channels && (params->multifd_channels < 1)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "multifd_channels", +- "a value between 1 and 255"); +- return false; +- } +- +- if (params->has_multifd_zlib_level && +- (params->multifd_zlib_level > 9)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zlib_level", +- "a value between 0 and 9"); +- return false; +- } +- +- if (params->has_multifd_zstd_level && +- (params->multifd_zstd_level > 20)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zstd_level", +- "a value between 0 and 20"); +- return false; +- } +- +- if (params->has_xbzrle_cache_size && +- (params->xbzrle_cache_size < qemu_target_page_size() || +- !is_power_of_2(params->xbzrle_cache_size))) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "xbzrle_cache_size", +- "a power of two no less than the target page size"); +- return false; +- } +- +- if (params->has_max_cpu_throttle && +- (params->max_cpu_throttle < params->cpu_throttle_initial || +- params->max_cpu_throttle > 99)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "max_cpu_throttle", +- "an integer in the range of cpu_throttle_initial to 99"); +- return false; +- } +- +- if (params->has_announce_initial && +- params->announce_initial > 100000) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "announce_initial", +- "a value between 0 and 100000"); +- return false; +- } +- if (params->has_announce_max && +- params->announce_max > 100000) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "announce_max", +- "a value between 0 and 100000"); +- return false; +- } +- if (params->has_announce_rounds && +- params->announce_rounds > 1000) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "announce_rounds", +- "a value between 0 and 1000"); +- return false; +- } +- if (params->has_announce_step && +- (params->announce_step < 1 || +- params->announce_step > 10000)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "announce_step", +- "a value between 0 and 10000"); +- return false; +- } +- +- if (params->has_block_bitmap_mapping && +- !check_dirty_bitmap_mig_alias_map(params->block_bitmap_mapping, errp)) { +- error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); +- return false; +- } +- +-#ifdef CONFIG_LINUX +- if (migrate_zero_copy_send() && +- ((params->has_multifd_compression && params->multifd_compression) || +- (params->tls_creds && *params->tls_creds))) { +- error_setg(errp, +- "Zero copy only available for non-compressed non-TLS multifd migration"); +- return false; +- } +-#endif +- +- return true; +-} +- +-static void migrate_params_test_apply(MigrateSetParameters *params, +- MigrationParameters *dest) +-{ +- *dest = migrate_get_current()->parameters; +- +- /* TODO use QAPI_CLONE() instead of duplicating it inline */ +- +- if (params->has_compress_level) { +- dest->compress_level = params->compress_level; +- } +- +- if (params->has_compress_threads) { +- dest->compress_threads = params->compress_threads; +- } +- +- if (params->has_compress_wait_thread) { +- dest->compress_wait_thread = params->compress_wait_thread; +- } +- +- if (params->has_decompress_threads) { +- dest->decompress_threads = params->decompress_threads; +- } +- +- if (params->has_throttle_trigger_threshold) { +- dest->throttle_trigger_threshold = params->throttle_trigger_threshold; +- } +- +- if (params->has_cpu_throttle_initial) { +- dest->cpu_throttle_initial = params->cpu_throttle_initial; +- } +- +- if (params->has_cpu_throttle_increment) { +- dest->cpu_throttle_increment = params->cpu_throttle_increment; +- } +- +- if (params->has_cpu_throttle_tailslow) { +- dest->cpu_throttle_tailslow = params->cpu_throttle_tailslow; +- } +- +- if (params->tls_creds) { +- assert(params->tls_creds->type == QTYPE_QSTRING); +- dest->tls_creds = params->tls_creds->u.s; +- } +- +- if (params->tls_hostname) { +- assert(params->tls_hostname->type == QTYPE_QSTRING); +- dest->tls_hostname = params->tls_hostname->u.s; +- } +- +- if (params->has_max_bandwidth) { +- dest->max_bandwidth = params->max_bandwidth; +- } +- +- if (params->has_downtime_limit) { +- dest->downtime_limit = params->downtime_limit; +- } +- +- if (params->has_x_checkpoint_delay) { +- dest->x_checkpoint_delay = params->x_checkpoint_delay; +- } +- +- if (params->has_block_incremental) { +- dest->block_incremental = params->block_incremental; +- } +- if (params->has_multifd_channels) { +- dest->multifd_channels = params->multifd_channels; +- } +- if (params->has_multifd_compression) { +- dest->multifd_compression = params->multifd_compression; +- } +- if (params->has_xbzrle_cache_size) { +- dest->xbzrle_cache_size = params->xbzrle_cache_size; +- } +- if (params->has_max_postcopy_bandwidth) { +- dest->max_postcopy_bandwidth = params->max_postcopy_bandwidth; +- } +- if (params->has_max_cpu_throttle) { +- dest->max_cpu_throttle = params->max_cpu_throttle; +- } +- if (params->has_announce_initial) { +- dest->announce_initial = params->announce_initial; +- } +- if (params->has_announce_max) { +- dest->announce_max = params->announce_max; +- } +- if (params->has_announce_rounds) { +- dest->announce_rounds = params->announce_rounds; +- } +- if (params->has_announce_step) { +- dest->announce_step = params->announce_step; +- } +- +- if (params->has_block_bitmap_mapping) { +- dest->has_block_bitmap_mapping = true; +- dest->block_bitmap_mapping = params->block_bitmap_mapping; +- } +-} +- +-static void migrate_params_apply(MigrateSetParameters *params, Error **errp) +-{ +- MigrationState *s = migrate_get_current(); +- +- /* TODO use QAPI_CLONE() instead of duplicating it inline */ +- +- if (params->has_compress_level) { +- s->parameters.compress_level = params->compress_level; +- } +- +- if (params->has_compress_threads) { +- s->parameters.compress_threads = params->compress_threads; +- } +- +- if (params->has_compress_wait_thread) { +- s->parameters.compress_wait_thread = params->compress_wait_thread; +- } +- +- if (params->has_decompress_threads) { +- s->parameters.decompress_threads = params->decompress_threads; +- } +- +- if (params->has_throttle_trigger_threshold) { +- s->parameters.throttle_trigger_threshold = params->throttle_trigger_threshold; +- } +- +- if (params->has_cpu_throttle_initial) { +- s->parameters.cpu_throttle_initial = params->cpu_throttle_initial; +- } +- +- if (params->has_cpu_throttle_increment) { +- s->parameters.cpu_throttle_increment = params->cpu_throttle_increment; +- } +- +- if (params->has_cpu_throttle_tailslow) { +- s->parameters.cpu_throttle_tailslow = params->cpu_throttle_tailslow; +- } +- +- if (params->tls_creds) { +- g_free(s->parameters.tls_creds); +- assert(params->tls_creds->type == QTYPE_QSTRING); +- s->parameters.tls_creds = g_strdup(params->tls_creds->u.s); +- } +- +- if (params->tls_hostname) { +- g_free(s->parameters.tls_hostname); +- assert(params->tls_hostname->type == QTYPE_QSTRING); +- s->parameters.tls_hostname = g_strdup(params->tls_hostname->u.s); +- } +- +- if (params->tls_authz) { +- g_free(s->parameters.tls_authz); +- assert(params->tls_authz->type == QTYPE_QSTRING); +- s->parameters.tls_authz = g_strdup(params->tls_authz->u.s); +- } +- +- if (params->has_max_bandwidth) { +- s->parameters.max_bandwidth = params->max_bandwidth; +- if (s->to_dst_file && !migration_in_postcopy()) { +- qemu_file_set_rate_limit(s->to_dst_file, +- s->parameters.max_bandwidth / XFER_LIMIT_RATIO); +- } +- } +- +- if (params->has_downtime_limit) { +- s->parameters.downtime_limit = params->downtime_limit; +- } +- +- if (params->has_x_checkpoint_delay) { +- s->parameters.x_checkpoint_delay = params->x_checkpoint_delay; +- if (migration_in_colo_state()) { +- colo_checkpoint_notify(s); +- } +- } +- +- if (params->has_block_incremental) { +- s->parameters.block_incremental = params->block_incremental; +- } +- if (params->has_multifd_channels) { +- s->parameters.multifd_channels = params->multifd_channels; +- } +- if (params->has_multifd_compression) { +- s->parameters.multifd_compression = params->multifd_compression; +- } +- if (params->has_xbzrle_cache_size) { +- s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; +- xbzrle_cache_resize(params->xbzrle_cache_size, errp); +- } +- if (params->has_max_postcopy_bandwidth) { +- s->parameters.max_postcopy_bandwidth = params->max_postcopy_bandwidth; +- if (s->to_dst_file && migration_in_postcopy()) { +- qemu_file_set_rate_limit(s->to_dst_file, +- s->parameters.max_postcopy_bandwidth / XFER_LIMIT_RATIO); +- } +- } +- if (params->has_max_cpu_throttle) { +- s->parameters.max_cpu_throttle = params->max_cpu_throttle; +- } +- if (params->has_announce_initial) { +- s->parameters.announce_initial = params->announce_initial; +- } +- if (params->has_announce_max) { +- s->parameters.announce_max = params->announce_max; +- } +- if (params->has_announce_rounds) { +- s->parameters.announce_rounds = params->announce_rounds; +- } +- if (params->has_announce_step) { +- s->parameters.announce_step = params->announce_step; +- } +- +- if (params->has_block_bitmap_mapping) { +- qapi_free_BitmapMigrationNodeAliasList( +- s->parameters.block_bitmap_mapping); +- +- s->parameters.has_block_bitmap_mapping = true; +- s->parameters.block_bitmap_mapping = +- QAPI_CLONE(BitmapMigrationNodeAliasList, +- params->block_bitmap_mapping); +- } +-} +- +-void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp) +-{ +- MigrationParameters tmp; +- +- /* TODO Rewrite "" to null instead */ +- if (params->tls_creds +- && params->tls_creds->type == QTYPE_QNULL) { +- qobject_unref(params->tls_creds->u.n); +- params->tls_creds->type = QTYPE_QSTRING; +- params->tls_creds->u.s = strdup(""); +- } +- /* TODO Rewrite "" to null instead */ +- if (params->tls_hostname +- && params->tls_hostname->type == QTYPE_QNULL) { +- qobject_unref(params->tls_hostname->u.n); +- params->tls_hostname->type = QTYPE_QSTRING; +- params->tls_hostname->u.s = strdup(""); +- } +- +- migrate_params_test_apply(params, &tmp); +- +- if (!migrate_params_check(&tmp, errp)) { +- /* Invalid parameter */ +- return; +- } +- +- migrate_params_apply(params, errp); +-} +- +- + void qmp_migrate_start_postcopy(Error **errp) + { + MigrationState *s = migrate_get_current(); +diff --git a/migration/options.c b/migration/options.c +index d4c0714683..4701c75a4d 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -12,17 +12,25 @@ + */ + + #include "qemu/osdep.h" ++#include "exec/target_page.h" + #include "qapi/clone-visitor.h" + #include "qapi/error.h" + #include "qapi/qapi-commands-migration.h" + #include "qapi/qapi-visit-migration.h" + #include "qapi/qmp/qerror.h" ++#include "qapi/qmp/qnull.h" + #include "sysemu/runstate.h" ++#include "migration/colo.h" + #include "migration/misc.h" + #include "migration.h" ++#include "qemu-file.h" + #include "ram.h" + #include "options.h" + ++/* Maximum migrate downtime set to 2000 seconds */ ++#define MAX_MIGRATE_DOWNTIME_SECONDS 2000 ++#define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000) ++ + bool migrate_auto_converge(void) + { + MigrationState *s; +@@ -729,3 +737,413 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) + + return params; + } ++ ++/* ++ * Check whether the parameters are valid. Error will be put into errp ++ * (if provided). Return true if valid, otherwise false. ++ */ ++bool migrate_params_check(MigrationParameters *params, Error **errp) ++{ ++ if (params->has_compress_level && ++ (params->compress_level > 9)) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level", ++ "a value between 0 and 9"); ++ return false; ++ } ++ ++ if (params->has_compress_threads && (params->compress_threads < 1)) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "compress_threads", ++ "a value between 1 and 255"); ++ return false; ++ } ++ ++ if (params->has_decompress_threads && (params->decompress_threads < 1)) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "decompress_threads", ++ "a value between 1 and 255"); ++ return false; ++ } ++ ++ if (params->has_throttle_trigger_threshold && ++ (params->throttle_trigger_threshold < 1 || ++ params->throttle_trigger_threshold > 100)) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "throttle_trigger_threshold", ++ "an integer in the range of 1 to 100"); ++ return false; ++ } ++ ++ if (params->has_cpu_throttle_initial && ++ (params->cpu_throttle_initial < 1 || ++ params->cpu_throttle_initial > 99)) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "cpu_throttle_initial", ++ "an integer in the range of 1 to 99"); ++ return false; ++ } ++ ++ if (params->has_cpu_throttle_increment && ++ (params->cpu_throttle_increment < 1 || ++ params->cpu_throttle_increment > 99)) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "cpu_throttle_increment", ++ "an integer in the range of 1 to 99"); ++ return false; ++ } ++ ++ if (params->has_max_bandwidth && (params->max_bandwidth > SIZE_MAX)) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "max_bandwidth", ++ "an integer in the range of 0 to "stringify(SIZE_MAX) ++ " bytes/second"); ++ return false; ++ } ++ ++ if (params->has_downtime_limit && ++ (params->downtime_limit > MAX_MIGRATE_DOWNTIME)) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "downtime_limit", ++ "an integer in the range of 0 to " ++ stringify(MAX_MIGRATE_DOWNTIME)" ms"); ++ return false; ++ } ++ ++ /* x_checkpoint_delay is now always positive */ ++ ++ if (params->has_multifd_channels && (params->multifd_channels < 1)) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "multifd_channels", ++ "a value between 1 and 255"); ++ return false; ++ } ++ ++ if (params->has_multifd_zlib_level && ++ (params->multifd_zlib_level > 9)) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zlib_level", ++ "a value between 0 and 9"); ++ return false; ++ } ++ ++ if (params->has_multifd_zstd_level && ++ (params->multifd_zstd_level > 20)) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zstd_level", ++ "a value between 0 and 20"); ++ return false; ++ } ++ ++ if (params->has_xbzrle_cache_size && ++ (params->xbzrle_cache_size < qemu_target_page_size() || ++ !is_power_of_2(params->xbzrle_cache_size))) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "xbzrle_cache_size", ++ "a power of two no less than the target page size"); ++ return false; ++ } ++ ++ if (params->has_max_cpu_throttle && ++ (params->max_cpu_throttle < params->cpu_throttle_initial || ++ params->max_cpu_throttle > 99)) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "max_cpu_throttle", ++ "an integer in the range of cpu_throttle_initial to 99"); ++ return false; ++ } ++ ++ if (params->has_announce_initial && ++ params->announce_initial > 100000) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "announce_initial", ++ "a value between 0 and 100000"); ++ return false; ++ } ++ if (params->has_announce_max && ++ params->announce_max > 100000) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "announce_max", ++ "a value between 0 and 100000"); ++ return false; ++ } ++ if (params->has_announce_rounds && ++ params->announce_rounds > 1000) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "announce_rounds", ++ "a value between 0 and 1000"); ++ return false; ++ } ++ if (params->has_announce_step && ++ (params->announce_step < 1 || ++ params->announce_step > 10000)) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "announce_step", ++ "a value between 0 and 10000"); ++ return false; ++ } ++ ++ if (params->has_block_bitmap_mapping && ++ !check_dirty_bitmap_mig_alias_map(params->block_bitmap_mapping, errp)) { ++ error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); ++ return false; ++ } ++ ++#ifdef CONFIG_LINUX ++ if (migrate_zero_copy_send() && ++ ((params->has_multifd_compression && params->multifd_compression) || ++ (params->tls_creds && *params->tls_creds))) { ++ error_setg(errp, ++ "Zero copy only available for non-compressed non-TLS multifd migration"); ++ return false; ++ } ++#endif ++ ++ return true; ++} ++ ++static void migrate_params_test_apply(MigrateSetParameters *params, ++ MigrationParameters *dest) ++{ ++ *dest = migrate_get_current()->parameters; ++ ++ /* TODO use QAPI_CLONE() instead of duplicating it inline */ ++ ++ if (params->has_compress_level) { ++ dest->compress_level = params->compress_level; ++ } ++ ++ if (params->has_compress_threads) { ++ dest->compress_threads = params->compress_threads; ++ } ++ ++ if (params->has_compress_wait_thread) { ++ dest->compress_wait_thread = params->compress_wait_thread; ++ } ++ ++ if (params->has_decompress_threads) { ++ dest->decompress_threads = params->decompress_threads; ++ } ++ ++ if (params->has_throttle_trigger_threshold) { ++ dest->throttle_trigger_threshold = params->throttle_trigger_threshold; ++ } ++ ++ if (params->has_cpu_throttle_initial) { ++ dest->cpu_throttle_initial = params->cpu_throttle_initial; ++ } ++ ++ if (params->has_cpu_throttle_increment) { ++ dest->cpu_throttle_increment = params->cpu_throttle_increment; ++ } ++ ++ if (params->has_cpu_throttle_tailslow) { ++ dest->cpu_throttle_tailslow = params->cpu_throttle_tailslow; ++ } ++ ++ if (params->tls_creds) { ++ assert(params->tls_creds->type == QTYPE_QSTRING); ++ dest->tls_creds = params->tls_creds->u.s; ++ } ++ ++ if (params->tls_hostname) { ++ assert(params->tls_hostname->type == QTYPE_QSTRING); ++ dest->tls_hostname = params->tls_hostname->u.s; ++ } ++ ++ if (params->has_max_bandwidth) { ++ dest->max_bandwidth = params->max_bandwidth; ++ } ++ ++ if (params->has_downtime_limit) { ++ dest->downtime_limit = params->downtime_limit; ++ } ++ ++ if (params->has_x_checkpoint_delay) { ++ dest->x_checkpoint_delay = params->x_checkpoint_delay; ++ } ++ ++ if (params->has_block_incremental) { ++ dest->block_incremental = params->block_incremental; ++ } ++ if (params->has_multifd_channels) { ++ dest->multifd_channels = params->multifd_channels; ++ } ++ if (params->has_multifd_compression) { ++ dest->multifd_compression = params->multifd_compression; ++ } ++ if (params->has_xbzrle_cache_size) { ++ dest->xbzrle_cache_size = params->xbzrle_cache_size; ++ } ++ if (params->has_max_postcopy_bandwidth) { ++ dest->max_postcopy_bandwidth = params->max_postcopy_bandwidth; ++ } ++ if (params->has_max_cpu_throttle) { ++ dest->max_cpu_throttle = params->max_cpu_throttle; ++ } ++ if (params->has_announce_initial) { ++ dest->announce_initial = params->announce_initial; ++ } ++ if (params->has_announce_max) { ++ dest->announce_max = params->announce_max; ++ } ++ if (params->has_announce_rounds) { ++ dest->announce_rounds = params->announce_rounds; ++ } ++ if (params->has_announce_step) { ++ dest->announce_step = params->announce_step; ++ } ++ ++ if (params->has_block_bitmap_mapping) { ++ dest->has_block_bitmap_mapping = true; ++ dest->block_bitmap_mapping = params->block_bitmap_mapping; ++ } ++} ++ ++static void migrate_params_apply(MigrateSetParameters *params, Error **errp) ++{ ++ MigrationState *s = migrate_get_current(); ++ ++ /* TODO use QAPI_CLONE() instead of duplicating it inline */ ++ ++ if (params->has_compress_level) { ++ s->parameters.compress_level = params->compress_level; ++ } ++ ++ if (params->has_compress_threads) { ++ s->parameters.compress_threads = params->compress_threads; ++ } ++ ++ if (params->has_compress_wait_thread) { ++ s->parameters.compress_wait_thread = params->compress_wait_thread; ++ } ++ ++ if (params->has_decompress_threads) { ++ s->parameters.decompress_threads = params->decompress_threads; ++ } ++ ++ if (params->has_throttle_trigger_threshold) { ++ s->parameters.throttle_trigger_threshold = params->throttle_trigger_threshold; ++ } ++ ++ if (params->has_cpu_throttle_initial) { ++ s->parameters.cpu_throttle_initial = params->cpu_throttle_initial; ++ } ++ ++ if (params->has_cpu_throttle_increment) { ++ s->parameters.cpu_throttle_increment = params->cpu_throttle_increment; ++ } ++ ++ if (params->has_cpu_throttle_tailslow) { ++ s->parameters.cpu_throttle_tailslow = params->cpu_throttle_tailslow; ++ } ++ ++ if (params->tls_creds) { ++ g_free(s->parameters.tls_creds); ++ assert(params->tls_creds->type == QTYPE_QSTRING); ++ s->parameters.tls_creds = g_strdup(params->tls_creds->u.s); ++ } ++ ++ if (params->tls_hostname) { ++ g_free(s->parameters.tls_hostname); ++ assert(params->tls_hostname->type == QTYPE_QSTRING); ++ s->parameters.tls_hostname = g_strdup(params->tls_hostname->u.s); ++ } ++ ++ if (params->tls_authz) { ++ g_free(s->parameters.tls_authz); ++ assert(params->tls_authz->type == QTYPE_QSTRING); ++ s->parameters.tls_authz = g_strdup(params->tls_authz->u.s); ++ } ++ ++ if (params->has_max_bandwidth) { ++ s->parameters.max_bandwidth = params->max_bandwidth; ++ if (s->to_dst_file && !migration_in_postcopy()) { ++ qemu_file_set_rate_limit(s->to_dst_file, ++ s->parameters.max_bandwidth / XFER_LIMIT_RATIO); ++ } ++ } ++ ++ if (params->has_downtime_limit) { ++ s->parameters.downtime_limit = params->downtime_limit; ++ } ++ ++ if (params->has_x_checkpoint_delay) { ++ s->parameters.x_checkpoint_delay = params->x_checkpoint_delay; ++ if (migration_in_colo_state()) { ++ colo_checkpoint_notify(s); ++ } ++ } ++ ++ if (params->has_block_incremental) { ++ s->parameters.block_incremental = params->block_incremental; ++ } ++ if (params->has_multifd_channels) { ++ s->parameters.multifd_channels = params->multifd_channels; ++ } ++ if (params->has_multifd_compression) { ++ s->parameters.multifd_compression = params->multifd_compression; ++ } ++ if (params->has_xbzrle_cache_size) { ++ s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; ++ xbzrle_cache_resize(params->xbzrle_cache_size, errp); ++ } ++ if (params->has_max_postcopy_bandwidth) { ++ s->parameters.max_postcopy_bandwidth = params->max_postcopy_bandwidth; ++ if (s->to_dst_file && migration_in_postcopy()) { ++ qemu_file_set_rate_limit(s->to_dst_file, ++ s->parameters.max_postcopy_bandwidth / XFER_LIMIT_RATIO); ++ } ++ } ++ if (params->has_max_cpu_throttle) { ++ s->parameters.max_cpu_throttle = params->max_cpu_throttle; ++ } ++ if (params->has_announce_initial) { ++ s->parameters.announce_initial = params->announce_initial; ++ } ++ if (params->has_announce_max) { ++ s->parameters.announce_max = params->announce_max; ++ } ++ if (params->has_announce_rounds) { ++ s->parameters.announce_rounds = params->announce_rounds; ++ } ++ if (params->has_announce_step) { ++ s->parameters.announce_step = params->announce_step; ++ } ++ ++ if (params->has_block_bitmap_mapping) { ++ qapi_free_BitmapMigrationNodeAliasList( ++ s->parameters.block_bitmap_mapping); ++ ++ s->parameters.has_block_bitmap_mapping = true; ++ s->parameters.block_bitmap_mapping = ++ QAPI_CLONE(BitmapMigrationNodeAliasList, ++ params->block_bitmap_mapping); ++ } ++} ++ ++void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp) ++{ ++ MigrationParameters tmp; ++ ++ /* TODO Rewrite "" to null instead */ ++ if (params->tls_creds ++ && params->tls_creds->type == QTYPE_QNULL) { ++ qobject_unref(params->tls_creds->u.n); ++ params->tls_creds->type = QTYPE_QSTRING; ++ params->tls_creds->u.s = strdup(""); ++ } ++ /* TODO Rewrite "" to null instead */ ++ if (params->tls_hostname ++ && params->tls_hostname->type == QTYPE_QNULL) { ++ qobject_unref(params->tls_hostname->u.n); ++ params->tls_hostname->type = QTYPE_QSTRING; ++ params->tls_hostname->u.s = strdup(""); ++ } ++ ++ migrate_params_test_apply(params, &tmp); ++ ++ if (!migrate_params_check(&tmp, errp)) { ++ /* Invalid parameter */ ++ return; ++ } ++ ++ migrate_params_apply(params, errp); ++} +diff --git a/migration/options.h b/migration/options.h +index 13318a16c7..89067e59a0 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -14,6 +14,13 @@ + #ifndef QEMU_MIGRATION_OPTIONS_H + #define QEMU_MIGRATION_OPTIONS_H + ++/* constants */ ++ ++/* Amount of time to allocate to each "chunk" of bandwidth-throttled ++ * data. */ ++#define BUFFER_DELAY 100 ++#define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY) ++ + /* capabilities */ + + bool migrate_auto_converge(void); +@@ -74,4 +81,8 @@ int migrate_multifd_zstd_level(void); + uint8_t migrate_throttle_trigger_threshold(void); + uint64_t migrate_xbzrle_cache_size(void); + ++/* parameters helpers */ ++ ++bool migrate_params_check(MigrationParameters *params, Error **errp); ++ + #endif +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Move-qmp_query_migrate_capabilities-to-opt.patch b/SOURCES/kvm-migration-Move-qmp_query_migrate_capabilities-to-opt.patch new file mode 100644 index 0000000..d2564de --- /dev/null +++ b/SOURCES/kvm-migration-Move-qmp_query_migrate_capabilities-to-opt.patch @@ -0,0 +1,100 @@ +From 00cc3c3598828588619a7b3696819060bddaddb8 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 23:15:59 +0100 +Subject: [PATCH 34/56] migration: Move qmp_query_migrate_capabilities() to + options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [33/50] dbfa8f1e7aa7e000b4622ce2da12d7d418710f19 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 4d0c6b695bf5252402ebf967f83baebfd2f4b91e) +Signed-off-by: Peter Xu +--- + migration/migration.c | 22 ---------------------- + migration/options.c | 23 +++++++++++++++++++++++ + 2 files changed, 23 insertions(+), 22 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index d9e30ca918..3dc8ee4875 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -886,28 +886,6 @@ void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value) + migrate_send_rp_message(mis, MIG_RP_MSG_RESUME_ACK, sizeof(buf), &buf); + } + +-MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) +-{ +- MigrationCapabilityStatusList *head = NULL, **tail = &head; +- MigrationCapabilityStatus *caps; +- MigrationState *s = migrate_get_current(); +- int i; +- +- for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { +-#ifndef CONFIG_LIVE_BLOCK_MIGRATION +- if (i == MIGRATION_CAPABILITY_BLOCK) { +- continue; +- } +-#endif +- caps = g_malloc0(sizeof(*caps)); +- caps->capability = i; +- caps->state = s->capabilities[i]; +- QAPI_LIST_APPEND(tail, caps); +- } +- +- return head; +-} +- + MigrationParameters *qmp_query_migrate_parameters(Error **errp) + { + MigrationParameters *params; +diff --git a/migration/options.c b/migration/options.c +index 367c930f46..ff621bdeb3 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -13,6 +13,7 @@ + + #include "qemu/osdep.h" + #include "qapi/error.h" ++#include "qapi/qapi-commands-migration.h" + #include "sysemu/runstate.h" + #include "migration.h" + #include "ram.h" +@@ -390,3 +391,25 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) + + return true; + } ++ ++MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) ++{ ++ MigrationCapabilityStatusList *head = NULL, **tail = &head; ++ MigrationCapabilityStatus *caps; ++ MigrationState *s = migrate_get_current(); ++ int i; ++ ++ for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { ++#ifndef CONFIG_LIVE_BLOCK_MIGRATION ++ if (i == MIGRATION_CAPABILITY_BLOCK) { ++ continue; ++ } ++#endif ++ caps = g_malloc0(sizeof(*caps)); ++ caps->capability = i; ++ caps->state = s->capabilities[i]; ++ QAPI_LIST_APPEND(tail, caps); ++ } ++ ++ return head; ++} +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Pass-migrate_caps_check-the-old-and-new-ca.patch b/SOURCES/kvm-migration-Pass-migrate_caps_check-the-old-and-new-ca.patch new file mode 100644 index 0000000..7339ce0 --- /dev/null +++ b/SOURCES/kvm-migration-Pass-migrate_caps_check-the-old-and-new-ca.patch @@ -0,0 +1,226 @@ +From 4782b59a8b0b5762f87505ac7a83b37ddd2e0b3f Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 20:28:56 +0100 +Subject: [PATCH 19/56] migration: Pass migrate_caps_check() the old and new + caps +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [18/50] df78d680d03f15d7cb7401ad89e68a4fc93fa835 (peterx/qemu-kvm) + +We used to pass the old capabilities array and the new +capabilities as a list. + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit b02c7fc9ef447787414e6fa67eff75e7b7b30180) +Signed-off-by: Peter Xu +--- + migration/migration.c | 80 +++++++++++++++++-------------------------- + 1 file changed, 31 insertions(+), 49 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index d8e5fb6226..e8f596bcfa 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1299,30 +1299,20 @@ WriteTrackingSupport migrate_query_write_tracking(void) + } + + /** +- * @migration_caps_check - check capability validity ++ * @migration_caps_check - check capability compatibility + * +- * @cap_list: old capability list, array of bool +- * @params: new capabilities to be applied soon ++ * @old_caps: old capability list ++ * @new_caps: new capability list + * @errp: set *errp if the check failed, with reason + * + * Returns true if check passed, otherwise false. + */ +-static bool migrate_caps_check(bool *cap_list, +- MigrationCapabilityStatusList *params, +- Error **errp) ++static bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) + { +- MigrationCapabilityStatusList *cap; +- bool old_postcopy_cap; + MigrationIncomingState *mis = migration_incoming_get_current(); + +- old_postcopy_cap = cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]; +- +- for (cap = params; cap; cap = cap->next) { +- cap_list[cap->value->capability] = cap->value->state; +- } +- + #ifndef CONFIG_LIVE_BLOCK_MIGRATION +- if (cap_list[MIGRATION_CAPABILITY_BLOCK]) { ++ if (new_caps[MIGRATION_CAPABILITY_BLOCK]) { + error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) " + "block migration"); + error_append_hint(errp, "Use drive_mirror+NBD instead.\n"); +@@ -1331,7 +1321,7 @@ static bool migrate_caps_check(bool *cap_list, + #endif + + #ifndef CONFIG_REPLICATION +- if (cap_list[MIGRATION_CAPABILITY_X_COLO]) { ++ if (new_caps[MIGRATION_CAPABILITY_X_COLO]) { + error_setg(errp, "QEMU compiled without replication module" + " can't enable COLO"); + error_append_hint(errp, "Please enable replication before COLO.\n"); +@@ -1339,12 +1329,13 @@ static bool migrate_caps_check(bool *cap_list, + } + #endif + +- if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { ++ if (new_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { + /* This check is reasonably expensive, so only when it's being + * set the first time, also it's only the destination that needs + * special support. + */ +- if (!old_postcopy_cap && runstate_check(RUN_STATE_INMIGRATE) && ++ if (!old_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM] && ++ runstate_check(RUN_STATE_INMIGRATE) && + !postcopy_ram_supported_by_host(mis)) { + /* postcopy_ram_supported_by_host will have emitted a more + * detailed message +@@ -1353,13 +1344,13 @@ static bool migrate_caps_check(bool *cap_list, + return false; + } + +- if (cap_list[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) { ++ if (new_caps[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) { + error_setg(errp, "Postcopy is not compatible with ignore-shared"); + return false; + } + } + +- if (cap_list[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) { ++ if (new_caps[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) { + WriteTrackingSupport wt_support; + int idx; + /* +@@ -1383,7 +1374,7 @@ static bool migrate_caps_check(bool *cap_list, + */ + for (idx = 0; idx < check_caps_background_snapshot.size; idx++) { + int incomp_cap = check_caps_background_snapshot.caps[idx]; +- if (cap_list[incomp_cap]) { ++ if (new_caps[incomp_cap]) { + error_setg(errp, + "Background-snapshot is not compatible with %s", + MigrationCapability_str(incomp_cap)); +@@ -1393,10 +1384,10 @@ static bool migrate_caps_check(bool *cap_list, + } + + #ifdef CONFIG_LINUX +- if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND] && +- (!cap_list[MIGRATION_CAPABILITY_MULTIFD] || +- cap_list[MIGRATION_CAPABILITY_COMPRESS] || +- cap_list[MIGRATION_CAPABILITY_XBZRLE] || ++ if (new_caps[MIGRATION_CAPABILITY_ZERO_COPY_SEND] && ++ (!new_caps[MIGRATION_CAPABILITY_MULTIFD] || ++ new_caps[MIGRATION_CAPABILITY_COMPRESS] || ++ new_caps[MIGRATION_CAPABILITY_XBZRLE] || + migrate_multifd_compression() || + migrate_use_tls())) { + error_setg(errp, +@@ -1404,15 +1395,15 @@ static bool migrate_caps_check(bool *cap_list, + return false; + } + #else +- if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND]) { ++ if (new_caps[MIGRATION_CAPABILITY_ZERO_COPY_SEND]) { + error_setg(errp, + "Zero copy currently only available on Linux"); + return false; + } + #endif + +- if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]) { +- if (!cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { ++ if (new_caps[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]) { ++ if (!new_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { + error_setg(errp, "Postcopy preempt requires postcopy-ram"); + return false; + } +@@ -1423,14 +1414,14 @@ static bool migrate_caps_check(bool *cap_list, + * different compression channels, which is not compatible with the + * preempt assumptions on channel assignments. + */ +- if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) { ++ if (new_caps[MIGRATION_CAPABILITY_COMPRESS]) { + error_setg(errp, "Postcopy preempt not compatible with compress"); + return false; + } + } + +- if (cap_list[MIGRATION_CAPABILITY_MULTIFD]) { +- if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) { ++ if (new_caps[MIGRATION_CAPABILITY_MULTIFD]) { ++ if (new_caps[MIGRATION_CAPABILITY_COMPRESS]) { + error_setg(errp, "Multifd is not compatible with compress"); + return false; + } +@@ -1486,15 +1477,19 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, + { + MigrationState *s = migrate_get_current(); + MigrationCapabilityStatusList *cap; +- bool cap_list[MIGRATION_CAPABILITY__MAX]; ++ bool new_caps[MIGRATION_CAPABILITY__MAX]; + + if (migration_is_running(s->state)) { + error_setg(errp, QERR_MIGRATION_ACTIVE); + return; + } + +- memcpy(cap_list, s->capabilities, sizeof(cap_list)); +- if (!migrate_caps_check(cap_list, params, errp)) { ++ memcpy(new_caps, s->capabilities, sizeof(new_caps)); ++ for (cap = params; cap; cap = cap->next) { ++ new_caps[cap->value->capability] = cap->value->state; ++ } ++ ++ if (!migrate_caps_check(s->capabilities, new_caps, errp)) { + return; + } + +@@ -4634,27 +4629,14 @@ static void migration_instance_init(Object *obj) + */ + static bool migration_object_check(MigrationState *ms, Error **errp) + { +- MigrationCapabilityStatusList *head = NULL; + /* Assuming all off */ +- bool cap_list[MIGRATION_CAPABILITY__MAX] = { 0 }, ret; +- int i; ++ bool old_caps[MIGRATION_CAPABILITY__MAX] = { 0 }; + + if (!migrate_params_check(&ms->parameters, errp)) { + return false; + } + +- for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { +- if (ms->capabilities[i]) { +- QAPI_LIST_PREPEND(head, migrate_cap_add(i, true)); +- } +- } +- +- ret = migrate_caps_check(cap_list, head, errp); +- +- /* It works with head == NULL */ +- qapi_free_MigrationCapabilityStatusList(head); +- +- return ret; ++ return migrate_caps_check(old_caps, ms->capabilities, errp); + } + + static const TypeInfo migration_type = { +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Rename-duplicate-to-zero_pages.patch b/SOURCES/kvm-migration-Rename-duplicate-to-zero_pages.patch new file mode 100644 index 0000000..22acab5 --- /dev/null +++ b/SOURCES/kvm-migration-Rename-duplicate-to-zero_pages.patch @@ -0,0 +1,109 @@ +From 3cecf66655a0dd599666bcac8add2dee85d5651f Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 19 Apr 2023 18:16:05 +0200 +Subject: [PATCH 16/56] migration: Rename duplicate to zero_pages +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [15/50] 89db3c8b167c0f411ba95ce2730540c0e8f1206b (peterx/qemu-kvm) + +Rest of counters that refer to pages has a _pages suffix. +And historically, this showed the number of pages composed of the same +character, here comes the name "duplicated". But since years ago, it +refers to the number of zero_pages. + +Signed-off-by: Juan Quintela +Reviewed-by: Peter Xu +(cherry picked from commit 1a386e8de5995fb5478ea99baa6d3e71abcf4b80) +Signed-off-by: Peter Xu +--- + migration/migration.c | 2 +- + migration/ram.c | 10 +++++----- + migration/ram.h | 2 +- + 3 files changed, 7 insertions(+), 7 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 39501a0ed8..c15e2a61ca 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1142,7 +1142,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + info->ram = g_malloc0(sizeof(*info->ram)); + info->ram->transferred = stat64_get(&ram_counters.transferred); + info->ram->total = ram_bytes_total(); +- info->ram->duplicate = stat64_get(&ram_counters.duplicate); ++ info->ram->duplicate = stat64_get(&ram_counters.zero_pages); + /* legacy value. It is not used anymore */ + info->ram->skipped = 0; + info->ram->normal = stat64_get(&ram_counters.normal); +diff --git a/migration/ram.c b/migration/ram.c +index fe69ecaef4..19d345a030 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1119,7 +1119,7 @@ uint64_t ram_pagesize_summary(void) + uint64_t ram_get_total_transferred_pages(void) + { + return stat64_get(&ram_counters.normal) + +- stat64_get(&ram_counters.duplicate) + ++ stat64_get(&ram_counters.zero_pages) + + compression_counters.pages + xbzrle_counters.pages; + } + +@@ -1320,7 +1320,7 @@ static int save_zero_page(PageSearchStatus *pss, QEMUFile *f, RAMBlock *block, + int len = save_zero_page_to_file(pss, f, block, offset); + + if (len) { +- stat64_add(&ram_counters.duplicate, 1); ++ stat64_add(&ram_counters.zero_pages, 1); + ram_transferred_add(len); + return 1; + } +@@ -1359,7 +1359,7 @@ static bool control_save_page(PageSearchStatus *pss, RAMBlock *block, + if (bytes_xmit > 0) { + stat64_add(&ram_counters.normal, 1); + } else if (bytes_xmit == 0) { +- stat64_add(&ram_counters.duplicate, 1); ++ stat64_add(&ram_counters.zero_pages, 1); + } + + return true; +@@ -1486,7 +1486,7 @@ update_compress_thread_counts(const CompressParam *param, int bytes_xmit) + ram_transferred_add(bytes_xmit); + + if (param->zero_page) { +- stat64_add(&ram_counters.duplicate, 1); ++ stat64_add(&ram_counters.zero_pages, 1); + return; + } + +@@ -2621,7 +2621,7 @@ void acct_update_position(QEMUFile *f, size_t size, bool zero) + uint64_t pages = size / TARGET_PAGE_SIZE; + + if (zero) { +- stat64_add(&ram_counters.duplicate, pages); ++ stat64_add(&ram_counters.zero_pages, pages); + } else { + stat64_add(&ram_counters.normal, pages); + ram_transferred_add(size); +diff --git a/migration/ram.h b/migration/ram.h +index afa68521d7..55258334fe 100644 +--- a/migration/ram.h ++++ b/migration/ram.h +@@ -45,7 +45,7 @@ typedef struct { + Stat64 dirty_sync_count; + Stat64 dirty_sync_missed_zero_copy; + Stat64 downtime_bytes; +- Stat64 duplicate; ++ Stat64 zero_pages; + Stat64 multifd_bytes; + Stat64 normal; + Stat64 postcopy_bytes; +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Rename-normal-to-normal_pages.patch b/SOURCES/kvm-migration-Rename-normal-to-normal_pages.patch new file mode 100644 index 0000000..8ad6447 --- /dev/null +++ b/SOURCES/kvm-migration-Rename-normal-to-normal_pages.patch @@ -0,0 +1,109 @@ +From 7e27e7ea83856e1a7222ff46d91495f48fb6be4d Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 19 Apr 2023 18:19:45 +0200 +Subject: [PATCH 17/56] migration: Rename normal to normal_pages +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [16/50] 7df8b946918def9657bbe357861a6d72b5399ac6 (peterx/qemu-kvm) + +Rest of counters that refer to pages has a _pages suffix. +And historically, this showed the number of full pages transferred. +The name "normal" refered to the fact that they were sent without any +optimization (compression, xbzrle, zero_page, ...). + +Signed-off-by: Juan Quintela +Reviewed-by: Peter Xu +(cherry picked from commit 8c0cda8fa0de0a50148e2c60552afca9cffca643) +Signed-off-by: Peter Xu +--- + migration/migration.c | 2 +- + migration/ram.c | 10 +++++----- + migration/ram.h | 2 +- + 3 files changed, 7 insertions(+), 7 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index c15e2a61ca..f1b3439e5f 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1145,7 +1145,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + info->ram->duplicate = stat64_get(&ram_counters.zero_pages); + /* legacy value. It is not used anymore */ + info->ram->skipped = 0; +- info->ram->normal = stat64_get(&ram_counters.normal); ++ info->ram->normal = stat64_get(&ram_counters.normal_pages); + info->ram->normal_bytes = info->ram->normal * page_size; + info->ram->mbps = s->mbps; + info->ram->dirty_sync_count = +diff --git a/migration/ram.c b/migration/ram.c +index 19d345a030..229714045a 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1118,7 +1118,7 @@ uint64_t ram_pagesize_summary(void) + + uint64_t ram_get_total_transferred_pages(void) + { +- return stat64_get(&ram_counters.normal) + ++ return stat64_get(&ram_counters.normal_pages) + + stat64_get(&ram_counters.zero_pages) + + compression_counters.pages + xbzrle_counters.pages; + } +@@ -1357,7 +1357,7 @@ static bool control_save_page(PageSearchStatus *pss, RAMBlock *block, + } + + if (bytes_xmit > 0) { +- stat64_add(&ram_counters.normal, 1); ++ stat64_add(&ram_counters.normal_pages, 1); + } else if (bytes_xmit == 0) { + stat64_add(&ram_counters.zero_pages, 1); + } +@@ -1391,7 +1391,7 @@ static int save_normal_page(PageSearchStatus *pss, RAMBlock *block, + qemu_put_buffer(file, buf, TARGET_PAGE_SIZE); + } + ram_transferred_add(TARGET_PAGE_SIZE); +- stat64_add(&ram_counters.normal, 1); ++ stat64_add(&ram_counters.normal_pages, 1); + return 1; + } + +@@ -1447,7 +1447,7 @@ static int ram_save_multifd_page(QEMUFile *file, RAMBlock *block, + if (multifd_queue_page(file, block, offset) < 0) { + return -1; + } +- stat64_add(&ram_counters.normal, 1); ++ stat64_add(&ram_counters.normal_pages, 1); + + return 1; + } +@@ -2623,7 +2623,7 @@ void acct_update_position(QEMUFile *f, size_t size, bool zero) + if (zero) { + stat64_add(&ram_counters.zero_pages, pages); + } else { +- stat64_add(&ram_counters.normal, pages); ++ stat64_add(&ram_counters.normal_pages, pages); + ram_transferred_add(size); + qemu_file_credit_transfer(f, size); + } +diff --git a/migration/ram.h b/migration/ram.h +index 55258334fe..a6e0d70226 100644 +--- a/migration/ram.h ++++ b/migration/ram.h +@@ -47,7 +47,7 @@ typedef struct { + Stat64 downtime_bytes; + Stat64 zero_pages; + Stat64 multifd_bytes; +- Stat64 normal; ++ Stat64 normal_pages; + Stat64 postcopy_bytes; + Stat64 postcopy_requests; + Stat64 precopy_bytes; +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Update-atomic-stats-out-of-the-mutex.patch b/SOURCES/kvm-migration-Update-atomic-stats-out-of-the-mutex.patch new file mode 100644 index 0000000..7e78d82 --- /dev/null +++ b/SOURCES/kvm-migration-Update-atomic-stats-out-of-the-mutex.patch @@ -0,0 +1,52 @@ +From c0d377e1bf442a09b82fddbb8588fcddf6439854 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 24 Nov 2022 17:26:19 +0100 +Subject: [PATCH 09/56] migration: Update atomic stats out of the mutex +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [8/50] 88e9dbc9a3e5aef60a7c98c871144904c7062b1f (peterx/qemu-kvm) + +Reviewed-by: David Edmondson +Reviewed-by: Peter Xu +Signed-off-by: Juan Quintela +(cherry picked from commit 30fb22cda45bea43a3c0e26049ebdd71a9503ffd) +Signed-off-by: Peter Xu +--- + migration/multifd.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 01fab01a92..6ef3a27938 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -433,8 +433,8 @@ static int multifd_send_pages(QEMUFile *f) + transferred = ((uint64_t) pages->num) * p->page_size + p->packet_len; + qemu_file_acct_rate_limit(f, transferred); + ram_counters.multifd_bytes += transferred; +- stat64_add(&ram_counters.transferred, transferred); + qemu_mutex_unlock(&p->mutex); ++ stat64_add(&ram_counters.transferred, transferred); + qemu_sem_post(&p->sem); + + return 1; +@@ -628,8 +628,8 @@ int multifd_send_sync_main(QEMUFile *f) + p->pending_job++; + qemu_file_acct_rate_limit(f, p->packet_len); + ram_counters.multifd_bytes += p->packet_len; +- stat64_add(&ram_counters.transferred, p->packet_len); + qemu_mutex_unlock(&p->mutex); ++ stat64_add(&ram_counters.transferred, p->packet_len); + qemu_sem_post(&p->sem); + } + for (i = 0; i < migrate_multifd_channels(); i++) { +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Use-migrate_max_postcopy_bandwidth.patch b/SOURCES/kvm-migration-Use-migrate_max_postcopy_bandwidth.patch new file mode 100644 index 0000000..f179761 --- /dev/null +++ b/SOURCES/kvm-migration-Use-migrate_max_postcopy_bandwidth.patch @@ -0,0 +1,40 @@ +From 8d203baa6cbd1f371e308c2c9d59a5ca7d29dca8 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 10:55:30 +0100 +Subject: [PATCH 38/56] migration: Use migrate_max_postcopy_bandwidth() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [37/50] d62948e9ee40a85ed9b460a583c3b0e43cd5d47f (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 5390adec03a7d8bc6bcf5887f726b0ddaeb90681) +Signed-off-by: Peter Xu +--- + migration/migration.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 7f2e770deb..78bca9a93f 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -3799,7 +3799,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) + + if (resume) { + /* This is a resumed migration */ +- rate_limit = s->parameters.max_postcopy_bandwidth / ++ rate_limit = migrate_max_postcopy_bandwidth() / + XFER_LIMIT_RATIO; + } else { + /* This is a fresh new migration */ +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-check-magic-value-for-deciding-the-mapping.patch b/SOURCES/kvm-migration-check-magic-value-for-deciding-the-mapping.patch deleted file mode 100644 index 387d0b3..0000000 --- a/SOURCES/kvm-migration-check-magic-value-for-deciding-the-mapping.patch +++ /dev/null @@ -1,330 +0,0 @@ -From 29eee1fbb84c0e2f0ece9e6d996afa7238ed2912 Mon Sep 17 00:00:00 2001 -From: "manish.mishra" -Date: Tue, 20 Dec 2022 18:44:18 +0000 -Subject: [PATCH 7/8] migration: check magic value for deciding the mapping of - channels -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 150: migration: Fix multifd crash on channel disorders -RH-Bugzilla: 2169732 -RH-Acked-by: quintela1 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Dr. David Alan Gilbert -RH-Commit: [2/2] 4fb9408478923415a91fe0527bf4b1a0f022f329 (peterx/qemu-kvm) - -Current logic assumes that channel connections on the destination side are -always established in the same order as the source and the first one will -always be the main channel followed by the multifid or post-copy -preemption channel. This may not be always true, as even if a channel has a -connection established on the source side it can be in the pending state on -the destination side and a newer connection can be established first. -Basically causing out of order mapping of channels on the destination side. -Currently, all channels except post-copy preempt send a magic number, this -patch uses that magic number to decide the type of channel. This logic is -applicable only for precopy(multifd) live migration, as mentioned, the -post-copy preempt channel does not send any magic number. Also, tls live -migrations already does tls handshake before creating other channels, so -this issue is not possible with tls, hence this logic is avoided for tls -live migrations. This patch uses read peek to check the magic number of -channels so that current data/control stream management remains -un-effected. - -Reviewed-by: Peter Xu -Reviewed-by: Daniel P. Berrange -Reviewed-by: Juan Quintela -Suggested-by: Daniel P. Berrange -Signed-off-by: manish.mishra -Signed-off-by: Juan Quintela -(cherry picked from commit 6720c2b32725e6ac404f22851a0ecd0a71d0cbe2) -Signed-off-by: Peter Xu ---- - migration/channel.c | 45 +++++++++++++++++++++++++++++++++ - migration/channel.h | 5 ++++ - migration/migration.c | 54 ++++++++++++++++++++++++++++------------ - migration/multifd.c | 19 +++++++------- - migration/multifd.h | 2 +- - migration/postcopy-ram.c | 5 +--- - migration/postcopy-ram.h | 2 +- - 7 files changed, 101 insertions(+), 31 deletions(-) - -diff --git a/migration/channel.c b/migration/channel.c -index 1b0815039f..ca3319a309 100644 ---- a/migration/channel.c -+++ b/migration/channel.c -@@ -92,3 +92,48 @@ void migration_channel_connect(MigrationState *s, - migrate_fd_connect(s, error); - error_free(error); - } -+ -+ -+/** -+ * @migration_channel_read_peek - Peek at migration channel, without -+ * actually removing it from channel buffer. -+ * -+ * @ioc: the channel object -+ * @buf: the memory region to read data into -+ * @buflen: the number of bytes to read in @buf -+ * @errp: pointer to a NULL-initialized error object -+ * -+ * Returns 0 if successful, returns -1 and sets @errp if fails. -+ */ -+int migration_channel_read_peek(QIOChannel *ioc, -+ const char *buf, -+ const size_t buflen, -+ Error **errp) -+{ -+ ssize_t len = 0; -+ struct iovec iov = { .iov_base = (char *)buf, .iov_len = buflen }; -+ -+ while (true) { -+ len = qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, -+ QIO_CHANNEL_READ_FLAG_MSG_PEEK, errp); -+ -+ if (len <= 0 && len != QIO_CHANNEL_ERR_BLOCK) { -+ error_setg(errp, -+ "Failed to peek at channel"); -+ return -1; -+ } -+ -+ if (len == buflen) { -+ break; -+ } -+ -+ /* 1ms sleep. */ -+ if (qemu_in_coroutine()) { -+ qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 1000000); -+ } else { -+ g_usleep(1000); -+ } -+ } -+ -+ return 0; -+} -diff --git a/migration/channel.h b/migration/channel.h -index 67a461c28a..5bdb8208a7 100644 ---- a/migration/channel.h -+++ b/migration/channel.h -@@ -24,4 +24,9 @@ void migration_channel_connect(MigrationState *s, - QIOChannel *ioc, - const char *hostname, - Error *error_in); -+ -+int migration_channel_read_peek(QIOChannel *ioc, -+ const char *buf, -+ const size_t buflen, -+ Error **errp); - #endif -diff --git a/migration/migration.c b/migration/migration.c -index f485eea5fb..593dbd25de 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -31,6 +31,7 @@ - #include "migration.h" - #include "savevm.h" - #include "qemu-file.h" -+#include "channel.h" - #include "migration/vmstate.h" - #include "block/block.h" - #include "qapi/error.h" -@@ -663,10 +664,6 @@ static bool migration_incoming_setup(QEMUFile *f, Error **errp) - { - MigrationIncomingState *mis = migration_incoming_get_current(); - -- if (multifd_load_setup(errp) != 0) { -- return false; -- } -- - if (!mis->from_src_file) { - mis->from_src_file = f; - } -@@ -733,31 +730,56 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) - { - MigrationIncomingState *mis = migration_incoming_get_current(); - Error *local_err = NULL; -- bool start_migration; - QEMUFile *f; -+ bool default_channel = true; -+ uint32_t channel_magic = 0; -+ int ret = 0; - -- if (!mis->from_src_file) { -- /* The first connection (multifd may have multiple) */ -+ if (migrate_use_multifd() && !migrate_postcopy_ram() && -+ qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { -+ /* -+ * With multiple channels, it is possible that we receive channels -+ * out of order on destination side, causing incorrect mapping of -+ * source channels on destination side. Check channel MAGIC to -+ * decide type of channel. Please note this is best effort, postcopy -+ * preempt channel does not send any magic number so avoid it for -+ * postcopy live migration. Also tls live migration already does -+ * tls handshake while initializing main channel so with tls this -+ * issue is not possible. -+ */ -+ ret = migration_channel_read_peek(ioc, (void *)&channel_magic, -+ sizeof(channel_magic), &local_err); -+ -+ if (ret != 0) { -+ error_propagate(errp, local_err); -+ return; -+ } -+ -+ default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC)); -+ } else { -+ default_channel = !mis->from_src_file; -+ } -+ -+ if (multifd_load_setup(errp) != 0) { -+ error_setg(errp, "Failed to setup multifd channels"); -+ return; -+ } -+ -+ if (default_channel) { - f = qemu_file_new_input(ioc); - - if (!migration_incoming_setup(f, errp)) { - return; - } -- -- /* -- * Common migration only needs one channel, so we can start -- * right now. Some features need more than one channel, we wait. -- */ -- start_migration = !migration_needs_multiple_sockets(); - } else { - /* Multiple connections */ - assert(migration_needs_multiple_sockets()); - if (migrate_use_multifd()) { -- start_migration = multifd_recv_new_channel(ioc, &local_err); -+ multifd_recv_new_channel(ioc, &local_err); - } else { - assert(migrate_postcopy_preempt()); - f = qemu_file_new_input(ioc); -- start_migration = postcopy_preempt_new_channel(mis, f); -+ postcopy_preempt_new_channel(mis, f); - } - if (local_err) { - error_propagate(errp, local_err); -@@ -765,7 +787,7 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) - } - } - -- if (start_migration) { -+ if (migration_has_all_channels()) { - /* If it's a recovery, we're done */ - if (postcopy_try_recover()) { - return; -diff --git a/migration/multifd.c b/migration/multifd.c -index 509bbbe3bf..c3385529cf 100644 ---- a/migration/multifd.c -+++ b/migration/multifd.c -@@ -1167,9 +1167,14 @@ int multifd_load_setup(Error **errp) - uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size(); - uint8_t i; - -- if (!migrate_use_multifd()) { -+ /* -+ * Return successfully if multiFD recv state is already initialised -+ * or multiFD is not enabled. -+ */ -+ if (multifd_recv_state || !migrate_use_multifd()) { - return 0; - } -+ - if (!migrate_multi_channels_is_allowed()) { - error_setg(errp, "multifd is not supported by current protocol"); - return -1; -@@ -1228,11 +1233,9 @@ bool multifd_recv_all_channels_created(void) - - /* - * Try to receive all multifd channels to get ready for the migration. -- * - Return true and do not set @errp when correctly receiving all channels; -- * - Return false and do not set @errp when correctly receiving the current one; -- * - Return false and set @errp when failing to receive the current channel. -+ * Sets @errp when failing to receive the current channel. - */ --bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) -+void multifd_recv_new_channel(QIOChannel *ioc, Error **errp) - { - MultiFDRecvParams *p; - Error *local_err = NULL; -@@ -1245,7 +1248,7 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) - "failed to receive packet" - " via multifd channel %d: ", - qatomic_read(&multifd_recv_state->count)); -- return false; -+ return; - } - trace_multifd_recv_new_channel(id); - -@@ -1255,7 +1258,7 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) - id); - multifd_recv_terminate_threads(local_err); - error_propagate(errp, local_err); -- return false; -+ return; - } - p->c = ioc; - object_ref(OBJECT(ioc)); -@@ -1266,6 +1269,4 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) - qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p, - QEMU_THREAD_JOINABLE); - qatomic_inc(&multifd_recv_state->count); -- return qatomic_read(&multifd_recv_state->count) == -- migrate_multifd_channels(); - } -diff --git a/migration/multifd.h b/migration/multifd.h -index 519f498643..913e4ba274 100644 ---- a/migration/multifd.h -+++ b/migration/multifd.h -@@ -18,7 +18,7 @@ void multifd_save_cleanup(void); - int multifd_load_setup(Error **errp); - int multifd_load_cleanup(Error **errp); - bool multifd_recv_all_channels_created(void); --bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp); -+void multifd_recv_new_channel(QIOChannel *ioc, Error **errp); - void multifd_recv_sync_main(void); - int multifd_send_sync_main(QEMUFile *f); - int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset); -diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c -index 0c55df0e52..b98e95dab0 100644 ---- a/migration/postcopy-ram.c -+++ b/migration/postcopy-ram.c -@@ -1538,7 +1538,7 @@ void postcopy_unregister_shared_ufd(struct PostCopyFD *pcfd) - } - } - --bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file) -+void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file) - { - /* - * The new loading channel has its own threads, so it needs to be -@@ -1547,9 +1547,6 @@ bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file) - qemu_file_set_blocking(file, true); - mis->postcopy_qemufile_dst = file; - trace_postcopy_preempt_new_channel(); -- -- /* Start the migration immediately */ -- return true; - } - - /* -diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h -index 6147bf7d1d..25881c4127 100644 ---- a/migration/postcopy-ram.h -+++ b/migration/postcopy-ram.h -@@ -190,7 +190,7 @@ enum PostcopyChannels { - RAM_CHANNEL_MAX, - }; - --bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file); -+void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file); - int postcopy_preempt_setup(MigrationState *s, Error **errp); - int postcopy_preempt_wait_channel(MigrationState *s); - --- -2.31.1 - diff --git a/SOURCES/kvm-migration-mark-mixed-functions-that-can-suspend.patch b/SOURCES/kvm-migration-mark-mixed-functions-that-can-suspend.patch new file mode 100644 index 0000000..9451696 --- /dev/null +++ b/SOURCES/kvm-migration-mark-mixed-functions-that-can-suspend.patch @@ -0,0 +1,153 @@ +From cfdf5715a2334ad06b5966ec986d134bbd5ba08b Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 16 Dec 2022 12:48:16 +0100 +Subject: [PATCH 05/56] migration: mark mixed functions that can suspend +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [4/50] 9f055b526edd06a3440999d5de91e5d624678c7d (peterx/qemu-kvm) + +There should be no paths from a coroutine_fn to aio_poll, however in +practice coroutine_mixed_fn will call aio_poll in the !qemu_in_coroutine() +path. By marking mixed functions, we can track accurately the call paths +that execute entirely in coroutine context, and find more missing +coroutine_fn markers. This results in more accurate checks that +coroutine code does not end up blocking. + +If the marking were extended transitively to all functions that call +these ones, static analysis could be done much more efficiently. +However, this is a start and makes it possible to use vrc's path-based +searches to find potential bugs where coroutine_fns call blocking functions. + +Signed-off-by: Paolo Bonzini +(cherry picked from commit 394b9407e4c515f96df6647d629ee28cbb86f07c) +Signed-off-by: Peter Xu +--- + include/migration/qemu-file-types.h | 4 ++-- + migration/qemu-file.c | 14 +++++++------- + migration/qemu-file.h | 6 +++--- + 3 files changed, 12 insertions(+), 12 deletions(-) + +diff --git a/include/migration/qemu-file-types.h b/include/migration/qemu-file-types.h +index 2867e3da84..1436f9ce92 100644 +--- a/include/migration/qemu-file-types.h ++++ b/include/migration/qemu-file-types.h +@@ -35,7 +35,7 @@ void qemu_put_byte(QEMUFile *f, int v); + void qemu_put_be16(QEMUFile *f, unsigned int v); + void qemu_put_be32(QEMUFile *f, unsigned int v); + void qemu_put_be64(QEMUFile *f, uint64_t v); +-size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size); ++size_t coroutine_mixed_fn qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size); + + int qemu_get_byte(QEMUFile *f); + +@@ -161,7 +161,7 @@ static inline void qemu_get_sbe64s(QEMUFile *f, int64_t *pv) + qemu_get_be64s(f, (uint64_t *)pv); + } + +-size_t qemu_get_counted_string(QEMUFile *f, char buf[256]); ++size_t coroutine_mixed_fn qemu_get_counted_string(QEMUFile *f, char buf[256]); + + void qemu_put_counted_string(QEMUFile *f, const char *name); + +diff --git a/migration/qemu-file.c b/migration/qemu-file.c +index 102ab3b439..ee04240a21 100644 +--- a/migration/qemu-file.c ++++ b/migration/qemu-file.c +@@ -392,7 +392,7 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset, + * case if the underlying file descriptor gives a short read, and that can + * happen even on a blocking fd. + */ +-static ssize_t qemu_fill_buffer(QEMUFile *f) ++static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f) + { + int len; + int pending; +@@ -585,7 +585,7 @@ void qemu_file_skip(QEMUFile *f, int size) + * return as many as it managed to read (assuming blocking fd's which + * all current QEMUFile are) + */ +-size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset) ++size_t coroutine_mixed_fn qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset) + { + ssize_t pending; + size_t index; +@@ -633,7 +633,7 @@ size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset) + * return as many as it managed to read (assuming blocking fd's which + * all current QEMUFile are) + */ +-size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size) ++size_t coroutine_mixed_fn qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size) + { + size_t pending = size; + size_t done = 0; +@@ -674,7 +674,7 @@ size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size) + * Note: Since **buf may get changed, the caller should take care to + * keep a pointer to the original buffer if it needs to deallocate it. + */ +-size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size) ++size_t coroutine_mixed_fn qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size) + { + if (size < IO_BUF_SIZE) { + size_t res; +@@ -696,7 +696,7 @@ size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size) + * Peeks a single byte from the buffer; this isn't guaranteed to work if + * offset leaves a gap after the previous read/peeked data. + */ +-int qemu_peek_byte(QEMUFile *f, int offset) ++int coroutine_mixed_fn qemu_peek_byte(QEMUFile *f, int offset) + { + int index = f->buf_index + offset; + +@@ -713,7 +713,7 @@ int qemu_peek_byte(QEMUFile *f, int offset) + return f->buf[index]; + } + +-int qemu_get_byte(QEMUFile *f) ++int coroutine_mixed_fn qemu_get_byte(QEMUFile *f) + { + int result; + +@@ -894,7 +894,7 @@ int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_src) + * else 0 + * (Note a 0 length string will return 0 either way) + */ +-size_t qemu_get_counted_string(QEMUFile *f, char buf[256]) ++size_t coroutine_fn qemu_get_counted_string(QEMUFile *f, char buf[256]) + { + size_t len = qemu_get_byte(f); + size_t res = qemu_get_buffer(f, (uint8_t *)buf, len); +diff --git a/migration/qemu-file.h b/migration/qemu-file.h +index 9d0155a2a1..d16cd50448 100644 +--- a/migration/qemu-file.h ++++ b/migration/qemu-file.h +@@ -108,8 +108,8 @@ bool qemu_file_is_writable(QEMUFile *f); + + #include "migration/qemu-file-types.h" + +-size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset); +-size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size); ++size_t coroutine_mixed_fn qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset); ++size_t coroutine_mixed_fn qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size); + ssize_t qemu_put_compression_data(QEMUFile *f, z_stream *stream, + const uint8_t *p, size_t size); + int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_src); +@@ -119,7 +119,7 @@ int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_src); + * is; you aren't guaranteed to be able to peak to +n bytes unless you've + * previously peeked +n-1. + */ +-int qemu_peek_byte(QEMUFile *f, int offset); ++int coroutine_mixed_fn qemu_peek_byte(QEMUFile *f, int offset); + void qemu_file_skip(QEMUFile *f, int size); + /* + * qemu_file_credit_transfer: +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-move-migration_global_dump-to-migration-hm.patch b/SOURCES/kvm-migration-move-migration_global_dump-to-migration-hm.patch new file mode 100644 index 0000000..4e73c80 --- /dev/null +++ b/SOURCES/kvm-migration-move-migration_global_dump-to-migration-hm.patch @@ -0,0 +1,121 @@ +From 96e6914cbfb18bb8287c57b9ac9a6b364d3e7a22 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 22 Feb 2023 17:18:05 +0100 +Subject: [PATCH 20/56] migration: move migration_global_dump() to + migration-hmp-cmds.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [19/50] c8d330a2833c706b9bd78f7154be882e3977ad06 (peterx/qemu-kvm) + +It is only used there, so we can make it static. +Once there, remove spice.h that it is not used. + +Signed-off-by: Juan Quintela +Reviewed-by: Peter Xu +Reviewed-by: Philippe Mathieu-Daudé + +--- + +fix David Edmonson ui/qemu-spice.h unintended removal + +(cherry picked from commit c938157713e723165a42cb6e8364adb6fcbd0e22) +Signed-off-by: Peter Xu +--- + include/migration/misc.h | 1 - + migration/migration-hmp-cmds.c | 22 +++++++++++++++++++++- + migration/migration.c | 19 ------------------- + 3 files changed, 21 insertions(+), 21 deletions(-) + +diff --git a/include/migration/misc.h b/include/migration/misc.h +index 8b49841016..5ebe13b4b9 100644 +--- a/include/migration/misc.h ++++ b/include/migration/misc.h +@@ -66,7 +66,6 @@ bool migration_has_finished(MigrationState *); + bool migration_has_failed(MigrationState *); + /* ...and after the device transmission */ + bool migration_in_postcopy_after_devices(MigrationState *); +-void migration_global_dump(Monitor *mon); + /* True if incoming migration entered POSTCOPY_INCOMING_DISCARD */ + bool migration_in_incoming_postcopy(void); + /* True if incoming migration entered POSTCOPY_INCOMING_ADVISE */ +diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c +index 72519ea99f..71da91967a 100644 +--- a/migration/migration-hmp-cmds.c ++++ b/migration/migration-hmp-cmds.c +@@ -15,7 +15,6 @@ + + #include "qemu/osdep.h" + #include "block/qapi.h" +-#include "migration/misc.h" + #include "migration/snapshot.h" + #include "monitor/hmp.h" + #include "monitor/monitor.h" +@@ -30,6 +29,27 @@ + #include "qemu/sockets.h" + #include "sysemu/runstate.h" + #include "ui/qemu-spice.h" ++#include "sysemu/sysemu.h" ++#include "migration.h" ++ ++static void migration_global_dump(Monitor *mon) ++{ ++ MigrationState *ms = migrate_get_current(); ++ ++ monitor_printf(mon, "globals:\n"); ++ monitor_printf(mon, "store-global-state: %s\n", ++ ms->store_global_state ? "on" : "off"); ++ monitor_printf(mon, "only-migratable: %s\n", ++ only_migratable ? "on" : "off"); ++ monitor_printf(mon, "send-configuration: %s\n", ++ ms->send_configuration ? "on" : "off"); ++ monitor_printf(mon, "send-section-footer: %s\n", ++ ms->send_section_footer ? "on" : "off"); ++ monitor_printf(mon, "decompress-error-check: %s\n", ++ ms->decompress_error_check ? "on" : "off"); ++ monitor_printf(mon, "clear-bitmap-shift: %u\n", ++ ms->clear_bitmap_shift); ++} + + void hmp_info_migrate(Monitor *mon, const QDict *qdict) + { +diff --git a/migration/migration.c b/migration/migration.c +index e8f596bcfa..aa96ffdc5b 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -4420,25 +4420,6 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) + s->migration_thread_running = true; + } + +-void migration_global_dump(Monitor *mon) +-{ +- MigrationState *ms = migrate_get_current(); +- +- monitor_printf(mon, "globals:\n"); +- monitor_printf(mon, "store-global-state: %s\n", +- ms->store_global_state ? "on" : "off"); +- monitor_printf(mon, "only-migratable: %s\n", +- only_migratable ? "on" : "off"); +- monitor_printf(mon, "send-configuration: %s\n", +- ms->send_configuration ? "on" : "off"); +- monitor_printf(mon, "send-section-footer: %s\n", +- ms->send_section_footer ? "on" : "off"); +- monitor_printf(mon, "decompress-error-check: %s\n", +- ms->decompress_error_check ? "on" : "off"); +- monitor_printf(mon, "clear-bitmap-shift: %u\n", +- ms->clear_bitmap_shift); +-} +- + #define DEFINE_PROP_MIG_CAP(name, x) \ + DEFINE_PROP_BOOL(name, MigrationState, capabilities[x], false) + +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-postcopy-Detect-file-system-on-dest-host.patch b/SOURCES/kvm-migration-postcopy-Detect-file-system-on-dest-host.patch new file mode 100644 index 0000000..7700466 --- /dev/null +++ b/SOURCES/kvm-migration-postcopy-Detect-file-system-on-dest-host.patch @@ -0,0 +1,117 @@ +From 4827d5be5357ab89e0c46f606ad828bf97d36471 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Wed, 19 Apr 2023 12:17:38 -0400 +Subject: [PATCH 04/56] migration/postcopy: Detect file system on dest host +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [3/50] 121aeeda8a019f79dba6c077c7018bd1c86f3d71 (peterx/qemu-kvm) + +Postcopy requires the memory support userfaultfd to work. Right now we +check it but it's a bit too late (when switching to postcopy migration). + +Do that early right at enabling of postcopy. + +Note that this is still only a best effort because ramblocks can be +dynamically created. We can add check in hostmem creations and fail if +postcopy enabled, but maybe that's too aggressive. + +Still, we have chance to fail the most obvious where we know there's an +existing unsupported ramblock. + +Signed-off-by: Peter Xu +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit ae30b9b2892b85e6c3d5c0b8d1949c4d77a2954a) +Signed-off-by: Peter Xu +--- + migration/postcopy-ram.c | 34 ++++++++++++++++++++++++++++++---- + 1 file changed, 30 insertions(+), 4 deletions(-) + +diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c +index 93f39f8e06..bbb8af61ae 100644 +--- a/migration/postcopy-ram.c ++++ b/migration/postcopy-ram.c +@@ -36,6 +36,7 @@ + #include "yank_functions.h" + #include "tls.h" + #include "qemu/userfaultfd.h" ++#include "qemu/mmap-alloc.h" + + /* Arbitrary limit on size of each discard command, + * keeps them around ~200 bytes +@@ -336,11 +337,12 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) + + /* Callback from postcopy_ram_supported_by_host block iterator. + */ +-static int test_ramblock_postcopiable(RAMBlock *rb, void *opaque) ++static int test_ramblock_postcopiable(RAMBlock *rb) + { + const char *block_name = qemu_ram_get_idstr(rb); + ram_addr_t length = qemu_ram_get_used_length(rb); + size_t pagesize = qemu_ram_pagesize(rb); ++ QemuFsType fs; + + if (length % pagesize) { + error_report("Postcopy requires RAM blocks to be a page size multiple," +@@ -348,6 +350,15 @@ static int test_ramblock_postcopiable(RAMBlock *rb, void *opaque) + "page size of 0x%zx", block_name, length, pagesize); + return 1; + } ++ ++ if (rb->fd >= 0) { ++ fs = qemu_fd_getfs(rb->fd); ++ if (fs != QEMU_FS_TYPE_TMPFS && fs != QEMU_FS_TYPE_HUGETLBFS) { ++ error_report("Host backend files need to be TMPFS or HUGETLBFS only"); ++ return 1; ++ } ++ } ++ + return 0; + } + +@@ -366,6 +377,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) + struct uffdio_range range_struct; + uint64_t feature_mask; + Error *local_err = NULL; ++ RAMBlock *block; + + if (qemu_target_page_size() > pagesize) { + error_report("Target page size bigger than host page size"); +@@ -390,9 +402,23 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) + goto out; + } + +- /* We don't support postcopy with shared RAM yet */ +- if (foreach_not_ignored_block(test_ramblock_postcopiable, NULL)) { +- goto out; ++ /* ++ * We don't support postcopy with some type of ramblocks. ++ * ++ * NOTE: we explicitly ignored ramblock_is_ignored() instead we checked ++ * all possible ramblocks. This is because this function can be called ++ * when creating the migration object, during the phase RAM_MIGRATABLE ++ * is not even properly set for all the ramblocks. ++ * ++ * A side effect of this is we'll also check against RAM_SHARED ++ * ramblocks even if migrate_ignore_shared() is set (in which case ++ * we'll never migrate RAM_SHARED at all), but normally this shouldn't ++ * affect in reality, or we can revisit. ++ */ ++ RAMBLOCK_FOREACH(block) { ++ if (test_ramblock_postcopiable(block)) { ++ goto out; ++ } + } + + /* +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-remove-extra-whitespace-character-for-code.patch b/SOURCES/kvm-migration-remove-extra-whitespace-character-for-code.patch new file mode 100644 index 0000000..88eb791 --- /dev/null +++ b/SOURCES/kvm-migration-remove-extra-whitespace-character-for-code.patch @@ -0,0 +1,44 @@ +From 93c9a1ae812720d3a29980a3c5fcfc1e916993de Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?=E6=9D=8E=E7=9A=86=E4=BF=8A?= +Date: Fri, 17 Mar 2023 09:57:13 +0000 +Subject: [PATCH 07/56] migration: remove extra whitespace character for code + style +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [6/50] bc1cd812f8dfc18e47e1644b5333c703eae23d2d (peterx/qemu-kvm) + +Fix code style. + +Signed-off-by: 李皆俊 +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit 8ebb6ecc3798e66a9ba98355983762bedfa1b72d) +Signed-off-by: Peter Xu +--- + migration/ram.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/migration/ram.c b/migration/ram.c +index 79d881f735..0e68099bf9 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -3293,7 +3293,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) + + migration_ops = g_malloc0(sizeof(MigrationOps)); + migration_ops->ram_save_target_page = ram_save_target_page_legacy; +- ret = multifd_send_sync_main(f); ++ ret = multifd_send_sync_main(f); + if (ret < 0) { + return ret; + } +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-rename-enabled_capabilities-to-capabilitie.patch b/SOURCES/kvm-migration-rename-enabled_capabilities-to-capabilitie.patch new file mode 100644 index 0000000..52b19b3 --- /dev/null +++ b/SOURCES/kvm-migration-rename-enabled_capabilities-to-capabilitie.patch @@ -0,0 +1,329 @@ +From ee566ec12099992f9134bda1db92dd568427245a Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 18:26:59 +0100 +Subject: [PATCH 18/56] migration: rename enabled_capabilities to capabilities +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [17/50] 841a27addf273d8f559bc8ebd2c854200e8ca673 (peterx/qemu-kvm) + +It is clear from the context what that means, and such a long name +with the extra long names of the capabilities make very difficilut to +stay inside the 80 columns limit. + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 0cec2056ff67557c18d7b8ab1b70ab47c9e31f2f) +Signed-off-by: Peter Xu +--- + migration/migration.c | 52 +++++++++++++++++++++---------------------- + migration/migration.h | 2 +- + migration/rdma.c | 4 ++-- + migration/savevm.c | 6 ++--- + 4 files changed, 31 insertions(+), 33 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index f1b3439e5f..d8e5fb6226 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -364,8 +364,7 @@ static bool migrate_late_block_activate(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[ +- MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; ++ return s->capabilities[MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; + } + + /* +@@ -944,7 +943,7 @@ MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) + #endif + caps = g_malloc0(sizeof(*caps)); + caps->capability = i; +- caps->state = s->enabled_capabilities[i]; ++ caps->state = s->capabilities[i]; + QAPI_LIST_APPEND(tail, caps); + } + +@@ -1494,13 +1493,13 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, + return; + } + +- memcpy(cap_list, s->enabled_capabilities, sizeof(cap_list)); ++ memcpy(cap_list, s->capabilities, sizeof(cap_list)); + if (!migrate_caps_check(cap_list, params, errp)) { + return; + } + + for (cap = params; cap; cap = cap->next) { +- s->enabled_capabilities[cap->value->capability] = cap->value->state; ++ s->capabilities[cap->value->capability] = cap->value->state; + } + } + +@@ -2569,7 +2568,7 @@ bool migrate_release_ram(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; ++ return s->capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; + } + + bool migrate_postcopy_ram(void) +@@ -2578,7 +2577,7 @@ bool migrate_postcopy_ram(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; ++ return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; + } + + bool migrate_postcopy(void) +@@ -2592,7 +2591,7 @@ bool migrate_auto_converge(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; ++ return s->capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; + } + + bool migrate_zero_blocks(void) +@@ -2601,7 +2600,7 @@ bool migrate_zero_blocks(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; ++ return s->capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; + } + + bool migrate_postcopy_blocktime(void) +@@ -2610,7 +2609,7 @@ bool migrate_postcopy_blocktime(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; ++ return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; + } + + bool migrate_use_compression(void) +@@ -2619,7 +2618,7 @@ bool migrate_use_compression(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS]; ++ return s->capabilities[MIGRATION_CAPABILITY_COMPRESS]; + } + + int migrate_compress_level(void) +@@ -2664,7 +2663,7 @@ bool migrate_dirty_bitmaps(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; ++ return s->capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; + } + + bool migrate_ignore_shared(void) +@@ -2673,7 +2672,7 @@ bool migrate_ignore_shared(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED]; ++ return s->capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED]; + } + + bool migrate_validate_uuid(void) +@@ -2682,7 +2681,7 @@ bool migrate_validate_uuid(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; ++ return s->capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; + } + + bool migrate_use_events(void) +@@ -2691,7 +2690,7 @@ bool migrate_use_events(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS]; ++ return s->capabilities[MIGRATION_CAPABILITY_EVENTS]; + } + + bool migrate_use_multifd(void) +@@ -2700,7 +2699,7 @@ bool migrate_use_multifd(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_MULTIFD]; ++ return s->capabilities[MIGRATION_CAPABILITY_MULTIFD]; + } + + bool migrate_pause_before_switchover(void) +@@ -2709,8 +2708,7 @@ bool migrate_pause_before_switchover(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[ +- MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; ++ return s->capabilities[MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; + } + + int migrate_multifd_channels(void) +@@ -2757,7 +2755,7 @@ bool migrate_use_zero_copy_send(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; ++ return s->capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; + } + #endif + +@@ -2776,7 +2774,7 @@ int migrate_use_xbzrle(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE]; ++ return s->capabilities[MIGRATION_CAPABILITY_XBZRLE]; + } + + uint64_t migrate_xbzrle_cache_size(void) +@@ -2803,7 +2801,7 @@ bool migrate_use_block(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK]; ++ return s->capabilities[MIGRATION_CAPABILITY_BLOCK]; + } + + bool migrate_use_return_path(void) +@@ -2812,7 +2810,7 @@ bool migrate_use_return_path(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; ++ return s->capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; + } + + bool migrate_use_block_incremental(void) +@@ -2830,7 +2828,7 @@ bool migrate_background_snapshot(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; ++ return s->capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; + } + + bool migrate_postcopy_preempt(void) +@@ -2839,7 +2837,7 @@ bool migrate_postcopy_preempt(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]; ++ return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]; + } + + /* migration thread support */ +@@ -3584,7 +3582,7 @@ fail: + bool migrate_colo_enabled(void) + { + MigrationState *s = migrate_get_current(); +- return s->enabled_capabilities[MIGRATION_CAPABILITY_X_COLO]; ++ return s->capabilities[MIGRATION_CAPABILITY_X_COLO]; + } + + typedef enum MigThrError { +@@ -4447,7 +4445,7 @@ void migration_global_dump(Monitor *mon) + } + + #define DEFINE_PROP_MIG_CAP(name, x) \ +- DEFINE_PROP_BOOL(name, MigrationState, enabled_capabilities[x], false) ++ DEFINE_PROP_BOOL(name, MigrationState, capabilities[x], false) + + static Property migration_properties[] = { + DEFINE_PROP_BOOL("store-global-state", MigrationState, +@@ -4646,7 +4644,7 @@ static bool migration_object_check(MigrationState *ms, Error **errp) + } + + for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { +- if (ms->enabled_capabilities[i]) { ++ if (ms->capabilities[i]) { + QAPI_LIST_PREPEND(head, migrate_cap_add(i, true)); + } + } +diff --git a/migration/migration.h b/migration/migration.h +index 310ae8901b..04e0860b4e 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -310,7 +310,7 @@ struct MigrationState { + int64_t downtime_start; + int64_t downtime; + int64_t expected_downtime; +- bool enabled_capabilities[MIGRATION_CAPABILITY__MAX]; ++ bool capabilities[MIGRATION_CAPABILITY__MAX]; + int64_t setup_time; + /* + * Whether guest was running when we enter the completion stage. +diff --git a/migration/rdma.c b/migration/rdma.c +index df646be35e..f35f021963 100644 +--- a/migration/rdma.c ++++ b/migration/rdma.c +@@ -4179,7 +4179,7 @@ void rdma_start_outgoing_migration(void *opaque, + } + + ret = qemu_rdma_source_init(rdma, +- s->enabled_capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp); ++ s->capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp); + + if (ret) { + goto err; +@@ -4201,7 +4201,7 @@ void rdma_start_outgoing_migration(void *opaque, + } + + ret = qemu_rdma_source_init(rdma_return_path, +- s->enabled_capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp); ++ s->capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp); + + if (ret) { + goto return_path_err; +diff --git a/migration/savevm.c b/migration/savevm.c +index aa54a67fda..589ef926ab 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -253,7 +253,7 @@ static uint32_t get_validatable_capabilities_count(void) + uint32_t result = 0; + int i; + for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { +- if (should_validate_capability(i) && s->enabled_capabilities[i]) { ++ if (should_validate_capability(i) && s->capabilities[i]) { + result++; + } + } +@@ -275,7 +275,7 @@ static int configuration_pre_save(void *opaque) + state->capabilities = g_renew(MigrationCapability, state->capabilities, + state->caps_count); + for (i = j = 0; i < MIGRATION_CAPABILITY__MAX; i++) { +- if (should_validate_capability(i) && s->enabled_capabilities[i]) { ++ if (should_validate_capability(i) && s->capabilities[i]) { + state->capabilities[j++] = i; + } + } +@@ -325,7 +325,7 @@ static bool configuration_validate_capabilities(SaveState *state) + continue; + } + source_state = test_bit(i, source_caps_bm); +- target_state = s->enabled_capabilities[i]; ++ target_state = s->capabilities[i]; + if (source_state != target_state) { + error_report("Capability %s is %s, but received capability is %s", + MigrationCapability_str(i), +-- +2.39.1 + diff --git a/SOURCES/kvm-multifd-Create-property-multifd-flush-after-each-sec.patch b/SOURCES/kvm-multifd-Create-property-multifd-flush-after-each-sec.patch new file mode 100644 index 0000000..0bebd2e --- /dev/null +++ b/SOURCES/kvm-multifd-Create-property-multifd-flush-after-each-sec.patch @@ -0,0 +1,127 @@ +From 2a5ea92ca0a5dffad54e4d06a683f683996cea9a Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 21 Jun 2022 12:13:14 +0200 +Subject: [PATCH 05/12] multifd: Create property + multifd-flush-after-each-section +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: quintela1 +RH-MergeRequest: 186: Multifd flushes its channels 10 times per second +RH-Bugzilla: 2196295 +RH-Acked-by: Peter Xu +RH-Acked-by: Leonardo Brás +RH-Commit: [1/3] 5bf5348e8be5b1d1629b859ce1ddb7aa0d72c0d6 (juan.quintela/c9s-qemu-kvm) + +We used to flush all channels at the end of each RAM section +sent. That is not needed, so preparing to only flush after a full +iteration through all the RAM. + +Default value of the property is false. But we return "true" in +migrate_multifd_flush_after_each_section() until we implement the code +in following patches. + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +Acked-by: Peter Xu + +--- + +Rename each-iteration to after-each-section +Rename multifd-sync-after-each-section to + multifd-flush-after-each-section +Move to machine-8.0 (peter) + +conflit hw_compat_8_0 and hw_compat_rhel_9_2 + +(cherry picked from commit 77c259a4cb1c9799754b48f570301ebf1de5ded8) +--- + hw/core/machine.c | 2 ++ + migration/migration.h | 12 ++++++++++++ + migration/options.c | 13 +++++++++++++ + migration/options.h | 1 + + 4 files changed, 28 insertions(+) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 5abdc8c39b..5ea52317b9 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -54,6 +54,8 @@ const char *rhel_old_machine_deprecation = + "machine types for previous major releases are deprecated"; + + GlobalProperty hw_compat_rhel_9_2[] = { ++ /* hw_compat_rhel_9_2 from hw_compat_8_0 */ ++ { "migration", "multifd-flush-after-each-section", "on"}, + /* hw_compat_rhel_9_2 from hw_compat_7_2 */ + { "e1000e", "migrate-timadj", "off" }, + /* hw_compat_rhel_9_2 from hw_compat_7_2 */ +diff --git a/migration/migration.h b/migration/migration.h +index 7ccf460aa2..04c78c1fd6 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -411,6 +411,18 @@ struct MigrationState { + */ + bool preempt_pre_7_2; + ++ /* ++ * flush every channel after each section sent. ++ * ++ * This assures that we can't mix pages from one iteration through ++ * ram pages with pages for the following iteration. We really ++ * only need to do this flush after we have go through all the ++ * dirty pages. For historical reasons, we do that after each ++ * section. This is suboptimal (we flush too many times). ++ * Default value is false. Setting this property has no effect ++ * until the patch that removes this comment. (since 8.1) ++ */ ++ bool multifd_flush_after_each_section; + /* + * This decides the size of guest memory chunk that will be used + * to track dirty bitmap clearing. The size of memory chunk will +diff --git a/migration/options.c b/migration/options.c +index ccd7ef3907..5b0d080ecb 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -88,6 +88,8 @@ Property migration_properties[] = { + send_section_footer, true), + DEFINE_PROP_BOOL("decompress-error-check", MigrationState, + decompress_error_check, true), ++ DEFINE_PROP_BOOL("multifd-flush-after-each-section", MigrationState, ++ multifd_flush_after_each_section, true), + DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState, + clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT), + DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState, +@@ -344,6 +346,17 @@ bool migrate_zero_copy_send(void) + + /* pseudo capabilities */ + ++bool migrate_multifd_flush_after_each_section(void) ++{ ++ MigrationState *s = migrate_get_current(); ++ ++ /* ++ * Until the patch that remove this comment, we always return that ++ * the property is enabled. ++ */ ++ return true || s->multifd_flush_after_each_section; ++} ++ + bool migrate_postcopy(void) + { + return migrate_postcopy_ram() || migrate_dirty_bitmaps(); +diff --git a/migration/options.h b/migration/options.h +index 0fc7be6869..271f49ae5f 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -60,6 +60,7 @@ bool migrate_zero_copy_send(void); + * check, but they are not a capability. + */ + ++bool migrate_multifd_flush_after_each_section(void); + bool migrate_postcopy(void); + bool migrate_tls(void); + +-- +2.39.3 + diff --git a/SOURCES/kvm-multifd-Fix-the-number-of-channels-ready.patch b/SOURCES/kvm-multifd-Fix-the-number-of-channels-ready.patch new file mode 100644 index 0000000..abf21e6 --- /dev/null +++ b/SOURCES/kvm-multifd-Fix-the-number-of-channels-ready.patch @@ -0,0 +1,58 @@ +From af6f2a543c7db6d67d33fd12615a50e57fc3fe66 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 26 Apr 2023 12:20:36 +0200 +Subject: [PATCH 19/21] multifd: Fix the number of channels ready +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 171: multifd: Fix the number of channels ready +RH-Bugzilla: 2196289 +RH-Acked-by: Peter Xu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/1] a5e271ba249d85b27a68d3cff10480ca3a112c5d (LeoBras/centos-qemu-kvm) + +We don't wait in the sem when we are doing a sync_main. Make it wait +there. To make things clearer, we mark the channel ready at the +begining of the thread loop. + +Signed-off-by: Juan Quintela +Reviewed-by: Fabiano Rosas +(cherry picked from commit d2026ee117147893f8d80f060cede6d872ecbd7f) +Signed-off-by: Leonardo Bras +--- + migration/multifd.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index cce3ad6988..6a59c03dd2 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -635,6 +635,7 @@ int multifd_send_sync_main(QEMUFile *f) + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + ++ qemu_sem_wait(&multifd_send_state->channels_ready); + trace_multifd_send_sync_main_wait(p->id); + qemu_sem_wait(&p->sem_sync); + +@@ -668,6 +669,7 @@ static void *multifd_send_thread(void *opaque) + p->num_packets = 1; + + while (true) { ++ qemu_sem_post(&multifd_send_state->channels_ready); + qemu_sem_wait(&p->sem); + + if (qatomic_read(&multifd_send_state->exiting)) { +@@ -736,7 +738,6 @@ static void *multifd_send_thread(void *opaque) + if (flags & MULTIFD_FLAG_SYNC) { + qemu_sem_post(&p->sem_sync); + } +- qemu_sem_post(&multifd_send_state->channels_ready); + } else if (p->quit) { + qemu_mutex_unlock(&p->mutex); + break; +-- +2.39.3 + diff --git a/SOURCES/kvm-multifd-Only-flush-once-each-full-round-of-memory.patch b/SOURCES/kvm-multifd-Only-flush-once-each-full-round-of-memory.patch new file mode 100644 index 0000000..3f76384 --- /dev/null +++ b/SOURCES/kvm-multifd-Only-flush-once-each-full-round-of-memory.patch @@ -0,0 +1,166 @@ +From e6f770506091eada46c63ac1c8b934b508e3807f Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 21 Jun 2022 13:36:11 +0200 +Subject: [PATCH 07/12] multifd: Only flush once each full round of memory +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: quintela1 +RH-MergeRequest: 186: Multifd flushes its channels 10 times per second +RH-Bugzilla: 2196295 +RH-Acked-by: Peter Xu +RH-Acked-by: Leonardo Brás +RH-Commit: [3/3] 33f76dfc72a2552a42dc7f0fe3923564185a7bf7 (juan.quintela/c9s-qemu-kvm) + +We need to add a new flag to mean to flush at that point. +Notice that we still flush at the end of setup and at the end of +complete stages. + +Signed-off-by: Juan Quintela +Acked-by: Peter Xu + +--- + +Add missing qemu_fflush(), now it passes all tests always. +In the previous version, the check that changes the default value to +false got lost in some rebase. Get it back. + +(cherry picked from commit 294e5a4034e81b3d8db03b4e0f691386f20d6ed3) +--- + migration/migration.h | 3 +-- + migration/options.c | 8 ++------ + migration/ram.c | 28 +++++++++++++++++++++++++++- + 3 files changed, 30 insertions(+), 9 deletions(-) + +diff --git a/migration/migration.h b/migration/migration.h +index 04c78c1fd6..dfec649af8 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -419,8 +419,7 @@ struct MigrationState { + * only need to do this flush after we have go through all the + * dirty pages. For historical reasons, we do that after each + * section. This is suboptimal (we flush too many times). +- * Default value is false. Setting this property has no effect +- * until the patch that removes this comment. (since 8.1) ++ * Default value is false. (since 8.1) + */ + bool multifd_flush_after_each_section; + /* +diff --git a/migration/options.c b/migration/options.c +index 5b0d080ecb..e13c7cb8e5 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -89,7 +89,7 @@ Property migration_properties[] = { + DEFINE_PROP_BOOL("decompress-error-check", MigrationState, + decompress_error_check, true), + DEFINE_PROP_BOOL("multifd-flush-after-each-section", MigrationState, +- multifd_flush_after_each_section, true), ++ multifd_flush_after_each_section, false), + DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState, + clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT), + DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState, +@@ -350,11 +350,7 @@ bool migrate_multifd_flush_after_each_section(void) + { + MigrationState *s = migrate_get_current(); + +- /* +- * Until the patch that remove this comment, we always return that +- * the property is enabled. +- */ +- return true || s->multifd_flush_after_each_section; ++ return s->multifd_flush_after_each_section; + } + + bool migrate_postcopy(void) +diff --git a/migration/ram.c b/migration/ram.c +index 1e2414d681..e9dcda8b9d 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -86,6 +86,7 @@ + #define RAM_SAVE_FLAG_XBZRLE 0x40 + /* 0x80 is reserved in qemu-file.h for RAM_SAVE_FLAG_HOOK */ + #define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100 ++#define RAM_SAVE_FLAG_MULTIFD_FLUSH 0x200 + /* We can't use any flag that is bigger than 0x200 */ + + int (*xbzrle_encode_buffer_func)(uint8_t *, uint8_t *, int, +@@ -1581,6 +1582,7 @@ retry: + * associated with the search process. + * + * Returns: ++ * <0: An error happened + * PAGE_ALL_CLEAN: no dirty page found, give up + * PAGE_TRY_AGAIN: no dirty page found, retry for next block + * PAGE_DIRTY_FOUND: dirty page found +@@ -1608,6 +1610,15 @@ static int find_dirty_block(RAMState *rs, PageSearchStatus *pss) + pss->page = 0; + pss->block = QLIST_NEXT_RCU(pss->block, next); + if (!pss->block) { ++ if (!migrate_multifd_flush_after_each_section()) { ++ QEMUFile *f = rs->pss[RAM_CHANNEL_PRECOPY].pss_channel; ++ int ret = multifd_send_sync_main(f); ++ if (ret < 0) { ++ return ret; ++ } ++ qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH); ++ qemu_fflush(f); ++ } + /* + * If memory migration starts over, we will meet a dirtied page + * which may still exists in compression threads's ring, so we +@@ -2600,6 +2611,9 @@ static int ram_find_and_save_block(RAMState *rs) + break; + } else if (res == PAGE_TRY_AGAIN) { + continue; ++ } else if (res < 0) { ++ pages = res; ++ break; + } + } + } +@@ -3286,6 +3300,10 @@ static int ram_save_setup(QEMUFile *f, void *opaque) + return ret; + } + ++ if (!migrate_multifd_flush_after_each_section()) { ++ qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH); ++ } ++ + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + qemu_fflush(f); + +@@ -3471,6 +3489,9 @@ static int ram_save_complete(QEMUFile *f, void *opaque) + return ret; + } + ++ if (!migrate_multifd_flush_after_each_section()) { ++ qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH); ++ } + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + qemu_fflush(f); + +@@ -4152,7 +4173,9 @@ int ram_load_postcopy(QEMUFile *f, int channel) + } + decompress_data_with_multi_threads(f, page_buffer, len); + break; +- ++ case RAM_SAVE_FLAG_MULTIFD_FLUSH: ++ multifd_recv_sync_main(); ++ break; + case RAM_SAVE_FLAG_EOS: + /* normal exit */ + if (migrate_multifd_flush_after_each_section()) { +@@ -4426,6 +4449,9 @@ static int ram_load_precopy(QEMUFile *f) + break; + } + break; ++ case RAM_SAVE_FLAG_MULTIFD_FLUSH: ++ multifd_recv_sync_main(); ++ break; + case RAM_SAVE_FLAG_EOS: + /* normal exit */ + if (migrate_multifd_flush_after_each_section()) { +-- +2.39.3 + diff --git a/SOURCES/kvm-multifd-Protect-multifd_send_sync_main-calls.patch b/SOURCES/kvm-multifd-Protect-multifd_send_sync_main-calls.patch new file mode 100644 index 0000000..779841f --- /dev/null +++ b/SOURCES/kvm-multifd-Protect-multifd_send_sync_main-calls.patch @@ -0,0 +1,78 @@ +From c4bfb4900b95e13bef2d86b83c33786c7c4f6289 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 21 Jun 2022 12:21:32 +0200 +Subject: [PATCH 06/12] multifd: Protect multifd_send_sync_main() calls +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: quintela1 +RH-MergeRequest: 186: Multifd flushes its channels 10 times per second +RH-Bugzilla: 2196295 +RH-Acked-by: Peter Xu +RH-Acked-by: Leonardo Brás +RH-Commit: [2/3] a91adf59c6b2f39bf4a308f566b00e39cae6e0ae (juan.quintela/c9s-qemu-kvm) + +We only need to do that on the ram_save_iterate() call on sending and +on destination when we get a RAM_SAVE_FLAG_EOS. + +In setup() and complete() we need to synch in both new and old cases, +so don't add a check there. + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +Acked-by: Peter Xu + +--- + +Remove the wrappers that we take out on patch 5. + +(cherry picked from commit b05292c237030343516d073b1a1e5f49ffc017a8) +--- + migration/ram.c | 16 +++++++++++----- + 1 file changed, 11 insertions(+), 5 deletions(-) + +diff --git a/migration/ram.c b/migration/ram.c +index 01356f60a4..1e2414d681 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -3394,9 +3394,11 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) + out: + if (ret >= 0 + && migration_is_setup_or_active(migrate_get_current()->state)) { +- ret = multifd_send_sync_main(rs->pss[RAM_CHANNEL_PRECOPY].pss_channel); +- if (ret < 0) { +- return ret; ++ if (migrate_multifd_flush_after_each_section()) { ++ ret = multifd_send_sync_main(rs->pss[RAM_CHANNEL_PRECOPY].pss_channel); ++ if (ret < 0) { ++ return ret; ++ } + } + + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); +@@ -4153,7 +4155,9 @@ int ram_load_postcopy(QEMUFile *f, int channel) + + case RAM_SAVE_FLAG_EOS: + /* normal exit */ +- multifd_recv_sync_main(); ++ if (migrate_multifd_flush_after_each_section()) { ++ multifd_recv_sync_main(); ++ } + break; + default: + error_report("Unknown combination of migration flags: 0x%x" +@@ -4424,7 +4428,9 @@ static int ram_load_precopy(QEMUFile *f) + break; + case RAM_SAVE_FLAG_EOS: + /* normal exit */ +- multifd_recv_sync_main(); ++ if (migrate_multifd_flush_after_each_section()) { ++ multifd_recv_sync_main(); ++ } + break; + default: + if (flags & RAM_SAVE_FLAG_HOOK) { +-- +2.39.3 + diff --git a/SOURCES/kvm-nbd-server-Fix-drained_poll-to-wake-coroutine-in-rig.patch b/SOURCES/kvm-nbd-server-Fix-drained_poll-to-wake-coroutine-in-rig.patch new file mode 100644 index 0000000..214b6dd --- /dev/null +++ b/SOURCES/kvm-nbd-server-Fix-drained_poll-to-wake-coroutine-in-rig.patch @@ -0,0 +1,159 @@ +From 639f65d2cd4c6627a1d22c4b418b41400fe40154 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 17 May 2023 17:28:33 +0200 +Subject: [PATCH 03/21] nbd/server: Fix drained_poll to wake coroutine in right + AioContext + +RH-Author: Kevin Wolf +RH-MergeRequest: 166: block/graph-lock: Disable locking for now +RH-Bugzilla: 2186725 +RH-Acked-by: Eric Blake +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [3/4] 177092e61360c2feb04377890b32fdeb2d1cfefc (kmwolf/centos-qemu-kvm) + +nbd_drained_poll() generally runs in the main thread, not whatever +iothread the NBD server coroutine is meant to run in, so it can't +directly reenter the coroutines to wake them up. + +The code seems to have the right intention, it specifies the correct +AioContext when it calls qemu_aio_coroutine_enter(). However, this +functions doesn't schedule the coroutine to run in that AioContext, but +it assumes it is already called in the home thread of the AioContext. + +To fix this, add a new thread-safe qio_channel_wake_read() that can be +called in the main thread to wake up the coroutine in its AioContext, +and use this in nbd_drained_poll(). + +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Message-Id: <20230517152834.277483-3-kwolf@redhat.com> +Reviewed-by: Eric Blake +Signed-off-by: Kevin Wolf +(cherry picked from commit 7c1f51bf38de8cea4ed5030467646c37b46edeb7) +Signed-off-by: Kevin Wolf +--- + include/io/channel.h | 10 ++++++++++ + io/channel.c | 33 +++++++++++++++++++++++++++------ + nbd/server.c | 3 +-- + 3 files changed, 38 insertions(+), 8 deletions(-) + +diff --git a/include/io/channel.h b/include/io/channel.h +index 153fbd2904..2b905423a9 100644 +--- a/include/io/channel.h ++++ b/include/io/channel.h +@@ -757,6 +757,16 @@ void qio_channel_detach_aio_context(QIOChannel *ioc); + void coroutine_fn qio_channel_yield(QIOChannel *ioc, + GIOCondition condition); + ++/** ++ * qio_channel_wake_read: ++ * @ioc: the channel object ++ * ++ * If qio_channel_yield() is currently waiting for the channel to become ++ * readable, interrupt it and reenter immediately. This function is safe to call ++ * from any thread. ++ */ ++void qio_channel_wake_read(QIOChannel *ioc); ++ + /** + * qio_channel_wait: + * @ioc: the channel object +diff --git a/io/channel.c b/io/channel.c +index a8c7f11649..3c9b7beb65 100644 +--- a/io/channel.c ++++ b/io/channel.c +@@ -19,6 +19,7 @@ + */ + + #include "qemu/osdep.h" ++#include "block/aio-wait.h" + #include "io/channel.h" + #include "qapi/error.h" + #include "qemu/main-loop.h" +@@ -514,7 +515,11 @@ int qio_channel_flush(QIOChannel *ioc, + static void qio_channel_restart_read(void *opaque) + { + QIOChannel *ioc = opaque; +- Coroutine *co = ioc->read_coroutine; ++ Coroutine *co = qatomic_xchg(&ioc->read_coroutine, NULL); ++ ++ if (!co) { ++ return; ++ } + + /* Assert that aio_co_wake() reenters the coroutine directly */ + assert(qemu_get_current_aio_context() == +@@ -525,7 +530,11 @@ static void qio_channel_restart_read(void *opaque) + static void qio_channel_restart_write(void *opaque) + { + QIOChannel *ioc = opaque; +- Coroutine *co = ioc->write_coroutine; ++ Coroutine *co = qatomic_xchg(&ioc->write_coroutine, NULL); ++ ++ if (!co) { ++ return; ++ } + + /* Assert that aio_co_wake() reenters the coroutine directly */ + assert(qemu_get_current_aio_context() == +@@ -568,7 +577,11 @@ void qio_channel_detach_aio_context(QIOChannel *ioc) + void coroutine_fn qio_channel_yield(QIOChannel *ioc, + GIOCondition condition) + { ++ AioContext *ioc_ctx = ioc->ctx ?: qemu_get_aio_context(); ++ + assert(qemu_in_coroutine()); ++ assert(in_aio_context_home_thread(ioc_ctx)); ++ + if (condition == G_IO_IN) { + assert(!ioc->read_coroutine); + ioc->read_coroutine = qemu_coroutine_self(); +@@ -580,18 +593,26 @@ void coroutine_fn qio_channel_yield(QIOChannel *ioc, + } + qio_channel_set_aio_fd_handlers(ioc); + qemu_coroutine_yield(); ++ assert(in_aio_context_home_thread(ioc_ctx)); + + /* Allow interrupting the operation by reentering the coroutine other than + * through the aio_fd_handlers. */ +- if (condition == G_IO_IN && ioc->read_coroutine) { +- ioc->read_coroutine = NULL; ++ if (condition == G_IO_IN) { ++ assert(ioc->read_coroutine == NULL); + qio_channel_set_aio_fd_handlers(ioc); +- } else if (condition == G_IO_OUT && ioc->write_coroutine) { +- ioc->write_coroutine = NULL; ++ } else if (condition == G_IO_OUT) { ++ assert(ioc->write_coroutine == NULL); + qio_channel_set_aio_fd_handlers(ioc); + } + } + ++void qio_channel_wake_read(QIOChannel *ioc) ++{ ++ Coroutine *co = qatomic_xchg(&ioc->read_coroutine, NULL); ++ if (co) { ++ aio_co_wake(co); ++ } ++} + + static gboolean qio_channel_wait_complete(QIOChannel *ioc, + GIOCondition condition, +diff --git a/nbd/server.c b/nbd/server.c +index 3d8d0d81df..ea47522e8f 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -1599,8 +1599,7 @@ static bool nbd_drained_poll(void *opaque) + * enter it here so we don't depend on the client to wake it up. + */ + if (client->recv_coroutine != NULL && client->read_yielding) { +- qemu_aio_coroutine_enter(exp->common.ctx, +- client->recv_coroutine); ++ qio_channel_wake_read(client->ioc); + } + + return true; +-- +2.39.3 + diff --git a/SOURCES/kvm-net-socket-move-fd-type-checking-to-its-own-function.patch b/SOURCES/kvm-net-socket-move-fd-type-checking-to-its-own-function.patch new file mode 100644 index 0000000..20b9c04 --- /dev/null +++ b/SOURCES/kvm-net-socket-move-fd-type-checking-to-its-own-function.patch @@ -0,0 +1,78 @@ +From d6b3f9e4b388b8d621761104ddf075d6087f6d6c Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Fri, 9 Jun 2023 09:27:47 +0200 +Subject: [PATCH 09/12] net: socket: move fd type checking to its own function + +RH-Author: Laurent Vivier +RH-MergeRequest: 187: net: socket: do not close file descriptor if it's not a socket +RH-Jira: RHEL-582 +RH-Acked-by: Stefano Brivio +RH-Acked-by: Jason Wang +RH-Acked-by: David Gibson (Red Hat) +RH-Commit: [2/3] 9726f0ae81ac209b5db33dc7767f652867d8ca0a (lvivier/qemu-kvm-centos) + +JIRA: https://issues.redhat.com/browse/RHEL-582 + +Reviewed-by: David Gibson +Signed-off-by: Laurent Vivier +Signed-off-by: Jason Wang +(cherry picked from commit 23455ae341656ca867ee4a171826b9d280d6acb5) +--- + net/socket.c | 28 ++++++++++++++++++++-------- + 1 file changed, 20 insertions(+), 8 deletions(-) + +diff --git a/net/socket.c b/net/socket.c +index 24dcaa55bc..6b1f0fec3a 100644 +--- a/net/socket.c ++++ b/net/socket.c +@@ -446,16 +446,32 @@ static NetSocketState *net_socket_fd_init_stream(NetClientState *peer, + return s; + } + ++static int net_socket_fd_check(int fd, Error **errp) ++{ ++ int so_type, optlen = sizeof(so_type); ++ ++ if (getsockopt(fd, SOL_SOCKET, SO_TYPE, (char *)&so_type, ++ (socklen_t *)&optlen) < 0) { ++ error_setg(errp, "can't get socket option SO_TYPE"); ++ return -1; ++ } ++ if (so_type != SOCK_DGRAM && so_type != SOCK_STREAM) { ++ error_setg(errp, "socket type=%d for fd=%d must be either" ++ " SOCK_DGRAM or SOCK_STREAM", so_type, fd); ++ return -1; ++ } ++ return so_type; ++} ++ + static NetSocketState *net_socket_fd_init(NetClientState *peer, + const char *model, const char *name, + int fd, int is_connected, + const char *mc, Error **errp) + { +- int so_type = -1, optlen=sizeof(so_type); ++ int so_type; + +- if(getsockopt(fd, SOL_SOCKET, SO_TYPE, (char *)&so_type, +- (socklen_t *)&optlen)< 0) { +- error_setg(errp, "can't get socket option SO_TYPE"); ++ so_type = net_socket_fd_check(fd, errp); ++ if (so_type < 0) { + close(fd); + return NULL; + } +@@ -465,10 +481,6 @@ static NetSocketState *net_socket_fd_init(NetClientState *peer, + mc, errp); + case SOCK_STREAM: + return net_socket_fd_init_stream(peer, model, name, fd, is_connected); +- default: +- error_setg(errp, "socket type=%d for fd=%d must be either" +- " SOCK_DGRAM or SOCK_STREAM", so_type, fd); +- close(fd); + } + return NULL; + } +-- +2.39.3 + diff --git a/SOURCES/kvm-net-socket-prepare-to-cleanup-net_init_socket.patch b/SOURCES/kvm-net-socket-prepare-to-cleanup-net_init_socket.patch new file mode 100644 index 0000000..269da29 --- /dev/null +++ b/SOURCES/kvm-net-socket-prepare-to-cleanup-net_init_socket.patch @@ -0,0 +1,60 @@ +From a467540e49e76c5961d86e3f47d3f8fcad8cef09 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Fri, 9 Jun 2023 09:27:46 +0200 +Subject: [PATCH 08/12] net: socket: prepare to cleanup net_init_socket() + +RH-Author: Laurent Vivier +RH-MergeRequest: 187: net: socket: do not close file descriptor if it's not a socket +RH-Jira: RHEL-582 +RH-Acked-by: Stefano Brivio +RH-Acked-by: Jason Wang +RH-Acked-by: David Gibson (Red Hat) +RH-Commit: [1/3] 3e4f8370586ae1ac2474fef971a239edb31eeb67 (lvivier/qemu-kvm-centos) + +JIRA: https://issues.redhat.com/browse/RHEL-582 + +Use directly net_socket_fd_init_stream() and net_socket_fd_init_dgram() +when the socket type is already known. + +Reviewed-by: David Gibson +Signed-off-by: Laurent Vivier +Signed-off-by: Jason Wang +(cherry picked from commit 006c3fa74c3edb978ff46d2851699e9a95609da5) +--- + net/socket.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/net/socket.c b/net/socket.c +index ba6e5b0b00..24dcaa55bc 100644 +--- a/net/socket.c ++++ b/net/socket.c +@@ -587,7 +587,7 @@ static int net_socket_connect_init(NetClientState *peer, + break; + } + } +- s = net_socket_fd_init(peer, model, name, fd, connected, NULL, errp); ++ s = net_socket_fd_init_stream(peer, model, name, fd, connected); + if (!s) { + return -1; + } +@@ -629,7 +629,7 @@ static int net_socket_mcast_init(NetClientState *peer, + return -1; + } + +- s = net_socket_fd_init(peer, model, name, fd, 0, NULL, errp); ++ s = net_socket_fd_init_dgram(peer, model, name, fd, 0, NULL, errp); + if (!s) { + return -1; + } +@@ -683,7 +683,7 @@ static int net_socket_udp_init(NetClientState *peer, + } + qemu_socket_set_nonblock(fd); + +- s = net_socket_fd_init(peer, model, name, fd, 0, NULL, errp); ++ s = net_socket_fd_init_dgram(peer, model, name, fd, 0, NULL, errp); + if (!s) { + return -1; + } +-- +2.39.3 + diff --git a/SOURCES/kvm-net-socket-remove-net_init_socket.patch b/SOURCES/kvm-net-socket-remove-net_init_socket.patch new file mode 100644 index 0000000..98c96f2 --- /dev/null +++ b/SOURCES/kvm-net-socket-remove-net_init_socket.patch @@ -0,0 +1,102 @@ +From ecb4f97895849c562112b76a30ddc2037e8df79e Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Fri, 9 Jun 2023 09:27:48 +0200 +Subject: [PATCH 10/12] net: socket: remove net_init_socket() + +RH-Author: Laurent Vivier +RH-MergeRequest: 187: net: socket: do not close file descriptor if it's not a socket +RH-Jira: RHEL-582 +RH-Acked-by: Stefano Brivio +RH-Acked-by: Jason Wang +RH-Acked-by: David Gibson (Red Hat) +RH-Commit: [3/3] e1d7939f5df4a77c2fff62d1ae4899a7a3615ad9 (lvivier/qemu-kvm-centos) + +JIRA: https://issues.redhat.com/browse/RHEL-582 + +Move the file descriptor type checking before doing anything with it. +If it's not usable, don't close it as it could be in use by another +part of QEMU, only fail and report an error. + +Reviewed-by: David Gibson +Signed-off-by: Laurent Vivier +Signed-off-by: Jason Wang +(cherry picked from commit b6aeee02980e193f744f74c48fd900940feb2799) +--- + net/socket.c | 43 +++++++++++++++++-------------------------- + 1 file changed, 17 insertions(+), 26 deletions(-) + +diff --git a/net/socket.c b/net/socket.c +index 6b1f0fec3a..8e3702e1f3 100644 +--- a/net/socket.c ++++ b/net/socket.c +@@ -463,28 +463,6 @@ static int net_socket_fd_check(int fd, Error **errp) + return so_type; + } + +-static NetSocketState *net_socket_fd_init(NetClientState *peer, +- const char *model, const char *name, +- int fd, int is_connected, +- const char *mc, Error **errp) +-{ +- int so_type; +- +- so_type = net_socket_fd_check(fd, errp); +- if (so_type < 0) { +- close(fd); +- return NULL; +- } +- switch(so_type) { +- case SOCK_DGRAM: +- return net_socket_fd_init_dgram(peer, model, name, fd, is_connected, +- mc, errp); +- case SOCK_STREAM: +- return net_socket_fd_init_stream(peer, model, name, fd, is_connected); +- } +- return NULL; +-} +- + static void net_socket_accept(void *opaque) + { + NetSocketState *s = opaque; +@@ -728,21 +706,34 @@ int net_init_socket(const Netdev *netdev, const char *name, + } + + if (sock->fd) { +- int fd, ret; ++ int fd, ret, so_type; + + fd = monitor_fd_param(monitor_cur(), sock->fd, errp); + if (fd == -1) { + return -1; + } ++ so_type = net_socket_fd_check(fd, errp); ++ if (so_type < 0) { ++ return -1; ++ } + ret = qemu_socket_try_set_nonblock(fd); + if (ret < 0) { + error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d", + name, fd); + return -1; + } +- if (!net_socket_fd_init(peer, "socket", name, fd, 1, sock->mcast, +- errp)) { +- return -1; ++ switch (so_type) { ++ case SOCK_DGRAM: ++ if (!net_socket_fd_init_dgram(peer, "socket", name, fd, 1, ++ sock->mcast, errp)) { ++ return -1; ++ } ++ break; ++ case SOCK_STREAM: ++ if (!net_socket_fd_init_stream(peer, "socket", name, fd, 1)) { ++ return -1; ++ } ++ break; + } + return 0; + } +-- +2.39.3 + diff --git a/SOURCES/kvm-net-stream-add-a-new-option-to-automatically-reconne.patch b/SOURCES/kvm-net-stream-add-a-new-option-to-automatically-reconne.patch deleted file mode 100644 index 707c80f..0000000 --- a/SOURCES/kvm-net-stream-add-a-new-option-to-automatically-reconne.patch +++ /dev/null @@ -1,325 +0,0 @@ -From e5834364958a3914d7b8b46b985a1b054728b466 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Thu, 19 Jan 2023 11:16:45 +0100 -Subject: [PATCH 2/8] net: stream: add a new option to automatically reconnect -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 148: net: stream: add a new option to automatically reconnect -RH-Bugzilla: 2169232 -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Cindy Lu -RH-Acked-by: MST -RH-Acked-by: David Gibson (Red Hat) -RH-Commit: [2/2] 9b87647a9ed2e7c1b91bdfa9d0a736e091c892a5 (lvivier/qemu-kvm-centos) - -In stream mode, if the server shuts down there is currently -no way to reconnect the client to a new server without removing -the NIC device and the netdev backend (or to reboot). - -This patch introduces a reconnect option that specifies a delay -to try to reconnect with the same parameters. - -Add a new test in qtest to test the reconnect option and the -connect/disconnect events. - -Signed-off-by: Laurent Vivier -Signed-off-by: Jason Wang -(cherry picked from commit b95c0d4440950fba6dbef0f781962911fa42abdb) ---- - net/stream.c | 53 ++++++++++++++++++- - qapi/net.json | 7 ++- - qemu-options.hx | 6 +-- - tests/qtest/netdev-socket.c | 101 ++++++++++++++++++++++++++++++++++++ - 4 files changed, 162 insertions(+), 5 deletions(-) - -diff --git a/net/stream.c b/net/stream.c -index 37ff727e0c..9204b4c96e 100644 ---- a/net/stream.c -+++ b/net/stream.c -@@ -39,6 +39,8 @@ - #include "io/channel-socket.h" - #include "io/net-listener.h" - #include "qapi/qapi-events-net.h" -+#include "qapi/qapi-visit-sockets.h" -+#include "qapi/clone-visitor.h" - - typedef struct NetStreamState { - NetClientState nc; -@@ -49,11 +51,15 @@ typedef struct NetStreamState { - guint ioc_write_tag; - SocketReadState rs; - unsigned int send_index; /* number of bytes sent*/ -+ uint32_t reconnect; -+ guint timer_tag; -+ SocketAddress *addr; - } NetStreamState; - - static void net_stream_listen(QIONetListener *listener, - QIOChannelSocket *cioc, - void *opaque); -+static void net_stream_arm_reconnect(NetStreamState *s); - - static gboolean net_stream_writable(QIOChannel *ioc, - GIOCondition condition, -@@ -170,6 +176,7 @@ static gboolean net_stream_send(QIOChannel *ioc, - qemu_set_info_str(&s->nc, "%s", ""); - - qapi_event_send_netdev_stream_disconnected(s->nc.name); -+ net_stream_arm_reconnect(s); - - return G_SOURCE_REMOVE; - } -@@ -187,6 +194,14 @@ static gboolean net_stream_send(QIOChannel *ioc, - static void net_stream_cleanup(NetClientState *nc) - { - NetStreamState *s = DO_UPCAST(NetStreamState, nc, nc); -+ if (s->timer_tag) { -+ g_source_remove(s->timer_tag); -+ s->timer_tag = 0; -+ } -+ if (s->addr) { -+ qapi_free_SocketAddress(s->addr); -+ s->addr = NULL; -+ } - if (s->ioc) { - if (QIO_CHANNEL_SOCKET(s->ioc)->fd != -1) { - if (s->ioc_read_tag) { -@@ -346,12 +361,37 @@ static void net_stream_client_connected(QIOTask *task, gpointer opaque) - error: - object_unref(OBJECT(s->ioc)); - s->ioc = NULL; -+ net_stream_arm_reconnect(s); -+} -+ -+static gboolean net_stream_reconnect(gpointer data) -+{ -+ NetStreamState *s = data; -+ QIOChannelSocket *sioc; -+ -+ s->timer_tag = 0; -+ -+ sioc = qio_channel_socket_new(); -+ s->ioc = QIO_CHANNEL(sioc); -+ qio_channel_socket_connect_async(sioc, s->addr, -+ net_stream_client_connected, s, -+ NULL, NULL); -+ return G_SOURCE_REMOVE; -+} -+ -+static void net_stream_arm_reconnect(NetStreamState *s) -+{ -+ if (s->reconnect && s->timer_tag == 0) { -+ s->timer_tag = g_timeout_add_seconds(s->reconnect, -+ net_stream_reconnect, s); -+ } - } - - static int net_stream_client_init(NetClientState *peer, - const char *model, - const char *name, - SocketAddress *addr, -+ uint32_t reconnect, - Error **errp) - { - NetStreamState *s; -@@ -364,6 +404,10 @@ static int net_stream_client_init(NetClientState *peer, - s->ioc = QIO_CHANNEL(sioc); - s->nc.link_down = true; - -+ s->reconnect = reconnect; -+ if (reconnect) { -+ s->addr = QAPI_CLONE(SocketAddress, addr); -+ } - qio_channel_socket_connect_async(sioc, addr, - net_stream_client_connected, s, - NULL, NULL); -@@ -380,7 +424,14 @@ int net_init_stream(const Netdev *netdev, const char *name, - sock = &netdev->u.stream; - - if (!sock->has_server || !sock->server) { -- return net_stream_client_init(peer, "stream", name, sock->addr, errp); -+ return net_stream_client_init(peer, "stream", name, sock->addr, -+ sock->has_reconnect ? sock->reconnect : 0, -+ errp); -+ } -+ if (sock->has_reconnect) { -+ error_setg(errp, "'reconnect' option is incompatible with " -+ "socket in server mode"); -+ return -1; - } - return net_stream_server_init(peer, "stream", name, sock->addr, errp); - } -diff --git a/qapi/net.json b/qapi/net.json -index 522ac582ed..d6eb30008b 100644 ---- a/qapi/net.json -+++ b/qapi/net.json -@@ -585,6 +585,10 @@ - # @addr: socket address to listen on (server=true) - # or connect to (server=false) - # @server: create server socket (default: false) -+# @reconnect: For a client socket, if a socket is disconnected, -+# then attempt a reconnect after the given number of seconds. -+# Setting this to zero disables this function. (default: 0) -+# (since 8.0) - # - # Only SocketAddress types 'unix', 'inet' and 'fd' are supported. - # -@@ -593,7 +597,8 @@ - { 'struct': 'NetdevStreamOptions', - 'data': { - 'addr': 'SocketAddress', -- '*server': 'bool' } } -+ '*server': 'bool', -+ '*reconnect': 'uint32' } } - - ## - # @NetdevDgramOptions: -diff --git a/qemu-options.hx b/qemu-options.hx -index ea02ca3a45..48eef4aa2c 100644 ---- a/qemu-options.hx -+++ b/qemu-options.hx -@@ -2766,9 +2766,9 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev, - "-netdev socket,id=str[,fd=h][,udp=host:port][,localaddr=host:port]\n" - " configure a network backend to connect to another network\n" - " using an UDP tunnel\n" -- "-netdev stream,id=str[,server=on|off],addr.type=inet,addr.host=host,addr.port=port[,to=maxport][,numeric=on|off][,keep-alive=on|off][,mptcp=on|off][,addr.ipv4=on|off][,addr.ipv6=on|off]\n" -- "-netdev stream,id=str[,server=on|off],addr.type=unix,addr.path=path[,abstract=on|off][,tight=on|off]\n" -- "-netdev stream,id=str[,server=on|off],addr.type=fd,addr.str=file-descriptor\n" -+ "-netdev stream,id=str[,server=on|off],addr.type=inet,addr.host=host,addr.port=port[,to=maxport][,numeric=on|off][,keep-alive=on|off][,mptcp=on|off][,addr.ipv4=on|off][,addr.ipv6=on|off][,reconnect=seconds]\n" -+ "-netdev stream,id=str[,server=on|off],addr.type=unix,addr.path=path[,abstract=on|off][,tight=on|off][,reconnect=seconds]\n" -+ "-netdev stream,id=str[,server=on|off],addr.type=fd,addr.str=file-descriptor[,reconnect=seconds]\n" - " configure a network backend to connect to another network\n" - " using a socket connection in stream mode.\n" - "-netdev dgram,id=str,remote.type=inet,remote.host=maddr,remote.port=port[,local.type=inet,local.host=addr]\n" -diff --git a/tests/qtest/netdev-socket.c b/tests/qtest/netdev-socket.c -index 6ba256e173..acc32c378b 100644 ---- a/tests/qtest/netdev-socket.c -+++ b/tests/qtest/netdev-socket.c -@@ -11,6 +11,10 @@ - #include - #include "../unit/socket-helpers.h" - #include "libqtest.h" -+#include "qapi/qmp/qstring.h" -+#include "qemu/sockets.h" -+#include "qapi/qobject-input-visitor.h" -+#include "qapi/qapi-visit-sockets.h" - - #define CONNECTION_TIMEOUT 5 - -@@ -142,6 +146,101 @@ static void test_stream_inet_ipv4(void) - qtest_quit(qts0); - } - -+static void wait_stream_connected(QTestState *qts, const char *id, -+ SocketAddress **addr) -+{ -+ QDict *resp, *data; -+ QString *qstr; -+ QObject *obj; -+ Visitor *v = NULL; -+ -+ resp = qtest_qmp_eventwait_ref(qts, "NETDEV_STREAM_CONNECTED"); -+ g_assert_nonnull(resp); -+ data = qdict_get_qdict(resp, "data"); -+ g_assert_nonnull(data); -+ -+ qstr = qobject_to(QString, qdict_get(data, "netdev-id")); -+ g_assert_nonnull(data); -+ -+ g_assert(!strcmp(qstring_get_str(qstr), id)); -+ -+ obj = qdict_get(data, "addr"); -+ -+ v = qobject_input_visitor_new(obj); -+ visit_type_SocketAddress(v, NULL, addr, NULL); -+ visit_free(v); -+ qobject_unref(resp); -+} -+ -+static void wait_stream_disconnected(QTestState *qts, const char *id) -+{ -+ QDict *resp, *data; -+ QString *qstr; -+ -+ resp = qtest_qmp_eventwait_ref(qts, "NETDEV_STREAM_DISCONNECTED"); -+ g_assert_nonnull(resp); -+ data = qdict_get_qdict(resp, "data"); -+ g_assert_nonnull(data); -+ -+ qstr = qobject_to(QString, qdict_get(data, "netdev-id")); -+ g_assert_nonnull(data); -+ -+ g_assert(!strcmp(qstring_get_str(qstr), id)); -+ qobject_unref(resp); -+} -+ -+static void test_stream_inet_reconnect(void) -+{ -+ QTestState *qts0, *qts1; -+ int port; -+ SocketAddress *addr; -+ -+ port = inet_get_free_port(false); -+ qts0 = qtest_initf("-nodefaults -M none " -+ "-netdev stream,id=st0,server=true,addr.type=inet," -+ "addr.ipv4=on,addr.ipv6=off," -+ "addr.host=127.0.0.1,addr.port=%d", port); -+ -+ EXPECT_STATE(qts0, "st0: index=0,type=stream,\r\n", 0); -+ -+ qts1 = qtest_initf("-nodefaults -M none " -+ "-netdev stream,server=false,id=st0,addr.type=inet," -+ "addr.ipv4=on,addr.ipv6=off,reconnect=1," -+ "addr.host=127.0.0.1,addr.port=%d", port); -+ -+ wait_stream_connected(qts0, "st0", &addr); -+ g_assert_cmpint(addr->type, ==, SOCKET_ADDRESS_TYPE_INET); -+ g_assert_cmpstr(addr->u.inet.host, ==, "127.0.0.1"); -+ qapi_free_SocketAddress(addr); -+ -+ /* kill server */ -+ qtest_quit(qts0); -+ -+ /* check client has been disconnected */ -+ wait_stream_disconnected(qts1, "st0"); -+ -+ /* restart server */ -+ qts0 = qtest_initf("-nodefaults -M none " -+ "-netdev stream,id=st0,server=true,addr.type=inet," -+ "addr.ipv4=on,addr.ipv6=off," -+ "addr.host=127.0.0.1,addr.port=%d", port); -+ -+ /* wait connection events*/ -+ wait_stream_connected(qts0, "st0", &addr); -+ g_assert_cmpint(addr->type, ==, SOCKET_ADDRESS_TYPE_INET); -+ g_assert_cmpstr(addr->u.inet.host, ==, "127.0.0.1"); -+ qapi_free_SocketAddress(addr); -+ -+ wait_stream_connected(qts1, "st0", &addr); -+ g_assert_cmpint(addr->type, ==, SOCKET_ADDRESS_TYPE_INET); -+ g_assert_cmpstr(addr->u.inet.host, ==, "127.0.0.1"); -+ g_assert_cmpint(atoi(addr->u.inet.port), ==, port); -+ qapi_free_SocketAddress(addr); -+ -+ qtest_quit(qts1); -+ qtest_quit(qts0); -+} -+ - static void test_stream_inet_ipv6(void) - { - QTestState *qts0, *qts1; -@@ -418,6 +517,8 @@ int main(int argc, char **argv) - #ifndef _WIN32 - qtest_add_func("/netdev/dgram/mcast", test_dgram_mcast); - #endif -+ qtest_add_func("/netdev/stream/inet/reconnect", -+ test_stream_inet_reconnect); - } - if (has_ipv6) { - qtest_add_func("/netdev/stream/inet/ipv6", test_stream_inet_ipv6); --- -2.31.1 - diff --git a/SOURCES/kvm-numa-Validate-cluster-and-NUMA-node-boundary-if-requ.patch b/SOURCES/kvm-numa-Validate-cluster-and-NUMA-node-boundary-if-requ.patch new file mode 100644 index 0000000..66d68f1 --- /dev/null +++ b/SOURCES/kvm-numa-Validate-cluster-and-NUMA-node-boundary-if-requ.patch @@ -0,0 +1,145 @@ +From 760a2f284f6d4cd3cd3b1685411bbca21c4ad233 Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Tue, 27 Jun 2023 20:20:09 +1000 +Subject: [PATCH 1/6] numa: Validate cluster and NUMA node boundary if required +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Gavin Shan +RH-MergeRequest: 175: hw/arm: Validate CPU cluster and NUMA node boundary for RHEL machines +RH-Bugzilla: 2171363 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Eric Auger +RH-Commit: [1/3] 24580064b9a0076ec4d9a916839d85135ac48cd9 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2171363 + +For some architectures like ARM64, multiple CPUs in one cluster can be +associated with different NUMA nodes, which is irregular configuration +because we shouldn't have this in baremetal environment. The irregular +configuration causes Linux guest to misbehave, as the following warning +messages indicate. + + -smp 6,maxcpus=6,sockets=2,clusters=1,cores=3,threads=1 \ + -numa node,nodeid=0,cpus=0-1,memdev=ram0 \ + -numa node,nodeid=1,cpus=2-3,memdev=ram1 \ + -numa node,nodeid=2,cpus=4-5,memdev=ram2 \ + + ------------[ cut here ]------------ + WARNING: CPU: 0 PID: 1 at kernel/sched/topology.c:2271 build_sched_domains+0x284/0x910 + Modules linked in: + CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-268.el9.aarch64 #1 + pstate: 00400005 (nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) + pc : build_sched_domains+0x284/0x910 + lr : build_sched_domains+0x184/0x910 + sp : ffff80000804bd50 + x29: ffff80000804bd50 x28: 0000000000000002 x27: 0000000000000000 + x26: ffff800009cf9a80 x25: 0000000000000000 x24: ffff800009cbf840 + x23: ffff000080325000 x22: ffff0000005df800 x21: ffff80000a4ce508 + x20: 0000000000000000 x19: ffff000080324440 x18: 0000000000000014 + x17: 00000000388925c0 x16: 000000005386a066 x15: 000000009c10cc2e + x14: 00000000000001c0 x13: 0000000000000001 x12: ffff00007fffb1a0 + x11: ffff00007fffb180 x10: ffff80000a4ce508 x9 : 0000000000000041 + x8 : ffff80000a4ce500 x7 : ffff80000a4cf920 x6 : 0000000000000001 + x5 : 0000000000000001 x4 : 0000000000000007 x3 : 0000000000000002 + x2 : 0000000000001000 x1 : ffff80000a4cf928 x0 : 0000000000000001 + Call trace: + build_sched_domains+0x284/0x910 + sched_init_domains+0xac/0xe0 + sched_init_smp+0x48/0xc8 + kernel_init_freeable+0x140/0x1ac + kernel_init+0x28/0x140 + ret_from_fork+0x10/0x20 + +Improve the situation to warn when multiple CPUs in one cluster have +been associated with different NUMA nodes. However, one NUMA node is +allowed to be associated with different clusters. + +Signed-off-by: Gavin Shan +Acked-by: Philippe Mathieu-Daudé +Acked-by: Igor Mammedov +Message-Id: <20230509002739.18388-2-gshan@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit a494fdb715832000ee9047a549a35aacfea8175e) +Signed-off-by: Gavin Shan +--- + hw/core/machine.c | 42 ++++++++++++++++++++++++++++++++++++++++++ + include/hw/boards.h | 1 + + 2 files changed, 43 insertions(+) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index c28702b690..5abdc8c39b 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -1496,6 +1496,45 @@ static void machine_numa_finish_cpu_init(MachineState *machine) + g_string_free(s, true); + } + ++static void validate_cpu_cluster_to_numa_boundary(MachineState *ms) ++{ ++ MachineClass *mc = MACHINE_GET_CLASS(ms); ++ NumaState *state = ms->numa_state; ++ const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms); ++ const CPUArchId *cpus = possible_cpus->cpus; ++ int i, j; ++ ++ if (state->num_nodes <= 1 || possible_cpus->len <= 1) { ++ return; ++ } ++ ++ /* ++ * The Linux scheduling domain can't be parsed when the multiple CPUs ++ * in one cluster have been associated with different NUMA nodes. However, ++ * it's fine to associate one NUMA node with CPUs in different clusters. ++ */ ++ for (i = 0; i < possible_cpus->len; i++) { ++ for (j = i + 1; j < possible_cpus->len; j++) { ++ if (cpus[i].props.has_socket_id && ++ cpus[i].props.has_cluster_id && ++ cpus[i].props.has_node_id && ++ cpus[j].props.has_socket_id && ++ cpus[j].props.has_cluster_id && ++ cpus[j].props.has_node_id && ++ cpus[i].props.socket_id == cpus[j].props.socket_id && ++ cpus[i].props.cluster_id == cpus[j].props.cluster_id && ++ cpus[i].props.node_id != cpus[j].props.node_id) { ++ warn_report("CPU-%d and CPU-%d in socket-%" PRId64 "-cluster-%" PRId64 ++ " have been associated with node-%" PRId64 " and node-%" PRId64 ++ " respectively. It can cause OSes like Linux to" ++ " misbehave", i, j, cpus[i].props.socket_id, ++ cpus[i].props.cluster_id, cpus[i].props.node_id, ++ cpus[j].props.node_id); ++ } ++ } ++ } ++} ++ + MemoryRegion *machine_consume_memdev(MachineState *machine, + HostMemoryBackend *backend) + { +@@ -1581,6 +1620,9 @@ void machine_run_board_init(MachineState *machine, const char *mem_path, Error * + numa_complete_configuration(machine); + if (machine->numa_state->num_nodes) { + machine_numa_finish_cpu_init(machine); ++ if (machine_class->cpu_cluster_has_numa_boundary) { ++ validate_cpu_cluster_to_numa_boundary(machine); ++ } + } + } + +diff --git a/include/hw/boards.h b/include/hw/boards.h +index 5f08bd7550..3628671228 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -275,6 +275,7 @@ struct MachineClass { + bool nvdimm_supported; + bool numa_mem_supported; + bool auto_enable_numa; ++ bool cpu_cluster_has_numa_boundary; + SMPCompatProps smp_props; + const char *default_ram_id; + +-- +2.39.3 + diff --git a/SOURCES/kvm-pc-bios-s390-ccw-Don-t-use-__bss_start-with-the-larl.patch b/SOURCES/kvm-pc-bios-s390-ccw-Don-t-use-__bss_start-with-the-larl.patch new file mode 100644 index 0000000..312af68 --- /dev/null +++ b/SOURCES/kvm-pc-bios-s390-ccw-Don-t-use-__bss_start-with-the-larl.patch @@ -0,0 +1,78 @@ +From 7495a51c586818925470fb247882f5ba0f7b0ffd Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Tue, 27 Jun 2023 09:47:03 +0200 +Subject: [PATCH 34/37] pc-bios/s390-ccw: Don't use __bss_start with the "larl" + instruction +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 180: Fix misaligned symbol error in the s390-ccw image during qemu-kvm build with binutils 2.40 +RH-Bugzilla: 2220866 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Miroslav Rezanina +RH-Commit: [4/4] 2483a50c0ed37fa29db649ec44220ac83c215698 (thuth/qemu-kvm-cs9) + +start.S currently cannot be compiled with Clang 16 and binutils 2.40: + + ld: start.o(.text+0x8): misaligned symbol `__bss_start' (0xc1e5) for + relocation R_390_PC32DBL + +According to the built-in linker script of ld, the symbol __bss_start +can actually point *before* the .bss section and does not need to have +any alignment, so in certain situations (like when using the internal +assembler of Clang), the __bss_start symbol can indeed be unaligned +and thus it is not suitable for being used with the "larl" instruction +that needs an address that is at least aligned to halfwords. +The problem went unnoticed so far since binutils <= 2.39 did not +check the alignment, but starting with binutils 2.40, such unaligned +addresses are now refused. + +Fix it by loading the address indirectly instead. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2216662 +Reported-by: Miroslav Rezanina +Suggested-by: Andreas Krebbel +Message-Id: <20230629104821.194859-8-thuth@redhat.com> +Reviewed-by: Claudio Imbrenda +Signed-off-by: Thomas Huth +(cherry picked from commit 7cd50cbe4ca3e2860b31b06ec92c17c54bd82d48) +--- + pc-bios/s390-ccw/start.S | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/pc-bios/s390-ccw/start.S b/pc-bios/s390-ccw/start.S +index abd6fe6639..22c1c296df 100644 +--- a/pc-bios/s390-ccw/start.S ++++ b/pc-bios/s390-ccw/start.S +@@ -19,7 +19,8 @@ _start: + larl %r15,stack + STACK_SIZE - STACK_FRAME_SIZE /* Set up stack */ + + /* clear bss */ +- larl %r2,__bss_start ++ larl %r2,bss_start_literal /* __bss_start might be unaligned ... */ ++ lg %r2,0(%r2) /* ... so load it indirectly */ + larl %r3,_end + slgr %r3,%r2 /* get sizeof bss */ + ltgr %r3,%r3 /* bss empty? */ +@@ -45,7 +46,6 @@ done: + memsetxc: + xc 0(1,%r1),0(%r1) + +- + /* + * void disabled_wait(void) + * +@@ -113,6 +113,8 @@ io_new_code: + br %r14 + + .align 8 ++bss_start_literal: ++ .quad __bss_start + disabled_wait_psw: + .quad 0x0002000180000000,0x0000000000000000 + enabled_wait_psw: +-- +2.39.3 + diff --git a/SOURCES/kvm-pc-bios-s390-ccw-Fix-indentation-in-start.S.patch b/SOURCES/kvm-pc-bios-s390-ccw-Fix-indentation-in-start.S.patch new file mode 100644 index 0000000..bd13187 --- /dev/null +++ b/SOURCES/kvm-pc-bios-s390-ccw-Fix-indentation-in-start.S.patch @@ -0,0 +1,218 @@ +From 24bc8fc932ae1c88cc2e97f0f90786a7be411bb2 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Tue, 27 Jun 2023 09:47:00 +0200 +Subject: [PATCH 32/37] pc-bios/s390-ccw: Fix indentation in start.S +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 180: Fix misaligned symbol error in the s390-ccw image during qemu-kvm build with binutils 2.40 +RH-Bugzilla: 2220866 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/4] cf8fa053602ce1cfac0b6efa67f491688d4f9348 (thuth/qemu-kvm-cs9) + +start.S is currently indented with a mixture of spaces and tabs, which +is quite ugly. QEMU coding style says indentation should be 4 spaces, +and this is also what we are using in the assembler files in the +tests/tcg/s390x/ folder already, so let's adjust start.S accordingly. + +Reviewed-by: Cédric Le Goater +Message-Id: <20230627074703.99608-2-thuth@redhat.com> +Reviewed-by: Claudio Imbrenda +Reviewed-by: Eric Farman +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Thomas Huth +(cherry picked from commit f52420fa4fd9f519dc42c20d2616aba4149adc25) +--- + pc-bios/s390-ccw/start.S | 136 +++++++++++++++++++-------------------- + 1 file changed, 68 insertions(+), 68 deletions(-) + +diff --git a/pc-bios/s390-ccw/start.S b/pc-bios/s390-ccw/start.S +index 6072906df4..d29de09cc6 100644 +--- a/pc-bios/s390-ccw/start.S ++++ b/pc-bios/s390-ccw/start.S +@@ -10,37 +10,37 @@ + * directory. + */ + +- .globl _start ++ .globl _start + _start: + +- larl %r15, stack + 0x8000 /* Set up stack */ ++ larl %r15,stack + 0x8000 /* Set up stack */ + +- /* clear bss */ +- larl %r2, __bss_start +- larl %r3, _end +- slgr %r3, %r2 /* get sizeof bss */ +- ltgr %r3,%r3 /* bss empty? */ +- jz done +- aghi %r3,-1 +- srlg %r4,%r3,8 /* how many 256 byte chunks? */ +- ltgr %r4,%r4 +- lgr %r1,%r2 +- jz remainder ++ /* clear bss */ ++ larl %r2,__bss_start ++ larl %r3,_end ++ slgr %r3,%r2 /* get sizeof bss */ ++ ltgr %r3,%r3 /* bss empty? */ ++ jz done ++ aghi %r3,-1 ++ srlg %r4,%r3,8 /* how many 256 byte chunks? */ ++ ltgr %r4,%r4 ++ lgr %r1,%r2 ++ jz remainder + loop: +- xc 0(256,%r1),0(%r1) +- la %r1,256(%r1) +- brctg %r4,loop ++ xc 0(256,%r1),0(%r1) ++ la %r1,256(%r1) ++ brctg %r4,loop + remainder: +- larl %r2,memsetxc +- ex %r3,0(%r2) ++ larl %r2,memsetxc ++ ex %r3,0(%r2) + done: +- /* set up a pgm exception disabled wait psw */ +- larl %r2, disabled_wait_psw +- mvc 0x01d0(16), 0(%r2) +- j main /* And call C */ ++ /* set up a pgm exception disabled wait psw */ ++ larl %r2,disabled_wait_psw ++ mvc 0x01d0(16),0(%r2) ++ j main /* And call C */ + + memsetxc: +- xc 0(1,%r1),0(%r1) ++ xc 0(1,%r1),0(%r1) + + + /* +@@ -48,11 +48,11 @@ memsetxc: + * + * stops the current guest cpu. + */ +- .globl disabled_wait ++ .globl disabled_wait + disabled_wait: +- larl %r1,disabled_wait_psw +- lpswe 0(%r1) +-1: j 1b ++ larl %r1,disabled_wait_psw ++ lpswe 0(%r1) ++1: j 1b + + + /* +@@ -60,61 +60,61 @@ disabled_wait: + * + * eats one sclp interrupt + */ +- .globl consume_sclp_int ++ .globl consume_sclp_int + consume_sclp_int: +- /* enable service interrupts in cr0 */ +- stctg %c0,%c0,0(%r15) +- oi 6(%r15),0x2 +- lctlg %c0,%c0,0(%r15) +- /* prepare external call handler */ +- larl %r1, external_new_code +- stg %r1, 0x1b8 +- larl %r1, external_new_mask +- mvc 0x1b0(8),0(%r1) +- /* load enabled wait PSW */ +- larl %r1, enabled_wait_psw +- lpswe 0(%r1) ++ /* enable service interrupts in cr0 */ ++ stctg %c0,%c0,0(%r15) ++ oi 6(%r15),0x2 ++ lctlg %c0,%c0,0(%r15) ++ /* prepare external call handler */ ++ larl %r1,external_new_code ++ stg %r1,0x1b8 ++ larl %r1,external_new_mask ++ mvc 0x1b0(8),0(%r1) ++ /* load enabled wait PSW */ ++ larl %r1,enabled_wait_psw ++ lpswe 0(%r1) + + /* + * void consume_io_int(void) + * + * eats one I/O interrupt + */ +- .globl consume_io_int ++ .globl consume_io_int + consume_io_int: +- /* enable I/O interrupts in cr6 */ +- stctg %c6,%c6,0(%r15) +- oi 4(%r15), 0xff +- lctlg %c6,%c6,0(%r15) +- /* prepare i/o call handler */ +- larl %r1, io_new_code +- stg %r1, 0x1f8 +- larl %r1, io_new_mask +- mvc 0x1f0(8),0(%r1) +- /* load enabled wait PSW */ +- larl %r1, enabled_wait_psw +- lpswe 0(%r1) ++ /* enable I/O interrupts in cr6 */ ++ stctg %c6,%c6,0(%r15) ++ oi 4(%r15), 0xff ++ lctlg %c6,%c6,0(%r15) ++ /* prepare i/o call handler */ ++ larl %r1,io_new_code ++ stg %r1,0x1f8 ++ larl %r1,io_new_mask ++ mvc 0x1f0(8),0(%r1) ++ /* load enabled wait PSW */ ++ larl %r1,enabled_wait_psw ++ lpswe 0(%r1) + + external_new_code: +- /* disable service interrupts in cr0 */ +- stctg %c0,%c0,0(%r15) +- ni 6(%r15),0xfd +- lctlg %c0,%c0,0(%r15) +- br %r14 ++ /* disable service interrupts in cr0 */ ++ stctg %c0,%c0,0(%r15) ++ ni 6(%r15),0xfd ++ lctlg %c0,%c0,0(%r15) ++ br %r14 + + io_new_code: +- /* disable I/O interrupts in cr6 */ +- stctg %c6,%c6,0(%r15) +- ni 4(%r15), 0x00 +- lctlg %c6,%c6,0(%r15) +- br %r14 ++ /* disable I/O interrupts in cr6 */ ++ stctg %c6,%c6,0(%r15) ++ ni 4(%r15),0x00 ++ lctlg %c6,%c6,0(%r15) ++ br %r14 + +- .align 8 ++ .align 8 + disabled_wait_psw: +- .quad 0x0002000180000000,0x0000000000000000 ++ .quad 0x0002000180000000,0x0000000000000000 + enabled_wait_psw: +- .quad 0x0302000180000000,0x0000000000000000 ++ .quad 0x0302000180000000,0x0000000000000000 + external_new_mask: +- .quad 0x0000000180000000 ++ .quad 0x0000000180000000 + io_new_mask: +- .quad 0x0000000180000000 ++ .quad 0x0000000180000000 +-- +2.39.3 + diff --git a/SOURCES/kvm-pc-bios-s390-ccw-Makefile-Use-z-noexecstack-to-silen.patch b/SOURCES/kvm-pc-bios-s390-ccw-Makefile-Use-z-noexecstack-to-silen.patch new file mode 100644 index 0000000..907fe43 --- /dev/null +++ b/SOURCES/kvm-pc-bios-s390-ccw-Makefile-Use-z-noexecstack-to-silen.patch @@ -0,0 +1,50 @@ +From b5b243cbbb897b236c08699529e13457e1e49924 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Thu, 22 Jun 2023 15:08:22 +0200 +Subject: [PATCH 31/37] pc-bios/s390-ccw/Makefile: Use -z noexecstack to + silence linker warning +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 180: Fix misaligned symbol error in the s390-ccw image during qemu-kvm build with binutils 2.40 +RH-Bugzilla: 2220866 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/4] 04f6f83169f1c5545a0e2772b4babfc6a50bd5bf (thuth/qemu-kvm-cs9) + +Recent versions of ld complain when linking the s390-ccw bios: + + /usr/bin/ld: warning: start.o: missing .note.GNU-stack section implies + executable stack + /usr/bin/ld: NOTE: This behaviour is deprecated and will be removed in + a future version of the linker + +We can silence the warning by telling the linker to mark the stack +as not executable. + +Message-Id: <20230622130822.396793-1-thuth@redhat.com> +Acked-by: Christian Borntraeger +Signed-off-by: Thomas Huth +(cherry picked from commit 442ef32ee5b6059a8f247fb2def9d449578d0a89) +--- + pc-bios/s390-ccw/Makefile | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/pc-bios/s390-ccw/Makefile b/pc-bios/s390-ccw/Makefile +index 10e8f5cb63..2a590af4a9 100644 +--- a/pc-bios/s390-ccw/Makefile ++++ b/pc-bios/s390-ccw/Makefile +@@ -53,7 +53,7 @@ config-cc.mak: Makefile + $(call cc-option,-march=z900,-march=z10)) 3> config-cc.mak + -include config-cc.mak + +-LDFLAGS += -Wl,-pie -nostdlib ++LDFLAGS += -Wl,-pie -nostdlib -z noexecstack + + build-all: s390-ccw.img s390-netboot.img + +-- +2.39.3 + diff --git a/SOURCES/kvm-pc-bios-s390-ccw-Provide-space-for-initial-stack-fra.patch b/SOURCES/kvm-pc-bios-s390-ccw-Provide-space-for-initial-stack-fra.patch new file mode 100644 index 0000000..0c4ce6f --- /dev/null +++ b/SOURCES/kvm-pc-bios-s390-ccw-Provide-space-for-initial-stack-fra.patch @@ -0,0 +1,59 @@ +From 2c52aebf90f28121a3e46a9305304406023b9747 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Tue, 27 Jun 2023 09:47:01 +0200 +Subject: [PATCH 33/37] pc-bios/s390-ccw: Provide space for initial stack frame + in start.S +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 180: Fix misaligned symbol error in the s390-ccw image during qemu-kvm build with binutils 2.40 +RH-Bugzilla: 2220866 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/4] c2f69ce5998861fe20b799bf0113def8cf0cd128 (thuth/qemu-kvm-cs9) + +Providing the space of a stack frame is the duty of the caller, +so we should reserve 160 bytes before jumping into the main function. +Otherwise the main() function might write past the stack array. + +While we're at it, add a proper STACK_SIZE macro for the stack size +instead of using magic numbers (this is also required for the following +patch). + +Reviewed-by: Christian Borntraeger +Reviewed-by: Cédric Le Goater +Message-Id: <20230627074703.99608-3-thuth@redhat.com> +Reviewed-by: Eric Farman +Reviewed-by: Claudio Imbrenda +Reviewed-by: Marc Hartmayer +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Thomas Huth +(cherry picked from commit 74fe98ee7fb3344dbd085d1fa32c0dc2fc2c831f) +--- + pc-bios/s390-ccw/start.S | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/pc-bios/s390-ccw/start.S b/pc-bios/s390-ccw/start.S +index d29de09cc6..abd6fe6639 100644 +--- a/pc-bios/s390-ccw/start.S ++++ b/pc-bios/s390-ccw/start.S +@@ -10,10 +10,13 @@ + * directory. + */ + ++#define STACK_SIZE 0x8000 ++#define STACK_FRAME_SIZE 160 ++ + .globl _start + _start: + +- larl %r15,stack + 0x8000 /* Set up stack */ ++ larl %r15,stack + STACK_SIZE - STACK_FRAME_SIZE /* Set up stack */ + + /* clear bss */ + larl %r2,__bss_start +-- +2.39.3 + diff --git a/SOURCES/kvm-pcie-Add-hotplug-detect-state-register-to-cmask.patch b/SOURCES/kvm-pcie-Add-hotplug-detect-state-register-to-cmask.patch new file mode 100644 index 0000000..1ec1c82 --- /dev/null +++ b/SOURCES/kvm-pcie-Add-hotplug-detect-state-register-to-cmask.patch @@ -0,0 +1,87 @@ +From 2732b6c5ef249d3ec9affca66768cc2fc476ff7c Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Thu, 6 Jul 2023 01:55:47 -0300 +Subject: [PATCH 11/12] pcie: Add hotplug detect state register to cmask +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 188: pcie: Add hotplug detect state register to cmask +RH-Bugzilla: 2215819 +RH-Acked-by: Peter Xu +RH-Acked-by: quintela1 +RH-Commit: [1/1] a125fa337711bddbc957c399044393e82272b143 (LeoBras/centos-qemu-kvm) + +When trying to migrate a machine type pc-q35-6.0 or lower, with this +cmdline options, + +-device driver=pcie-root-port,port=18,chassis=19,id=pcie-root-port18,bus=pcie.0,addr=0x12 \ +-device driver=nec-usb-xhci,p2=4,p3=4,id=nex-usb-xhci0,bus=pcie-root-port18,addr=0x12.0x1 + +the following bug happens after all ram pages were sent: + +qemu-kvm: get_pci_config_device: Bad config data: i=0x6e read: 0 device: 40 cmask: ff wmask: 0 w1cmask:19 +qemu-kvm: Failed to load PCIDevice:config +qemu-kvm: Failed to load pcie-root-port:parent_obj.parent_obj.parent_obj +qemu-kvm: error while loading state for instance 0x0 of device '0000:00:12.0/pcie-root-port' +qemu-kvm: load of migration failed: Invalid argument + +This happens on pc-q35-6.0 or lower because of: +{ "ICH9-LPC", ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, "off" } + +In this scenario, hotplug_handler_plug() calls pcie_cap_slot_plug_cb(), +which sets dev->config byte 0x6e with bit PCI_EXP_SLTSTA_PDS to signal PCI +hotplug for the guest. After a while the guest will deal with this hotplug +and qemu will clear the above bit. + +Then, during migration, get_pci_config_device() will compare the +configs of both the freshly created device and the one that is being +received via migration, which will differ due to the PCI_EXP_SLTSTA_PDS bit +and cause the bug to reproduce. + +To avoid this fake incompatibility, there are tree fields in PCIDevice that +can help: + +- wmask: Used to implement R/W bytes, and +- w1cmask: Used to implement RW1C(Write 1 to Clear) bytes +- cmask: Used to enable config checks on load. + +According to PCI Express® Base Specification Revision 5.0 Version 1.0, +table 7-27 (Slot Status Register) bit 6, the "Presence Detect State" is +listed as RO (read-only), so it only makes sense to make use of the cmask +field. + +So, clear PCI_EXP_SLTSTA_PDS bit on cmask, so the fake incompatibility on +get_pci_config_device() does not abort the migration. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2215819 +Signed-off-by: Leonardo Bras +Message-Id: <20230706045546.593605-3-leobras@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Juan Quintela +(cherry picked from commit 625b370c45f4acd155ee625d61c0057d770a5b5e) +Signed-off-by: Leonardo Bras +--- + hw/pci/pcie.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c +index b8c24cf45f..8bc4a4ee57 100644 +--- a/hw/pci/pcie.c ++++ b/hw/pci/pcie.c +@@ -659,6 +659,10 @@ void pcie_cap_slot_init(PCIDevice *dev, PCIESlot *s) + pci_word_test_and_set_mask(dev->w1cmask + pos + PCI_EXP_SLTSTA, + PCI_EXP_HP_EV_SUPPORTED); + ++ /* Avoid migration abortion when this device hot-removed by guest */ ++ pci_word_test_and_clear_mask(dev->cmask + pos + PCI_EXP_SLTSTA, ++ PCI_EXP_SLTSTA_PDS); ++ + dev->exp.hpev_notified = false; + + qbus_set_hotplug_handler(BUS(pci_bridge_get_sec_bus(PCI_BRIDGE(dev))), +-- +2.39.3 + diff --git a/SOURCES/kvm-physmem-add-missing-memory-barrier.patch b/SOURCES/kvm-physmem-add-missing-memory-barrier.patch deleted file mode 100644 index 3eafa78..0000000 --- a/SOURCES/kvm-physmem-add-missing-memory-barrier.patch +++ /dev/null @@ -1,55 +0,0 @@ -From 0dd4be411e35f00d006d89a15d9161f5d8783c1d Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Thu, 9 Mar 2023 08:24:36 -0500 -Subject: [PATCH 10/12] physmem: add missing memory barrier - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() -RH-Bugzilla: 2175660 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Eric Auger -RH-Commit: [7/9] ee4875cb8c564f0510e48b00a5d95c0e6ea6301b (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 - -commit 33828ca11da08436e1b32f3e79dabce3061a0427 -Author: Paolo Bonzini -Date: Fri Mar 3 14:36:32 2023 +0100 - - physmem: add missing memory barrier - - Reviewed-by: Richard Henderson - Reviewed-by: David Hildenbrand - Signed-off-by: Paolo Bonzini - -Signed-off-by: Emanuele Giuseppe Esposito ---- - softmmu/physmem.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/softmmu/physmem.c b/softmmu/physmem.c -index 1b606a3002..772c9896cd 100644 ---- a/softmmu/physmem.c -+++ b/softmmu/physmem.c -@@ -3117,6 +3117,8 @@ void cpu_register_map_client(QEMUBH *bh) - qemu_mutex_lock(&map_client_list_lock); - client->bh = bh; - QLIST_INSERT_HEAD(&map_client_list, client, link); -+ /* Write map_client_list before reading in_use. */ -+ smp_mb(); - if (!qatomic_read(&bounce.in_use)) { - cpu_notify_map_clients_locked(); - } -@@ -3309,6 +3311,7 @@ void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len, - qemu_vfree(bounce.buffer); - bounce.buffer = NULL; - memory_region_unref(bounce.mr); -+ /* Clear in_use before reading map_client_list. */ - qatomic_mb_set(&bounce.in_use, false); - cpu_notify_map_clients(); - } --- -2.39.1 - diff --git a/SOURCES/kvm-postcopy-ram-do-not-use-qatomic_mb_read.patch b/SOURCES/kvm-postcopy-ram-do-not-use-qatomic_mb_read.patch new file mode 100644 index 0000000..0421e33 --- /dev/null +++ b/SOURCES/kvm-postcopy-ram-do-not-use-qatomic_mb_read.patch @@ -0,0 +1,42 @@ +From ab9b8620c62540f3267d005c198920671ef9abc3 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 3 Mar 2023 11:15:28 +0100 +Subject: [PATCH 06/56] postcopy-ram: do not use qatomic_mb_read +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [5/50] 534c0e13362dfc994fa90c79bfb5ed6ee8c27dfc (peterx/qemu-kvm) + +It does not even pair with a qatomic_mb_set(), so it is clearer to use +load-acquire in this case; they are synonyms. + +Signed-off-by: Paolo Bonzini +(cherry picked from commit 4592eaf38755a28300d113cd128f65b5b38495f2) +Signed-off-by: Peter Xu +--- + migration/postcopy-ram.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c +index bbb8af61ae..d7b48dd920 100644 +--- a/migration/postcopy-ram.c ++++ b/migration/postcopy-ram.c +@@ -1526,7 +1526,7 @@ static PostcopyState incoming_postcopy_state; + + PostcopyState postcopy_state_get(void) + { +- return qatomic_mb_read(&incoming_postcopy_state); ++ return qatomic_load_acquire(&incoming_postcopy_state); + } + + /* Set the state and return the old state */ +-- +2.39.1 + diff --git a/SOURCES/kvm-qapi-add-fdset-feature-for-BlockdevOptionsVirtioBlkV.patch b/SOURCES/kvm-qapi-add-fdset-feature-for-BlockdevOptionsVirtioBlkV.patch new file mode 100644 index 0000000..abaadf8 --- /dev/null +++ b/SOURCES/kvm-qapi-add-fdset-feature-for-BlockdevOptionsVirtioBlkV.patch @@ -0,0 +1,79 @@ +From 99f27e14856c528f442b628e8f4a7881e6e63179 Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Tue, 30 May 2023 09:19:41 +0200 +Subject: [PATCH 4/5] qapi: add '@fdset' feature for + BlockdevOptionsVirtioBlkVhostVdpa + +RH-Author: Stefano Garzarella +RH-MergeRequest: 169: block/blkio: support fd passing for virtio-blk-vhost-vdpa driver +RH-Bugzilla: 2180076 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [2/2] abee2a542e41f9eaa17dd204b74778e232d1eb60 (sgarzarella/qemu-kvm-c-9-s) + +The virtio-blk-vhost-vdpa driver in libblkio 1.3.0 supports the fd +passing through the new 'fd' property. + +Since now we are using qemu_open() on '@path' if the virtio-blk driver +supports the fd passing, let's announce it. +In this way, the management layer can pass the file descriptor of an +already opened vhost-vdpa character device. This is useful especially +when the device can only be accessed with certain privileges. + +Add the '@fdset' feature only when the virtio-blk-vhost-vdpa driver +in libblkio supports it. + +Suggested-by: Markus Armbruster +Reviewed-by: Stefan Hajnoczi +Signed-off-by: Stefano Garzarella +Message-id: 20230530071941.8954-3-sgarzare@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 98b126f5e3228a346c774e569e26689943b401dd) +- changed doc indentantion since QAPI parser failed downstream because + we don't have commit 08349786c84306863a3b659c8a9b28bb74c405c6 + downstream. It relaxed the indentation rules. +Signed-off-by: Stefano Garzarella +--- + meson.build | 4 ++++ + qapi/block-core.json | 6 ++++++ + 2 files changed, 10 insertions(+) + +diff --git a/meson.build b/meson.build +index d964e741e7..a18cc64531 100644 +--- a/meson.build ++++ b/meson.build +@@ -1843,6 +1843,10 @@ config_host_data.set('CONFIG_LZO', lzo.found()) + config_host_data.set('CONFIG_MPATH', mpathpersist.found()) + config_host_data.set('CONFIG_MPATH_NEW_API', mpathpersist_new_api) + config_host_data.set('CONFIG_BLKIO', blkio.found()) ++if blkio.found() ++ config_host_data.set('CONFIG_BLKIO_VHOST_VDPA_FD', ++ blkio.version().version_compare('>=1.3.0')) ++endif + config_host_data.set('CONFIG_CURL', curl.found()) + config_host_data.set('CONFIG_CURSES', curses.found()) + config_host_data.set('CONFIG_GBM', gbm.found()) +diff --git a/qapi/block-core.json b/qapi/block-core.json +index c05ad0c07e..81b48a8d3b 100644 +--- a/qapi/block-core.json ++++ b/qapi/block-core.json +@@ -3841,10 +3841,16 @@ + # + # @path: path to the vhost-vdpa character device. + # ++# Features: ++# @fdset: Member @path supports the special "/dev/fdset/N" path ++# (since 8.1) ++# + # Since: 7.2 + ## + { 'struct': 'BlockdevOptionsVirtioBlkVhostVdpa', + 'data': { 'path': 'str' }, ++ 'features': [ { 'name' :'fdset', ++ 'if': 'CONFIG_BLKIO_VHOST_VDPA_FD' } ], + 'if': 'CONFIG_BLKIO' } + + ## +-- +2.39.3 + diff --git a/SOURCES/kvm-qapi-i386-sev-Change-the-reduced-phys-bits-value-fro.patch b/SOURCES/kvm-qapi-i386-sev-Change-the-reduced-phys-bits-value-fro.patch new file mode 100644 index 0000000..a95895b --- /dev/null +++ b/SOURCES/kvm-qapi-i386-sev-Change-the-reduced-phys-bits-value-fro.patch @@ -0,0 +1,50 @@ +From cbf9c74ef46d71c015b9de53f4514941dca8a035 Mon Sep 17 00:00:00 2001 +From: Bandan Das +Date: Thu, 3 Aug 2023 14:19:37 -0400 +Subject: [PATCH 10/14] qapi, i386/sev: Change the reduced-phys-bits value from + 5 to 1 + +RH-Author: Bandan Das +RH-MergeRequest: 196: Updates to SEV reduced-phys-bits parameter +RH-Bugzilla: 2214839 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/4] 4243578db33f89461e60b745eb96fee402218c9f (bdas1/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2214839 + +commit 798a818f50a9bfc01e8b5943090de458863b897b +Author: Tom Lendacky +Date: Fri Sep 30 10:14:27 2022 -0500 + + qapi, i386/sev: Change the reduced-phys-bits value from 5 to 1 + + A guest only ever experiences, at most, 1 bit of reduced physical + addressing. Change the query-sev-capabilities json comment to use 1. + + Fixes: 31dd67f684 ("sev/i386: qmp: add query-sev-capabilities command") + Signed-off-by: Tom Lendacky + Reviewed-by: Dr. David Alan Gilbert + Message-Id: + Signed-off-by: Paolo Bonzini + +Signed-off-by: Bandan Das +--- + qapi/misc-target.json | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/qapi/misc-target.json b/qapi/misc-target.json +index de91054523..bf04042f45 100644 +--- a/qapi/misc-target.json ++++ b/qapi/misc-target.json +@@ -172,7 +172,7 @@ + # -> { "execute": "query-sev-capabilities" } + # <- { "return": { "pdh": "8CCDD8DDD", "cert-chain": "888CCCDDDEE", + # "cpu0-id": "2lvmGwo+...61iEinw==", +-# "cbitpos": 47, "reduced-phys-bits": 5}} ++# "cbitpos": 47, "reduced-phys-bits": 1}} + # + ## + { 'command': 'query-sev-capabilities', 'returns': 'SevCapability', +-- +2.39.3 + diff --git a/SOURCES/kvm-qatomic-add-smp_mb__before-after_rmw.patch b/SOURCES/kvm-qatomic-add-smp_mb__before-after_rmw.patch deleted file mode 100644 index acc8c7d..0000000 --- a/SOURCES/kvm-qatomic-add-smp_mb__before-after_rmw.patch +++ /dev/null @@ -1,177 +0,0 @@ -From 1fdc864f9ac927f3ea407f35f6771a4b2e8f509f Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Thu, 9 Mar 2023 08:24:36 -0500 -Subject: [PATCH 04/12] qatomic: add smp_mb__before/after_rmw() - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() -RH-Bugzilla: 2175660 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Eric Auger -RH-Commit: [1/9] e8d0b64670bff778d275b1fb477dcee0c109251a (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 - -commit ff00bed1897c3d27adc5b0cec6f6eeb5a7d13176 -Author: Paolo Bonzini -Date: Thu Mar 2 11:10:56 2023 +0100 - - qatomic: add smp_mb__before/after_rmw() - - On ARM, seqcst loads and stores (which QEMU does not use) are compiled - respectively as LDAR and STLR instructions. Even though LDAR is - also used for load-acquire operations, it also waits for all STLRs to - leave the store buffer. Thus, LDAR and STLR alone are load-acquire - and store-release operations, but LDAR also provides store-against-load - ordering as long as the previous store is a STLR. - - Compare this to ARMv7, where store-release is DMB+STR and load-acquire - is LDR+DMB, but an additional DMB is needed between store-seqcst and - load-seqcst (e.g. DMB+STR+DMB+LDR+DMB); or with x86, where MOV provides - load-acquire and store-release semantics and the two can be reordered. - - Likewise, on ARM sequentially consistent read-modify-write operations only - need to use LDAXR and STLXR respectively for the load and the store, while - on x86 they need to use the stronger LOCK prefix. - - In a strange twist of events, however, the _stronger_ semantics - of the ARM instructions can end up causing bugs on ARM, not on x86. - The problems occur when seqcst atomics are mixed with relaxed atomics. - - QEMU's atomics try to bridge the Linux API (that most of the developers - are familiar with) and the C11 API, and the two have a substantial - difference: - - - in Linux, strongly-ordered atomics such as atomic_add_return() affect - the global ordering of _all_ memory operations, including for example - READ_ONCE()/WRITE_ONCE() - - - in C11, sequentially consistent atomics (except for seq-cst fences) - only affect the ordering of sequentially consistent operations. - In particular, since relaxed loads are done with LDR on ARM, they are - not ordered against seqcst stores (which are done with STLR). - - QEMU implements high-level synchronization primitives with the idea that - the primitives contain the necessary memory barriers, and the callers can - use relaxed atomics (qatomic_read/qatomic_set) or even regular accesses. - This is very much incompatible with the C11 view that seqcst accesses - are only ordered against other seqcst accesses, and requires using seqcst - fences as in the following example: - - qatomic_set(&y, 1); qatomic_set(&x, 1); - smp_mb(); smp_mb(); - ... qatomic_read(&x) ... ... qatomic_read(&y) ... - - When a qatomic_*() read-modify write operation is used instead of one - or both stores, developers that are more familiar with the Linux API may - be tempted to omit the smp_mb(), which will work on x86 but not on ARM. - - This nasty difference between Linux and C11 read-modify-write operations - has already caused issues in util/async.c and more are being found. - Provide something similar to Linux smp_mb__before/after_atomic(); this - has the double function of documenting clearly why there is a memory - barrier, and avoiding a double barrier on x86 and s390x systems. - - The new macro can already be put to use in qatomic_mb_set(). - - Reviewed-by: Richard Henderson - Reviewed-by: David Hildenbrand - Signed-off-by: Paolo Bonzini - -Signed-off-by: Emanuele Giuseppe Esposito ---- - docs/devel/atomics.rst | 26 +++++++++++++++++++++----- - include/qemu/atomic.h | 17 ++++++++++++++++- - 2 files changed, 37 insertions(+), 6 deletions(-) - -diff --git a/docs/devel/atomics.rst b/docs/devel/atomics.rst -index 52baa0736d..10fbfc58bb 100644 ---- a/docs/devel/atomics.rst -+++ b/docs/devel/atomics.rst -@@ -25,7 +25,8 @@ provides macros that fall in three camps: - - - weak atomic access and manual memory barriers: ``qatomic_read()``, - ``qatomic_set()``, ``smp_rmb()``, ``smp_wmb()``, ``smp_mb()``, -- ``smp_mb_acquire()``, ``smp_mb_release()``, ``smp_read_barrier_depends()``; -+ ``smp_mb_acquire()``, ``smp_mb_release()``, ``smp_read_barrier_depends()``, -+ ``smp_mb__before_rmw()``, ``smp_mb__after_rmw()``; - - - sequentially consistent atomic access: everything else. - -@@ -470,7 +471,7 @@ and memory barriers, and the equivalents in QEMU: - sequential consistency. - - - in QEMU, ``qatomic_read()`` and ``qatomic_set()`` do not participate in -- the total ordering enforced by sequentially-consistent operations. -+ the ordering enforced by read-modify-write operations. - This is because QEMU uses the C11 memory model. The following example - is correct in Linux but not in QEMU: - -@@ -486,9 +487,24 @@ and memory barriers, and the equivalents in QEMU: - because the read of ``y`` can be moved (by either the processor or the - compiler) before the write of ``x``. - -- Fixing this requires an ``smp_mb()`` memory barrier between the write -- of ``x`` and the read of ``y``. In the common case where only one thread -- writes ``x``, it is also possible to write it like this: -+ Fixing this requires a full memory barrier between the write of ``x`` and -+ the read of ``y``. QEMU provides ``smp_mb__before_rmw()`` and -+ ``smp_mb__after_rmw()``; they act both as an optimization, -+ avoiding the memory barrier on processors where it is unnecessary, -+ and as a clarification of this corner case of the C11 memory model: -+ -+ +--------------------------------+ -+ | QEMU (correct) | -+ +================================+ -+ | :: | -+ | | -+ | a = qatomic_fetch_add(&x, 2);| -+ | smp_mb__after_rmw(); | -+ | b = qatomic_read(&y); | -+ +--------------------------------+ -+ -+ In the common case where only one thread writes ``x``, it is also possible -+ to write it like this: - - +--------------------------------+ - | QEMU (correct) | -diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h -index 874134fd19..f85834ee8b 100644 ---- a/include/qemu/atomic.h -+++ b/include/qemu/atomic.h -@@ -245,6 +245,20 @@ - #define smp_wmb() smp_mb_release() - #define smp_rmb() smp_mb_acquire() - -+/* -+ * SEQ_CST is weaker than the older __sync_* builtins and Linux -+ * kernel read-modify-write atomics. Provide a macro to obtain -+ * the same semantics. -+ */ -+#if !defined(QEMU_SANITIZE_THREAD) && \ -+ (defined(__i386__) || defined(__x86_64__) || defined(__s390x__)) -+# define smp_mb__before_rmw() signal_barrier() -+# define smp_mb__after_rmw() signal_barrier() -+#else -+# define smp_mb__before_rmw() smp_mb() -+# define smp_mb__after_rmw() smp_mb() -+#endif -+ - /* qatomic_mb_read/set semantics map Java volatile variables. They are - * less expensive on some platforms (notably POWER) than fully - * sequentially consistent operations. -@@ -259,7 +273,8 @@ - #if !defined(QEMU_SANITIZE_THREAD) && \ - (defined(__i386__) || defined(__x86_64__) || defined(__s390x__)) - /* This is more efficient than a store plus a fence. */ --# define qatomic_mb_set(ptr, i) ((void)qatomic_xchg(ptr, i)) -+# define qatomic_mb_set(ptr, i) \ -+ ({ (void)qatomic_xchg(ptr, i); smp_mb__after_rmw(); }) - #else - # define qatomic_mb_set(ptr, i) \ - ({ qatomic_store_release(ptr, i); smp_mb(); }) --- -2.39.1 - diff --git a/SOURCES/kvm-qcow2-Fix-theoretical-corruption-in-store_bitmap-err.patch b/SOURCES/kvm-qcow2-Fix-theoretical-corruption-in-store_bitmap-err.patch deleted file mode 100644 index 7f39f4a..0000000 --- a/SOURCES/kvm-qcow2-Fix-theoretical-corruption-in-store_bitmap-err.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 46ead2c391924b68741d6da28f28f909b80f5914 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Thu, 12 Jan 2023 20:14:51 +0100 -Subject: [PATCH 01/20] qcow2: Fix theoretical corruption in store_bitmap() - error path -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Kevin Wolf -RH-MergeRequest: 143: qemu-img: Fix exit code for errors closing the image -RH-Bugzilla: 2150180 -RH-Acked-by: Thomas Huth -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Stefano Garzarella -RH-Commit: [1/4] a6a497947179431567d330d0501247a3749fb9fd (kmwolf/centos-qemu-kvm) - -In order to write the bitmap table to the image file, it is converted to -big endian. If the write fails, it is passed to clear_bitmap_table() to -free all of the clusters it had allocated before. However, if we don't -convert it back to native endianness first, we'll free things at a wrong -offset. - -In practical terms, the offsets will be so high that we won't actually -free any allocated clusters, but just run into an error, but in theory -this can cause image corruption. - -Cc: qemu-stable@nongnu.org -Signed-off-by: Kevin Wolf -Message-Id: <20230112191454.169353-2-kwolf@redhat.com> -Reviewed-by: Hanna Czenczek -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Kevin Wolf -(cherry picked from commit b03dd9613bcf8fe948581b2b3585510cb525c382) -Signed-off-by: Kevin Wolf ---- - block/qcow2-bitmap.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c -index bcad567c0c..3dff99ba06 100644 ---- a/block/qcow2-bitmap.c -+++ b/block/qcow2-bitmap.c -@@ -115,7 +115,7 @@ static int update_header_sync(BlockDriverState *bs) - return bdrv_flush(bs->file->bs); - } - --static inline void bitmap_table_to_be(uint64_t *bitmap_table, size_t size) -+static inline void bitmap_table_bswap_be(uint64_t *bitmap_table, size_t size) - { - size_t i; - -@@ -1401,9 +1401,10 @@ static int store_bitmap(BlockDriverState *bs, Qcow2Bitmap *bm, Error **errp) - goto fail; - } - -- bitmap_table_to_be(tb, tb_size); -+ bitmap_table_bswap_be(tb, tb_size); - ret = bdrv_pwrite(bs->file, tb_offset, tb_size * sizeof(tb[0]), tb, 0); - if (ret < 0) { -+ bitmap_table_bswap_be(tb, tb_size); - error_setg_errno(errp, -ret, "Failed to write bitmap '%s' to file", - bm_name); - goto fail; --- -2.31.1 - diff --git a/SOURCES/kvm-qed-Don-t-yield-in-bdrv_qed_co_drain_begin.patch b/SOURCES/kvm-qed-Don-t-yield-in-bdrv_qed_co_drain_begin.patch deleted file mode 100644 index d2dacbc..0000000 --- a/SOURCES/kvm-qed-Don-t-yield-in-bdrv_qed_co_drain_begin.patch +++ /dev/null @@ -1,84 +0,0 @@ -From f628a08d20b9ab6be24c2ab18b38a934a314c78b Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:40:56 +0100 -Subject: [PATCH 14/31] qed: Don't yield in bdrv_qed_co_drain_begin() - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [2/16] f18e9aebb7e04a62e309b656bac8f2ab83df657f (sgarzarella/qemu-kvm-c-9-s) - -We want to change .bdrv_co_drained_begin() back to be a non-coroutine -callback, so in preparation, avoid yielding in its implementation. - -Because we increase bs->in_flight and bdrv_drained_begin() polls, the -behaviour is unchanged. - -Signed-off-by: Kevin Wolf -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Emanuele Giuseppe Esposito -Reviewed-by: Hanna Reitz -Message-Id: <20221118174110.55183-2-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 6d47eb0c8bf2d50682c7dccae74d24104076fe23) -Signed-off-by: Stefano Garzarella ---- - block/qed.c | 20 +++++++++++++++++--- - 1 file changed, 17 insertions(+), 3 deletions(-) - -diff --git a/block/qed.c b/block/qed.c -index 2f36ad342c..013f826c44 100644 ---- a/block/qed.c -+++ b/block/qed.c -@@ -282,9 +282,8 @@ static void coroutine_fn qed_unplug_allocating_write_reqs(BDRVQEDState *s) - qemu_co_mutex_unlock(&s->table_lock); - } - --static void coroutine_fn qed_need_check_timer_entry(void *opaque) -+static void coroutine_fn qed_need_check_timer(BDRVQEDState *s) - { -- BDRVQEDState *s = opaque; - int ret; - - trace_qed_need_check_timer_cb(s); -@@ -310,9 +309,20 @@ static void coroutine_fn qed_need_check_timer_entry(void *opaque) - (void) ret; - } - -+static void coroutine_fn qed_need_check_timer_entry(void *opaque) -+{ -+ BDRVQEDState *s = opaque; -+ -+ qed_need_check_timer(opaque); -+ bdrv_dec_in_flight(s->bs); -+} -+ - static void qed_need_check_timer_cb(void *opaque) - { -+ BDRVQEDState *s = opaque; - Coroutine *co = qemu_coroutine_create(qed_need_check_timer_entry, opaque); -+ -+ bdrv_inc_in_flight(s->bs); - qemu_coroutine_enter(co); - } - -@@ -363,8 +373,12 @@ static void coroutine_fn bdrv_qed_co_drain_begin(BlockDriverState *bs) - * header is flushed. - */ - if (s->need_check_timer && timer_pending(s->need_check_timer)) { -+ Coroutine *co; -+ - qed_cancel_need_check_timer(s); -- qed_need_check_timer_entry(s); -+ co = qemu_coroutine_create(qed_need_check_timer_entry, s); -+ bdrv_inc_in_flight(bs); -+ aio_co_enter(bdrv_get_aio_context(bs), co); - } - } - --- -2.31.1 - diff --git a/SOURCES/kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch b/SOURCES/kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch deleted file mode 100644 index 86e94db..0000000 --- a/SOURCES/kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch +++ /dev/null @@ -1,75 +0,0 @@ -From 7a9907c65e3e2bbb0c119acdbbeb4381e7f1d902 Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Thu, 9 Mar 2023 08:24:36 -0500 -Subject: [PATCH 09/12] qemu-coroutine-lock: add smp_mb__after_rmw() - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() -RH-Bugzilla: 2175660 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Eric Auger -RH-Commit: [6/9] 4b1723b1ad670ec4c85240390b4fc15ff361154f (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 - -commit e3a3b6ec8169eab2feb241b4982585001512cd55 -Author: Paolo Bonzini -Date: Fri Mar 3 10:52:59 2023 +0100 - - qemu-coroutine-lock: add smp_mb__after_rmw() - - mutex->from_push and mutex->handoff in qemu-coroutine-lock implement - the familiar pattern: - - write a write b - smp_mb() smp_mb() - read b read a - - The memory barrier is required by the C memory model even after a - SEQ_CST read-modify-write operation such as QSLIST_INSERT_HEAD_ATOMIC. - Add it and avoid the unclear qatomic_mb_read() operation. - - Reviewed-by: Richard Henderson - Reviewed-by: David Hildenbrand - Signed-off-by: Paolo Bonzini - -Signed-off-by: Emanuele Giuseppe Esposito ---- - util/qemu-coroutine-lock.c | 9 ++++++++- - 1 file changed, 8 insertions(+), 1 deletion(-) - -diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c -index 45c6b57374..c5897bd963 100644 ---- a/util/qemu-coroutine-lock.c -+++ b/util/qemu-coroutine-lock.c -@@ -202,10 +202,16 @@ static void coroutine_fn qemu_co_mutex_lock_slowpath(AioContext *ctx, - trace_qemu_co_mutex_lock_entry(mutex, self); - push_waiter(mutex, &w); - -+ /* -+ * Add waiter before reading mutex->handoff. Pairs with qatomic_mb_set -+ * in qemu_co_mutex_unlock. -+ */ -+ smp_mb__after_rmw(); -+ - /* This is the "Responsibility Hand-Off" protocol; a lock() picks from - * a concurrent unlock() the responsibility of waking somebody up. - */ -- old_handoff = qatomic_mb_read(&mutex->handoff); -+ old_handoff = qatomic_read(&mutex->handoff); - if (old_handoff && - has_waiters(mutex) && - qatomic_cmpxchg(&mutex->handoff, old_handoff, 0) == old_handoff) { -@@ -304,6 +310,7 @@ void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex) - } - - our_handoff = mutex->sequence; -+ /* Set handoff before checking for waiters. */ - qatomic_mb_set(&mutex->handoff, our_handoff); - if (!has_waiters(mutex)) { - /* The concurrent lock has not added itself yet, so it --- -2.39.1 - diff --git a/SOURCES/kvm-qemu-img-Change-info-key-names-for-protocol-nodes.patch b/SOURCES/kvm-qemu-img-Change-info-key-names-for-protocol-nodes.patch deleted file mode 100644 index eff4d2e..0000000 --- a/SOURCES/kvm-qemu-img-Change-info-key-names-for-protocol-nodes.patch +++ /dev/null @@ -1,197 +0,0 @@ -From b1970c733dc46b2a8f648997a7e1c5d12900ff54 Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Mon, 20 Jun 2022 18:27:04 +0200 -Subject: [PATCH 17/20] qemu-img: Change info key names for protocol nodes - -RH-Author: Hanna Czenczek -RH-MergeRequest: 145: Show protocol-level information in qemu-img info -RH-Bugzilla: 1860292 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella -RH-Commit: [12/12] 67c260aaa05466410503fecee6210bf9d47e8c7c (hreitz/qemu-kvm-c-9-s) - -Currently, when querying a qcow2 image, qemu-img info reports something -like this: - -image: test.qcow2 -file format: qcow2 -virtual size: 64 MiB (67108864 bytes) -disk size: 196 KiB -cluster_size: 65536 -Format specific information: - compat: 1.1 - compression type: zlib - lazy refcounts: false - refcount bits: 16 - corrupt: false - extended l2: false -Child node '/file': - image: test.qcow2 - file format: file - virtual size: 192 KiB (197120 bytes) - disk size: 196 KiB - Format specific information: - extent size hint: 1048576 - -Notably, the way the keys are named is specific for image files: The -filename is shown under "image", the BDS driver under "file format", and -the BDS length under "virtual size". This does not make much sense for -nodes that are not actually supposed to be guest images, like the /file -child node shown above. - -Give bdrv_node_info_dump() a @protocol parameter that gives a hint that -the respective node is probably just used for data storage and does not -necessarily present the data for a VM guest disk. This renames the keys -so that with this patch, the output becomes: - -image: test.qcow2 -[...] -Child node '/file': - filename: test.qcow2 - protocol type: file - file length: 192 KiB (197120 bytes) - disk size: 196 KiB - Format specific information: - extent size hint: 1048576 - -(Perhaps we should also rename "Format specific information", but I -could not come up with anything better that will not become problematic -if we guess wrong with the protocol "heuristic".) - -This change affects iotest 302, which has protocol node information in -its reference output. - -Signed-off-by: Hanna Reitz -Message-Id: <20220620162704.80987-13-hreitz@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit d570177b50c389f379f93183155a27d44856ab46) -Signed-off-by: Hanna Czenczek ---- - block/monitor/block-hmp-cmds.c | 2 +- - block/qapi.c | 39 ++++++++++++++++++++++++++++------ - include/block/qapi.h | 2 +- - qemu-img.c | 3 ++- - tests/qemu-iotests/302.out | 6 +++--- - 5 files changed, 39 insertions(+), 13 deletions(-) - -diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c -index 72824d4e2e..4d83339a5d 100644 ---- a/block/monitor/block-hmp-cmds.c -+++ b/block/monitor/block-hmp-cmds.c -@@ -734,7 +734,7 @@ static void print_block_info(Monitor *mon, BlockInfo *info, - monitor_printf(mon, "\nImages:\n"); - image_info = inserted->image; - while (1) { -- bdrv_node_info_dump(qapi_ImageInfo_base(image_info), 0); -+ bdrv_node_info_dump(qapi_ImageInfo_base(image_info), 0, false); - if (image_info->has_backing_image) { - image_info = image_info->backing_image; - } else { -diff --git a/block/qapi.c b/block/qapi.c -index 3e35603f0c..56f398c500 100644 ---- a/block/qapi.c -+++ b/block/qapi.c -@@ -934,24 +934,49 @@ void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, - visit_free(v); - } - --void bdrv_node_info_dump(BlockNodeInfo *info, int indentation) -+/** -+ * Print the given @info object in human-readable form. Every field is indented -+ * using the given @indentation (four spaces per indentation level). -+ * -+ * When using this to print a whole block graph, @protocol can be set to true to -+ * signify that the given information is associated with a protocol node, i.e. -+ * just data storage for an image, such that the data it presents is not really -+ * a full VM disk. If so, several fields change name: For example, "virtual -+ * size" is printed as "file length". -+ * (Consider a qcow2 image, which is represented by a qcow2 node and a file -+ * node. Printing a "virtual size" for the file node does not make sense, -+ * because without the qcow2 node, it is not really a guest disk, so it does not -+ * have a "virtual size". Therefore, we call it "file length" instead.) -+ * -+ * @protocol is ignored when @indentation is 0, because we take that to mean -+ * that the associated node is the root node in the queried block graph, and -+ * thus is always to be interpreted as a standalone guest disk. -+ */ -+void bdrv_node_info_dump(BlockNodeInfo *info, int indentation, bool protocol) - { - char *size_buf, *dsize_buf; - g_autofree char *ind_s = g_strdup_printf("%*s", indentation * 4, ""); - -+ if (indentation == 0) { -+ /* Top level, consider this a normal image */ -+ protocol = false; -+ } -+ - if (!info->has_actual_size) { - dsize_buf = g_strdup("unavailable"); - } else { - dsize_buf = size_to_str(info->actual_size); - } - size_buf = size_to_str(info->virtual_size); -- qemu_printf("%simage: %s\n" -- "%sfile format: %s\n" -- "%svirtual size: %s (%" PRId64 " bytes)\n" -+ qemu_printf("%s%s: %s\n" -+ "%s%s: %s\n" -+ "%s%s: %s (%" PRId64 " bytes)\n" - "%sdisk size: %s\n", -- ind_s, info->filename, -- ind_s, info->format, -- ind_s, size_buf, info->virtual_size, -+ ind_s, protocol ? "filename" : "image", info->filename, -+ ind_s, protocol ? "protocol type" : "file format", -+ info->format, -+ ind_s, protocol ? "file length" : "virtual size", -+ size_buf, info->virtual_size, - ind_s, dsize_buf); - g_free(size_buf); - g_free(dsize_buf); -diff --git a/include/block/qapi.h b/include/block/qapi.h -index 38855f2ae9..26113da21a 100644 ---- a/include/block/qapi.h -+++ b/include/block/qapi.h -@@ -51,5 +51,5 @@ void bdrv_snapshot_dump(QEMUSnapshotInfo *sn); - void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, - const char *prefix, - int indentation); --void bdrv_node_info_dump(BlockNodeInfo *info, int indentation); -+void bdrv_node_info_dump(BlockNodeInfo *info, int indentation, bool protocol); - #endif -diff --git a/qemu-img.c b/qemu-img.c -index e281011245..2943625c67 100644 ---- a/qemu-img.c -+++ b/qemu-img.c -@@ -2853,7 +2853,8 @@ static void dump_human_image_info(BlockGraphInfo *info, int indentation, - { - BlockChildInfoList *children_list; - -- bdrv_node_info_dump(qapi_BlockGraphInfo_base(info), indentation); -+ bdrv_node_info_dump(qapi_BlockGraphInfo_base(info), indentation, -+ info->children == NULL); - - for (children_list = info->children; children_list; - children_list = children_list->next) -diff --git a/tests/qemu-iotests/302.out b/tests/qemu-iotests/302.out -index edfa1c4f05..7b5014cdd8 100644 ---- a/tests/qemu-iotests/302.out -+++ b/tests/qemu-iotests/302.out -@@ -5,9 +5,9 @@ file format: raw - virtual size: 448 KiB (458752 bytes) - disk size: unavailable - Child node '/file': -- image: nbd+unix:///exp?socket=SOCK_DIR/PID-nbd-sock -- file format: nbd -- virtual size: 448 KiB (458752 bytes) -+ filename: nbd+unix:///exp?socket=SOCK_DIR/PID-nbd-sock -+ protocol type: nbd -+ file length: 448 KiB (458752 bytes) - disk size: unavailable - - === Converted image info === --- -2.31.1 - diff --git a/SOURCES/kvm-qemu-img-Let-info-print-block-graph.patch b/SOURCES/kvm-qemu-img-Let-info-print-block-graph.patch deleted file mode 100644 index 536df69..0000000 --- a/SOURCES/kvm-qemu-img-Let-info-print-block-graph.patch +++ /dev/null @@ -1,261 +0,0 @@ -From ea73e9de42b446ce1049805c23f7706e4f87ed1f Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Mon, 20 Jun 2022 18:27:03 +0200 -Subject: [PATCH 16/20] qemu-img: Let info print block graph - -RH-Author: Hanna Czenczek -RH-MergeRequest: 145: Show protocol-level information in qemu-img info -RH-Bugzilla: 1860292 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella -RH-Commit: [11/12] 2c1b8a03c918484449e876acf4c6663766848ad8 (hreitz/qemu-kvm-c-9-s) - -For every node in the backing chain, collect its BlockGraphInfo struct -using bdrv_query_block_graph_info(). Print all nodes' information, -indenting child nodes and labelling them with a path constructed from -the child names leading to the node from the root (e.g. /file/file). - -Note that we open each image with BDRV_O_NO_BACKING, so its backing -child is omitted from this graph, and thus presented in the previous -manner: By simply concatenating all images' information, separated with -blank lines. - -This affects two iotests: -- 065: Here we try to get the format node's format specific information. - The pre-patch code does so by taking all lines from "Format specific - information:" until an empty line. This format specific information - is no longer followed by an empty line, though, but by child node - information, so limit the range by "Child node '/file':". -- 302: Calls qemu_img() for qemu-img info directly, which does not - filter the output, so the child node information ends up in the - output. - -Signed-off-by: Hanna Reitz -Message-Id: <20220620162704.80987-12-hreitz@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit c04d0ab026201d21873a63f768cb69c4554dfec1) -Signed-off-by: Hanna Czenczek ---- - qapi/block-core.json | 4 +-- - qemu-img.c | 69 ++++++++++++++++++++++++++------------ - tests/qemu-iotests/065 | 2 +- - tests/qemu-iotests/302.out | 5 +++ - 4 files changed, 56 insertions(+), 24 deletions(-) - -diff --git a/qapi/block-core.json b/qapi/block-core.json -index d703e0fb16..7f331eb8ea 100644 ---- a/qapi/block-core.json -+++ b/qapi/block-core.json -@@ -5831,9 +5831,9 @@ - ## - # @DummyBlockCoreForceArrays: - # --# Not used by QMP; hack to let us use BlockNodeInfoList internally -+# Not used by QMP; hack to let us use BlockGraphInfoList internally - # - # Since: 8.0 - ## - { 'struct': 'DummyBlockCoreForceArrays', -- 'data': { 'unused-block-node-info': ['BlockNodeInfo'] } } -+ 'data': { 'unused-block-graph-info': ['BlockGraphInfo'] } } -diff --git a/qemu-img.c b/qemu-img.c -index 30b4ea58bb..e281011245 100644 ---- a/qemu-img.c -+++ b/qemu-img.c -@@ -2816,13 +2816,13 @@ static void dump_snapshots(BlockDriverState *bs) - g_free(sn_tab); - } - --static void dump_json_block_node_info_list(BlockNodeInfoList *list) -+static void dump_json_block_graph_info_list(BlockGraphInfoList *list) - { - GString *str; - QObject *obj; - Visitor *v = qobject_output_visitor_new(&obj); - -- visit_type_BlockNodeInfoList(v, NULL, &list, &error_abort); -+ visit_type_BlockGraphInfoList(v, NULL, &list, &error_abort); - visit_complete(v, &obj); - str = qobject_to_json_pretty(obj, true); - assert(str != NULL); -@@ -2832,13 +2832,13 @@ static void dump_json_block_node_info_list(BlockNodeInfoList *list) - g_string_free(str, true); - } - --static void dump_json_block_node_info(BlockNodeInfo *info) -+static void dump_json_block_graph_info(BlockGraphInfo *info) - { - GString *str; - QObject *obj; - Visitor *v = qobject_output_visitor_new(&obj); - -- visit_type_BlockNodeInfo(v, NULL, &info, &error_abort); -+ visit_type_BlockGraphInfo(v, NULL, &info, &error_abort); - visit_complete(v, &obj); - str = qobject_to_json_pretty(obj, true); - assert(str != NULL); -@@ -2848,9 +2848,29 @@ static void dump_json_block_node_info(BlockNodeInfo *info) - g_string_free(str, true); - } - --static void dump_human_image_info_list(BlockNodeInfoList *list) -+static void dump_human_image_info(BlockGraphInfo *info, int indentation, -+ const char *path) - { -- BlockNodeInfoList *elem; -+ BlockChildInfoList *children_list; -+ -+ bdrv_node_info_dump(qapi_BlockGraphInfo_base(info), indentation); -+ -+ for (children_list = info->children; children_list; -+ children_list = children_list->next) -+ { -+ BlockChildInfo *child = children_list->value; -+ g_autofree char *child_path = NULL; -+ -+ printf("%*sChild node '%s%s':\n", -+ indentation * 4, "", path, child->name); -+ child_path = g_strdup_printf("%s%s/", path, child->name); -+ dump_human_image_info(child->info, indentation + 1, child_path); -+ } -+} -+ -+static void dump_human_image_info_list(BlockGraphInfoList *list) -+{ -+ BlockGraphInfoList *elem; - bool delim = false; - - for (elem = list; elem; elem = elem->next) { -@@ -2859,7 +2879,7 @@ static void dump_human_image_info_list(BlockNodeInfoList *list) - } - delim = true; - -- bdrv_node_info_dump(elem->value, 0); -+ dump_human_image_info(elem->value, 0, "/"); - } - } - -@@ -2869,7 +2889,7 @@ static gboolean str_equal_func(gconstpointer a, gconstpointer b) - } - - /** -- * Open an image file chain and return an BlockNodeInfoList -+ * Open an image file chain and return an BlockGraphInfoList - * - * @filename: topmost image filename - * @fmt: topmost image format (may be NULL to autodetect) -@@ -2880,13 +2900,13 @@ static gboolean str_equal_func(gconstpointer a, gconstpointer b) - * opening an image file. If there was an error a message will have been - * printed to stderr. - */ --static BlockNodeInfoList *collect_image_info_list(bool image_opts, -- const char *filename, -- const char *fmt, -- bool chain, bool force_share) -+static BlockGraphInfoList *collect_image_info_list(bool image_opts, -+ const char *filename, -+ const char *fmt, -+ bool chain, bool force_share) - { -- BlockNodeInfoList *head = NULL; -- BlockNodeInfoList **tail = &head; -+ BlockGraphInfoList *head = NULL; -+ BlockGraphInfoList **tail = &head; - GHashTable *filenames; - Error *err = NULL; - -@@ -2895,7 +2915,7 @@ static BlockNodeInfoList *collect_image_info_list(bool image_opts, - while (filename) { - BlockBackend *blk; - BlockDriverState *bs; -- BlockNodeInfo *info; -+ BlockGraphInfo *info; - - if (g_hash_table_lookup_extended(filenames, filename, NULL, NULL)) { - error_report("Backing file '%s' creates an infinite loop.", -@@ -2912,7 +2932,14 @@ static BlockNodeInfoList *collect_image_info_list(bool image_opts, - } - bs = blk_bs(blk); - -- bdrv_query_block_node_info(bs, &info, &err); -+ /* -+ * Note that the returned BlockGraphInfo object will not have -+ * information about this image's backing node, because we have opened -+ * it with BDRV_O_NO_BACKING. Printing this object will therefore not -+ * duplicate the backing chain information that we obtain by walking -+ * the chain manually here. -+ */ -+ bdrv_query_block_graph_info(bs, &info, &err); - if (err) { - error_report_err(err); - blk_unref(blk); -@@ -2945,7 +2972,7 @@ static BlockNodeInfoList *collect_image_info_list(bool image_opts, - return head; - - err: -- qapi_free_BlockNodeInfoList(head); -+ qapi_free_BlockGraphInfoList(head); - g_hash_table_destroy(filenames); - return NULL; - } -@@ -2956,7 +2983,7 @@ static int img_info(int argc, char **argv) - OutputFormat output_format = OFORMAT_HUMAN; - bool chain = false; - const char *filename, *fmt, *output; -- BlockNodeInfoList *list; -+ BlockGraphInfoList *list; - bool image_opts = false; - bool force_share = false; - -@@ -3035,14 +3062,14 @@ static int img_info(int argc, char **argv) - break; - case OFORMAT_JSON: - if (chain) { -- dump_json_block_node_info_list(list); -+ dump_json_block_graph_info_list(list); - } else { -- dump_json_block_node_info(list->value); -+ dump_json_block_graph_info(list->value); - } - break; - } - -- qapi_free_BlockNodeInfoList(list); -+ qapi_free_BlockGraphInfoList(list); - return 0; - } - -diff --git a/tests/qemu-iotests/065 b/tests/qemu-iotests/065 -index b724c89c7c..b76701c71e 100755 ---- a/tests/qemu-iotests/065 -+++ b/tests/qemu-iotests/065 -@@ -56,7 +56,7 @@ class TestQemuImgInfo(TestImageInfoSpecific): - def test_human(self): - data = qemu_img('info', '--output=human', test_img).stdout.split('\n') - data = data[(data.index('Format specific information:') + 1) -- :data.index('')] -+ :data.index("Child node '/file':")] - for field in data: - self.assertTrue(re.match('^ {4}[^ ]', field) is not None) - data = [line.strip() for line in data] -diff --git a/tests/qemu-iotests/302.out b/tests/qemu-iotests/302.out -index 3e7c281b91..edfa1c4f05 100644 ---- a/tests/qemu-iotests/302.out -+++ b/tests/qemu-iotests/302.out -@@ -4,6 +4,11 @@ image: nbd+unix:///exp?socket=SOCK_DIR/PID-nbd-sock - file format: raw - virtual size: 448 KiB (458752 bytes) - disk size: unavailable -+Child node '/file': -+ image: nbd+unix:///exp?socket=SOCK_DIR/PID-nbd-sock -+ file format: nbd -+ virtual size: 448 KiB (458752 bytes) -+ disk size: unavailable - - === Converted image info === - image: TEST_IMG --- -2.31.1 - diff --git a/SOURCES/kvm-qemu-img-Use-BlockNodeInfo.patch b/SOURCES/kvm-qemu-img-Use-BlockNodeInfo.patch deleted file mode 100644 index 7bfb7e6..0000000 --- a/SOURCES/kvm-qemu-img-Use-BlockNodeInfo.patch +++ /dev/null @@ -1,241 +0,0 @@ -From dca4cbe680baff837ca8ac8bd39b77b46af3f64b Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Mon, 20 Jun 2022 18:26:57 +0200 -Subject: [PATCH 10/20] qemu-img: Use BlockNodeInfo - -RH-Author: Hanna Czenczek -RH-MergeRequest: 145: Show protocol-level information in qemu-img info -RH-Bugzilla: 1860292 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella -RH-Commit: [5/12] b599af3ec05951a0ba11d9eae2ee19148d6bf624 (hreitz/qemu-kvm-c-9-s) - -qemu-img info never uses ImageInfo's backing-image field, because it -opens the backing chain one by one with BDRV_O_NO_BACKING, and prints -all backing chain nodes' information consecutively. Use BlockNodeInfo -to make it clear that we only print information about a single node, and -that we are not using the backing-image field. - -Notably, bdrv_image_info_dump() does not evaluate the backing-image -field, so we can easily make it take a BlockNodeInfo pointer (and -consequentially rename it to bdrv_node_info_dump()). It makes more -sense this way, because again, the interface now makes it syntactically -clear that backing-image is ignored by this function. - -Signed-off-by: Hanna Reitz -Message-Id: <20220620162704.80987-6-hreitz@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit b1f4cd1589a16fec02f264a09bd3560e4ccce3c2) -Signed-off-by: Hanna Czenczek ---- - block/monitor/block-hmp-cmds.c | 2 +- - block/qapi.c | 2 +- - include/block/qapi.h | 2 +- - qapi/block-core.json | 4 +-- - qemu-img.c | 48 +++++++++++++++++----------------- - 5 files changed, 29 insertions(+), 29 deletions(-) - -diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c -index b6135e9bfe..aa37faa601 100644 ---- a/block/monitor/block-hmp-cmds.c -+++ b/block/monitor/block-hmp-cmds.c -@@ -734,7 +734,7 @@ static void print_block_info(Monitor *mon, BlockInfo *info, - monitor_printf(mon, "\nImages:\n"); - image_info = inserted->image; - while (1) { -- bdrv_image_info_dump(image_info); -+ bdrv_node_info_dump(qapi_ImageInfo_base(image_info)); - if (image_info->has_backing_image) { - image_info = image_info->backing_image; - } else { -diff --git a/block/qapi.c b/block/qapi.c -index e5022b4481..ad88bf9b38 100644 ---- a/block/qapi.c -+++ b/block/qapi.c -@@ -865,7 +865,7 @@ void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, - visit_free(v); - } - --void bdrv_image_info_dump(ImageInfo *info) -+void bdrv_node_info_dump(BlockNodeInfo *info) - { - char *size_buf, *dsize_buf; - if (!info->has_actual_size) { -diff --git a/include/block/qapi.h b/include/block/qapi.h -index c7de4e3fa9..22198dcd0c 100644 ---- a/include/block/qapi.h -+++ b/include/block/qapi.h -@@ -45,5 +45,5 @@ void bdrv_query_image_info(BlockDriverState *bs, - void bdrv_snapshot_dump(QEMUSnapshotInfo *sn); - void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, - const char *prefix); --void bdrv_image_info_dump(ImageInfo *info); -+void bdrv_node_info_dump(BlockNodeInfo *info); - #endif -diff --git a/qapi/block-core.json b/qapi/block-core.json -index 7720da0498..4cf2deeb6c 100644 ---- a/qapi/block-core.json -+++ b/qapi/block-core.json -@@ -5796,9 +5796,9 @@ - ## - # @DummyBlockCoreForceArrays: - # --# Not used by QMP; hack to let us use ImageInfoList internally -+# Not used by QMP; hack to let us use BlockNodeInfoList internally - # - # Since: 8.0 - ## - { 'struct': 'DummyBlockCoreForceArrays', -- 'data': { 'unused-image-info': ['ImageInfo'] } } -+ 'data': { 'unused-block-node-info': ['BlockNodeInfo'] } } -diff --git a/qemu-img.c b/qemu-img.c -index 2f85bb7ede..3b2ca3bbcb 100644 ---- a/qemu-img.c -+++ b/qemu-img.c -@@ -2816,13 +2816,13 @@ static void dump_snapshots(BlockDriverState *bs) - g_free(sn_tab); - } - --static void dump_json_image_info_list(ImageInfoList *list) -+static void dump_json_block_node_info_list(BlockNodeInfoList *list) - { - GString *str; - QObject *obj; - Visitor *v = qobject_output_visitor_new(&obj); - -- visit_type_ImageInfoList(v, NULL, &list, &error_abort); -+ visit_type_BlockNodeInfoList(v, NULL, &list, &error_abort); - visit_complete(v, &obj); - str = qobject_to_json_pretty(obj, true); - assert(str != NULL); -@@ -2832,13 +2832,13 @@ static void dump_json_image_info_list(ImageInfoList *list) - g_string_free(str, true); - } - --static void dump_json_image_info(ImageInfo *info) -+static void dump_json_block_node_info(BlockNodeInfo *info) - { - GString *str; - QObject *obj; - Visitor *v = qobject_output_visitor_new(&obj); - -- visit_type_ImageInfo(v, NULL, &info, &error_abort); -+ visit_type_BlockNodeInfo(v, NULL, &info, &error_abort); - visit_complete(v, &obj); - str = qobject_to_json_pretty(obj, true); - assert(str != NULL); -@@ -2848,9 +2848,9 @@ static void dump_json_image_info(ImageInfo *info) - g_string_free(str, true); - } - --static void dump_human_image_info_list(ImageInfoList *list) -+static void dump_human_image_info_list(BlockNodeInfoList *list) - { -- ImageInfoList *elem; -+ BlockNodeInfoList *elem; - bool delim = false; - - for (elem = list; elem; elem = elem->next) { -@@ -2859,7 +2859,7 @@ static void dump_human_image_info_list(ImageInfoList *list) - } - delim = true; - -- bdrv_image_info_dump(elem->value); -+ bdrv_node_info_dump(elem->value); - } - } - -@@ -2869,24 +2869,24 @@ static gboolean str_equal_func(gconstpointer a, gconstpointer b) - } - - /** -- * Open an image file chain and return an ImageInfoList -+ * Open an image file chain and return an BlockNodeInfoList - * - * @filename: topmost image filename - * @fmt: topmost image format (may be NULL to autodetect) - * @chain: true - enumerate entire backing file chain - * false - only topmost image file - * -- * Returns a list of ImageInfo objects or NULL if there was an error opening an -- * image file. If there was an error a message will have been printed to -- * stderr. -+ * Returns a list of BlockNodeInfo objects or NULL if there was an error -+ * opening an image file. If there was an error a message will have been -+ * printed to stderr. - */ --static ImageInfoList *collect_image_info_list(bool image_opts, -- const char *filename, -- const char *fmt, -- bool chain, bool force_share) -+static BlockNodeInfoList *collect_image_info_list(bool image_opts, -+ const char *filename, -+ const char *fmt, -+ bool chain, bool force_share) - { -- ImageInfoList *head = NULL; -- ImageInfoList **tail = &head; -+ BlockNodeInfoList *head = NULL; -+ BlockNodeInfoList **tail = &head; - GHashTable *filenames; - Error *err = NULL; - -@@ -2895,7 +2895,7 @@ static ImageInfoList *collect_image_info_list(bool image_opts, - while (filename) { - BlockBackend *blk; - BlockDriverState *bs; -- ImageInfo *info; -+ BlockNodeInfo *info; - - if (g_hash_table_lookup_extended(filenames, filename, NULL, NULL)) { - error_report("Backing file '%s' creates an infinite loop.", -@@ -2912,7 +2912,7 @@ static ImageInfoList *collect_image_info_list(bool image_opts, - } - bs = blk_bs(blk); - -- bdrv_query_image_info(bs, &info, &err); -+ bdrv_query_block_node_info(bs, &info, &err); - if (err) { - error_report_err(err); - blk_unref(blk); -@@ -2945,7 +2945,7 @@ static ImageInfoList *collect_image_info_list(bool image_opts, - return head; - - err: -- qapi_free_ImageInfoList(head); -+ qapi_free_BlockNodeInfoList(head); - g_hash_table_destroy(filenames); - return NULL; - } -@@ -2956,7 +2956,7 @@ static int img_info(int argc, char **argv) - OutputFormat output_format = OFORMAT_HUMAN; - bool chain = false; - const char *filename, *fmt, *output; -- ImageInfoList *list; -+ BlockNodeInfoList *list; - bool image_opts = false; - bool force_share = false; - -@@ -3035,14 +3035,14 @@ static int img_info(int argc, char **argv) - break; - case OFORMAT_JSON: - if (chain) { -- dump_json_image_info_list(list); -+ dump_json_block_node_info_list(list); - } else { -- dump_json_image_info(list->value); -+ dump_json_block_node_info(list->value); - } - break; - } - -- qapi_free_ImageInfoList(list); -+ qapi_free_BlockNodeInfoList(list); - return 0; - } - --- -2.31.1 - diff --git a/SOURCES/kvm-qemu-img-bitmap-Report-errors-while-closing-the-imag.patch b/SOURCES/kvm-qemu-img-bitmap-Report-errors-while-closing-the-imag.patch deleted file mode 100644 index 693049c..0000000 --- a/SOURCES/kvm-qemu-img-bitmap-Report-errors-while-closing-the-imag.patch +++ /dev/null @@ -1,70 +0,0 @@ -From d0d3d694b3a8d200442484ae0c9d263e0439cd04 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Thu, 12 Jan 2023 20:14:53 +0100 -Subject: [PATCH 03/20] qemu-img bitmap: Report errors while closing the image -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Kevin Wolf -RH-MergeRequest: 143: qemu-img: Fix exit code for errors closing the image -RH-Bugzilla: 2150180 -RH-Acked-by: Thomas Huth -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Stefano Garzarella -RH-Commit: [3/4] 4a704fec2e3bcb47b2be1529e27fd1833d58c517 (kmwolf/centos-qemu-kvm) - -blk_unref() can't report any errors that happen while closing the image. -For example, if qcow2 hits an -ENOSPC error while writing out dirty -bitmaps when it's closed, it prints error messages to stderr, but -'qemu-img bitmap' won't see any error return value and will therefore -look successful with exit code 0. - -In order to fix this, manually inactivate the image first before calling -blk_unref(). This already performs the operations that would be most -likely to fail while closing the image, but it can still return errors. - -Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1330 -Signed-off-by: Kevin Wolf -Message-Id: <20230112191454.169353-4-kwolf@redhat.com> -Reviewed-by: Hanna Czenczek -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Kevin Wolf -(cherry picked from commit c5e477110dcb8ef4642dce399777c3dee68fa96c) -Signed-off-by: Kevin Wolf ---- - qemu-img.c | 11 +++++++++++ - 1 file changed, 11 insertions(+) - -diff --git a/qemu-img.c b/qemu-img.c -index 3cbdda9f76..2f85bb7ede 100644 ---- a/qemu-img.c -+++ b/qemu-img.c -@@ -4646,6 +4646,7 @@ static int img_bitmap(int argc, char **argv) - QSIMPLEQ_HEAD(, ImgBitmapAction) actions; - ImgBitmapAction *act, *act_next; - const char *op; -+ int inactivate_ret; - - QSIMPLEQ_INIT(&actions); - -@@ -4830,6 +4831,16 @@ static int img_bitmap(int argc, char **argv) - ret = 0; - - out: -+ /* -+ * Manually inactivate the images first because this way we can know whether -+ * an error occurred. blk_unref() doesn't tell us about failures. -+ */ -+ inactivate_ret = bdrv_inactivate_all(); -+ if (inactivate_ret < 0) { -+ error_report("Error while closing the image: %s", strerror(-inactivate_ret)); -+ ret = 1; -+ } -+ - blk_unref(src); - blk_unref(blk); - qemu_opts_del(opts); --- -2.31.1 - diff --git a/SOURCES/kvm-qemu-img-commit-Report-errors-while-closing-the-imag.patch b/SOURCES/kvm-qemu-img-commit-Report-errors-while-closing-the-imag.patch deleted file mode 100644 index 5cac3ba..0000000 --- a/SOURCES/kvm-qemu-img-commit-Report-errors-while-closing-the-imag.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 2f5369f0effaa23be746f9b5d9f6a0bfc346fb7d Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Thu, 12 Jan 2023 20:14:52 +0100 -Subject: [PATCH 02/20] qemu-img commit: Report errors while closing the image - -RH-Author: Kevin Wolf -RH-MergeRequest: 143: qemu-img: Fix exit code for errors closing the image -RH-Bugzilla: 2150180 -RH-Acked-by: Thomas Huth -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Stefano Garzarella -RH-Commit: [2/4] faedd43355463b1210a3f21ecd430f478bd06f5a (kmwolf/centos-qemu-kvm) - -blk_unref() can't report any errors that happen while closing the image. -For example, if qcow2 hits an -ENOSPC error while writing out dirty -bitmaps when it's closed, it prints error messages to stderr, but -'qemu-img commit' won't see any error return value and will therefore -look successful with exit code 0. - -In order to fix this, manually inactivate the image first before calling -blk_unref(). This already performs the operations that would be most -likely to fail while closing the image, but it can still return errors. - -Signed-off-by: Kevin Wolf -Message-Id: <20230112191454.169353-3-kwolf@redhat.com> -Reviewed-by: Hanna Czenczek -Signed-off-by: Kevin Wolf -(cherry picked from commit 44efba2d713aca076c411594d0c1a2b99155eeb3) -Signed-off-by: Kevin Wolf ---- - qemu-img.c | 13 +++++++++++++ - 1 file changed, 13 insertions(+) - -diff --git a/qemu-img.c b/qemu-img.c -index a9b3a8103c..3cbdda9f76 100644 ---- a/qemu-img.c -+++ b/qemu-img.c -@@ -449,6 +449,11 @@ static BlockBackend *img_open(bool image_opts, - blk = img_open_file(filename, NULL, fmt, flags, writethrough, quiet, - force_share); - } -+ -+ if (blk) { -+ blk_set_force_allow_inactivate(blk); -+ } -+ - return blk; - } - -@@ -1119,6 +1124,14 @@ unref_backing: - done: - qemu_progress_end(); - -+ /* -+ * Manually inactivate the image first because this way we can know whether -+ * an error occurred. blk_unref() doesn't tell us about failures. -+ */ -+ ret = bdrv_inactivate_all(); -+ if (ret < 0 && !local_err) { -+ error_setg_errno(&local_err, -ret, "Error while closing the image"); -+ } - blk_unref(blk); - - if (local_err) { --- -2.31.1 - diff --git a/SOURCES/kvm-qemu-iotests-Test-qemu-img-bitmap-commit-exit-code-o.patch b/SOURCES/kvm-qemu-iotests-Test-qemu-img-bitmap-commit-exit-code-o.patch deleted file mode 100644 index 6b88e5c..0000000 --- a/SOURCES/kvm-qemu-iotests-Test-qemu-img-bitmap-commit-exit-code-o.patch +++ /dev/null @@ -1,166 +0,0 @@ -From 06030aa79fcb2d90d6a670e75d959aa0c3204b5c Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Thu, 12 Jan 2023 20:14:54 +0100 -Subject: [PATCH 04/20] qemu-iotests: Test qemu-img bitmap/commit exit code on - error - -RH-Author: Kevin Wolf -RH-MergeRequest: 143: qemu-img: Fix exit code for errors closing the image -RH-Bugzilla: 2150180 -RH-Acked-by: Thomas Huth -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Stefano Garzarella -RH-Commit: [4/4] b96bb671bcfb7ae18015fda14db70f42a83a6ea7 (kmwolf/centos-qemu-kvm) - -This tests that when an error happens while writing back bitmaps to the -image file in qcow2_inactivate(), 'qemu-img bitmap/commit' actually -return an error value in their exit code instead of making the operation -look successful to scripts. - -Signed-off-by: Kevin Wolf -Message-Id: <20230112191454.169353-5-kwolf@redhat.com> -Reviewed-by: Hanna Czenczek -Signed-off-by: Kevin Wolf -(cherry picked from commit 07a4e1f8e5418f36424cd57d5d061b090a238c65) -Signed-off-by: Kevin Wolf ---- - .../qemu-iotests/tests/qemu-img-close-errors | 96 +++++++++++++++++++ - .../tests/qemu-img-close-errors.out | 23 +++++ - 2 files changed, 119 insertions(+) - create mode 100755 tests/qemu-iotests/tests/qemu-img-close-errors - create mode 100644 tests/qemu-iotests/tests/qemu-img-close-errors.out - -diff --git a/tests/qemu-iotests/tests/qemu-img-close-errors b/tests/qemu-iotests/tests/qemu-img-close-errors -new file mode 100755 -index 0000000000..50bfb6cfa2 ---- /dev/null -+++ b/tests/qemu-iotests/tests/qemu-img-close-errors -@@ -0,0 +1,96 @@ -+#!/usr/bin/env bash -+# group: rw auto quick -+# -+# Check that errors while closing the image, in particular writing back dirty -+# bitmaps, is correctly reported with a failing qemu-img exit code. -+# -+# Copyright (C) 2023 Red Hat, Inc. -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+# -+ -+# creator -+owner=kwolf@redhat.com -+ -+seq="$(basename $0)" -+echo "QA output created by $seq" -+ -+status=1 # failure is the default! -+ -+_cleanup() -+{ -+ _cleanup_test_img -+} -+trap "_cleanup; exit \$status" 0 1 2 3 15 -+ -+# get standard environment, filters and checks -+cd .. -+. ./common.rc -+. ./common.filter -+ -+_supported_fmt qcow2 -+_supported_proto file -+_supported_os Linux -+ -+size=1G -+ -+# The error we are going to use is ENOSPC. Depending on how many bitmaps we -+# create in the backing file (and therefore increase the used up space), we get -+# failures in different places. With a low number, only merging the bitmap -+# fails, whereas with a higher number, already 'qemu-img commit' fails. -+for max_bitmap in 6 7; do -+ echo -+ echo "=== Test with $max_bitmap bitmaps ===" -+ -+ TEST_IMG="$TEST_IMG.base" _make_test_img -q $size -+ for i in $(seq 1 $max_bitmap); do -+ $QEMU_IMG bitmap --add "$TEST_IMG.base" "stale-bitmap-$i" -+ done -+ -+ # Simulate a block device of 128 MB by resizing the image file accordingly -+ # and then enforcing the size with the raw driver -+ $QEMU_IO -f raw -c "truncate 128M" "$TEST_IMG.base" -+ BASE_JSON='json:{ -+ "driver": "qcow2", -+ "file": { -+ "driver": "raw", -+ "size": 134217728, -+ "file": { -+ "driver": "file", -+ "filename":"'"$TEST_IMG.base"'" -+ } -+ } -+ }' -+ -+ _make_test_img -q -b "$BASE_JSON" -F $IMGFMT -+ $QEMU_IMG bitmap --add "$TEST_IMG" "good-bitmap" -+ -+ $QEMU_IO -c 'write 0 126m' "$TEST_IMG" | _filter_qemu_io -+ -+ $QEMU_IMG commit -d "$TEST_IMG" 2>&1 | _filter_generated_node_ids -+ echo "qemu-img commit exit code: ${PIPESTATUS[0]}" -+ -+ $QEMU_IMG bitmap --add "$BASE_JSON" "good-bitmap" -+ echo "qemu-img bitmap --add exit code: $?" -+ -+ $QEMU_IMG bitmap --merge "good-bitmap" -b "$TEST_IMG" "$BASE_JSON" \ -+ "good-bitmap" 2>&1 | _filter_generated_node_ids -+ echo "qemu-img bitmap --merge exit code: ${PIPESTATUS[0]}" -+done -+ -+# success, all done -+echo "*** done" -+rm -f $seq.full -+status=0 -+ -diff --git a/tests/qemu-iotests/tests/qemu-img-close-errors.out b/tests/qemu-iotests/tests/qemu-img-close-errors.out -new file mode 100644 -index 0000000000..1bfe88f176 ---- /dev/null -+++ b/tests/qemu-iotests/tests/qemu-img-close-errors.out -@@ -0,0 +1,23 @@ -+QA output created by qemu-img-close-errors -+ -+=== Test with 6 bitmaps === -+wrote 132120576/132120576 bytes at offset 0 -+126 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+Image committed. -+qemu-img commit exit code: 0 -+qemu-img bitmap --add exit code: 0 -+qemu-img: Lost persistent bitmaps during inactivation of node 'NODE_NAME': Failed to write bitmap 'good-bitmap' to file: No space left on device -+qemu-img: Error while closing the image: Invalid argument -+qemu-img: Lost persistent bitmaps during inactivation of node 'NODE_NAME': Failed to write bitmap 'good-bitmap' to file: No space left on device -+qemu-img bitmap --merge exit code: 1 -+ -+=== Test with 7 bitmaps === -+wrote 132120576/132120576 bytes at offset 0 -+126 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+qemu-img: Lost persistent bitmaps during inactivation of node 'NODE_NAME': Failed to write bitmap 'stale-bitmap-7' to file: No space left on device -+qemu-img: Lost persistent bitmaps during inactivation of node 'NODE_NAME': Failed to write bitmap 'stale-bitmap-7' to file: No space left on device -+qemu-img: Error while closing the image: Invalid argument -+qemu-img commit exit code: 1 -+qemu-img bitmap --add exit code: 0 -+qemu-img bitmap --merge exit code: 0 -+*** done --- -2.31.1 - diff --git a/SOURCES/kvm-qemu-options.hx-Update-the-reduced-phys-bits-documen.patch b/SOURCES/kvm-qemu-options.hx-Update-the-reduced-phys-bits-documen.patch new file mode 100644 index 0000000..6830692 --- /dev/null +++ b/SOURCES/kvm-qemu-options.hx-Update-the-reduced-phys-bits-documen.patch @@ -0,0 +1,60 @@ +From 50c833fc3c7d8d3a5124cfdb2f2dc06b910c2252 Mon Sep 17 00:00:00 2001 +From: Bandan Das +Date: Thu, 3 Aug 2023 14:21:25 -0400 +Subject: [PATCH 11/14] qemu-options.hx: Update the reduced-phys-bits + documentation + +RH-Author: Bandan Das +RH-MergeRequest: 196: Updates to SEV reduced-phys-bits parameter +RH-Bugzilla: 2214839 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/4] b0c4a19e9f4185c97ddf71857bc9367cea01ffa8 (bdas1/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2214839 + +commit 326e3015c4c6f3197157ea0bb00826ae740e2fad +Author: Tom Lendacky +Date: Fri Sep 30 10:14:28 2022 -0500 + + qemu-options.hx: Update the reduced-phys-bits documentation + + A guest only ever experiences, at most, 1 bit of reduced physical + addressing. Update the documentation to reflect this as well as change + the example value on the reduced-phys-bits option. + + Fixes: a9b4942f48 ("target/i386: add Secure Encrypted Virtualization (SEV) object") + Signed-off-by: Tom Lendacky + Reviewed-by: Dr. David Alan Gilbert + Message-Id: <13a62ced1808546c1d398e2025cf85f4c94ae123.1664550870.git.thomas.lendacky@amd.com> + Signed-off-by: Paolo Bonzini + +Signed-off-by: Bandan Das +--- + qemu-options.hx | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/qemu-options.hx b/qemu-options.hx +index b18f933703..edf10a5aac 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -5417,7 +5417,7 @@ SRST + physical address space. The ``reduced-phys-bits`` is used to + provide the number of bits we loose in physical address space. + Similar to C-bit, the value is Host family dependent. On EPYC, +- the value should be 5. ++ a guest will lose a maximum of 1 bit, so the value should be 1. + + The ``sev-device`` provides the device file to use for + communicating with the SEV firmware running inside AMD Secure +@@ -5452,7 +5452,7 @@ SRST + + # |qemu_system_x86| \\ + ...... \\ +- -object sev-guest,id=sev0,cbitpos=47,reduced-phys-bits=5 \\ ++ -object sev-guest,id=sev0,cbitpos=47,reduced-phys-bits=1 \\ + -machine ...,memory-encryption=sev0 \\ + ..... + +-- +2.39.3 + diff --git a/SOURCES/kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch b/SOURCES/kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch deleted file mode 100644 index 25f30ff..0000000 --- a/SOURCES/kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch +++ /dev/null @@ -1,146 +0,0 @@ -From aa61e4c437d29a791ea09a01f7230231f1e53356 Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Thu, 9 Mar 2023 08:24:36 -0500 -Subject: [PATCH 05/12] qemu-thread-posix: cleanup, fix, document QemuEvent - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() -RH-Bugzilla: 2175660 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Eric Auger -RH-Commit: [2/9] c3bdf75f884e137c667316aaac96bb4a0b9ec2d9 (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 - -commit 9586a1329f5dce6c1d7f4de53cf0536644d7e593 -Author: Paolo Bonzini -Date: Thu Mar 2 11:19:52 2023 +0100 - - qemu-thread-posix: cleanup, fix, document QemuEvent - - QemuEvent is currently broken on ARM due to missing memory barriers - after qatomic_*(). Apart from adding the memory barrier, a closer look - reveals some unpaired memory barriers too. Document more clearly what - is going on. - - Reviewed-by: Richard Henderson - Reviewed-by: David Hildenbrand - Signed-off-by: Paolo Bonzini - -Signed-off-by: Emanuele Giuseppe Esposito ---- - util/qemu-thread-posix.c | 69 ++++++++++++++++++++++++++++------------ - 1 file changed, 49 insertions(+), 20 deletions(-) - -diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c -index bae938c670..cc74f4ede0 100644 ---- a/util/qemu-thread-posix.c -+++ b/util/qemu-thread-posix.c -@@ -379,13 +379,21 @@ void qemu_event_destroy(QemuEvent *ev) - - void qemu_event_set(QemuEvent *ev) - { -- /* qemu_event_set has release semantics, but because it *loads* -+ assert(ev->initialized); -+ -+ /* -+ * Pairs with both qemu_event_reset() and qemu_event_wait(). -+ * -+ * qemu_event_set has release semantics, but because it *loads* - * ev->value we need a full memory barrier here. - */ -- assert(ev->initialized); - smp_mb(); - if (qatomic_read(&ev->value) != EV_SET) { -- if (qatomic_xchg(&ev->value, EV_SET) == EV_BUSY) { -+ int old = qatomic_xchg(&ev->value, EV_SET); -+ -+ /* Pairs with memory barrier in kernel futex_wait system call. */ -+ smp_mb__after_rmw(); -+ if (old == EV_BUSY) { - /* There were waiters, wake them up. */ - qemu_futex_wake(ev, INT_MAX); - } -@@ -394,18 +402,19 @@ void qemu_event_set(QemuEvent *ev) - - void qemu_event_reset(QemuEvent *ev) - { -- unsigned value; -- - assert(ev->initialized); -- value = qatomic_read(&ev->value); -- smp_mb_acquire(); -- if (value == EV_SET) { -- /* -- * If there was a concurrent reset (or even reset+wait), -- * do nothing. Otherwise change EV_SET->EV_FREE. -- */ -- qatomic_or(&ev->value, EV_FREE); -- } -+ -+ /* -+ * If there was a concurrent reset (or even reset+wait), -+ * do nothing. Otherwise change EV_SET->EV_FREE. -+ */ -+ qatomic_or(&ev->value, EV_FREE); -+ -+ /* -+ * Order reset before checking the condition in the caller. -+ * Pairs with the first memory barrier in qemu_event_set(). -+ */ -+ smp_mb__after_rmw(); - } - - void qemu_event_wait(QemuEvent *ev) -@@ -413,20 +422,40 @@ void qemu_event_wait(QemuEvent *ev) - unsigned value; - - assert(ev->initialized); -- value = qatomic_read(&ev->value); -- smp_mb_acquire(); -+ -+ /* -+ * qemu_event_wait must synchronize with qemu_event_set even if it does -+ * not go down the slow path, so this load-acquire is needed that -+ * synchronizes with the first memory barrier in qemu_event_set(). -+ * -+ * If we do go down the slow path, there is no requirement at all: we -+ * might miss a qemu_event_set() here but ultimately the memory barrier in -+ * qemu_futex_wait() will ensure the check is done correctly. -+ */ -+ value = qatomic_load_acquire(&ev->value); - if (value != EV_SET) { - if (value == EV_FREE) { - /* -- * Leave the event reset and tell qemu_event_set that there -- * are waiters. No need to retry, because there cannot be -- * a concurrent busy->free transition. After the CAS, the -- * event will be either set or busy. -+ * Leave the event reset and tell qemu_event_set that there are -+ * waiters. No need to retry, because there cannot be a concurrent -+ * busy->free transition. After the CAS, the event will be either -+ * set or busy. -+ * -+ * This cmpxchg doesn't have particular ordering requirements if it -+ * succeeds (moving the store earlier can only cause qemu_event_set() -+ * to issue _more_ wakeups), the failing case needs acquire semantics -+ * like the load above. - */ - if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) { - return; - } - } -+ -+ /* -+ * This is the final check for a concurrent set, so it does need -+ * a smp_mb() pairing with the second barrier of qemu_event_set(). -+ * The barrier is inside the FUTEX_WAIT system call. -+ */ - qemu_futex_wait(ev, EV_BUSY); - } - } --- -2.39.1 - diff --git a/SOURCES/kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch b/SOURCES/kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch deleted file mode 100644 index 631d541..0000000 --- a/SOURCES/kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch +++ /dev/null @@ -1,162 +0,0 @@ -From 02347869410fe53d814487501fb586f7dc614375 Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Thu, 9 Mar 2023 08:24:36 -0500 -Subject: [PATCH 06/12] qemu-thread-win32: cleanup, fix, document QemuEvent - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() -RH-Bugzilla: 2175660 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Eric Auger -RH-Commit: [3/9] d228e9d6a4a75dd1f0a23a6dceaf4fea23d69192 (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 - -commit 6c5df4b48f0c52a61342ecb307a43f4c2a3565c4 -Author: Paolo Bonzini -Date: Thu Mar 2 11:22:50 2023 +0100 - - qemu-thread-win32: cleanup, fix, document QemuEvent - - QemuEvent is currently broken on ARM due to missing memory barriers - after qatomic_*(). Apart from adding the memory barrier, a closer look - reveals some unpaired memory barriers that are not really needed and - complicated the functions unnecessarily. Also, it is relying on - a memory barrier in ResetEvent(); the barrier _ought_ to be there - but there is really no documentation about it, so make it explicit. - - Reviewed-by: Richard Henderson - Reviewed-by: David Hildenbrand - Signed-off-by: Paolo Bonzini - -Signed-off-by: Emanuele Giuseppe Esposito ---- - util/qemu-thread-win32.c | 82 +++++++++++++++++++++++++++------------- - 1 file changed, 56 insertions(+), 26 deletions(-) - -diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c -index 69db254ac7..a7fe3cc345 100644 ---- a/util/qemu-thread-win32.c -+++ b/util/qemu-thread-win32.c -@@ -272,12 +272,20 @@ void qemu_event_destroy(QemuEvent *ev) - void qemu_event_set(QemuEvent *ev) - { - assert(ev->initialized); -- /* qemu_event_set has release semantics, but because it *loads* -+ -+ /* -+ * Pairs with both qemu_event_reset() and qemu_event_wait(). -+ * -+ * qemu_event_set has release semantics, but because it *loads* - * ev->value we need a full memory barrier here. - */ - smp_mb(); - if (qatomic_read(&ev->value) != EV_SET) { -- if (qatomic_xchg(&ev->value, EV_SET) == EV_BUSY) { -+ int old = qatomic_xchg(&ev->value, EV_SET); -+ -+ /* Pairs with memory barrier after ResetEvent. */ -+ smp_mb__after_rmw(); -+ if (old == EV_BUSY) { - /* There were waiters, wake them up. */ - SetEvent(ev->event); - } -@@ -286,17 +294,19 @@ void qemu_event_set(QemuEvent *ev) - - void qemu_event_reset(QemuEvent *ev) - { -- unsigned value; -- - assert(ev->initialized); -- value = qatomic_read(&ev->value); -- smp_mb_acquire(); -- if (value == EV_SET) { -- /* If there was a concurrent reset (or even reset+wait), -- * do nothing. Otherwise change EV_SET->EV_FREE. -- */ -- qatomic_or(&ev->value, EV_FREE); -- } -+ -+ /* -+ * If there was a concurrent reset (or even reset+wait), -+ * do nothing. Otherwise change EV_SET->EV_FREE. -+ */ -+ qatomic_or(&ev->value, EV_FREE); -+ -+ /* -+ * Order reset before checking the condition in the caller. -+ * Pairs with the first memory barrier in qemu_event_set(). -+ */ -+ smp_mb__after_rmw(); - } - - void qemu_event_wait(QemuEvent *ev) -@@ -304,29 +314,49 @@ void qemu_event_wait(QemuEvent *ev) - unsigned value; - - assert(ev->initialized); -- value = qatomic_read(&ev->value); -- smp_mb_acquire(); -+ -+ /* -+ * qemu_event_wait must synchronize with qemu_event_set even if it does -+ * not go down the slow path, so this load-acquire is needed that -+ * synchronizes with the first memory barrier in qemu_event_set(). -+ * -+ * If we do go down the slow path, there is no requirement at all: we -+ * might miss a qemu_event_set() here but ultimately the memory barrier in -+ * qemu_futex_wait() will ensure the check is done correctly. -+ */ -+ value = qatomic_load_acquire(&ev->value); - if (value != EV_SET) { - if (value == EV_FREE) { -- /* qemu_event_set is not yet going to call SetEvent, but we are -- * going to do another check for EV_SET below when setting EV_BUSY. -- * At that point it is safe to call WaitForSingleObject. -+ /* -+ * Here the underlying kernel event is reset, but qemu_event_set is -+ * not yet going to call SetEvent. However, there will be another -+ * check for EV_SET below when setting EV_BUSY. At that point it -+ * is safe to call WaitForSingleObject. - */ - ResetEvent(ev->event); - -- /* Tell qemu_event_set that there are waiters. No need to retry -- * because there cannot be a concurrent busy->free transition. -- * After the CAS, the event will be either set or busy. -+ /* -+ * It is not clear whether ResetEvent provides this barrier; kernel -+ * APIs (KeResetEvent/KeClearEvent) do not. Better safe than sorry! -+ */ -+ smp_mb(); -+ -+ /* -+ * Leave the event reset and tell qemu_event_set that there are -+ * waiters. No need to retry, because there cannot be a concurrent -+ * busy->free transition. After the CAS, the event will be either -+ * set or busy. - */ - if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) { -- value = EV_SET; -- } else { -- value = EV_BUSY; -+ return; - } - } -- if (value == EV_BUSY) { -- WaitForSingleObject(ev->event, INFINITE); -- } -+ -+ /* -+ * ev->value is now EV_BUSY. Since we didn't observe EV_SET, -+ * qemu_event_set() must observe EV_BUSY and call SetEvent(). -+ */ -+ WaitForSingleObject(ev->event, INFINITE); - } - } - --- -2.39.1 - diff --git a/SOURCES/kvm-raven-disable-reentrancy-detection-for-iomem.patch b/SOURCES/kvm-raven-disable-reentrancy-detection-for-iomem.patch new file mode 100644 index 0000000..4a4a2cc --- /dev/null +++ b/SOURCES/kvm-raven-disable-reentrancy-detection-for-iomem.patch @@ -0,0 +1,54 @@ +From 936e21428a04524ccffeb36110d1aa61de9f44e5 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 May 2023 10:29:03 -0400 +Subject: [PATCH 11/21] raven: disable reentrancy detection for iomem + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [7/13] 48278583aa1ab08b912f49cd8b3a79d1bb3abf5f (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit 6dad5a6810d9c60ca320d01276f6133bbcfa1fc7 +Author: Alexander Bulekov +Date: Thu Apr 27 17:10:12 2023 -0400 + + raven: disable reentrancy detection for iomem + + As the code is designed for re-entrant calls from raven_io_ops to + pci-conf, mark raven_io_ops as reentrancy-safe. + + Signed-off-by: Alexander Bulekov + Message-Id: <20230427211013.2994127-8-alxndr@bu.edu> + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + hw/pci-host/raven.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/hw/pci-host/raven.c b/hw/pci-host/raven.c +index 072ffe3c5e..9a11ac4b2b 100644 +--- a/hw/pci-host/raven.c ++++ b/hw/pci-host/raven.c +@@ -294,6 +294,13 @@ static void raven_pcihost_initfn(Object *obj) + memory_region_init(&s->pci_memory, obj, "pci-memory", 0x3f000000); + address_space_init(&s->pci_io_as, &s->pci_io, "raven-io"); + ++ /* ++ * Raven's raven_io_ops use the address-space API to access pci-conf-idx ++ * (which is also owned by the raven device). As such, mark the ++ * pci_io_non_contiguous as re-entrancy safe. ++ */ ++ s->pci_io_non_contiguous.disable_reentrancy_guard = true; ++ + /* CPU address space */ + memory_region_add_subregion(address_space_mem, PCI_IO_BASE_ADDR, + &s->pci_io); +-- +2.39.3 + diff --git a/SOURCES/kvm-redhat-fix-virt-rhel9.2.0-compat-props.patch b/SOURCES/kvm-redhat-fix-virt-rhel9.2.0-compat-props.patch deleted file mode 100644 index 1a2e863..0000000 --- a/SOURCES/kvm-redhat-fix-virt-rhel9.2.0-compat-props.patch +++ /dev/null @@ -1,43 +0,0 @@ -From 546e4213c4e8a7b2e369315a71bc9aec091eed6e Mon Sep 17 00:00:00 2001 -From: Cornelia Huck -Date: Mon, 19 Dec 2022 10:30:26 +0100 -Subject: redhat: fix virt-rhel9.2.0 compat props - -RH-Author: Cornelia Huck -RH-MergeRequest: 127: redhat: fix virt-rhel9.2.0 compat props -RH-Bugzilla: 2154640 -RH-Acked-by: Eric Auger -RH-Acked-by: Gavin Shan -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/1] 49635fdc1d9a934ece78abd160b07c19909f876a (cohuck/qemu-kvm-c9s) - -We need to include arm_rhel_compat props in the latest machine. - -Signed-off-by: Cornelia Huck ---- - hw/arm/virt.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 0a94f31dd1..bf18838b87 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -3520,6 +3520,7 @@ type_init(rhel_machine_init); - - static void rhel920_virt_options(MachineClass *mc) - { -+ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); - } - DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0) - -@@ -3529,7 +3530,6 @@ static void rhel900_virt_options(MachineClass *mc) - - rhel920_virt_options(mc); - -- compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); - compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len); - - /* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */ --- -2.38.1 - diff --git a/SOURCES/kvm-s390x-ap-Wire-up-the-device-request-notifier-interfa.patch b/SOURCES/kvm-s390x-ap-Wire-up-the-device-request-notifier-interfa.patch new file mode 100644 index 0000000..d0eb303 --- /dev/null +++ b/SOURCES/kvm-s390x-ap-Wire-up-the-device-request-notifier-interfa.patch @@ -0,0 +1,220 @@ +From 41987ce0dd79d8734088002cbd34f20704dd017a Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Mon, 17 Jul 2023 17:36:07 +0200 +Subject: [PATCH 04/12] s390x/ap: Wire up the device request notifier interface +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 185: Backport s390x fixes from QEMU 8.1 +RH-Jira: RHEL-794 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Thomas Huth +RH-Commit: [1/1] ab6c912a1b8cdb584adacac16af79352fdfe7355 (clegoate/qemu-kvm-c9s) + +Jira: https://issues.redhat.com/browse/RHEL-794 + +commit 1360b2ad1f673d32a09de5826cd71ecd0510164a +Author: Tony Krowiak +Date: Fri Jun 2 10:11:25 2023 -0400 + + s390x/ap: Wire up the device request notifier interface + + Let's wire up the device request notifier interface to handle device unplug + requests for AP. + + Signed-off-by: Tony Krowiak + Link: https://lore.kernel.org/qemu-devel/20230530225544.280031-1-akrowiak@linux.ibm.com/ + Signed-off-by: Cédric Le Goater + +Backport note: + + - linux-headers/linux/vfio.h + updated to v6.5-rc1 level for VFIO_AP_REQ_IRQ_INDEX definition + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/ap.c | 113 +++++++++++++++++++++++++++++++++++++ + linux-headers/linux/vfio.h | 9 +++ + 2 files changed, 122 insertions(+) + +diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c +index e0dd561e85..6e21d1da5a 100644 +--- a/hw/vfio/ap.c ++++ b/hw/vfio/ap.c +@@ -18,6 +18,8 @@ + #include "hw/vfio/vfio-common.h" + #include "hw/s390x/ap-device.h" + #include "qemu/error-report.h" ++#include "qemu/event_notifier.h" ++#include "qemu/main-loop.h" + #include "qemu/module.h" + #include "qemu/option.h" + #include "qemu/config-file.h" +@@ -33,6 +35,7 @@ + struct VFIOAPDevice { + APDevice apdev; + VFIODevice vdev; ++ EventNotifier req_notifier; + }; + + OBJECT_DECLARE_SIMPLE_TYPE(VFIOAPDevice, VFIO_AP_DEVICE) +@@ -84,10 +87,110 @@ static VFIOGroup *vfio_ap_get_group(VFIOAPDevice *vapdev, Error **errp) + return vfio_get_group(groupid, &address_space_memory, errp); + } + ++static void vfio_ap_req_notifier_handler(void *opaque) ++{ ++ VFIOAPDevice *vapdev = opaque; ++ Error *err = NULL; ++ ++ if (!event_notifier_test_and_clear(&vapdev->req_notifier)) { ++ return; ++ } ++ ++ qdev_unplug(DEVICE(vapdev), &err); ++ ++ if (err) { ++ warn_reportf_err(err, VFIO_MSG_PREFIX, vapdev->vdev.name); ++ } ++} ++ ++static void vfio_ap_register_irq_notifier(VFIOAPDevice *vapdev, ++ unsigned int irq, Error **errp) ++{ ++ int fd; ++ size_t argsz; ++ IOHandler *fd_read; ++ EventNotifier *notifier; ++ struct vfio_irq_info *irq_info; ++ VFIODevice *vdev = &vapdev->vdev; ++ ++ switch (irq) { ++ case VFIO_AP_REQ_IRQ_INDEX: ++ notifier = &vapdev->req_notifier; ++ fd_read = vfio_ap_req_notifier_handler; ++ break; ++ default: ++ error_setg(errp, "vfio: Unsupported device irq(%d)", irq); ++ return; ++ } ++ ++ if (vdev->num_irqs < irq + 1) { ++ error_setg(errp, "vfio: IRQ %u not available (number of irqs %u)", ++ irq, vdev->num_irqs); ++ return; ++ } ++ ++ argsz = sizeof(*irq_info); ++ irq_info = g_malloc0(argsz); ++ irq_info->index = irq; ++ irq_info->argsz = argsz; ++ ++ if (ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, ++ irq_info) < 0 || irq_info->count < 1) { ++ error_setg_errno(errp, errno, "vfio: Error getting irq info"); ++ goto out_free_info; ++ } ++ ++ if (event_notifier_init(notifier, 0)) { ++ error_setg_errno(errp, errno, ++ "vfio: Unable to init event notifier for irq (%d)", ++ irq); ++ goto out_free_info; ++ } ++ ++ fd = event_notifier_get_fd(notifier); ++ qemu_set_fd_handler(fd, fd_read, NULL, vapdev); ++ ++ if (vfio_set_irq_signaling(vdev, irq, 0, VFIO_IRQ_SET_ACTION_TRIGGER, fd, ++ errp)) { ++ qemu_set_fd_handler(fd, NULL, NULL, vapdev); ++ event_notifier_cleanup(notifier); ++ } ++ ++out_free_info: ++ g_free(irq_info); ++ ++} ++ ++static void vfio_ap_unregister_irq_notifier(VFIOAPDevice *vapdev, ++ unsigned int irq) ++{ ++ Error *err = NULL; ++ EventNotifier *notifier; ++ ++ switch (irq) { ++ case VFIO_AP_REQ_IRQ_INDEX: ++ notifier = &vapdev->req_notifier; ++ break; ++ default: ++ error_report("vfio: Unsupported device irq(%d)", irq); ++ return; ++ } ++ ++ if (vfio_set_irq_signaling(&vapdev->vdev, irq, 0, ++ VFIO_IRQ_SET_ACTION_TRIGGER, -1, &err)) { ++ warn_reportf_err(err, VFIO_MSG_PREFIX, vapdev->vdev.name); ++ } ++ ++ qemu_set_fd_handler(event_notifier_get_fd(notifier), ++ NULL, NULL, vapdev); ++ event_notifier_cleanup(notifier); ++} ++ + static void vfio_ap_realize(DeviceState *dev, Error **errp) + { + int ret; + char *mdevid; ++ Error *err = NULL; + VFIOGroup *vfio_group; + APDevice *apdev = AP_DEVICE(dev); + VFIOAPDevice *vapdev = VFIO_AP_DEVICE(apdev); +@@ -116,6 +219,15 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp) + goto out_get_dev_err; + } + ++ vfio_ap_register_irq_notifier(vapdev, VFIO_AP_REQ_IRQ_INDEX, &err); ++ if (err) { ++ /* ++ * Report this error, but do not make it a failing condition. ++ * Lack of this IRQ in the host does not prevent normal operation. ++ */ ++ error_report_err(err); ++ } ++ + return; + + out_get_dev_err: +@@ -129,6 +241,7 @@ static void vfio_ap_unrealize(DeviceState *dev) + VFIOAPDevice *vapdev = VFIO_AP_DEVICE(apdev); + VFIOGroup *group = vapdev->vdev.group; + ++ vfio_ap_unregister_irq_notifier(vapdev, VFIO_AP_REQ_IRQ_INDEX); + vfio_ap_put_device(vapdev); + vfio_put_group(group); + } +diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h +index c59692ce0b..ce464957c8 100644 +--- a/linux-headers/linux/vfio.h ++++ b/linux-headers/linux/vfio.h +@@ -642,6 +642,15 @@ enum { + VFIO_CCW_NUM_IRQS + }; + ++/* ++ * The vfio-ap bus driver makes use of the following IRQ index mapping. ++ * Unimplemented IRQ types return a count of zero. ++ */ ++enum { ++ VFIO_AP_REQ_IRQ_INDEX, ++ VFIO_AP_NUM_IRQS ++}; ++ + /** + * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 12, + * struct vfio_pci_hot_reset_info) +-- +2.39.3 + diff --git a/SOURCES/kvm-s390x-pci-coalesce-unmap-operations.patch b/SOURCES/kvm-s390x-pci-coalesce-unmap-operations.patch deleted file mode 100644 index 8bf1f61..0000000 --- a/SOURCES/kvm-s390x-pci-coalesce-unmap-operations.patch +++ /dev/null @@ -1,125 +0,0 @@ -From ed90f91b61844abd2dff2eb970f721a6cf072235 Mon Sep 17 00:00:00 2001 -From: Matthew Rosato -Date: Fri, 28 Oct 2022 15:47:57 -0400 -Subject: [PATCH 6/9] s390x/pci: coalesce unmap operations -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 141: s390x/pci: reset ISM passthrough devices on shutdown and system reset -RH-Bugzilla: 2163701 -RH-Acked-by: Thomas Huth -RH-Acked-by: Cornelia Huck -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/3] 80c3a2c1d720057ae2a80b338ea06c9c6c804532 (clegoate/qemu-kvm-c9s) - -Currently, each unmapped page is handled as an individual iommu -region notification. Attempt to group contiguous unmap operations -into fewer notifications to reduce overhead. - -Signed-off-by: Matthew Rosato -Message-Id: <20221028194758.204007-3-mjrosato@linux.ibm.com> -Reviewed-by: Eric Farman -Signed-off-by: Thomas Huth -(cherry picked from commit ef536007c3301bbd6a787e4c2210ea289adaa6f0) -Signed-off-by: Cédric Le Goater ---- - hw/s390x/s390-pci-inst.c | 51 ++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 51 insertions(+) - -diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c -index 7cc4bcf850..66e764f901 100644 ---- a/hw/s390x/s390-pci-inst.c -+++ b/hw/s390x/s390-pci-inst.c -@@ -640,6 +640,8 @@ static uint32_t s390_pci_update_iotlb(S390PCIIOMMU *iommu, - } - g_hash_table_remove(iommu->iotlb, &entry->iova); - inc_dma_avail(iommu); -+ /* Don't notify the iommu yet, maybe we can bundle contiguous unmaps */ -+ goto out; - } else { - if (cache) { - if (cache->perm == entry->perm && -@@ -663,15 +665,44 @@ static uint32_t s390_pci_update_iotlb(S390PCIIOMMU *iommu, - dec_dma_avail(iommu); - } - -+ /* -+ * All associated iotlb entries have already been cleared, trigger the -+ * unmaps. -+ */ - memory_region_notify_iommu(&iommu->iommu_mr, 0, event); - - out: - return iommu->dma_limit ? iommu->dma_limit->avail : 1; - } - -+static void s390_pci_batch_unmap(S390PCIIOMMU *iommu, uint64_t iova, -+ uint64_t len) -+{ -+ uint64_t remain = len, start = iova, end = start + len - 1, mask, size; -+ IOMMUTLBEvent event = { -+ .type = IOMMU_NOTIFIER_UNMAP, -+ .entry = { -+ .target_as = &address_space_memory, -+ .translated_addr = 0, -+ .perm = IOMMU_NONE, -+ }, -+ }; -+ -+ while (remain >= TARGET_PAGE_SIZE) { -+ mask = dma_aligned_pow2_mask(start, end, 64); -+ size = mask + 1; -+ event.entry.iova = start; -+ event.entry.addr_mask = mask; -+ memory_region_notify_iommu(&iommu->iommu_mr, 0, event); -+ start += size; -+ remain -= size; -+ } -+} -+ - int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) - { - CPUS390XState *env = &cpu->env; -+ uint64_t iova, coalesce = 0; - uint32_t fh; - uint16_t error = 0; - S390PCIBusDevice *pbdev; -@@ -742,6 +773,21 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) - break; - } - -+ /* -+ * If this is an unmap of a PTE, let's try to coalesce multiple unmaps -+ * into as few notifier events as possible. -+ */ -+ if (entry.perm == IOMMU_NONE && entry.len == TARGET_PAGE_SIZE) { -+ if (coalesce == 0) { -+ iova = entry.iova; -+ } -+ coalesce += entry.len; -+ } else if (coalesce > 0) { -+ /* Unleash the coalesced unmap before processing a new map */ -+ s390_pci_batch_unmap(iommu, iova, coalesce); -+ coalesce = 0; -+ } -+ - start += entry.len; - while (entry.iova < start && entry.iova < end) { - if (dma_avail > 0 || entry.perm == IOMMU_NONE) { -@@ -759,6 +805,11 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) - } - } - } -+ if (coalesce) { -+ /* Unleash the coalesced unmap before finishing rpcit */ -+ s390_pci_batch_unmap(iommu, iova, coalesce); -+ coalesce = 0; -+ } - if (again && dma_avail > 0) - goto retry; - err: --- -2.31.1 - diff --git a/SOURCES/kvm-s390x-pci-reset-ISM-passthrough-devices-on-shutdown-.patch b/SOURCES/kvm-s390x-pci-reset-ISM-passthrough-devices-on-shutdown-.patch deleted file mode 100644 index bbe2595..0000000 --- a/SOURCES/kvm-s390x-pci-reset-ISM-passthrough-devices-on-shutdown-.patch +++ /dev/null @@ -1,147 +0,0 @@ -From 1ed1f8fc20a4883bc0bc1f58d299b0278abc5442 Mon Sep 17 00:00:00 2001 -From: Matthew Rosato -Date: Fri, 9 Dec 2022 14:57:00 -0500 -Subject: [PATCH 8/9] s390x/pci: reset ISM passthrough devices on shutdown and - system reset -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 141: s390x/pci: reset ISM passthrough devices on shutdown and system reset -RH-Bugzilla: 2163701 -RH-Acked-by: Thomas Huth -RH-Acked-by: Cornelia Huck -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/3] c531352b9d57f51ba938d4c46ee19a5706ade697 (clegoate/qemu-kvm-c9s) - -ISM device firmware stores unique state information that can -can cause a wholesale unmap of the associated IOMMU (e.g. when -we get a termination signal for QEMU) to trigger firmware errors -because firmware believes we are attempting to invalidate entries -that are still in-use by the guest OS (when in fact that guest is -in the process of being terminated or rebooted). -To alleviate this, register both a shutdown notifier (for unexpected -termination cases e.g. virsh destroy) as well as a reset callback -(for cases like guest OS reboot). For each of these scenarios, trigger -PCI device reset; this is enough to indicate to firmware that the IOMMU -is no longer in-use by the guest OS, making it safe to invalidate any -associated IOMMU entries. - -Fixes: 15d0e7942d3b ("s390x/pci: don't fence interpreted devices without MSI-X") -Signed-off-by: Matthew Rosato -Message-Id: <20221209195700.263824-1-mjrosato@linux.ibm.com> -Reviewed-by: Eric Farman -[thuth: Adjusted the hunk in s390-pci-vfio.c due to different context] -Signed-off-by: Thomas Huth -(cherry picked from commit 03451953c79e6b31f7860ee0c35b28e181d573c1) -Signed-off-by: Cédric Le Goater ---- - hw/s390x/s390-pci-bus.c | 28 ++++++++++++++++++++++++++++ - hw/s390x/s390-pci-vfio.c | 2 ++ - include/hw/s390x/s390-pci-bus.h | 5 +++++ - 3 files changed, 35 insertions(+) - -diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c -index 977e7daa15..02751f3597 100644 ---- a/hw/s390x/s390-pci-bus.c -+++ b/hw/s390x/s390-pci-bus.c -@@ -24,6 +24,8 @@ - #include "hw/pci/msi.h" - #include "qemu/error-report.h" - #include "qemu/module.h" -+#include "sysemu/reset.h" -+#include "sysemu/runstate.h" - - #ifndef DEBUG_S390PCI_BUS - #define DEBUG_S390PCI_BUS 0 -@@ -150,10 +152,30 @@ out: - psccb->header.response_code = cpu_to_be16(rc); - } - -+static void s390_pci_shutdown_notifier(Notifier *n, void *opaque) -+{ -+ S390PCIBusDevice *pbdev = container_of(n, S390PCIBusDevice, -+ shutdown_notifier); -+ -+ pci_device_reset(pbdev->pdev); -+} -+ -+static void s390_pci_reset_cb(void *opaque) -+{ -+ S390PCIBusDevice *pbdev = opaque; -+ -+ pci_device_reset(pbdev->pdev); -+} -+ - static void s390_pci_perform_unplug(S390PCIBusDevice *pbdev) - { - HotplugHandler *hotplug_ctrl; - -+ if (pbdev->pft == ZPCI_PFT_ISM) { -+ notifier_remove(&pbdev->shutdown_notifier); -+ qemu_unregister_reset(s390_pci_reset_cb, pbdev); -+ } -+ - /* Unplug the PCI device */ - if (pbdev->pdev) { - DeviceState *pdev = DEVICE(pbdev->pdev); -@@ -1111,6 +1133,12 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, - pbdev->fh |= FH_SHM_VFIO; - pbdev->forwarding_assist = false; - } -+ /* Register shutdown notifier and reset callback for ISM devices */ -+ if (pbdev->pft == ZPCI_PFT_ISM) { -+ pbdev->shutdown_notifier.notify = s390_pci_shutdown_notifier; -+ qemu_register_shutdown_notifier(&pbdev->shutdown_notifier); -+ qemu_register_reset(s390_pci_reset_cb, pbdev); -+ } - } else { - pbdev->fh |= FH_SHM_EMUL; - /* Always intercept emulated devices */ -diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c -index f7bf36cec8..f51190d466 100644 ---- a/hw/s390x/s390-pci-vfio.c -+++ b/hw/s390x/s390-pci-vfio.c -@@ -124,6 +124,8 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev, - /* The following values remain 0 until we support other FMB formats */ - pbdev->zpci_fn.fmbl = 0; - pbdev->zpci_fn.pft = 0; -+ /* Store function type separately for type-specific behavior */ -+ pbdev->pft = cap->pft; - - /* - * If appropriate, reduce the size of the supported DMA aperture reported -diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h -index 1c46e3a269..e0a9f9385b 100644 ---- a/include/hw/s390x/s390-pci-bus.h -+++ b/include/hw/s390x/s390-pci-bus.h -@@ -39,6 +39,9 @@ - #define UID_CHECKING_ENABLED 0x01 - #define ZPCI_DTSM 0x40 - -+/* zPCI Function Types */ -+#define ZPCI_PFT_ISM 5 -+ - OBJECT_DECLARE_SIMPLE_TYPE(S390pciState, S390_PCI_HOST_BRIDGE) - OBJECT_DECLARE_SIMPLE_TYPE(S390PCIBus, S390_PCI_BUS) - OBJECT_DECLARE_SIMPLE_TYPE(S390PCIBusDevice, S390_PCI_DEVICE) -@@ -344,6 +347,7 @@ struct S390PCIBusDevice { - uint16_t noi; - uint16_t maxstbl; - uint8_t sum; -+ uint8_t pft; - S390PCIGroup *pci_group; - ClpRspQueryPci zpci_fn; - S390MsixInfo msix; -@@ -352,6 +356,7 @@ struct S390PCIBusDevice { - MemoryRegion msix_notify_mr; - IndAddr *summary_ind; - IndAddr *indicator; -+ Notifier shutdown_notifier; - bool pci_unplug_request_processed; - bool unplug_requested; - bool interp; --- -2.31.1 - diff --git a/SOURCES/kvm-s390x-pci-shrink-DMA-aperture-to-be-bound-by-vfio-DM.patch b/SOURCES/kvm-s390x-pci-shrink-DMA-aperture-to-be-bound-by-vfio-DM.patch deleted file mode 100644 index 0992724..0000000 --- a/SOURCES/kvm-s390x-pci-shrink-DMA-aperture-to-be-bound-by-vfio-DM.patch +++ /dev/null @@ -1,91 +0,0 @@ -From ee69c8c57fe62fc200f749c4ce3927c88803644d Mon Sep 17 00:00:00 2001 -From: Matthew Rosato -Date: Fri, 28 Oct 2022 15:47:58 -0400 -Subject: [PATCH 7/9] s390x/pci: shrink DMA aperture to be bound by vfio DMA - limit -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 141: s390x/pci: reset ISM passthrough devices on shutdown and system reset -RH-Bugzilla: 2163701 -RH-Acked-by: Thomas Huth -RH-Acked-by: Cornelia Huck -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/3] 0956bbb4773dd0085f6aed59d6284c704b4fed3b (clegoate/qemu-kvm-c9s) - -Currently, s390x-pci performs accounting against the vfio DMA -limit and triggers the guest to clean up mappings when the limit -is reached. Let's go a step further and also limit the size of -the supported DMA aperture reported to the guest based upon the -initial vfio DMA limit reported for the container (if less than -than the size reported by the firmware/host zPCI layer). This -avoids processing sections of the guest DMA table during global -refresh that, for common use cases, will never be used anway, and -makes exhausting the vfio DMA limit due to mismatch between guest -aperture size and host limit far less likely and more indicitive -of an error. - -Signed-off-by: Matthew Rosato -Message-Id: <20221028194758.204007-4-mjrosato@linux.ibm.com> -Reviewed-by: Eric Farman -Signed-off-by: Thomas Huth -(cherry picked from commit df202e3ff3fccb49868e08f20d0bda86cb953fbe) -Signed-off-by: Cédric Le Goater ---- - hw/s390x/s390-pci-vfio.c | 11 +++++++++++ - include/hw/s390x/s390-pci-bus.h | 1 + - 2 files changed, 12 insertions(+) - -diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c -index 5f0adb0b4a..f7bf36cec8 100644 ---- a/hw/s390x/s390-pci-vfio.c -+++ b/hw/s390x/s390-pci-vfio.c -@@ -84,6 +84,7 @@ S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s, - cnt->users = 1; - cnt->avail = avail; - QTAILQ_INSERT_TAIL(&s->zpci_dma_limit, cnt, link); -+ pbdev->iommu->max_dma_limit = avail; - return cnt; - } - -@@ -103,6 +104,7 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev, - struct vfio_info_cap_header *hdr; - struct vfio_device_info_cap_zpci_base *cap; - VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev); -+ uint64_t vfio_size; - - hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_BASE); - -@@ -122,6 +124,15 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev, - /* The following values remain 0 until we support other FMB formats */ - pbdev->zpci_fn.fmbl = 0; - pbdev->zpci_fn.pft = 0; -+ -+ /* -+ * If appropriate, reduce the size of the supported DMA aperture reported -+ * to the guest based upon the vfio DMA limit. -+ */ -+ vfio_size = pbdev->iommu->max_dma_limit << TARGET_PAGE_BITS; -+ if (vfio_size < (cap->end_dma - cap->start_dma + 1)) { -+ pbdev->zpci_fn.edma = cap->start_dma + vfio_size - 1; -+ } - } - - static bool get_host_fh(S390PCIBusDevice *pbdev, struct vfio_device_info *info, -diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h -index 0605fcea24..1c46e3a269 100644 ---- a/include/hw/s390x/s390-pci-bus.h -+++ b/include/hw/s390x/s390-pci-bus.h -@@ -278,6 +278,7 @@ struct S390PCIIOMMU { - uint64_t g_iota; - uint64_t pba; - uint64_t pal; -+ uint64_t max_dma_limit; - GHashTable *iotlb; - S390PCIDMACount *dma_limit; - }; --- -2.31.1 - diff --git a/SOURCES/kvm-s390x-pv-Fix-spurious-warning-with-asynchronous-tear.patch b/SOURCES/kvm-s390x-pv-Fix-spurious-warning-with-asynchronous-tear.patch new file mode 100644 index 0000000..ecf1353 --- /dev/null +++ b/SOURCES/kvm-s390x-pv-Fix-spurious-warning-with-asynchronous-tear.patch @@ -0,0 +1,129 @@ +From 3cab2a638a10ece2b76d9f33a3c5dc6f64f1bbaa Mon Sep 17 00:00:00 2001 +From: Claudio Imbrenda +Date: Wed, 10 May 2023 12:55:31 +0200 +Subject: [PATCH 21/21] s390x/pv: Fix spurious warning with asynchronous + teardown +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 173: Improve memory reclaiming for z15 Secure Execution guests +RH-Bugzilla: 2168500 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cédric Le Goater +RH-Commit: [2/2] cb690d3155ea22c6df00a4d75b72f501515e5556 (thuth/qemu-kvm-cs9) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168500 + +Kernel commit 292a7d6fca33 ("KVM: s390: pv: fix asynchronous teardown +for small VMs") causes the KVM_PV_ASYNC_CLEANUP_PREPARE ioctl to fail +if the VM is not larger than 2GiB. QEMU would attempt it and fail, +print an error message, and then proceed with a normal teardown. + +Avoid attempting to use asynchronous teardown altogether when the VM is +not larger than 2 GiB. This will avoid triggering the error message and +also avoid pointless overhead; normal teardown is fast enough for small +VMs. + +Reported-by: Marc Hartmayer +Fixes: c3a073c610 ("s390x/pv: Add support for asynchronous teardown for reboot") +Link: https://lore.kernel.org/all/20230421085036.52511-2-imbrenda@linux.ibm.com/ +Signed-off-by: Claudio Imbrenda +Message-Id: <20230510105531.30623-2-imbrenda@linux.ibm.com> +Reviewed-by: Thomas Huth +[thuth: Fix inline function parameter in pv.h] +Signed-off-by: Thomas Huth +(cherry picked from commit 88693ab2a53f2f3d25cb39a7b5034ab391bc5a81) +--- + hw/s390x/pv.c | 10 ++++++++-- + hw/s390x/s390-virtio-ccw.c | 2 +- + include/hw/s390x/pv.h | 6 +++--- + 3 files changed, 12 insertions(+), 6 deletions(-) + +diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c +index 49ea38236c..b63f3784c6 100644 +--- a/hw/s390x/pv.c ++++ b/hw/s390x/pv.c +@@ -13,6 +13,7 @@ + + #include + ++#include "qemu/units.h" + #include "qapi/error.h" + #include "qemu/error-report.h" + #include "sysemu/kvm.h" +@@ -115,7 +116,7 @@ static void *s390_pv_do_unprot_async_fn(void *p) + return NULL; + } + +-bool s390_pv_vm_try_disable_async(void) ++bool s390_pv_vm_try_disable_async(S390CcwMachineState *ms) + { + /* + * t is only needed to create the thread; once qemu_thread_create +@@ -123,7 +124,12 @@ bool s390_pv_vm_try_disable_async(void) + */ + QemuThread t; + +- if (!kvm_check_extension(kvm_state, KVM_CAP_S390_PROTECTED_ASYNC_DISABLE)) { ++ /* ++ * If the feature is not present or if the VM is not larger than 2 GiB, ++ * KVM_PV_ASYNC_CLEANUP_PREPARE fill fail; no point in attempting it. ++ */ ++ if ((MACHINE(ms)->maxram_size <= 2 * GiB) || ++ !kvm_check_extension(kvm_state, KVM_CAP_S390_PROTECTED_ASYNC_DISABLE)) { + return false; + } + if (s390_pv_cmd(KVM_PV_ASYNC_CLEANUP_PREPARE, NULL) != 0) { +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 6a0b93c63d..d95c595f88 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -330,7 +330,7 @@ static inline void s390_do_cpu_ipl(CPUState *cs, run_on_cpu_data arg) + + static void s390_machine_unprotect(S390CcwMachineState *ms) + { +- if (!s390_pv_vm_try_disable_async()) { ++ if (!s390_pv_vm_try_disable_async(ms)) { + s390_pv_vm_disable(); + } + ms->pv = false; +diff --git a/include/hw/s390x/pv.h b/include/hw/s390x/pv.h +index 966306a9db..7b935e2246 100644 +--- a/include/hw/s390x/pv.h ++++ b/include/hw/s390x/pv.h +@@ -14,10 +14,10 @@ + + #include "qapi/error.h" + #include "sysemu/kvm.h" ++#include "hw/s390x/s390-virtio-ccw.h" + + #ifdef CONFIG_KVM + #include "cpu.h" +-#include "hw/s390x/s390-virtio-ccw.h" + + static inline bool s390_is_pv(void) + { +@@ -41,7 +41,7 @@ static inline bool s390_is_pv(void) + int s390_pv_query_info(void); + int s390_pv_vm_enable(void); + void s390_pv_vm_disable(void); +-bool s390_pv_vm_try_disable_async(void); ++bool s390_pv_vm_try_disable_async(S390CcwMachineState *ms); + int s390_pv_set_sec_parms(uint64_t origin, uint64_t length); + int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak); + void s390_pv_prep_reset(void); +@@ -61,7 +61,7 @@ static inline bool s390_is_pv(void) { return false; } + static inline int s390_pv_query_info(void) { return 0; } + static inline int s390_pv_vm_enable(void) { return 0; } + static inline void s390_pv_vm_disable(void) {} +-static inline bool s390_pv_vm_try_disable_async(void) { return false; } ++static inline bool s390_pv_vm_try_disable_async(S390CcwMachineState *ms) { return false; } + static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t length) { return 0; } + static inline int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak) { return 0; } + static inline void s390_pv_prep_reset(void) {} +-- +2.39.3 + diff --git a/SOURCES/kvm-s390x-pv-Implement-a-CGS-check-helper.patch b/SOURCES/kvm-s390x-pv-Implement-a-CGS-check-helper.patch deleted file mode 100644 index c3383af..0000000 --- a/SOURCES/kvm-s390x-pv-Implement-a-CGS-check-helper.patch +++ /dev/null @@ -1,109 +0,0 @@ -From 9452246e59a5f16f44fdf9a7d514b947faf1d5fc Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Mon, 16 Jan 2023 18:46:05 +0100 -Subject: [PATCH 5/9] s390x/pv: Implement a CGS check helper -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 139: s390x/pv: Implement a CGS check helper -RH-Bugzilla: 2122523 -RH-Acked-by: Thomas Huth -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Commit: [1/1] 8551ce772b10de653b4e1c8be60aae60ec98b421 (clegoate/qemu-kvm-c9s) - -When a protected VM is started with the maximum number of CPUs (248), -the service call providing information on the CPUs requires more -buffer space than allocated and QEMU disgracefully aborts : - - LOADPARM=[........] - Using virtio-blk. - Using SCSI scheme. - ................................................................................... - qemu-system-s390x: KVM_S390_MEM_OP failed: Argument list too long - -When protected virtualization is initialized, compute the maximum -number of vCPUs supported by the machine and return useful information -to the user before the machine starts in case of error. - -Suggested-by: Thomas Huth -Reviewed-by: Thomas Huth -Signed-off-by: Cédric Le Goater -Message-Id: <20230116174607.2459498-2-clg@kaod.org> -Signed-off-by: Thomas Huth -(cherry picked from commit 75d7150c636569f6687f7e70a33be893be43eb5f) -Signed-off-by: Cédric Le Goater ---- - hw/s390x/pv.c | 40 ++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 40 insertions(+) - -diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c -index 8dfe92d8df..8a1c71436b 100644 ---- a/hw/s390x/pv.c -+++ b/hw/s390x/pv.c -@@ -20,6 +20,7 @@ - #include "exec/confidential-guest-support.h" - #include "hw/s390x/ipl.h" - #include "hw/s390x/pv.h" -+#include "hw/s390x/sclp.h" - #include "target/s390x/kvm/kvm_s390x.h" - - static bool info_valid; -@@ -249,6 +250,41 @@ struct S390PVGuestClass { - ConfidentialGuestSupportClass parent_class; - }; - -+/* -+ * If protected virtualization is enabled, the amount of data that the -+ * Read SCP Info Service Call can use is limited to one page. The -+ * available space also depends on the Extended-Length SCCB (ELS) -+ * feature which can take more buffer space to store feature -+ * information. This impacts the maximum number of CPUs supported in -+ * the machine. -+ */ -+static uint32_t s390_pv_get_max_cpus(void) -+{ -+ int offset_cpu = s390_has_feat(S390_FEAT_EXTENDED_LENGTH_SCCB) ? -+ offsetof(ReadInfo, entries) : SCLP_READ_SCP_INFO_FIXED_CPU_OFFSET; -+ -+ return (TARGET_PAGE_SIZE - offset_cpu) / sizeof(CPUEntry); -+} -+ -+static bool s390_pv_check_cpus(Error **errp) -+{ -+ MachineState *ms = MACHINE(qdev_get_machine()); -+ uint32_t pv_max_cpus = s390_pv_get_max_cpus(); -+ -+ if (ms->smp.max_cpus > pv_max_cpus) { -+ error_setg(errp, "Protected VMs support a maximum of %d CPUs", -+ pv_max_cpus); -+ return false; -+ } -+ -+ return true; -+} -+ -+static bool s390_pv_guest_check(ConfidentialGuestSupport *cgs, Error **errp) -+{ -+ return s390_pv_check_cpus(errp); -+} -+ - int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) - { - if (!object_dynamic_cast(OBJECT(cgs), TYPE_S390_PV_GUEST)) { -@@ -261,6 +297,10 @@ int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) - return -1; - } - -+ if (!s390_pv_guest_check(cgs, errp)) { -+ return -1; -+ } -+ - cgs->ready = true; - - return 0; --- -2.31.1 - diff --git a/SOURCES/kvm-s390x-s390-virtio-ccw-Activate-zPCI-features-on-s390.patch b/SOURCES/kvm-s390x-s390-virtio-ccw-Activate-zPCI-features-on-s390.patch deleted file mode 100644 index 42114a1..0000000 --- a/SOURCES/kvm-s390x-s390-virtio-ccw-Activate-zPCI-features-on-s390.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 51fcf352a97f2e99a6a3fb8ae663b45436304120 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Tue, 10 Jan 2023 14:25:34 +0100 -Subject: [PATCH 11/31] s390x/s390-virtio-ccw: Activate zPCI features on - s390-ccw-virtio-rhel8.6.0 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 133: s390x/s390-virtio-ccw: Activate zPCI features on s390-ccw-virtio-rhel8.6.0 -RH-Bugzilla: 2159408 -RH-Acked-by: Thomas Huth -RH-Acked-by: David Hildenbrand -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/1] 1ed82e56fe74a283a1726c4893dc3387e645072c (clegoate/qemu-kvm-c9s) - -commit c7b14d3af7 ("s390x/s390-virtio-ccw: Switch off zPCI enhancements -on older machines") activated zPCI enhancement features (interpretation -and forward assist) silently on the s390-ccw-virtio-rhel8.6.0 machine -for RHEL8.8. It didn't seem to be a problem since migration is not -possible but it broke LEAPP upgrade to RHEL9 when the machine is -defined with a passthrough device. Activate the zPCI features also on -RHEL9.2 for the machines to be alike in both latest RHEL distros. - -Upstream Status: RHEL-only -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2159408 - -Signed-off-by: Cédric Le Goater ---- - hw/s390x/s390-virtio-ccw.c | 12 ++++++++++++ - 1 file changed, 12 insertions(+) - -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index aa142a1a4e..4cdd59c394 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -1234,8 +1234,14 @@ static void ccw_machine_rhel860_instance_options(MachineState *machine) - - static void ccw_machine_rhel860_class_options(MachineClass *mc) - { -+ static GlobalProperty compat[] = { -+ { TYPE_S390_PCI_DEVICE, "interpret", "on", }, -+ { TYPE_S390_PCI_DEVICE, "forwarding-assist", "on", }, -+ }; -+ - ccw_machine_rhel900_class_options(mc); - compat_props_add(mc->compat_props, hw_compat_rhel_8_6, hw_compat_rhel_8_6_len); -+ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); - - /* All RHEL machines for prior major releases are deprecated */ - mc->deprecation_reason = rhel_old_machine_deprecation; -@@ -1259,8 +1265,14 @@ static void ccw_machine_rhel850_instance_options(MachineState *machine) - - static void ccw_machine_rhel850_class_options(MachineClass *mc) - { -+ static GlobalProperty compat[] = { -+ { TYPE_S390_PCI_DEVICE, "interpret", "off", }, -+ { TYPE_S390_PCI_DEVICE, "forwarding-assist", "off", }, -+ }; -+ - ccw_machine_rhel860_class_options(mc); - compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); -+ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); - mc->smp_props.prefer_sockets = true; - } - DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", false); --- -2.31.1 - diff --git a/SOURCES/kvm-scsi-cleanup-scsi_clear_unit_attention.patch b/SOURCES/kvm-scsi-cleanup-scsi_clear_unit_attention.patch new file mode 100644 index 0000000..11dda3a --- /dev/null +++ b/SOURCES/kvm-scsi-cleanup-scsi_clear_unit_attention.patch @@ -0,0 +1,81 @@ +From 5dd7d26c034c26b2d4d9b91b8d1a7b605e19730f Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Wed, 12 Jul 2023 15:43:51 +0200 +Subject: [PATCH 02/12] scsi: cleanup scsi_clear_unit_attention() + +RH-Author: Stefano Garzarella +RH-MergeRequest: 184: scsi: fix issue with Linux guest and unit attention +RH-Bugzilla: 2176702 +RH-Acked-by: Thomas Huth +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [2/3] b3a06a91644e44fae3d76d0fbe72448652db517a (sgarzarella/qemu-kvm-c-9-s) + +The previous commit moved the unit attention clearing when we create +the request. So now we can clean scsi_clear_unit_attention() to handle +only the case of the REPORT LUNS command: this is the only case in +which a UNIT ATTENTION is cleared without having been reported. + +Suggested-by: Paolo Bonzini +Signed-off-by: Stefano Garzarella +Message-ID: <20230712134352.118655-3-sgarzare@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit ba947dab98e7cd4337c70975bd255701a2a6aad8) +Signed-off-by: Stefano Garzarella +--- + hw/scsi/scsi-bus.c | 28 ++++++---------------------- + 1 file changed, 6 insertions(+), 22 deletions(-) + +diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c +index 5d22313b9d..cecd26479e 100644 +--- a/hw/scsi/scsi-bus.c ++++ b/hw/scsi/scsi-bus.c +@@ -828,26 +828,12 @@ static void scsi_clear_unit_attention(SCSIRequest *req) + return; + } + +- if (req->dev->unit_attention.key != UNIT_ATTENTION && +- req->bus->unit_attention.key != UNIT_ATTENTION) { +- return; +- } +- +- /* +- * If an INQUIRY command enters the enabled command state, +- * the device server shall [not] clear any unit attention condition; +- * See also MMC-6, paragraphs 6.5 and 6.6.2. +- */ +- if (req->cmd.buf[0] == INQUIRY || +- req->cmd.buf[0] == GET_CONFIGURATION || +- req->cmd.buf[0] == GET_EVENT_STATUS_NOTIFICATION) { +- return; +- } +- + if (req->dev->unit_attention.key == UNIT_ATTENTION) { + ua = &req->dev->unit_attention; +- } else { ++ } else if (req->bus->unit_attention.key == UNIT_ATTENTION) { + ua = &req->bus->unit_attention; ++ } else { ++ return; + } + + /* +@@ -856,12 +842,10 @@ static void scsi_clear_unit_attention(SCSIRequest *req) + * with an additional sense code of REPORTED LUNS DATA HAS CHANGED. + */ + if (req->cmd.buf[0] == REPORT_LUNS && +- !(ua->asc == SENSE_CODE(REPORTED_LUNS_CHANGED).asc && +- ua->ascq == SENSE_CODE(REPORTED_LUNS_CHANGED).ascq)) { +- return; ++ ua->asc == SENSE_CODE(REPORTED_LUNS_CHANGED).asc && ++ ua->ascq == SENSE_CODE(REPORTED_LUNS_CHANGED).ascq) { ++ *ua = SENSE_CODE(NO_SENSE); + } +- +- *ua = SENSE_CODE(NO_SENSE); + } + + int scsi_req_get_sense(SCSIRequest *req, uint8_t *buf, int len) +-- +2.39.3 + diff --git a/SOURCES/kvm-scsi-clear-unit-attention-only-for-REPORT-LUNS-comma.patch b/SOURCES/kvm-scsi-clear-unit-attention-only-for-REPORT-LUNS-comma.patch new file mode 100644 index 0000000..cb3b24e --- /dev/null +++ b/SOURCES/kvm-scsi-clear-unit-attention-only-for-REPORT-LUNS-comma.patch @@ -0,0 +1,110 @@ +From 0a784c45a7b7ee32c36bf86eebb24c8431a89f49 Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Wed, 12 Jul 2023 15:43:52 +0200 +Subject: [PATCH 03/12] scsi: clear unit attention only for REPORT LUNS + commands + +RH-Author: Stefano Garzarella +RH-MergeRequest: 184: scsi: fix issue with Linux guest and unit attention +RH-Bugzilla: 2176702 +RH-Acked-by: Thomas Huth +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [3/3] 01d5e112ef9ae204d96ceb01b4a453fdb4e8b669 (sgarzarella/qemu-kvm-c-9-s) + +scsi_clear_unit_attention() now only handles REPORTED LUNS DATA HAS +CHANGED. + +This only happens when we handle REPORT LUNS commands, so let's rename +the function in scsi_clear_reported_luns_changed() and call it only in +scsi_target_emulate_report_luns(). + +Suggested-by: Paolo Bonzini +Signed-off-by: Stefano Garzarella +Message-ID: <20230712134352.118655-4-sgarzare@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 2eb5599e8a73e70a9e86a97120818ff95a43a23a) +Signed-off-by: Stefano Garzarella +--- + hw/scsi/scsi-bus.c | 34 +++++++++++----------------------- + 1 file changed, 11 insertions(+), 23 deletions(-) + +diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c +index cecd26479e..9542410800 100644 +--- a/hw/scsi/scsi-bus.c ++++ b/hw/scsi/scsi-bus.c +@@ -22,6 +22,7 @@ static char *scsibus_get_fw_dev_path(DeviceState *dev); + static void scsi_req_dequeue(SCSIRequest *req); + static uint8_t *scsi_target_alloc_buf(SCSIRequest *req, size_t len); + static void scsi_target_free_buf(SCSIRequest *req); ++static void scsi_clear_reported_luns_changed(SCSIRequest *req); + + static int next_scsi_bus; + +@@ -518,6 +519,14 @@ static bool scsi_target_emulate_report_luns(SCSITargetReq *r) + + /* store the LUN list length */ + stl_be_p(&r->buf[0], len - 8); ++ ++ /* ++ * If a REPORT LUNS command enters the enabled command state, [...] ++ * the device server shall clear any pending unit attention condition ++ * with an additional sense code of REPORTED LUNS DATA HAS CHANGED. ++ */ ++ scsi_clear_reported_luns_changed(&r->req); ++ + return true; + } + +@@ -816,18 +825,10 @@ uint8_t *scsi_req_get_buf(SCSIRequest *req) + return req->ops->get_buf(req); + } + +-static void scsi_clear_unit_attention(SCSIRequest *req) ++static void scsi_clear_reported_luns_changed(SCSIRequest *req) + { + SCSISense *ua; + +- /* +- * scsi_fetch_unit_attention_sense() already cleaned the unit attention +- * in this case. +- */ +- if (req->ops == &reqops_unit_attention) { +- return; +- } +- + if (req->dev->unit_attention.key == UNIT_ATTENTION) { + ua = &req->dev->unit_attention; + } else if (req->bus->unit_attention.key == UNIT_ATTENTION) { +@@ -836,13 +837,7 @@ static void scsi_clear_unit_attention(SCSIRequest *req) + return; + } + +- /* +- * If a REPORT LUNS command enters the enabled command state, [...] +- * the device server shall clear any pending unit attention condition +- * with an additional sense code of REPORTED LUNS DATA HAS CHANGED. +- */ +- if (req->cmd.buf[0] == REPORT_LUNS && +- ua->asc == SENSE_CODE(REPORTED_LUNS_CHANGED).asc && ++ if (ua->asc == SENSE_CODE(REPORTED_LUNS_CHANGED).asc && + ua->ascq == SENSE_CODE(REPORTED_LUNS_CHANGED).ascq) { + *ua = SENSE_CODE(NO_SENSE); + } +@@ -1528,13 +1523,6 @@ void scsi_req_complete(SCSIRequest *req, int status) + req->dev->sense_is_ua = false; + } + +- /* +- * Unit attention state is now stored in the device's sense buffer +- * if the HBA didn't do autosense. Clear the pending unit attention +- * flags. +- */ +- scsi_clear_unit_attention(req); +- + scsi_req_ref(req); + scsi_req_dequeue(req); + req->bus->info->complete(req, req->residual); +-- +2.39.3 + diff --git a/SOURCES/kvm-scsi-fetch-unit-attention-when-creating-the-request.patch b/SOURCES/kvm-scsi-fetch-unit-attention-when-creating-the-request.patch new file mode 100644 index 0000000..a41ae82 --- /dev/null +++ b/SOURCES/kvm-scsi-fetch-unit-attention-when-creating-the-request.patch @@ -0,0 +1,132 @@ +From 562ea3a2d602cf41c548f3ddf52c43c04fded347 Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Wed, 12 Jul 2023 15:43:50 +0200 +Subject: [PATCH 01/12] scsi: fetch unit attention when creating the request + +RH-Author: Stefano Garzarella +RH-MergeRequest: 184: scsi: fix issue with Linux guest and unit attention +RH-Bugzilla: 2176702 +RH-Acked-by: Thomas Huth +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [1/3] 04563caac45d0110ea65eda8e55472556cd317c0 (sgarzarella/qemu-kvm-c-9-s) + +Commit 1880ad4f4e ("virtio-scsi: Batched prepare for cmd reqs") split +calls to scsi_req_new() and scsi_req_enqueue() in the virtio-scsi device. +No ill effects were observed until commit 8cc5583abe ("virtio-scsi: Send +"REPORTED LUNS CHANGED" sense data upon disk hotplug events") added a +unit attention that was easy to trigger with device hotplug and +hot-unplug. + +Because the two calls were separated, all requests in the batch were +prepared calling scsi_req_new() to report a sense. The first one +submitted would report the right sense and reset it to NO_SENSE, while +the others reported CHECK_CONDITION with no sense data. This caused +SCSI errors in Linux. + +To solve this issue, let's fetch the unit attention as early as possible +when we prepare the request, so that only the first request in the batch +will use the unit attention SCSIReqOps and the others will not report +CHECK CONDITION. + +Fixes: 1880ad4f4e ("virtio-scsi: Batched prepare for cmd reqs") +Fixes: 8cc5583abe ("virtio-scsi: Send "REPORTED LUNS CHANGED" sense data upon disk hotplug events") +Reported-by: Thomas Huth +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2176702 +Co-developed-by: Paolo Bonzini +Signed-off-by: Stefano Garzarella +Message-ID: <20230712134352.118655-2-sgarzare@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 9472083e642bfb9bc836b38662baddd9bc964ebc) +Signed-off-by: Stefano Garzarella +--- + hw/scsi/scsi-bus.c | 36 +++++++++++++++++++++++++++++++++--- + include/hw/scsi/scsi.h | 1 + + 2 files changed, 34 insertions(+), 3 deletions(-) + +diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c +index 3c20b47ad0..5d22313b9d 100644 +--- a/hw/scsi/scsi-bus.c ++++ b/hw/scsi/scsi-bus.c +@@ -413,19 +413,35 @@ static const struct SCSIReqOps reqops_invalid_opcode = { + + /* SCSIReqOps implementation for unit attention conditions. */ + +-static int32_t scsi_unit_attention(SCSIRequest *req, uint8_t *buf) ++static void scsi_fetch_unit_attention_sense(SCSIRequest *req) + { ++ SCSISense *ua = NULL; ++ + if (req->dev->unit_attention.key == UNIT_ATTENTION) { +- scsi_req_build_sense(req, req->dev->unit_attention); ++ ua = &req->dev->unit_attention; + } else if (req->bus->unit_attention.key == UNIT_ATTENTION) { +- scsi_req_build_sense(req, req->bus->unit_attention); ++ ua = &req->bus->unit_attention; + } ++ ++ /* ++ * Fetch the unit attention sense immediately so that another ++ * scsi_req_new does not use reqops_unit_attention. ++ */ ++ if (ua) { ++ scsi_req_build_sense(req, *ua); ++ *ua = SENSE_CODE(NO_SENSE); ++ } ++} ++ ++static int32_t scsi_unit_attention(SCSIRequest *req, uint8_t *buf) ++{ + scsi_req_complete(req, CHECK_CONDITION); + return 0; + } + + static const struct SCSIReqOps reqops_unit_attention = { + .size = sizeof(SCSIRequest), ++ .init_req = scsi_fetch_unit_attention_sense, + .send_command = scsi_unit_attention + }; + +@@ -699,6 +715,11 @@ SCSIRequest *scsi_req_alloc(const SCSIReqOps *reqops, SCSIDevice *d, + object_ref(OBJECT(d)); + object_ref(OBJECT(qbus->parent)); + notifier_list_init(&req->cancel_notifiers); ++ ++ if (reqops->init_req) { ++ reqops->init_req(req); ++ } ++ + trace_scsi_req_alloc(req->dev->id, req->lun, req->tag); + return req; + } +@@ -798,6 +819,15 @@ uint8_t *scsi_req_get_buf(SCSIRequest *req) + static void scsi_clear_unit_attention(SCSIRequest *req) + { + SCSISense *ua; ++ ++ /* ++ * scsi_fetch_unit_attention_sense() already cleaned the unit attention ++ * in this case. ++ */ ++ if (req->ops == &reqops_unit_attention) { ++ return; ++ } ++ + if (req->dev->unit_attention.key != UNIT_ATTENTION && + req->bus->unit_attention.key != UNIT_ATTENTION) { + return; +diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h +index 6f23a7a73e..1787ddd01e 100644 +--- a/include/hw/scsi/scsi.h ++++ b/include/hw/scsi/scsi.h +@@ -108,6 +108,7 @@ int cdrom_read_toc_raw(int nb_sectors, uint8_t *buf, int msf, int session_num); + /* scsi-bus.c */ + struct SCSIReqOps { + size_t size; ++ void (*init_req)(SCSIRequest *req); + void (*free_req)(SCSIRequest *req); + int32_t (*send_command)(SCSIRequest *req, uint8_t *buf); + void (*read_data)(SCSIRequest *req); +-- +2.39.3 + diff --git a/SOURCES/kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch b/SOURCES/kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch deleted file mode 100644 index ca61286..0000000 --- a/SOURCES/kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch +++ /dev/null @@ -1,176 +0,0 @@ -From 0a4f5bcc2a6f8ac31431e971c1dce9e6ab2191c2 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 21 Feb 2023 16:22:16 -0500 -Subject: [PATCH 01/12] scsi: protect req->aiocb with AioContext lock - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 155: virtio-scsi: reset SCSI devices from main loop thread -RH-Bugzilla: 2155748 -RH-Acked-by: Eric Blake -RH-Acked-by: Kevin Wolf -RH-Acked-by: Laszlo Ersek -RH-Commit: [1/3] 61727297bd31dfe18220b61f1d265ced0649c60d (stefanha/centos-stream-qemu-kvm) - -If requests are being processed in the IOThread when a SCSIDevice is -unplugged, scsi_device_purge_requests() -> scsi_req_cancel_async() races -with I/O completion callbacks. Both threads load and store req->aiocb. -This can lead to assert(r->req.aiocb == NULL) failures and undefined -behavior. - -Protect r->req.aiocb with the AioContext lock to prevent the race. - -Reviewed-by: Eric Blake -Reviewed-by: Kevin Wolf -Signed-off-by: Stefan Hajnoczi -Message-Id: <20230221212218.1378734-2-stefanha@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 7b7fc3d0102dafe8eb44802493036a526e921a71) -Signed-off-by: Stefan Hajnoczi ---- - hw/scsi/scsi-disk.c | 23 ++++++++++++++++------- - hw/scsi/scsi-generic.c | 11 ++++++----- - 2 files changed, 22 insertions(+), 12 deletions(-) - -diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c -index e493c28814..5327f93f4c 100644 ---- a/hw/scsi/scsi-disk.c -+++ b/hw/scsi/scsi-disk.c -@@ -273,9 +273,11 @@ static void scsi_aio_complete(void *opaque, int ret) - SCSIDiskReq *r = (SCSIDiskReq *)opaque; - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - -+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -+ - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; -- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -+ - if (scsi_disk_req_check_error(r, ret, true)) { - goto done; - } -@@ -357,10 +359,11 @@ static void scsi_dma_complete(void *opaque, int ret) - SCSIDiskReq *r = (SCSIDiskReq *)opaque; - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - -+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -+ - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; - -- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); - if (ret < 0) { - block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); - } else { -@@ -393,10 +396,11 @@ static void scsi_read_complete(void *opaque, int ret) - SCSIDiskReq *r = (SCSIDiskReq *)opaque; - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - -+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -+ - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; - -- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); - if (ret < 0) { - block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); - } else { -@@ -446,10 +450,11 @@ static void scsi_do_read_cb(void *opaque, int ret) - SCSIDiskReq *r = (SCSIDiskReq *)opaque; - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - -+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -+ - assert (r->req.aiocb != NULL); - r->req.aiocb = NULL; - -- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); - if (ret < 0) { - block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); - } else { -@@ -530,10 +535,11 @@ static void scsi_write_complete(void * opaque, int ret) - SCSIDiskReq *r = (SCSIDiskReq *)opaque; - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - -+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -+ - assert (r->req.aiocb != NULL); - r->req.aiocb = NULL; - -- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); - if (ret < 0) { - block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); - } else { -@@ -1737,10 +1743,11 @@ static void scsi_unmap_complete(void *opaque, int ret) - SCSIDiskReq *r = data->r; - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - -+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -+ - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; - -- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); - if (scsi_disk_req_check_error(r, ret, true)) { - scsi_req_unref(&r->req); - g_free(data); -@@ -1816,9 +1823,11 @@ static void scsi_write_same_complete(void *opaque, int ret) - SCSIDiskReq *r = data->r; - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - -+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -+ - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; -- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -+ - if (scsi_disk_req_check_error(r, ret, true)) { - goto done; - } -diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c -index 92cce20a4d..ac9fa662b4 100644 ---- a/hw/scsi/scsi-generic.c -+++ b/hw/scsi/scsi-generic.c -@@ -111,10 +111,11 @@ static void scsi_command_complete(void *opaque, int ret) - SCSIGenericReq *r = (SCSIGenericReq *)opaque; - SCSIDevice *s = r->req.dev; - -+ aio_context_acquire(blk_get_aio_context(s->conf.blk)); -+ - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; - -- aio_context_acquire(blk_get_aio_context(s->conf.blk)); - scsi_command_complete_noio(r, ret); - aio_context_release(blk_get_aio_context(s->conf.blk)); - } -@@ -269,11 +270,11 @@ static void scsi_read_complete(void * opaque, int ret) - SCSIDevice *s = r->req.dev; - int len; - -+ aio_context_acquire(blk_get_aio_context(s->conf.blk)); -+ - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; - -- aio_context_acquire(blk_get_aio_context(s->conf.blk)); -- - if (ret || r->req.io_canceled) { - scsi_command_complete_noio(r, ret); - goto done; -@@ -386,11 +387,11 @@ static void scsi_write_complete(void * opaque, int ret) - - trace_scsi_generic_write_complete(ret); - -+ aio_context_acquire(blk_get_aio_context(s->conf.blk)); -+ - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; - -- aio_context_acquire(blk_get_aio_context(s->conf.blk)); -- - if (ret || r->req.io_canceled) { - scsi_command_complete_noio(r, ret); - goto done; --- -2.39.1 - diff --git a/SOURCES/kvm-spice-move-client_migrate_info-command-to-ui.patch b/SOURCES/kvm-spice-move-client_migrate_info-command-to-ui.patch new file mode 100644 index 0000000..f1de158 --- /dev/null +++ b/SOURCES/kvm-spice-move-client_migrate_info-command-to-ui.patch @@ -0,0 +1,248 @@ +From 00f6e941e75f378c84c773a15efde7dd085d9ce3 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 19:40:14 +0100 +Subject: [PATCH 21/56] spice: move client_migrate_info command to ui/ +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [20/50] a587bb001b51a1f9fdf2fcfb0978bb931ae443b6 (peterx/qemu-kvm) + +It has nothing to do with migration, except for the "migrate" in the +name of the command. Move it with the rest of the ui commands. + +Signed-off-by: Juan Quintela +Reviewed-by: Philippe Mathieu-Daudé +(cherry picked from commit f9e1ef7482f1ee289b04f4b45702a1701bc8929d) +Signed-off-by: Peter Xu +--- + migration/migration-hmp-cmds.c | 17 ----------------- + migration/migration.c | 30 ------------------------------ + qapi/migration.json | 28 ---------------------------- + qapi/ui.json | 28 ++++++++++++++++++++++++++++ + ui/ui-hmp-cmds.c | 17 +++++++++++++++++ + ui/ui-qmp-cmds.c | 29 +++++++++++++++++++++++++++++ + 6 files changed, 74 insertions(+), 75 deletions(-) + +diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c +index 71da91967a..4e9f00e7dc 100644 +--- a/migration/migration-hmp-cmds.c ++++ b/migration/migration-hmp-cmds.c +@@ -636,23 +636,6 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) + hmp_handle_error(mon, err); + } + +-void hmp_client_migrate_info(Monitor *mon, const QDict *qdict) +-{ +- Error *err = NULL; +- const char *protocol = qdict_get_str(qdict, "protocol"); +- const char *hostname = qdict_get_str(qdict, "hostname"); +- bool has_port = qdict_haskey(qdict, "port"); +- int port = qdict_get_try_int(qdict, "port", -1); +- bool has_tls_port = qdict_haskey(qdict, "tls-port"); +- int tls_port = qdict_get_try_int(qdict, "tls-port", -1); +- const char *cert_subject = qdict_get_try_str(qdict, "cert-subject"); +- +- qmp_client_migrate_info(protocol, hostname, +- has_port, port, has_tls_port, tls_port, +- cert_subject, &err); +- hmp_handle_error(mon, err); +-} +- + void hmp_migrate_start_postcopy(Monitor *mon, const QDict *qdict) + { + Error *err = NULL; +diff --git a/migration/migration.c b/migration/migration.c +index aa96ffdc5b..b745d829a4 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -63,7 +63,6 @@ + #include "sysemu/cpus.h" + #include "yank_functions.h" + #include "sysemu/qtest.h" +-#include "ui/qemu-spice.h" + + #define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ + +@@ -1018,35 +1017,6 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) + return params; + } + +-void qmp_client_migrate_info(const char *protocol, const char *hostname, +- bool has_port, int64_t port, +- bool has_tls_port, int64_t tls_port, +- const char *cert_subject, +- Error **errp) +-{ +- if (strcmp(protocol, "spice") == 0) { +- if (!qemu_using_spice(errp)) { +- return; +- } +- +- if (!has_port && !has_tls_port) { +- error_setg(errp, QERR_MISSING_PARAMETER, "port/tls-port"); +- return; +- } +- +- if (qemu_spice.migrate_info(hostname, +- has_port ? port : -1, +- has_tls_port ? tls_port : -1, +- cert_subject)) { +- error_setg(errp, "Could not set up display for migration"); +- return; +- } +- return; +- } +- +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "protocol", "'spice'"); +-} +- + AnnounceParameters *migrate_announce_params(void) + { + static AnnounceParameters ap; +diff --git a/qapi/migration.json b/qapi/migration.json +index c84fa10e86..2c35b7b9cf 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -1203,34 +1203,6 @@ + { 'command': 'query-migrate-parameters', + 'returns': 'MigrationParameters' } + +-## +-# @client_migrate_info: +-# +-# Set migration information for remote display. This makes the server +-# ask the client to automatically reconnect using the new parameters +-# once migration finished successfully. Only implemented for SPICE. +-# +-# @protocol: must be "spice" +-# @hostname: migration target hostname +-# @port: spice tcp port for plaintext channels +-# @tls-port: spice tcp port for tls-secured channels +-# @cert-subject: server certificate subject +-# +-# Since: 0.14 +-# +-# Example: +-# +-# -> { "execute": "client_migrate_info", +-# "arguments": { "protocol": "spice", +-# "hostname": "virt42.lab.kraxel.org", +-# "port": 1234 } } +-# <- { "return": {} } +-# +-## +-{ 'command': 'client_migrate_info', +- 'data': { 'protocol': 'str', 'hostname': 'str', '*port': 'int', +- '*tls-port': 'int', '*cert-subject': 'str' } } +- + ## + # @migrate-start-postcopy: + # +diff --git a/qapi/ui.json b/qapi/ui.json +index 98322342f7..7ddd27a932 100644 +--- a/qapi/ui.json ++++ b/qapi/ui.json +@@ -1554,3 +1554,31 @@ + { 'command': 'display-update', + 'data': 'DisplayUpdateOptions', + 'boxed' : true } ++ ++## ++# @client_migrate_info: ++# ++# Set migration information for remote display. This makes the server ++# ask the client to automatically reconnect using the new parameters ++# once migration finished successfully. Only implemented for SPICE. ++# ++# @protocol: must be "spice" ++# @hostname: migration target hostname ++# @port: spice tcp port for plaintext channels ++# @tls-port: spice tcp port for tls-secured channels ++# @cert-subject: server certificate subject ++# ++# Since: 0.14 ++# ++# Example: ++# ++# -> { "execute": "client_migrate_info", ++# "arguments": { "protocol": "spice", ++# "hostname": "virt42.lab.kraxel.org", ++# "port": 1234 } } ++# <- { "return": {} } ++# ++## ++{ 'command': 'client_migrate_info', ++ 'data': { 'protocol': 'str', 'hostname': 'str', '*port': 'int', ++ '*tls-port': 'int', '*cert-subject': 'str' } } +diff --git a/ui/ui-hmp-cmds.c b/ui/ui-hmp-cmds.c +index 5c456ecc02..c671389473 100644 +--- a/ui/ui-hmp-cmds.c ++++ b/ui/ui-hmp-cmds.c +@@ -458,3 +458,20 @@ hmp_screendump(Monitor *mon, const QDict *qdict) + end: + hmp_handle_error(mon, err); + } ++ ++void hmp_client_migrate_info(Monitor *mon, const QDict *qdict) ++{ ++ Error *err = NULL; ++ const char *protocol = qdict_get_str(qdict, "protocol"); ++ const char *hostname = qdict_get_str(qdict, "hostname"); ++ bool has_port = qdict_haskey(qdict, "port"); ++ int port = qdict_get_try_int(qdict, "port", -1); ++ bool has_tls_port = qdict_haskey(qdict, "tls-port"); ++ int tls_port = qdict_get_try_int(qdict, "tls-port", -1); ++ const char *cert_subject = qdict_get_try_str(qdict, "cert-subject"); ++ ++ qmp_client_migrate_info(protocol, hostname, ++ has_port, port, has_tls_port, tls_port, ++ cert_subject, &err); ++ hmp_handle_error(mon, err); ++} +diff --git a/ui/ui-qmp-cmds.c b/ui/ui-qmp-cmds.c +index dbc4afcd73..a37a7024f3 100644 +--- a/ui/ui-qmp-cmds.c ++++ b/ui/ui-qmp-cmds.c +@@ -175,3 +175,32 @@ void qmp_display_update(DisplayUpdateOptions *arg, Error **errp) + abort(); + } + } ++ ++void qmp_client_migrate_info(const char *protocol, const char *hostname, ++ bool has_port, int64_t port, ++ bool has_tls_port, int64_t tls_port, ++ const char *cert_subject, ++ Error **errp) ++{ ++ if (strcmp(protocol, "spice") == 0) { ++ if (!qemu_using_spice(errp)) { ++ return; ++ } ++ ++ if (!has_port && !has_tls_port) { ++ error_setg(errp, QERR_MISSING_PARAMETER, "port/tls-port"); ++ return; ++ } ++ ++ if (qemu_spice.migrate_info(hostname, ++ has_port ? port : -1, ++ has_tls_port ? tls_port : -1, ++ cert_subject)) { ++ error_setg(errp, "Could not set up display for migration"); ++ return; ++ } ++ return; ++ } ++ ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "protocol", "'spice'"); ++} +-- +2.39.1 + diff --git a/SOURCES/kvm-stream-Replace-subtree-drain-with-a-single-node-drai.patch b/SOURCES/kvm-stream-Replace-subtree-drain-with-a-single-node-drai.patch deleted file mode 100644 index a8e3957..0000000 --- a/SOURCES/kvm-stream-Replace-subtree-drain-with-a-single-node-drai.patch +++ /dev/null @@ -1,159 +0,0 @@ -From 5defda06ec4c24818a34126c5048be5e274b63f5 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:41:04 +0100 -Subject: [PATCH 22/31] stream: Replace subtree drain with a single node drain - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [10/16] a93250b1f6ef296e903df0ba5d8b29bc2ed540a8 (sgarzarella/qemu-kvm-c-9-s) - -The subtree drain was introduced in commit b1e1af394d9 as a way to avoid -graph changes between finding the base node and changing the block graph -as necessary on completion of the image streaming job. - -The block graph could change between these two points because -bdrv_set_backing_hd() first drains the parent node, which involved -polling and can do anything. - -Subtree draining was an imperfect way to make this less likely (because -with it, fewer callbacks are called during this window). Everyone agreed -that it's not really the right solution, and it was only committed as a -stopgap solution. - -This replaces the subtree drain with a solution that simply drains the -parent node before we try to find the base node, and then call a version -of bdrv_set_backing_hd() that doesn't drain, but just asserts that the -parent node is already drained. - -This way, any graph changes caused by draining happen before we start -looking at the graph and things stay consistent between finding the base -node and changing the graph. - -Signed-off-by: Kevin Wolf -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Hanna Reitz -Message-Id: <20221118174110.55183-10-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 92140b9f3f07d80e2c27edcc6e32f392be2135e6) -Signed-off-by: Stefano Garzarella ---- - block.c | 17 ++++++++++++++--- - block/stream.c | 26 ++++++++++++++++---------- - include/block/block-global-state.h | 3 +++ - 3 files changed, 33 insertions(+), 13 deletions(-) - -diff --git a/block.c b/block.c -index b3449a312e..5330e89903 100644 ---- a/block.c -+++ b/block.c -@@ -3403,14 +3403,15 @@ static int bdrv_set_backing_noperm(BlockDriverState *bs, - return bdrv_set_file_or_backing_noperm(bs, backing_hd, true, tran, errp); - } - --int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, -- Error **errp) -+int bdrv_set_backing_hd_drained(BlockDriverState *bs, -+ BlockDriverState *backing_hd, -+ Error **errp) - { - int ret; - Transaction *tran = tran_new(); - - GLOBAL_STATE_CODE(); -- bdrv_drained_begin(bs); -+ assert(bs->quiesce_counter > 0); - - ret = bdrv_set_backing_noperm(bs, backing_hd, tran, errp); - if (ret < 0) { -@@ -3420,7 +3421,17 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, - ret = bdrv_refresh_perms(bs, errp); - out: - tran_finalize(tran, ret); -+ return ret; -+} - -+int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, -+ Error **errp) -+{ -+ int ret; -+ GLOBAL_STATE_CODE(); -+ -+ bdrv_drained_begin(bs); -+ ret = bdrv_set_backing_hd_drained(bs, backing_hd, errp); - bdrv_drained_end(bs); - - return ret; -diff --git a/block/stream.c b/block/stream.c -index 694709bd25..8744ad103f 100644 ---- a/block/stream.c -+++ b/block/stream.c -@@ -64,13 +64,16 @@ static int stream_prepare(Job *job) - bdrv_cor_filter_drop(s->cor_filter_bs); - s->cor_filter_bs = NULL; - -- bdrv_subtree_drained_begin(s->above_base); -+ /* -+ * bdrv_set_backing_hd() requires that unfiltered_bs is drained. Drain -+ * already here and use bdrv_set_backing_hd_drained() instead because -+ * the polling during drained_begin() might change the graph, and if we do -+ * this only later, we may end up working with the wrong base node (or it -+ * might even have gone away by the time we want to use it). -+ */ -+ bdrv_drained_begin(unfiltered_bs); - - base = bdrv_filter_or_cow_bs(s->above_base); -- if (base) { -- bdrv_ref(base); -- } -- - unfiltered_base = bdrv_skip_filters(base); - - if (bdrv_cow_child(unfiltered_bs)) { -@@ -82,7 +85,13 @@ static int stream_prepare(Job *job) - } - } - -- bdrv_set_backing_hd(unfiltered_bs, base, &local_err); -+ bdrv_set_backing_hd_drained(unfiltered_bs, base, &local_err); -+ -+ /* -+ * This call will do I/O, so the graph can change again from here on. -+ * We have already completed the graph change, so we are not in danger -+ * of operating on the wrong node any more if this happens. -+ */ - ret = bdrv_change_backing_file(unfiltered_bs, base_id, base_fmt, false); - if (local_err) { - error_report_err(local_err); -@@ -92,10 +101,7 @@ static int stream_prepare(Job *job) - } - - out: -- if (base) { -- bdrv_unref(base); -- } -- bdrv_subtree_drained_end(s->above_base); -+ bdrv_drained_end(unfiltered_bs); - return ret; - } - -diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h -index c7bd4a2088..00e0cf8aea 100644 ---- a/include/block/block-global-state.h -+++ b/include/block/block-global-state.h -@@ -82,6 +82,9 @@ int bdrv_open_file_child(const char *filename, - BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp); - int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, - Error **errp); -+int bdrv_set_backing_hd_drained(BlockDriverState *bs, -+ BlockDriverState *backing_hd, -+ Error **errp); - int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, - const char *bdref_key, Error **errp); - BlockDriverState *bdrv_open(const char *filename, const char *reference, --- -2.31.1 - diff --git a/SOURCES/kvm-target-i386-Add-EPYC-Genoa-model-to-support-Zen-4-pr.patch b/SOURCES/kvm-target-i386-Add-EPYC-Genoa-model-to-support-Zen-4-pr.patch new file mode 100644 index 0000000..43c239a --- /dev/null +++ b/SOURCES/kvm-target-i386-Add-EPYC-Genoa-model-to-support-Zen-4-pr.patch @@ -0,0 +1,203 @@ +From 03011d00cfb5862edb7394a9b79b269198af5c89 Mon Sep 17 00:00:00 2001 +From: Bandan Das +Date: Wed, 9 Aug 2023 12:48:34 -0400 +Subject: [PATCH 7/7] target/i386: Add EPYC-Genoa model to support Zen 4 + processor series + +RH-Author: Bandan Das +RH-MergeRequest: 198: Add EPYC-Genoa CPU model in qemu +RH-Bugzilla: 2094913 +RH-Acked-by: Wei Huang +RH-Acked-by: Miroslav Rezanina +RH-Commit: [7/7] 158091c691169a5d30c7c8005371ee7a0d9fc4ce (bdas1/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094913 + +commit 166b1741884dd4fd7090b753cd7333868457a29b +Author: Babu Moger +Date: Thu May 4 15:53:12 2023 -0500 + + target/i386: Add EPYC-Genoa model to support Zen 4 processor series + + Adds the support for AMD EPYC Genoa generation processors. The model + display for the new processor will be EPYC-Genoa. + + Adds the following new feature bits on top of the feature bits from + the previous generation EPYC models. + + avx512f : AVX-512 Foundation instruction + avx512dq : AVX-512 Doubleword & Quadword Instruction + avx512ifma : AVX-512 Integer Fused Multiply Add instruction + avx512cd : AVX-512 Conflict Detection instruction + avx512bw : AVX-512 Byte and Word Instructions + avx512vl : AVX-512 Vector Length Extension Instructions + avx512vbmi : AVX-512 Vector Byte Manipulation Instruction + avx512_vbmi2 : AVX-512 Additional Vector Byte Manipulation Instruction + gfni : AVX-512 Galois Field New Instructions + avx512_vnni : AVX-512 Vector Neural Network Instructions + avx512_bitalg : AVX-512 Bit Algorithms, add bit algorithms Instructions + avx512_vpopcntdq: AVX-512 AVX-512 Vector Population Count Doubleword and + Quadword Instructions + avx512_bf16 : AVX-512 BFLOAT16 instructions + la57 : 57-bit virtual address support (5-level Page Tables) + vnmi : Virtual NMI (VNMI) allows the hypervisor to inject the NMI + into the guest without using Event Injection mechanism + meaning not required to track the guest NMI and intercepting + the IRET. + auto-ibrs : The AMD Zen4 core supports a new feature called Automatic IBRS. + It is a "set-and-forget" feature that means that, unlike e.g., + s/w-toggled SPEC_CTRL.IBRS, h/w manages its IBRS mitigation + resources automatically across CPL transitions. + + Signed-off-by: Babu Moger + Message-Id: <20230504205313.225073-8-babu.moger@amd.com> + Signed-off-by: Paolo Bonzini + +Signed-off-by: Bandan Das +--- + target/i386/cpu.c | 122 ++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 122 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index f1baefe775..b27db050a2 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1973,6 +1973,56 @@ static const CPUCaches epyc_milan_v2_cache_info = { + }, + }; + ++static const CPUCaches epyc_genoa_cache_info = { ++ .l1d_cache = &(CPUCacheInfo) { ++ .type = DATA_CACHE, ++ .level = 1, ++ .size = 32 * KiB, ++ .line_size = 64, ++ .associativity = 8, ++ .partitions = 1, ++ .sets = 64, ++ .lines_per_tag = 1, ++ .self_init = 1, ++ .no_invd_sharing = true, ++ }, ++ .l1i_cache = &(CPUCacheInfo) { ++ .type = INSTRUCTION_CACHE, ++ .level = 1, ++ .size = 32 * KiB, ++ .line_size = 64, ++ .associativity = 8, ++ .partitions = 1, ++ .sets = 64, ++ .lines_per_tag = 1, ++ .self_init = 1, ++ .no_invd_sharing = true, ++ }, ++ .l2_cache = &(CPUCacheInfo) { ++ .type = UNIFIED_CACHE, ++ .level = 2, ++ .size = 1 * MiB, ++ .line_size = 64, ++ .associativity = 8, ++ .partitions = 1, ++ .sets = 2048, ++ .lines_per_tag = 1, ++ }, ++ .l3_cache = &(CPUCacheInfo) { ++ .type = UNIFIED_CACHE, ++ .level = 3, ++ .size = 32 * MiB, ++ .line_size = 64, ++ .associativity = 16, ++ .partitions = 1, ++ .sets = 32768, ++ .lines_per_tag = 1, ++ .self_init = true, ++ .inclusive = true, ++ .complex_indexing = false, ++ }, ++}; ++ + /* The following VMX features are not supported by KVM and are left out in the + * CPU definitions: + * +@@ -4493,6 +4543,78 @@ static const X86CPUDefinition builtin_x86_defs[] = { + { /* end of list */ } + } + }, ++ { ++ .name = "EPYC-Genoa", ++ .level = 0xd, ++ .vendor = CPUID_VENDOR_AMD, ++ .family = 25, ++ .model = 17, ++ .stepping = 0, ++ .features[FEAT_1_EDX] = ++ CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | ++ CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE | ++ CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE | ++ CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | CPUID_DE | ++ CPUID_VME | CPUID_FP87, ++ .features[FEAT_1_ECX] = ++ CPUID_EXT_RDRAND | CPUID_EXT_F16C | CPUID_EXT_AVX | ++ CPUID_EXT_XSAVE | CPUID_EXT_AES | CPUID_EXT_POPCNT | ++ CPUID_EXT_MOVBE | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | ++ CPUID_EXT_PCID | CPUID_EXT_CX16 | CPUID_EXT_FMA | ++ CPUID_EXT_SSSE3 | CPUID_EXT_MONITOR | CPUID_EXT_PCLMULQDQ | ++ CPUID_EXT_SSE3, ++ .features[FEAT_8000_0001_EDX] = ++ CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_PDPE1GB | ++ CPUID_EXT2_FFXSR | CPUID_EXT2_MMXEXT | CPUID_EXT2_NX | ++ CPUID_EXT2_SYSCALL, ++ .features[FEAT_8000_0001_ECX] = ++ CPUID_EXT3_OSVW | CPUID_EXT3_3DNOWPREFETCH | ++ CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | ++ CPUID_EXT3_CR8LEG | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM | ++ CPUID_EXT3_TOPOEXT | CPUID_EXT3_PERFCORE, ++ .features[FEAT_8000_0008_EBX] = ++ CPUID_8000_0008_EBX_CLZERO | CPUID_8000_0008_EBX_XSAVEERPTR | ++ CPUID_8000_0008_EBX_WBNOINVD | CPUID_8000_0008_EBX_IBPB | ++ CPUID_8000_0008_EBX_IBRS | CPUID_8000_0008_EBX_STIBP | ++ CPUID_8000_0008_EBX_STIBP_ALWAYS_ON | ++ CPUID_8000_0008_EBX_AMD_SSBD | CPUID_8000_0008_EBX_AMD_PSFD, ++ .features[FEAT_8000_0021_EAX] = ++ CPUID_8000_0021_EAX_No_NESTED_DATA_BP | ++ CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING | ++ CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE | ++ CPUID_8000_0021_EAX_AUTO_IBRS, ++ .features[FEAT_7_0_EBX] = ++ CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | ++ CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | ++ CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_AVX512F | ++ CPUID_7_0_EBX_AVX512DQ | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | ++ CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_AVX512IFMA | ++ CPUID_7_0_EBX_CLFLUSHOPT | CPUID_7_0_EBX_CLWB | ++ CPUID_7_0_EBX_AVX512CD | CPUID_7_0_EBX_SHA_NI | ++ CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512VL, ++ .features[FEAT_7_0_ECX] = ++ CPUID_7_0_ECX_AVX512_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | ++ CPUID_7_0_ECX_AVX512_VBMI2 | CPUID_7_0_ECX_GFNI | ++ CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ | ++ CPUID_7_0_ECX_AVX512VNNI | CPUID_7_0_ECX_AVX512BITALG | ++ CPUID_7_0_ECX_AVX512_VPOPCNTDQ | CPUID_7_0_ECX_LA57 | ++ CPUID_7_0_ECX_RDPID, ++ .features[FEAT_7_0_EDX] = ++ CPUID_7_0_EDX_FSRM, ++ .features[FEAT_7_1_EAX] = ++ CPUID_7_1_EAX_AVX512_BF16, ++ .features[FEAT_XSAVE] = ++ CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | ++ CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, ++ .features[FEAT_6_EAX] = ++ CPUID_6_EAX_ARAT, ++ .features[FEAT_SVM] = ++ CPUID_SVM_NPT | CPUID_SVM_NRIPSAVE | CPUID_SVM_VNMI | ++ CPUID_SVM_SVME_ADDR_CHK, ++ .xlevel = 0x80000022, ++ .model_id = "AMD EPYC-Genoa Processor", ++ .cache_info = &epyc_genoa_cache_info, ++ }, + }; + + /* +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-Add-VNMI-and-automatic-IBRS-feature-bits.patch b/SOURCES/kvm-target-i386-Add-VNMI-and-automatic-IBRS-feature-bits.patch new file mode 100644 index 0000000..5e8f79b --- /dev/null +++ b/SOURCES/kvm-target-i386-Add-VNMI-and-automatic-IBRS-feature-bits.patch @@ -0,0 +1,105 @@ +From 95c5cee20741b055dea9ac3ad3176bbaa1eaf705 Mon Sep 17 00:00:00 2001 +From: Bandan Das +Date: Wed, 9 Aug 2023 12:46:25 -0400 +Subject: [PATCH 6/7] target/i386: Add VNMI and automatic IBRS feature bits +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Bandan Das +RH-MergeRequest: 198: Add EPYC-Genoa CPU model in qemu +RH-Bugzilla: 2094913 +RH-Acked-by: Wei Huang +RH-Acked-by: Miroslav Rezanina +RH-Commit: [6/7] 24c0fb08973aa2615817f67576550ce2efadb75c (bdas1/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094913 + +commit 62a798d4bc2c3e767d94670776c77a7df274d7c5 +Author: Babu Moger +Date: Thu May 4 15:53:11 2023 -0500 + + target/i386: Add VNMI and automatic IBRS feature bits + + Add the following featute bits. + + vnmi: Virtual NMI (VNMI) allows the hypervisor to inject the NMI into the + guest without using Event Injection mechanism meaning not required to + track the guest NMI and intercepting the IRET. + The presence of this feature is indicated via the CPUID function + 0x8000000A_EDX[25]. + + automatic-ibrs : + The AMD Zen4 core supports a new feature called Automatic IBRS. + It is a "set-and-forget" feature that means that, unlike e.g., + s/w-toggled SPEC_CTRL.IBRS, h/w manages its IBRS mitigation + resources automatically across CPL transitions. + The presence of this feature is indicated via the CPUID function + 0x80000021_EAX[8]. + + The documention for the features are available in the links below. + a. Processor Programming Reference (PPR) for AMD Family 19h Model 01h, + Revision B1 Processors + b. AMD64 Architecture Programmer’s Manual Volumes 1–5 Publication No. Revision + 40332 4.05 Date October 2022 + + Signed-off-by: Santosh Shukla + Signed-off-by: Kim Phillips + Signed-off-by: Babu Moger + Link: https://www.amd.com/system/files/TechDocs/55898_B1_pub_0.50.zip + Link: https://www.amd.com/system/files/TechDocs/40332_4.05.pdf + Message-Id: <20230504205313.225073-7-babu.moger@amd.com> + Signed-off-by: Paolo Bonzini + +Signed-off-by: Bandan Das +--- + target/i386/cpu.c | 4 ++-- + target/i386/cpu.h | 3 +++ + 2 files changed, 5 insertions(+), 2 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index bbddc682df..f1baefe775 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -806,7 +806,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + "pfthreshold", "avic", NULL, "v-vmsave-vmload", + "vgif", NULL, NULL, NULL, + NULL, NULL, NULL, NULL, +- NULL, NULL, NULL, NULL, ++ NULL, "vnmi", NULL, NULL, + "svme-addr-chk", NULL, NULL, NULL, + }, + .cpuid = { .eax = 0x8000000A, .reg = R_EDX, }, +@@ -925,7 +925,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + .feat_names = { + "no-nested-data-bp", NULL, "lfence-always-serializing", NULL, + NULL, NULL, "null-sel-clr-base", NULL, +- NULL, NULL, NULL, NULL, ++ "auto-ibrs", NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index c37abf62ae..f7d225e4f1 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -773,6 +773,7 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, + #define CPUID_SVM_AVIC (1U << 13) + #define CPUID_SVM_V_VMSAVE_VMLOAD (1U << 15) + #define CPUID_SVM_VGIF (1U << 16) ++#define CPUID_SVM_VNMI (1U << 25) + #define CPUID_SVM_SVME_ADDR_CHK (1U << 28) + + /* Support RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE */ +@@ -948,6 +949,8 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, + #define CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING (1U << 2) + /* Null Selector Clears Base */ + #define CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE (1U << 6) ++/* Automatic IBRS */ ++#define CPUID_8000_0021_EAX_AUTO_IBRS (1U << 8) + + #define CPUID_XSAVE_XSAVEOPT (1U << 0) + #define CPUID_XSAVE_XSAVEC (1U << 1) +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-Add-a-couple-of-feature-bits-in-8000_000.patch b/SOURCES/kvm-target-i386-Add-a-couple-of-feature-bits-in-8000_000.patch new file mode 100644 index 0000000..772bbbd --- /dev/null +++ b/SOURCES/kvm-target-i386-Add-a-couple-of-feature-bits-in-8000_000.patch @@ -0,0 +1,94 @@ +From 2d7fb99c02a7666f1d8fe70a4749f0b7771a68ed Mon Sep 17 00:00:00 2001 +From: Bandan Das +Date: Wed, 9 Aug 2023 12:29:55 -0400 +Subject: [PATCH 3/7] target/i386: Add a couple of feature bits in + 8000_0008_EBX + +RH-Author: Bandan Das +RH-MergeRequest: 198: Add EPYC-Genoa CPU model in qemu +RH-Bugzilla: 2094913 +RH-Acked-by: Wei Huang +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/7] b11020b249d4ecc2e3e1ddf4fdc4b52c42ec2642 (bdas1/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094913 + +commit bb039a230e6a7920d71d21fa9afee2653a678c48 +Author: Babu Moger +Date: Thu May 4 15:53:08 2023 -0500 + + target/i386: Add a couple of feature bits in 8000_0008_EBX + + Add the following feature bits. + + amd-psfd : Predictive Store Forwarding Disable: + PSF is a hardware-based micro-architectural optimization + designed to improve the performance of code execution by + predicting address dependencies between loads and stores. + While SSBD (Speculative Store Bypass Disable) disables both + PSF and speculative store bypass, PSFD only disables PSF. + PSFD may be desirable for the software which is concerned + with the speculative behavior of PSF but desires a smaller + performance impact than setting SSBD. + Depends on the following kernel commit: + b73a54321ad8 ("KVM: x86: Expose Predictive Store Forwarding Disable") + + stibp-always-on : + Single Thread Indirect Branch Prediction mode has enhanced + performance and may be left always on. + + The documentation for the features are available in the links below. + a. Processor Programming Reference (PPR) for AMD Family 19h Model 01h, + Revision B1 Processors + b. SECURITY ANALYSIS OF AMD PREDICTIVE STORE FORWARDING + + Signed-off-by: Babu Moger + Acked-by: Michael S. Tsirkin + Link: https://www.amd.com/system/files/documents/security-analysis-predictive-store-forwarding.pdf + Link: https://www.amd.com/system/files/TechDocs/55898_B1_pub_0.50.zip + Message-Id: <20230504205313.225073-4-babu.moger@amd.com> + Signed-off-by: Paolo Bonzini + +Signed-off-by: Bandan Das +--- + target/i386/cpu.c | 4 ++-- + target/i386/cpu.h | 4 ++++ + 2 files changed, 6 insertions(+), 2 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 8aa7eb611c..c8f88aefc7 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -911,10 +911,10 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + NULL, NULL, NULL, NULL, + NULL, "wbnoinvd", NULL, NULL, + "ibpb", NULL, "ibrs", "amd-stibp", +- NULL, NULL, NULL, NULL, ++ NULL, "stibp-always-on", NULL, NULL, + NULL, NULL, NULL, NULL, + "amd-ssbd", "virt-ssbd", "amd-no-ssb", NULL, +- NULL, NULL, NULL, NULL, ++ "amd-psfd", NULL, NULL, NULL, + }, + .cpuid = { .eax = 0x80000008, .reg = R_EBX, }, + .tcg_features = 0, +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index c28b9df217..81d2200543 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -934,8 +934,12 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, + #define CPUID_8000_0008_EBX_IBRS (1U << 14) + /* Single Thread Indirect Branch Predictors */ + #define CPUID_8000_0008_EBX_STIBP (1U << 15) ++/* STIBP mode has enhanced performance and may be left always on */ ++#define CPUID_8000_0008_EBX_STIBP_ALWAYS_ON (1U << 17) + /* Speculative Store Bypass Disable */ + #define CPUID_8000_0008_EBX_AMD_SSBD (1U << 24) ++/* Predictive Store Forwarding Disable */ ++#define CPUID_8000_0008_EBX_AMD_PSFD (1U << 28) + + #define CPUID_XSAVE_XSAVEOPT (1U << 0) + #define CPUID_XSAVE_XSAVEC (1U << 1) +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-Add-feature-bits-for-CPUID_Fn80000021_EA.patch b/SOURCES/kvm-target-i386-Add-feature-bits-for-CPUID_Fn80000021_EA.patch new file mode 100644 index 0000000..c714e49 --- /dev/null +++ b/SOURCES/kvm-target-i386-Add-feature-bits-for-CPUID_Fn80000021_EA.patch @@ -0,0 +1,126 @@ +From 2a2f74c53258ef67034307b59afe2f4c679afaa2 Mon Sep 17 00:00:00 2001 +From: Bandan Das +Date: Wed, 9 Aug 2023 12:32:00 -0400 +Subject: [PATCH 4/7] target/i386: Add feature bits for CPUID_Fn80000021_EAX +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Bandan Das +RH-MergeRequest: 198: Add EPYC-Genoa CPU model in qemu +RH-Bugzilla: 2094913 +RH-Acked-by: Wei Huang +RH-Acked-by: Miroslav Rezanina +RH-Commit: [4/7] 133044a7245226308406a684a875e1f96a394516 (bdas1/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094913 + +commit b70eec312b185197d639bff689007727e596afd1 +Author: Babu Moger +Date: Thu May 4 15:53:09 2023 -0500 + + target/i386: Add feature bits for CPUID_Fn80000021_EAX + + Add the following feature bits. + no-nested-data-bp : Processor ignores nested data breakpoints. + lfence-always-serializing : LFENCE instruction is always serializing. + null-sel-cls-base : Null Selector Clears Base. When this bit is + set, a null segment load clears the segment base. + + The documentation for the features are available in the links below. + a. Processor Programming Reference (PPR) for AMD Family 19h Model 01h, + Revision B1 Processors + b. AMD64 Architecture Programmer’s Manual Volumes 1–5 Publication No. Revision + 40332 4.05 Date October 2022 + + Signed-off-by: Babu Moger + Acked-by: Michael S. Tsirkin + Link: https://www.amd.com/system/files/TechDocs/55898_B1_pub_0.50.zip + Link: https://www.amd.com/system/files/TechDocs/40332_4.05.pdf + Message-Id: <20230504205313.225073-5-babu.moger@amd.com> + Signed-off-by: Paolo Bonzini + +Signed-off-by: Bandan Das +--- + target/i386/cpu.c | 24 ++++++++++++++++++++++++ + target/i386/cpu.h | 8 ++++++++ + 2 files changed, 32 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index c8f88aefc7..7ddebbaa3c 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -920,6 +920,22 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + .tcg_features = 0, + .unmigratable_flags = 0, + }, ++ [FEAT_8000_0021_EAX] = { ++ .type = CPUID_FEATURE_WORD, ++ .feat_names = { ++ "no-nested-data-bp", NULL, "lfence-always-serializing", NULL, ++ NULL, NULL, "null-sel-clr-base", NULL, ++ NULL, NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ }, ++ .cpuid = { .eax = 0x80000021, .reg = R_EAX, }, ++ .tcg_features = 0, ++ .unmigratable_flags = 0, ++ }, + [FEAT_XSAVE] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { +@@ -6156,6 +6172,10 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */ + } + break; ++ case 0x80000021: ++ *eax = env->features[FEAT_8000_0021_EAX]; ++ *ebx = *ecx = *edx = 0; ++ break; + default: + /* reserved values: zero */ + *eax = 0; +@@ -6585,6 +6605,10 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) + x86_cpu_adjust_level(cpu, &env->cpuid_min_xlevel, 0x8000001F); + } + ++ if (env->features[FEAT_8000_0021_EAX]) { ++ x86_cpu_adjust_level(cpu, &env->cpuid_min_xlevel, 0x80000021); ++ } ++ + /* SGX requires CPUID[0x12] for EPC enumeration */ + if (env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_SGX) { + x86_cpu_adjust_level(cpu, &env->cpuid_min_level, 0x12); +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 81d2200543..c37abf62ae 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -600,6 +600,7 @@ typedef enum FeatureWord { + FEAT_8000_0001_ECX, /* CPUID[8000_0001].ECX */ + FEAT_8000_0007_EDX, /* CPUID[8000_0007].EDX */ + FEAT_8000_0008_EBX, /* CPUID[8000_0008].EBX */ ++ FEAT_8000_0021_EAX, /* CPUID[8000_0021].EAX */ + FEAT_C000_0001_EDX, /* CPUID[C000_0001].EDX */ + FEAT_KVM, /* CPUID[4000_0001].EAX (KVM_CPUID_FEATURES) */ + FEAT_KVM_HINTS, /* CPUID[4000_0001].EDX */ +@@ -941,6 +942,13 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, + /* Predictive Store Forwarding Disable */ + #define CPUID_8000_0008_EBX_AMD_PSFD (1U << 28) + ++/* Processor ignores nested data breakpoints */ ++#define CPUID_8000_0021_EAX_No_NESTED_DATA_BP (1U << 0) ++/* LFENCE is always serializing */ ++#define CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING (1U << 2) ++/* Null Selector Clears Base */ ++#define CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE (1U << 6) ++ + #define CPUID_XSAVE_XSAVEOPT (1U << 0) + #define CPUID_XSAVE_XSAVEC (1U << 1) + #define CPUID_XSAVE_XGETBV1 (1U << 2) +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-Add-missing-feature-bits-in-EPYC-Milan-m.patch b/SOURCES/kvm-target-i386-Add-missing-feature-bits-in-EPYC-Milan-m.patch new file mode 100644 index 0000000..9bb4bf9 --- /dev/null +++ b/SOURCES/kvm-target-i386-Add-missing-feature-bits-in-EPYC-Milan-m.patch @@ -0,0 +1,152 @@ +From a8180665019d537ee9775614627bf9eb8bd4770e Mon Sep 17 00:00:00 2001 +From: Bandan Das +Date: Wed, 9 Aug 2023 12:35:33 -0400 +Subject: [PATCH 5/7] target/i386: Add missing feature bits in EPYC-Milan model +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Bandan Das +RH-MergeRequest: 198: Add EPYC-Genoa CPU model in qemu +RH-Bugzilla: 2094913 +RH-Acked-by: Wei Huang +RH-Acked-by: Miroslav Rezanina +RH-Commit: [5/7] 8f77315c8d7010564423df3e3c594c90fd5f9c00 (bdas1/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094913 + +commit 27f03be6f59d04bd5673ba1e1628b2b490f9a9ff +Author: Babu Moger +Date: Thu May 4 15:53:10 2023 -0500 + + target/i386: Add missing feature bits in EPYC-Milan model + + Add the following feature bits for EPYC-Milan model and bump the version. + vaes : Vector VAES(ENC|DEC), VAES(ENC|DEC)LAST instruction support + vpclmulqdq : Vector VPCLMULQDQ instruction support + stibp-always-on : Single Thread Indirect Branch Prediction Mode has enhanced + performance and may be left Always on + amd-psfd : Predictive Store Forward Disable + no-nested-data-bp : Processor ignores nested data breakpoints + lfence-always-serializing : LFENCE instruction is always serializing + null-sel-clr-base : Null Selector Clears Base. When this bit is + set, a null segment load clears the segment base + + These new features will be added in EPYC-Milan-v2. The "-cpu help" output + after the change will be. + + x86 EPYC-Milan (alias configured by machine type) + x86 EPYC-Milan-v1 AMD EPYC-Milan Processor + x86 EPYC-Milan-v2 AMD EPYC-Milan Processor + + The documentation for the features are available in the links below. + a. Processor Programming Reference (PPR) for AMD Family 19h Model 01h, + Revision B1 Processors + b. SECURITY ANALYSIS OF AMD PREDICTIVE STORE FORWARDING + c. AMD64 Architecture Programmer’s Manual Volumes 1–5 Publication No. Revision + 40332 4.05 Date October 2022 + + Signed-off-by: Babu Moger + Acked-by: Michael S. Tsirkin + Link: https://www.amd.com/system/files/TechDocs/55898_B1_pub_0.50.zip + Link: https://www.amd.com/system/files/documents/security-analysis-predictive-store-forwarding.pdf + Link: https://www.amd.com/system/files/TechDocs/40332_4.05.pdf + Message-Id: <20230504205313.225073-6-babu.moger@amd.com> + Signed-off-by: Paolo Bonzini + +Signed-off-by: Bandan Das +--- + target/i386/cpu.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 70 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 7ddebbaa3c..bbddc682df 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1923,6 +1923,56 @@ static const CPUCaches epyc_milan_cache_info = { + }, + }; + ++static const CPUCaches epyc_milan_v2_cache_info = { ++ .l1d_cache = &(CPUCacheInfo) { ++ .type = DATA_CACHE, ++ .level = 1, ++ .size = 32 * KiB, ++ .line_size = 64, ++ .associativity = 8, ++ .partitions = 1, ++ .sets = 64, ++ .lines_per_tag = 1, ++ .self_init = 1, ++ .no_invd_sharing = true, ++ }, ++ .l1i_cache = &(CPUCacheInfo) { ++ .type = INSTRUCTION_CACHE, ++ .level = 1, ++ .size = 32 * KiB, ++ .line_size = 64, ++ .associativity = 8, ++ .partitions = 1, ++ .sets = 64, ++ .lines_per_tag = 1, ++ .self_init = 1, ++ .no_invd_sharing = true, ++ }, ++ .l2_cache = &(CPUCacheInfo) { ++ .type = UNIFIED_CACHE, ++ .level = 2, ++ .size = 512 * KiB, ++ .line_size = 64, ++ .associativity = 8, ++ .partitions = 1, ++ .sets = 1024, ++ .lines_per_tag = 1, ++ }, ++ .l3_cache = &(CPUCacheInfo) { ++ .type = UNIFIED_CACHE, ++ .level = 3, ++ .size = 32 * MiB, ++ .line_size = 64, ++ .associativity = 16, ++ .partitions = 1, ++ .sets = 32768, ++ .lines_per_tag = 1, ++ .self_init = true, ++ .inclusive = true, ++ .complex_indexing = false, ++ }, ++}; ++ + /* The following VMX features are not supported by KVM and are left out in the + * CPU definitions: + * +@@ -4422,6 +4472,26 @@ static const X86CPUDefinition builtin_x86_defs[] = { + .xlevel = 0x8000001E, + .model_id = "AMD EPYC-Milan Processor", + .cache_info = &epyc_milan_cache_info, ++ .versions = (X86CPUVersionDefinition[]) { ++ { .version = 1 }, ++ { ++ .version = 2, ++ .props = (PropValue[]) { ++ { "model-id", ++ "AMD EPYC-Milan-v2 Processor" }, ++ { "vaes", "on" }, ++ { "vpclmulqdq", "on" }, ++ { "stibp-always-on", "on" }, ++ { "amd-psfd", "on" }, ++ { "no-nested-data-bp", "on" }, ++ { "lfence-always-serializing", "on" }, ++ { "null-sel-clr-base", "on" }, ++ { /* end of list */ } ++ }, ++ .cache_info = &epyc_milan_v2_cache_info ++ }, ++ { /* end of list */ } ++ } + }, + }; + +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-Add-new-EPYC-CPU-versions-with-updated-c.patch b/SOURCES/kvm-target-i386-Add-new-EPYC-CPU-versions-with-updated-c.patch new file mode 100644 index 0000000..40c289a --- /dev/null +++ b/SOURCES/kvm-target-i386-Add-new-EPYC-CPU-versions-with-updated-c.patch @@ -0,0 +1,192 @@ +From 92f0b5d0c7a841a21cabbc6efc1d7baf0e5a3e0f Mon Sep 17 00:00:00 2001 +From: Bandan Das +Date: Wed, 9 Aug 2023 12:26:12 -0400 +Subject: [PATCH 2/7] target/i386: Add new EPYC CPU versions with updated + cache_info + +RH-Author: Bandan Das +RH-MergeRequest: 198: Add EPYC-Genoa CPU model in qemu +RH-Bugzilla: 2094913 +RH-Acked-by: Wei Huang +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/7] 71a2fd907636733f86729bc9328600f6f9306eaf (bdas1/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094913 + +commit d7c72735f618a7ee27ee109d8b1468193734606a +Author: Michael Roth +Date: Thu May 4 15:53:07 2023 -0500 + + target/i386: Add new EPYC CPU versions with updated cache_info + + Introduce new EPYC cpu versions: EPYC-v4 and EPYC-Rome-v3. + The only difference vs. older models is an updated cache_info with + the 'complex_indexing' bit unset, since this bit is not currently + defined for AMD and may cause problems should it be used for + something else in the future. Setting this bit will also cause + CPUID validation failures when running SEV-SNP guests. + + Signed-off-by: Michael Roth + Signed-off-by: Babu Moger + Acked-by: Michael S. Tsirkin + Message-Id: <20230504205313.225073-3-babu.moger@amd.com> + Signed-off-by: Paolo Bonzini + +Signed-off-by: Bandan Das +--- + target/i386/cpu.c | 118 ++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 118 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 3558c92ed0..8aa7eb611c 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1707,6 +1707,56 @@ static const CPUCaches epyc_cache_info = { + }, + }; + ++static CPUCaches epyc_v4_cache_info = { ++ .l1d_cache = &(CPUCacheInfo) { ++ .type = DATA_CACHE, ++ .level = 1, ++ .size = 32 * KiB, ++ .line_size = 64, ++ .associativity = 8, ++ .partitions = 1, ++ .sets = 64, ++ .lines_per_tag = 1, ++ .self_init = 1, ++ .no_invd_sharing = true, ++ }, ++ .l1i_cache = &(CPUCacheInfo) { ++ .type = INSTRUCTION_CACHE, ++ .level = 1, ++ .size = 64 * KiB, ++ .line_size = 64, ++ .associativity = 4, ++ .partitions = 1, ++ .sets = 256, ++ .lines_per_tag = 1, ++ .self_init = 1, ++ .no_invd_sharing = true, ++ }, ++ .l2_cache = &(CPUCacheInfo) { ++ .type = UNIFIED_CACHE, ++ .level = 2, ++ .size = 512 * KiB, ++ .line_size = 64, ++ .associativity = 8, ++ .partitions = 1, ++ .sets = 1024, ++ .lines_per_tag = 1, ++ }, ++ .l3_cache = &(CPUCacheInfo) { ++ .type = UNIFIED_CACHE, ++ .level = 3, ++ .size = 8 * MiB, ++ .line_size = 64, ++ .associativity = 16, ++ .partitions = 1, ++ .sets = 8192, ++ .lines_per_tag = 1, ++ .self_init = true, ++ .inclusive = true, ++ .complex_indexing = false, ++ }, ++}; ++ + static const CPUCaches epyc_rome_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, +@@ -1757,6 +1807,56 @@ static const CPUCaches epyc_rome_cache_info = { + }, + }; + ++static const CPUCaches epyc_rome_v3_cache_info = { ++ .l1d_cache = &(CPUCacheInfo) { ++ .type = DATA_CACHE, ++ .level = 1, ++ .size = 32 * KiB, ++ .line_size = 64, ++ .associativity = 8, ++ .partitions = 1, ++ .sets = 64, ++ .lines_per_tag = 1, ++ .self_init = 1, ++ .no_invd_sharing = true, ++ }, ++ .l1i_cache = &(CPUCacheInfo) { ++ .type = INSTRUCTION_CACHE, ++ .level = 1, ++ .size = 32 * KiB, ++ .line_size = 64, ++ .associativity = 8, ++ .partitions = 1, ++ .sets = 64, ++ .lines_per_tag = 1, ++ .self_init = 1, ++ .no_invd_sharing = true, ++ }, ++ .l2_cache = &(CPUCacheInfo) { ++ .type = UNIFIED_CACHE, ++ .level = 2, ++ .size = 512 * KiB, ++ .line_size = 64, ++ .associativity = 8, ++ .partitions = 1, ++ .sets = 1024, ++ .lines_per_tag = 1, ++ }, ++ .l3_cache = &(CPUCacheInfo) { ++ .type = UNIFIED_CACHE, ++ .level = 3, ++ .size = 16 * MiB, ++ .line_size = 64, ++ .associativity = 16, ++ .partitions = 1, ++ .sets = 16384, ++ .lines_per_tag = 1, ++ .self_init = true, ++ .inclusive = true, ++ .complex_indexing = false, ++ }, ++}; ++ + static const CPUCaches epyc_milan_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, +@@ -4112,6 +4212,15 @@ static const X86CPUDefinition builtin_x86_defs[] = { + { /* end of list */ } + } + }, ++ { ++ .version = 4, ++ .props = (PropValue[]) { ++ { "model-id", ++ "AMD EPYC-v4 Processor" }, ++ { /* end of list */ } ++ }, ++ .cache_info = &epyc_v4_cache_info ++ }, + { /* end of list */ } + } + }, +@@ -4231,6 +4340,15 @@ static const X86CPUDefinition builtin_x86_defs[] = { + { /* end of list */ } + } + }, ++ { ++ .version = 3, ++ .props = (PropValue[]) { ++ { "model-id", ++ "AMD EPYC-Rome-v3 Processor" }, ++ { /* end of list */ } ++ }, ++ .cache_info = &epyc_rome_v3_cache_info ++ }, + { /* end of list */ } + } + }, +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-Fix-32-bit-AD-CO-X-insns-in-64-bit-mode.patch b/SOURCES/kvm-target-i386-Fix-32-bit-AD-CO-X-insns-in-64-bit-mode.patch deleted file mode 100644 index 52e73e7..0000000 --- a/SOURCES/kvm-target-i386-Fix-32-bit-AD-CO-X-insns-in-64-bit-mode.patch +++ /dev/null @@ -1,144 +0,0 @@ -From e419493e6ec188461aa6f06c1b1cdc8a698859df Mon Sep 17 00:00:00 2001 -From: Richard Henderson -Date: Sat, 14 Jan 2023 15:21:03 -1000 -Subject: [PATCH 6/8] target/i386: Fix 32-bit AD[CO]X insns in 64-bit mode -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Paolo Bonzini -RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions -RH-Bugzilla: 2173590 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Bandan Das -RH-Commit: [6/7] 0fa4d3858319d4f877a5b3f31776121a72e2c57a (bonzini/rhel-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590 -Upstream-Status: merged - -Failure to truncate the inputs results in garbage for the carry-out. - -Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1373 -Signed-off-by: Richard Henderson -Reviewed-by: Philippe Mathieu-Daudé -Message-Id: <20230115012103.3131796-1-richard.henderson@linaro.org> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 6fbef9426bac7184b5d5887589d8386e732865eb) ---- - target/i386/tcg/emit.c.inc | 2 + - tests/tcg/x86_64/Makefile.target | 3 ++ - tests/tcg/x86_64/adox.c | 69 ++++++++++++++++++++++++++++++++ - 3 files changed, 74 insertions(+) - create mode 100644 tests/tcg/x86_64/adox.c - -diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc -index 0d7c6e80ae..e61ae9a2e9 100644 ---- a/target/i386/tcg/emit.c.inc -+++ b/target/i386/tcg/emit.c.inc -@@ -1037,6 +1037,8 @@ static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op) - #ifdef TARGET_X86_64 - case MO_32: - /* If TL is 64-bit just do everything in 64-bit arithmetic. */ -+ tcg_gen_ext32u_tl(s->T0, s->T0); -+ tcg_gen_ext32u_tl(s->T1, s->T1); - tcg_gen_add_i64(s->T0, s->T0, s->T1); - tcg_gen_add_i64(s->T0, s->T0, carry_in); - tcg_gen_shri_i64(carry_out, s->T0, 32); -diff --git a/tests/tcg/x86_64/Makefile.target b/tests/tcg/x86_64/Makefile.target -index 4eac78293f..e64aab1b81 100644 ---- a/tests/tcg/x86_64/Makefile.target -+++ b/tests/tcg/x86_64/Makefile.target -@@ -12,11 +12,14 @@ ifeq ($(filter %-linux-user, $(TARGET)),$(TARGET)) - X86_64_TESTS += vsyscall - X86_64_TESTS += noexec - X86_64_TESTS += cmpxchg -+X86_64_TESTS += adox - TESTS=$(MULTIARCH_TESTS) $(X86_64_TESTS) test-x86_64 - else - TESTS=$(MULTIARCH_TESTS) - endif - -+adox: CFLAGS=-O2 -+ - run-test-i386-ssse3: QEMU_OPTS += -cpu max - run-plugin-test-i386-ssse3-%: QEMU_OPTS += -cpu max - -diff --git a/tests/tcg/x86_64/adox.c b/tests/tcg/x86_64/adox.c -new file mode 100644 -index 0000000000..36be644c8b ---- /dev/null -+++ b/tests/tcg/x86_64/adox.c -@@ -0,0 +1,69 @@ -+/* See if ADOX give expected results */ -+ -+#include -+#include -+#include -+ -+static uint64_t adoxq(bool *c_out, uint64_t a, uint64_t b, bool c) -+{ -+ asm ("addl $0x7fffffff, %k1\n\t" -+ "adoxq %2, %0\n\t" -+ "seto %b1" -+ : "+r"(a), "=&r"(c) : "r"(b), "1"((int)c)); -+ *c_out = c; -+ return a; -+} -+ -+static uint64_t adoxl(bool *c_out, uint64_t a, uint64_t b, bool c) -+{ -+ asm ("addl $0x7fffffff, %k1\n\t" -+ "adoxl %k2, %k0\n\t" -+ "seto %b1" -+ : "+r"(a), "=&r"(c) : "r"(b), "1"((int)c)); -+ *c_out = c; -+ return a; -+} -+ -+int main() -+{ -+ uint64_t r; -+ bool c; -+ -+ r = adoxq(&c, 0, 0, 0); -+ assert(r == 0); -+ assert(c == 0); -+ -+ r = adoxl(&c, 0, 0, 0); -+ assert(r == 0); -+ assert(c == 0); -+ -+ r = adoxl(&c, 0x100000000, 0, 0); -+ assert(r == 0); -+ assert(c == 0); -+ -+ r = adoxq(&c, 0, 0, 1); -+ assert(r == 1); -+ assert(c == 0); -+ -+ r = adoxl(&c, 0, 0, 1); -+ assert(r == 1); -+ assert(c == 0); -+ -+ r = adoxq(&c, -1, -1, 0); -+ assert(r == -2); -+ assert(c == 1); -+ -+ r = adoxl(&c, -1, -1, 0); -+ assert(r == 0xfffffffe); -+ assert(c == 1); -+ -+ r = adoxq(&c, -1, -1, 1); -+ assert(r == -1); -+ assert(c == 1); -+ -+ r = adoxl(&c, -1, -1, 1); -+ assert(r == 0xffffffff); -+ assert(c == 1); -+ -+ return 0; -+} --- -2.39.1 - diff --git a/SOURCES/kvm-target-i386-Fix-BEXTR-instruction.patch b/SOURCES/kvm-target-i386-Fix-BEXTR-instruction.patch deleted file mode 100644 index 0c28c7e..0000000 --- a/SOURCES/kvm-target-i386-Fix-BEXTR-instruction.patch +++ /dev/null @@ -1,110 +0,0 @@ -From a019c203f0148e5fbb20e102a17453806f5296b6 Mon Sep 17 00:00:00 2001 -From: Richard Henderson -Date: Sat, 14 Jan 2023 13:05:42 -1000 -Subject: [PATCH 3/8] target/i386: Fix BEXTR instruction - -RH-Author: Paolo Bonzini -RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions -RH-Bugzilla: 2173590 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Bandan Das -RH-Commit: [3/7] bd1e3b26c72d7152b44be2d34308fd40dc106424 (bonzini/rhel-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590 -Upstream-Status: merged - -There were two problems here: not limiting the input to operand bits, -and not correctly handling large extraction length. - -Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1372 -Signed-off-by: Richard Henderson -Message-Id: <20230114230542.3116013-3-richard.henderson@linaro.org> -Cc: qemu-stable@nongnu.org -Fixes: 1d0b926150e5 ("target/i386: move scalar 0F 38 and 0F 3A instruction to new decoder", 2022-10-18) -Signed-off-by: Paolo Bonzini -(cherry picked from commit b14c0098975264ed03144f145bca0179a6763a07) ---- - target/i386/tcg/emit.c.inc | 22 +++++++++++----------- - tests/tcg/i386/test-i386-bmi2.c | 12 ++++++++++++ - 2 files changed, 23 insertions(+), 11 deletions(-) - -diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc -index 7037ff91c6..99f6ba6e19 100644 ---- a/target/i386/tcg/emit.c.inc -+++ b/target/i386/tcg/emit.c.inc -@@ -1078,30 +1078,30 @@ static void gen_ANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) - static void gen_BEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) - { - MemOp ot = decode->op[0].ot; -- TCGv bound, zero; -+ TCGv bound = tcg_constant_tl(ot == MO_64 ? 63 : 31); -+ TCGv zero = tcg_constant_tl(0); -+ TCGv mone = tcg_constant_tl(-1); - - /* - * Extract START, and shift the operand. - * Shifts larger than operand size get zeros. - */ - tcg_gen_ext8u_tl(s->A0, s->T1); -+ if (TARGET_LONG_BITS == 64 && ot == MO_32) { -+ tcg_gen_ext32u_tl(s->T0, s->T0); -+ } - tcg_gen_shr_tl(s->T0, s->T0, s->A0); - -- bound = tcg_constant_tl(ot == MO_64 ? 63 : 31); -- zero = tcg_constant_tl(0); - tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound, s->T0, zero); - - /* -- * Extract the LEN into a mask. Lengths larger than -- * operand size get all ones. -+ * Extract the LEN into an inverse mask. Lengths larger than -+ * operand size get all zeros, length 0 gets all ones. - */ - tcg_gen_extract_tl(s->A0, s->T1, 8, 8); -- tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound, s->A0, bound); -- -- tcg_gen_movi_tl(s->T1, 1); -- tcg_gen_shl_tl(s->T1, s->T1, s->A0); -- tcg_gen_subi_tl(s->T1, s->T1, 1); -- tcg_gen_and_tl(s->T0, s->T0, s->T1); -+ tcg_gen_shl_tl(s->T1, mone, s->A0); -+ tcg_gen_movcond_tl(TCG_COND_LEU, s->T1, s->A0, bound, s->T1, zero); -+ tcg_gen_andc_tl(s->T0, s->T0, s->T1); - - gen_op_update1_cc(s); - set_cc_op(s, CC_OP_LOGICB + ot); -diff --git a/tests/tcg/i386/test-i386-bmi2.c b/tests/tcg/i386/test-i386-bmi2.c -index 3c3ef85513..982d4abda4 100644 ---- a/tests/tcg/i386/test-i386-bmi2.c -+++ b/tests/tcg/i386/test-i386-bmi2.c -@@ -99,6 +99,9 @@ int main(int argc, char *argv[]) { - result = bextrq(mask, 0x10f8); - assert(result == 0); - -+ result = bextrq(0xfedcba9876543210ull, 0x7f00); -+ assert(result == 0xfedcba9876543210ull); -+ - result = blsiq(0x30); - assert(result == 0x10); - -@@ -164,6 +167,15 @@ int main(int argc, char *argv[]) { - result = bextrl(mask, 0x1038); - assert(result == 0); - -+ result = bextrl((reg_t)0x8f635a775ad3b9b4ull, 0x3018); -+ assert(result == 0x5a); -+ -+ result = bextrl((reg_t)0xfedcba9876543210ull, 0x7f00); -+ assert(result == 0x76543210u); -+ -+ result = bextrl(-1, 0); -+ assert(result == 0); -+ - result = blsil(0xffff); - assert(result == 1); - --- -2.39.1 - diff --git a/SOURCES/kvm-target-i386-Fix-BZHI-instruction.patch b/SOURCES/kvm-target-i386-Fix-BZHI-instruction.patch deleted file mode 100644 index bcf79f4..0000000 --- a/SOURCES/kvm-target-i386-Fix-BZHI-instruction.patch +++ /dev/null @@ -1,77 +0,0 @@ -From d49e5d193dfccf6f5cfa98ccce5bd491478d563d Mon Sep 17 00:00:00 2001 -From: Richard Henderson -Date: Sat, 14 Jan 2023 13:32:06 -1000 -Subject: [PATCH 7/8] target/i386: Fix BZHI instruction - -RH-Author: Paolo Bonzini -RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions -RH-Bugzilla: 2173590 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Bandan Das -RH-Commit: [7/7] ad6b343c09c0304ac32cc68670c49d1fc12d8cf8 (bonzini/rhel-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590 -Upstream-Status: merged - -We did not correctly handle N >= operand size. - -Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1374 -Signed-off-by: Richard Henderson -Message-Id: <20230114233206.3118472-1-richard.henderson@linaro.org> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 9ad2ba6e8e7fc195d0dd0b76ab38bd2fceb1bdd4) ---- - target/i386/tcg/emit.c.inc | 14 +++++++------- - tests/tcg/i386/test-i386-bmi2.c | 3 +++ - 2 files changed, 10 insertions(+), 7 deletions(-) - -diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc -index e61ae9a2e9..0d01e13002 100644 ---- a/target/i386/tcg/emit.c.inc -+++ b/target/i386/tcg/emit.c.inc -@@ -1147,20 +1147,20 @@ static void gen_BLSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) - static void gen_BZHI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) - { - MemOp ot = decode->op[0].ot; -- TCGv bound; -+ TCGv bound = tcg_constant_tl(ot == MO_64 ? 63 : 31); -+ TCGv zero = tcg_constant_tl(0); -+ TCGv mone = tcg_constant_tl(-1); - -- tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]); -- bound = tcg_constant_tl(ot == MO_64 ? 63 : 31); -+ tcg_gen_ext8u_tl(s->T1, s->T1); - - /* - * Note that since we're using BMILG (in order to get O - * cleared) we need to store the inverse into C. - */ -- tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src, s->T1, bound); -- tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1, bound, bound, s->T1); -+ tcg_gen_setcond_tl(TCG_COND_LEU, cpu_cc_src, s->T1, bound); - -- tcg_gen_movi_tl(s->A0, -1); -- tcg_gen_shl_tl(s->A0, s->A0, s->T1); -+ tcg_gen_shl_tl(s->A0, mone, s->T1); -+ tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->T1, bound, s->A0, zero); - tcg_gen_andc_tl(s->T0, s->T0, s->A0); - - gen_op_update1_cc(s); -diff --git a/tests/tcg/i386/test-i386-bmi2.c b/tests/tcg/i386/test-i386-bmi2.c -index 982d4abda4..0244df7987 100644 ---- a/tests/tcg/i386/test-i386-bmi2.c -+++ b/tests/tcg/i386/test-i386-bmi2.c -@@ -123,6 +123,9 @@ int main(int argc, char *argv[]) { - result = bzhiq(mask, 0x1f); - assert(result == (mask & ~(-1 << 30))); - -+ result = bzhiq(mask, 0x40); -+ assert(result == mask); -+ - result = rorxq(0x2132435465768798, 8); - assert(result == 0x9821324354657687); - --- -2.39.1 - diff --git a/SOURCES/kvm-target-i386-Fix-C-flag-for-BLSI-BLSMSK-BLSR.patch b/SOURCES/kvm-target-i386-Fix-C-flag-for-BLSI-BLSMSK-BLSR.patch deleted file mode 100644 index 7f3051f..0000000 --- a/SOURCES/kvm-target-i386-Fix-C-flag-for-BLSI-BLSMSK-BLSR.patch +++ /dev/null @@ -1,60 +0,0 @@ -From cb2b591e1677db2837810eaedac534a7ff3a7b1c Mon Sep 17 00:00:00 2001 -From: Richard Henderson -Date: Sat, 14 Jan 2023 08:06:01 -1000 -Subject: [PATCH 4/8] target/i386: Fix C flag for BLSI, BLSMSK, BLSR - -RH-Author: Paolo Bonzini -RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions -RH-Bugzilla: 2173590 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Bandan Das -RH-Commit: [4/7] 173e23c492c830da6c5a4be0cfc20a69ac655b59 (bonzini/rhel-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590 -Upstream-Status: merged - -We forgot to set cc_src, which is used for computing C. - -Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1370 -Signed-off-by: Richard Henderson -Message-Id: <20230114180601.2993644-1-richard.henderson@linaro.org> -Cc: qemu-stable@nongnu.org -Fixes: 1d0b926150e5 ("target/i386: move scalar 0F 38 and 0F 3A instruction to new decoder", 2022-10-18) -Signed-off-by: Paolo Bonzini -(cherry picked from commit 99282098dc74c2055bde5652bde6cf0067d0c370) ---- - target/i386/tcg/emit.c.inc | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc -index 99f6ba6e19..4d7702c106 100644 ---- a/target/i386/tcg/emit.c.inc -+++ b/target/i386/tcg/emit.c.inc -@@ -1111,6 +1111,7 @@ static void gen_BLSI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) - { - MemOp ot = decode->op[0].ot; - -+ tcg_gen_mov_tl(cpu_cc_src, s->T0); - tcg_gen_neg_tl(s->T1, s->T0); - tcg_gen_and_tl(s->T0, s->T0, s->T1); - tcg_gen_mov_tl(cpu_cc_dst, s->T0); -@@ -1121,6 +1122,7 @@ static void gen_BLSMSK(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode - { - MemOp ot = decode->op[0].ot; - -+ tcg_gen_mov_tl(cpu_cc_src, s->T0); - tcg_gen_subi_tl(s->T1, s->T0, 1); - tcg_gen_xor_tl(s->T0, s->T0, s->T1); - tcg_gen_mov_tl(cpu_cc_dst, s->T0); -@@ -1131,6 +1133,7 @@ static void gen_BLSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) - { - MemOp ot = decode->op[0].ot; - -+ tcg_gen_mov_tl(cpu_cc_src, s->T0); - tcg_gen_subi_tl(s->T1, s->T0, 1); - tcg_gen_and_tl(s->T0, s->T0, s->T1); - tcg_gen_mov_tl(cpu_cc_dst, s->T0); --- -2.39.1 - diff --git a/SOURCES/kvm-target-i386-add-support-for-FB_CLEAR-feature.patch b/SOURCES/kvm-target-i386-add-support-for-FB_CLEAR-feature.patch new file mode 100644 index 0000000..2b1cbc9 --- /dev/null +++ b/SOURCES/kvm-target-i386-add-support-for-FB_CLEAR-feature.patch @@ -0,0 +1,71 @@ +From 0d056d6da9e4147d5965bf3507f6d6d6a413924d Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Wed, 24 May 2023 06:52:43 -0400 +Subject: [PATCH 2/5] target/i386: add support for FB_CLEAR feature + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 167: target/i386: add support for FB_CLEAR feature +RH-Bugzilla: 2216201 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/2] 5f191964ba25754107a06ef907f4ac614280aaa1 (eesposit/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2216201 + +commit 22e1094ca82d5518c1b69aff3e87c550776ae1eb +Author: Emanuele Giuseppe Esposito +Date: Wed Feb 1 08:57:59 2023 -0500 + + target/i386: add support for FB_CLEAR feature + + As reported by the Intel's doc: + "FB_CLEAR: The processor will overwrite fill buffer values as part of + MD_CLEAR operations with the VERW instruction. + On these processors, L1D_FLUSH does not overwrite fill buffer values." + + If this cpu feature is present in host, allow QEMU to choose whether to + show it to the guest too. + One disadvantage of not exposing it is that the guest will report + a non existing vulnerability in + /sys/devices/system/cpu/vulnerabilities/mmio_stale_data + because the mitigation is present only when the cpu has + (FLUSH_L1D and MD_CLEAR) or FB_CLEAR + features enabled. + + Signed-off-by: Emanuele Giuseppe Esposito + Message-Id: <20230201135759.555607-3-eesposit@redhat.com> + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + target/i386/cpu.c | 2 +- + target/i386/cpu.h | 1 + + 2 files changed, 2 insertions(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index caf6338cc0..839706b430 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1012,7 +1012,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + "ssb-no", "mds-no", "pschange-mc-no", "tsx-ctrl", + "taa-no", NULL, NULL, NULL, + NULL, NULL, NULL, NULL, +- NULL, NULL, NULL, NULL, ++ NULL, "fb-clear", NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 74fa649b60..c28b9df217 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -989,6 +989,7 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, + #define MSR_ARCH_CAP_PSCHANGE_MC_NO (1U << 6) + #define MSR_ARCH_CAP_TSX_CTRL_MSR (1U << 7) + #define MSR_ARCH_CAP_TAA_NO (1U << 8) ++#define MSR_ARCH_CAP_FB_CLEAR (1U << 17) + + #define MSR_CORE_CAP_SPLIT_LOCK_DETECT (1U << 5) + +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-add-support-for-FLUSH_L1D-feature.patch b/SOURCES/kvm-target-i386-add-support-for-FLUSH_L1D-feature.patch new file mode 100644 index 0000000..39f2542 --- /dev/null +++ b/SOURCES/kvm-target-i386-add-support-for-FLUSH_L1D-feature.patch @@ -0,0 +1,70 @@ +From 14eae569030805680570d93412100ad26242c7e6 Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Wed, 24 May 2023 06:52:34 -0400 +Subject: [PATCH 1/5] target/i386: add support for FLUSH_L1D feature + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 167: target/i386: add support for FB_CLEAR feature +RH-Bugzilla: 2216201 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/2] e296c75c5cd7e1d16d3c70483d52aeba9f9eb2cd (eesposit/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2216201 + +commit 0e7e3bf1a552c178924867fa7c2f30ccc8a179e0 +Author: Emanuele Giuseppe Esposito +Date: Wed Feb 1 08:57:58 2023 -0500 + + target/i386: add support for FLUSH_L1D feature + + As reported by Intel's doc: + "L1D_FLUSH: Writeback and invalidate the L1 data cache" + + If this cpu feature is present in host, allow QEMU to choose whether to + show it to the guest too. + One disadvantage of not exposing it is that the guest will report + a non existing vulnerability in + /sys/devices/system/cpu/vulnerabilities/mmio_stale_data + because the mitigation is present only when the cpu has + (FLUSH_L1D and MD_CLEAR) or FB_CLEAR + features enabled. + + Signed-off-by: Emanuele Giuseppe Esposito + Message-Id: <20230201135759.555607-2-eesposit@redhat.com> + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + target/i386/cpu.c | 2 +- + target/i386/cpu.h | 2 ++ + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 0ef2bf1b93..caf6338cc0 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -860,7 +860,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + "tsx-ldtrk", NULL, NULL /* pconfig */, "arch-lbr", + NULL, NULL, "amx-bf16", "avx512-fp16", + "amx-tile", "amx-int8", "spec-ctrl", "stibp", +- NULL, "arch-capabilities", "core-capability", "ssbd", ++ "flush-l1d", "arch-capabilities", "core-capability", "ssbd", + }, + .cpuid = { + .eax = 7, +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index d243e290d3..74fa649b60 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -896,6 +896,8 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, + #define CPUID_7_0_EDX_SPEC_CTRL (1U << 26) + /* Single Thread Indirect Branch Predictors */ + #define CPUID_7_0_EDX_STIBP (1U << 27) ++/* Flush L1D cache */ ++#define CPUID_7_0_EDX_FLUSH_L1D (1U << 28) + /* Arch Capabilities */ + #define CPUID_7_0_EDX_ARCH_CAPABILITIES (1U << 29) + /* Core Capability */ +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-allow-versioned-CPUs-to-specify-new-cach.patch b/SOURCES/kvm-target-i386-allow-versioned-CPUs-to-specify-new-cach.patch new file mode 100644 index 0000000..2c81c72 --- /dev/null +++ b/SOURCES/kvm-target-i386-allow-versioned-CPUs-to-specify-new-cach.patch @@ -0,0 +1,116 @@ +From 457e74c076e0fe7b64631dfd4369d167f0762c9a Mon Sep 17 00:00:00 2001 +From: Bandan Das +Date: Wed, 9 Aug 2023 12:22:41 -0400 +Subject: [PATCH 1/7] target/i386: allow versioned CPUs to specify new + cache_info + +RH-Author: Bandan Das +RH-MergeRequest: 198: Add EPYC-Genoa CPU model in qemu +RH-Bugzilla: 2094913 +RH-Acked-by: Wei Huang +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/7] 6070e07a4bb070d1c15a811b2bd3195929c18d61 (bdas1/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094913 + +commit cca0a000d06f897411a8af4402e5d0522bbe450b +Author: Michael Roth +Date: Thu May 4 15:53:06 2023 -0500 + + target/i386: allow versioned CPUs to specify new cache_info + + New EPYC CPUs versions require small changes to their cache_info's. + Because current QEMU x86 CPU definition does not support versioned + cach_info, we would have to declare a new CPU type for each such case. + To avoid the dup work, add "cache_info" in X86CPUVersionDefinition", + to allow new cache_info pointers to be specified for a new CPU version. + + Co-developed-by: Wei Huang + Signed-off-by: Wei Huang + Signed-off-by: Michael Roth + Signed-off-by: Babu Moger + Acked-by: Michael S. Tsirkin + Message-Id: <20230504205313.225073-2-babu.moger@amd.com> + Signed-off-by: Paolo Bonzini + +Signed-off-by: Bandan Das +--- + target/i386/cpu.c | 35 ++++++++++++++++++++++++++++++++--- + 1 file changed, 32 insertions(+), 3 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 4ac3046313..3558c92ed0 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1598,6 +1598,7 @@ typedef struct X86CPUVersionDefinition { + const char *alias; + const char *note; + PropValue *props; ++ const CPUCaches *const cache_info; + } X86CPUVersionDefinition; + + /* Base definition for a CPU model */ +@@ -5213,6 +5214,31 @@ static void x86_cpu_apply_version_props(X86CPU *cpu, X86CPUModel *model) + assert(vdef->version == version); + } + ++static const CPUCaches *x86_cpu_get_versioned_cache_info(X86CPU *cpu, ++ X86CPUModel *model) ++{ ++ const X86CPUVersionDefinition *vdef; ++ X86CPUVersion version = x86_cpu_model_resolve_version(model); ++ const CPUCaches *cache_info = model->cpudef->cache_info; ++ ++ if (version == CPU_VERSION_LEGACY) { ++ return cache_info; ++ } ++ ++ for (vdef = x86_cpu_def_get_versions(model->cpudef); vdef->version; vdef++) { ++ if (vdef->cache_info) { ++ cache_info = vdef->cache_info; ++ } ++ ++ if (vdef->version == version) { ++ break; ++ } ++ } ++ ++ assert(vdef->version == version); ++ return cache_info; ++} ++ + /* + * Load data from X86CPUDefinition into a X86CPU object. + * Only for builtin_x86_defs models initialized with x86_register_cpudef_types. +@@ -5245,7 +5271,7 @@ static void x86_cpu_load_model(X86CPU *cpu, X86CPUModel *model) + } + + /* legacy-cache defaults to 'off' if CPU model provides cache info */ +- cpu->legacy_cache = !def->cache_info; ++ cpu->legacy_cache = !x86_cpu_get_versioned_cache_info(cpu, model); + + env->features[FEAT_1_ECX] |= CPUID_EXT_HYPERVISOR; + +@@ -6724,14 +6750,17 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) + + /* Cache information initialization */ + if (!cpu->legacy_cache) { +- if (!xcc->model || !xcc->model->cpudef->cache_info) { ++ const CPUCaches *cache_info = ++ x86_cpu_get_versioned_cache_info(cpu, xcc->model); ++ ++ if (!xcc->model || !cache_info) { + g_autofree char *name = x86_cpu_class_get_model_name(xcc); + error_setg(errp, + "CPU model '%s' doesn't support legacy-cache=off", name); + return; + } + env->cache_info_cpuid2 = env->cache_info_cpuid4 = env->cache_info_amd = +- *xcc->model->cpudef->cache_info; ++ *cache_info; + } else { + /* Build legacy cache information */ + env->cache_info_cpuid2.l1d_cache = &legacy_l1d_cache; +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-fix-ADOX-followed-by-ADCX.patch b/SOURCES/kvm-target-i386-fix-ADOX-followed-by-ADCX.patch deleted file mode 100644 index 72ae8ee..0000000 --- a/SOURCES/kvm-target-i386-fix-ADOX-followed-by-ADCX.patch +++ /dev/null @@ -1,205 +0,0 @@ -From 54d3e58aabf9716f9a07aeb7044d7b7997e28123 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Tue, 31 Jan 2023 09:48:03 +0100 -Subject: [PATCH 5/8] target/i386: fix ADOX followed by ADCX - -RH-Author: Paolo Bonzini -RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions -RH-Bugzilla: 2173590 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Bandan Das -RH-Commit: [5/7] 64dbe4e602f08e4a88fdeacee5a8993ca4383563 (bonzini/rhel-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590 -Upstream-Status: merged - -When ADCX is followed by ADOX or vice versa, the second instruction's -carry comes from EFLAGS and the condition codes use the CC_OP_ADCOX -operation. Retrieving the carry from EFLAGS is handled by this bit -of gen_ADCOX: - - tcg_gen_extract_tl(carry_in, cpu_cc_src, - ctz32(cc_op == CC_OP_ADCX ? CC_C : CC_O), 1); - -Unfortunately, in this case cc_op has been overwritten by the previous -"if" statement to CC_OP_ADCOX. This works by chance when the first -instruction is ADCX; however, if the first instruction is ADOX, -ADCX will incorrectly take its carry from OF instead of CF. - -Fix by moving the computation of the new cc_op at the end of the function. -The included exhaustive test case fails without this patch and passes -afterwards. - -Because ADCX/ADOX need not be invoked through the VEX prefix, this -regression bisects to commit 16fc5726a6e2 ("target/i386: reimplement -0x0f 0x38, add AVX", 2022-10-18). However, the mistake happened a -little earlier, when BMI instructions were rewritten using the new -decoder framework. - -Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1471 -Reported-by: Paul Jolly -Fixes: 1d0b926150e5 ("target/i386: move scalar 0F 38 and 0F 3A instruction to new decoder", 2022-10-18) -Cc: qemu-stable@nongnu.org -Signed-off-by: Paolo Bonzini -(cherry picked from commit 60c7dd22e1383754d5f150bc9f7c2785c662a7b6) ---- - target/i386/tcg/emit.c.inc | 20 +++++---- - tests/tcg/i386/Makefile.target | 6 ++- - tests/tcg/i386/test-i386-adcox.c | 75 ++++++++++++++++++++++++++++++++ - 3 files changed, 91 insertions(+), 10 deletions(-) - create mode 100644 tests/tcg/i386/test-i386-adcox.c - -diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc -index 4d7702c106..0d7c6e80ae 100644 ---- a/target/i386/tcg/emit.c.inc -+++ b/target/i386/tcg/emit.c.inc -@@ -1015,6 +1015,7 @@ VSIB_AVX(VPGATHERQ, vpgatherq) - - static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op) - { -+ int opposite_cc_op; - TCGv carry_in = NULL; - TCGv carry_out = (cc_op == CC_OP_ADCX ? cpu_cc_dst : cpu_cc_src2); - TCGv zero; -@@ -1022,14 +1023,8 @@ static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op) - if (cc_op == s->cc_op || s->cc_op == CC_OP_ADCOX) { - /* Re-use the carry-out from a previous round. */ - carry_in = carry_out; -- cc_op = s->cc_op; -- } else if (s->cc_op == CC_OP_ADCX || s->cc_op == CC_OP_ADOX) { -- /* Merge with the carry-out from the opposite instruction. */ -- cc_op = CC_OP_ADCOX; -- } -- -- /* If we don't have a carry-in, get it out of EFLAGS. */ -- if (!carry_in) { -+ } else { -+ /* We don't have a carry-in, get it out of EFLAGS. */ - if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) { - gen_compute_eflags(s); - } -@@ -1053,7 +1048,14 @@ static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op) - tcg_gen_add2_tl(s->T0, carry_out, s->T0, carry_out, s->T1, zero); - break; - } -- set_cc_op(s, cc_op); -+ -+ opposite_cc_op = cc_op == CC_OP_ADCX ? CC_OP_ADOX : CC_OP_ADCX; -+ if (s->cc_op == CC_OP_ADCOX || s->cc_op == opposite_cc_op) { -+ /* Merge with the carry-out from the opposite instruction. */ -+ set_cc_op(s, CC_OP_ADCOX); -+ } else { -+ set_cc_op(s, cc_op); -+ } - } - - static void gen_ADCX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) -diff --git a/tests/tcg/i386/Makefile.target b/tests/tcg/i386/Makefile.target -index 81831cafbc..bafd8c2180 100644 ---- a/tests/tcg/i386/Makefile.target -+++ b/tests/tcg/i386/Makefile.target -@@ -14,7 +14,7 @@ config-cc.mak: Makefile - I386_SRCS=$(notdir $(wildcard $(I386_SRC)/*.c)) - ALL_X86_TESTS=$(I386_SRCS:.c=) - SKIP_I386_TESTS=test-i386-ssse3 test-avx test-3dnow test-mmx --X86_64_TESTS:=$(filter test-i386-bmi2 $(SKIP_I386_TESTS), $(ALL_X86_TESTS)) -+X86_64_TESTS:=$(filter test-i386-adcox test-i386-bmi2 $(SKIP_I386_TESTS), $(ALL_X86_TESTS)) - - test-i386-sse-exceptions: CFLAGS += -msse4.1 -mfpmath=sse - run-test-i386-sse-exceptions: QEMU_OPTS += -cpu max -@@ -28,6 +28,10 @@ test-i386-bmi2: CFLAGS=-O2 - run-test-i386-bmi2: QEMU_OPTS += -cpu max - run-plugin-test-i386-bmi2-%: QEMU_OPTS += -cpu max - -+test-i386-adcox: CFLAGS=-O2 -+run-test-i386-adcox: QEMU_OPTS += -cpu max -+run-plugin-test-i386-adcox-%: QEMU_OPTS += -cpu max -+ - # - # hello-i386 is a barebones app - # -diff --git a/tests/tcg/i386/test-i386-adcox.c b/tests/tcg/i386/test-i386-adcox.c -new file mode 100644 -index 0000000000..16169efff8 ---- /dev/null -+++ b/tests/tcg/i386/test-i386-adcox.c -@@ -0,0 +1,75 @@ -+/* See if various BMI2 instructions give expected results */ -+#include -+#include -+#include -+ -+#define CC_C 1 -+#define CC_O (1 << 11) -+ -+#ifdef __x86_64__ -+#define REG uint64_t -+#else -+#define REG uint32_t -+#endif -+ -+void test_adox_adcx(uint32_t in_c, uint32_t in_o, REG adcx_operand, REG adox_operand) -+{ -+ REG flags; -+ REG out_adcx, out_adox; -+ -+ asm("pushf; pop %0" : "=r"(flags)); -+ flags &= ~(CC_C | CC_O); -+ flags |= (in_c ? CC_C : 0); -+ flags |= (in_o ? CC_O : 0); -+ -+ out_adcx = adcx_operand; -+ out_adox = adox_operand; -+ asm("push %0; popf;" -+ "adox %3, %2;" -+ "adcx %3, %1;" -+ "pushf; pop %0" -+ : "+r" (flags), "+r" (out_adcx), "+r" (out_adox) -+ : "r" ((REG)-1), "0" (flags), "1" (out_adcx), "2" (out_adox)); -+ -+ assert(out_adcx == in_c + adcx_operand - 1); -+ assert(out_adox == in_o + adox_operand - 1); -+ assert(!!(flags & CC_C) == (in_c || adcx_operand)); -+ assert(!!(flags & CC_O) == (in_o || adox_operand)); -+} -+ -+void test_adcx_adox(uint32_t in_c, uint32_t in_o, REG adcx_operand, REG adox_operand) -+{ -+ REG flags; -+ REG out_adcx, out_adox; -+ -+ asm("pushf; pop %0" : "=r"(flags)); -+ flags &= ~(CC_C | CC_O); -+ flags |= (in_c ? CC_C : 0); -+ flags |= (in_o ? CC_O : 0); -+ -+ out_adcx = adcx_operand; -+ out_adox = adox_operand; -+ asm("push %0; popf;" -+ "adcx %3, %1;" -+ "adox %3, %2;" -+ "pushf; pop %0" -+ : "+r" (flags), "+r" (out_adcx), "+r" (out_adox) -+ : "r" ((REG)-1), "0" (flags), "1" (out_adcx), "2" (out_adox)); -+ -+ assert(out_adcx == in_c + adcx_operand - 1); -+ assert(out_adox == in_o + adox_operand - 1); -+ assert(!!(flags & CC_C) == (in_c || adcx_operand)); -+ assert(!!(flags & CC_O) == (in_o || adox_operand)); -+} -+ -+int main(int argc, char *argv[]) { -+ /* try all combinations of input CF, input OF, CF from op1+op2, OF from op2+op1 */ -+ int i; -+ for (i = 0; i <= 15; i++) { -+ printf("%d\n", i); -+ test_adcx_adox(!!(i & 1), !!(i & 2), !!(i & 4), !!(i & 8)); -+ test_adox_adcx(!!(i & 1), !!(i & 2), !!(i & 4), !!(i & 8)); -+ } -+ return 0; -+} -+ --- -2.39.1 - diff --git a/SOURCES/kvm-target-i386-fix-operand-size-of-unary-SSE-operations.patch b/SOURCES/kvm-target-i386-fix-operand-size-of-unary-SSE-operations.patch deleted file mode 100644 index 81a0003..0000000 --- a/SOURCES/kvm-target-i386-fix-operand-size-of-unary-SSE-operations.patch +++ /dev/null @@ -1,77 +0,0 @@ -From f4ddcdd2395e0944c20f6683c66068ed0ac7d757 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Sat, 7 Jan 2023 18:14:20 +0100 -Subject: [PATCH 1/8] target/i386: fix operand size of unary SSE operations - -RH-Author: Paolo Bonzini -RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions -RH-Bugzilla: 2173590 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Bandan Das -RH-Commit: [1/7] 7041f3e30e19add6bd8e5355d8bebf92390a5c2e (bonzini/rhel-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590 -Upstream-Status: merged - -VRCPSS, VRSQRTSS and VCVTSx2Sx have a 32-bit or 64-bit memory operand, -which is represented in the decoding tables by X86_VEX_REPScalar. Add it -to the tables, and make validate_vex() handle the case of an instruction -that is in exception type 4 without the REP prefix and exception type 5 -with it; this is the cas of VRCP and VRSQRT. - -Reported-by: yongwoo -Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1377 -Signed-off-by: Paolo Bonzini -(cherry picked from commit 3d304620ec6c95f31db17acc132f42f243369299) ---- - target/i386/tcg/decode-new.c.inc | 11 ++++++----- - 1 file changed, 6 insertions(+), 5 deletions(-) - -diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc -index 80c579164f..d5fd8d965c 100644 ---- a/target/i386/tcg/decode-new.c.inc -+++ b/target/i386/tcg/decode-new.c.inc -@@ -105,6 +105,7 @@ - #define vex3 .vex_class = 3, - #define vex4 .vex_class = 4, - #define vex4_unal .vex_class = 4, .vex_special = X86_VEX_SSEUnaligned, -+#define vex4_rep5 .vex_class = 4, .vex_special = X86_VEX_REPScalar, - #define vex5 .vex_class = 5, - #define vex6 .vex_class = 6, - #define vex7 .vex_class = 7, -@@ -839,8 +840,8 @@ static const X86OpEntry opcodes_0F[256] = { - - [0x50] = X86_OP_ENTRY3(MOVMSK, G,y, None,None, U,x, vex7 p_00_66), - [0x51] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), -- [0x52] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex5 p_00_f3), -- [0x53] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex5 p_00_f3), -+ [0x52] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex4_rep5 p_00_f3), -+ [0x53] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex4_rep5 p_00_f3), - [0x54] = X86_OP_ENTRY3(PAND, V,x, H,x, W,x, vex4 p_00_66), /* vand */ - [0x55] = X86_OP_ENTRY3(PANDN, V,x, H,x, W,x, vex4 p_00_66), /* vandn */ - [0x56] = X86_OP_ENTRY3(POR, V,x, H,x, W,x, vex4 p_00_66), /* vor */ -@@ -878,7 +879,7 @@ static const X86OpEntry opcodes_0F[256] = { - - [0x58] = X86_OP_ENTRY3(VADD, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), - [0x59] = X86_OP_ENTRY3(VMUL, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), -- [0x5a] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex3 p_00_66_f3_f2), -+ [0x5a] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), - [0x5b] = X86_OP_GROUP0(0F5B), - [0x5c] = X86_OP_ENTRY3(VSUB, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), - [0x5d] = X86_OP_ENTRY3(VMIN, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), -@@ -1447,9 +1448,9 @@ static bool validate_vex(DisasContext *s, X86DecodedInsn *decode) - * Instructions which differ between 00/66 and F2/F3 in the - * exception classification and the size of the memory operand. - */ -- assert(e->vex_class == 1 || e->vex_class == 2); -+ assert(e->vex_class == 1 || e->vex_class == 2 || e->vex_class == 4); - if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) { -- e->vex_class = 3; -+ e->vex_class = e->vex_class < 4 ? 3 : 5; - if (s->vex_l) { - goto illegal; - } --- -2.39.1 - diff --git a/SOURCES/kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch b/SOURCES/kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch deleted file mode 100644 index b9536c3..0000000 --- a/SOURCES/kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch +++ /dev/null @@ -1,50 +0,0 @@ -From b330bf0a2ad5af73d3c62997f7f0fa5b61f1796b Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Tue, 14 Feb 2023 14:48:37 +0100 -Subject: [PATCH 8/8] target/s390x/arch_dump: Fix memory corruption in - s390x_write_elf64_notes() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -RH-MergeRequest: 152: Fix memory corruption in s390x_write_elf64_notes() -RH-Bugzilla: 2168172 -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cédric Le Goater -RH-Commit: [1/1] 37a2c997b2c8b7524e0b6299891bf3ea7c9a46d0 (thuth/qemu-kvm-cs9) - -Bugzilla: https://bugzilla.redhat.com/2168172 -Upstream-Status: Posted (and reviewed, but not merged yet) - -"note_size" can be smaller than sizeof(note), so unconditionally calling -memset(notep, 0, sizeof(note)) could cause a memory corruption here in -case notep has been allocated dynamically, thus let's use note_size as -length argument for memset() instead. - -Fixes: 113d8f4e95 ("s390x: pv: Add dump support") -Message-Id: <20230214141056.680969-1-thuth@redhat.com> -Reviewed-by: Janosch Frank -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Thomas Huth ---- - target/s390x/arch_dump.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/target/s390x/arch_dump.c b/target/s390x/arch_dump.c -index a2329141e8..a7c44ba49d 100644 ---- a/target/s390x/arch_dump.c -+++ b/target/s390x/arch_dump.c -@@ -248,7 +248,7 @@ static int s390x_write_elf64_notes(const char *note_name, - notep = g_malloc(note_size); - } - -- memset(notep, 0, sizeof(note)); -+ memset(notep, 0, note_size); - - /* Setup note header data */ - notep->hdr.n_descsz = cpu_to_be32(content_size); --- -2.31.1 - diff --git a/SOURCES/kvm-test-bdrv-drain-Don-t-yield-in-.bdrv_co_drained_begi.patch b/SOURCES/kvm-test-bdrv-drain-Don-t-yield-in-.bdrv_co_drained_begi.patch deleted file mode 100644 index 268c263..0000000 --- a/SOURCES/kvm-test-bdrv-drain-Don-t-yield-in-.bdrv_co_drained_begi.patch +++ /dev/null @@ -1,153 +0,0 @@ -From 093c4a6834f3ec5a05390a3630ae4edec80885b8 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:40:57 +0100 -Subject: [PATCH 15/31] test-bdrv-drain: Don't yield in - .bdrv_co_drained_begin/end() - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [3/16] 5282d3e13cb85dfb480edb11b7eb2769248465df (sgarzarella/qemu-kvm-c-9-s) - -We want to change .bdrv_co_drained_begin/end() back to be non-coroutine -callbacks, so in preparation, avoid yielding in their implementation. - -This does almost the same as the existing logic in bdrv_drain_invoke(), -by creating and entering coroutines internally. However, since the test -case is by far the heaviest user of coroutine code in drain callbacks, -it is preferable to have the complexity in the test case rather than the -drain core, which is already complicated enough without this. - -The behaviour for bdrv_drain_begin() is unchanged because we increase -bs->in_flight and this is still polled. However, bdrv_drain_end() -doesn't wait for the spawned coroutine to complete any more. This is -fine, we don't rely on bdrv_drain_end() restarting all operations -immediately before the next aio_poll(). - -Signed-off-by: Kevin Wolf -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Emanuele Giuseppe Esposito -Reviewed-by: Hanna Reitz -Message-Id: <20221118174110.55183-3-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 7bce1c299834557bffd92294608ea528648cfe75) -Signed-off-by: Stefano Garzarella ---- - tests/unit/test-bdrv-drain.c | 64 ++++++++++++++++++++++++++---------- - 1 file changed, 46 insertions(+), 18 deletions(-) - -diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c -index 09dc4a4891..24f34e24ad 100644 ---- a/tests/unit/test-bdrv-drain.c -+++ b/tests/unit/test-bdrv-drain.c -@@ -38,12 +38,22 @@ typedef struct BDRVTestState { - bool sleep_in_drain_begin; - } BDRVTestState; - -+static void coroutine_fn sleep_in_drain_begin(void *opaque) -+{ -+ BlockDriverState *bs = opaque; -+ -+ qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000); -+ bdrv_dec_in_flight(bs); -+} -+ - static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs) - { - BDRVTestState *s = bs->opaque; - s->drain_count++; - if (s->sleep_in_drain_begin) { -- qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000); -+ Coroutine *co = qemu_coroutine_create(sleep_in_drain_begin, bs); -+ bdrv_inc_in_flight(bs); -+ aio_co_enter(bdrv_get_aio_context(bs), co); - } - } - -@@ -1916,6 +1926,21 @@ static int coroutine_fn bdrv_replace_test_co_preadv(BlockDriverState *bs, - return 0; - } - -+static void coroutine_fn bdrv_replace_test_drain_co(void *opaque) -+{ -+ BlockDriverState *bs = opaque; -+ BDRVReplaceTestState *s = bs->opaque; -+ -+ /* Keep waking io_co up until it is done */ -+ while (s->io_co) { -+ aio_co_wake(s->io_co); -+ s->io_co = NULL; -+ qemu_coroutine_yield(); -+ } -+ s->drain_co = NULL; -+ bdrv_dec_in_flight(bs); -+} -+ - /** - * If .drain_count is 0, wake up .io_co if there is one; and set - * .was_drained. -@@ -1926,20 +1951,27 @@ static void coroutine_fn bdrv_replace_test_co_drain_begin(BlockDriverState *bs) - BDRVReplaceTestState *s = bs->opaque; - - if (!s->drain_count) { -- /* Keep waking io_co up until it is done */ -- s->drain_co = qemu_coroutine_self(); -- while (s->io_co) { -- aio_co_wake(s->io_co); -- s->io_co = NULL; -- qemu_coroutine_yield(); -- } -- s->drain_co = NULL; -- -+ s->drain_co = qemu_coroutine_create(bdrv_replace_test_drain_co, bs); -+ bdrv_inc_in_flight(bs); -+ aio_co_enter(bdrv_get_aio_context(bs), s->drain_co); - s->was_drained = true; - } - s->drain_count++; - } - -+static void coroutine_fn bdrv_replace_test_read_entry(void *opaque) -+{ -+ BlockDriverState *bs = opaque; -+ char data; -+ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, &data, 1); -+ int ret; -+ -+ /* Queue a read request post-drain */ -+ ret = bdrv_replace_test_co_preadv(bs, 0, 1, &qiov, 0); -+ g_assert(ret >= 0); -+ bdrv_dec_in_flight(bs); -+} -+ - /** - * Reduce .drain_count, set .was_undrained once it reaches 0. - * If .drain_count reaches 0 and the node has a backing file, issue a -@@ -1951,17 +1983,13 @@ static void coroutine_fn bdrv_replace_test_co_drain_end(BlockDriverState *bs) - - g_assert(s->drain_count > 0); - if (!--s->drain_count) { -- int ret; -- - s->was_undrained = true; - - if (bs->backing) { -- char data; -- QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, &data, 1); -- -- /* Queue a read request post-drain */ -- ret = bdrv_replace_test_co_preadv(bs, 0, 1, &qiov, 0); -- g_assert(ret >= 0); -+ Coroutine *co = qemu_coroutine_create(bdrv_replace_test_read_entry, -+ bs); -+ bdrv_inc_in_flight(bs); -+ aio_co_enter(bdrv_get_aio_context(bs), co); - } - } - } --- -2.31.1 - diff --git a/SOURCES/kvm-tests-qtest-netdev-test-stream-and-dgram-backends.patch b/SOURCES/kvm-tests-qtest-netdev-test-stream-and-dgram-backends.patch deleted file mode 100644 index ebd52cd..0000000 --- a/SOURCES/kvm-tests-qtest-netdev-test-stream-and-dgram-backends.patch +++ /dev/null @@ -1,505 +0,0 @@ -From 39d5761fe1f546e764dedf2ea32c55d8f5222696 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Wed, 18 Jan 2023 13:04:05 +0100 -Subject: [PATCH 1/8] tests/qtest: netdev: test stream and dgram backends -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 148: net: stream: add a new option to automatically reconnect -RH-Bugzilla: 2169232 -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Cindy Lu -RH-Acked-by: MST -RH-Acked-by: David Gibson (Red Hat) -RH-Commit: [1/2] 75c71b47eea072e14651a96612d402b50d2b8f1e (lvivier/qemu-kvm-centos) - -Signed-off-by: Laurent Vivier -Acked-by: Michael S. Tsirkin -Message-Id: <20230118120405.1876329-1-lvivier@redhat.com> -Signed-off-by: Thomas Huth -(cherry picked from commit c95031a19f0d7f418a597243f6f84b031a858997) ---- - tests/qtest/meson.build | 2 + - tests/qtest/netdev-socket.c | 448 ++++++++++++++++++++++++++++++++++++ - 2 files changed, 450 insertions(+) - create mode 100644 tests/qtest/netdev-socket.c - -diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build -index 9df3f9f8b9..2e7c6fe5e3 100644 ---- a/tests/qtest/meson.build -+++ b/tests/qtest/meson.build -@@ -27,6 +27,7 @@ qtests_generic = [ - 'test-hmp', - 'qos-test', - 'readconfig-test', -+ 'netdev-socket', - ] - if config_host.has_key('CONFIG_MODULES') - qtests_generic += [ 'modules-test' ] -@@ -299,6 +300,7 @@ qtests = { - 'tpm-tis-device-swtpm-test': [io, tpmemu_files, 'tpm-tis-util.c'], - 'tpm-tis-device-test': [io, tpmemu_files, 'tpm-tis-util.c'], - 'vmgenid-test': files('boot-sector.c', 'acpi-utils.c'), -+ 'netdev-socket': files('netdev-socket.c', '../unit/socket-helpers.c'), - } - - gvnc = dependency('gvnc-1.0', required: false) -diff --git a/tests/qtest/netdev-socket.c b/tests/qtest/netdev-socket.c -new file mode 100644 -index 0000000000..6ba256e173 ---- /dev/null -+++ b/tests/qtest/netdev-socket.c -@@ -0,0 +1,448 @@ -+/* -+ * QTest testcase for netdev stream and dgram -+ * -+ * Copyright (c) 2022 Red Hat, Inc. -+ * -+ * SPDX-License-Identifier: GPL-2.0-or-later -+ */ -+ -+#include "qemu/osdep.h" -+#include "qemu/sockets.h" -+#include -+#include "../unit/socket-helpers.h" -+#include "libqtest.h" -+ -+#define CONNECTION_TIMEOUT 5 -+ -+#define EXPECT_STATE(q, e, t) \ -+do { \ -+ char *resp = NULL; \ -+ g_test_timer_start(); \ -+ do { \ -+ g_free(resp); \ -+ resp = qtest_hmp(q, "info network"); \ -+ if (t) { \ -+ strrchr(resp, t)[0] = 0; \ -+ } \ -+ if (g_str_equal(resp, e)) { \ -+ break; \ -+ } \ -+ } while (g_test_timer_elapsed() < CONNECTION_TIMEOUT); \ -+ g_assert_cmpstr(resp, ==, e); \ -+ g_free(resp); \ -+} while (0) -+ -+static gchar *tmpdir; -+ -+static int inet_get_free_port_socket_ipv4(int sock) -+{ -+ struct sockaddr_in addr; -+ socklen_t len; -+ -+ memset(&addr, 0, sizeof(addr)); -+ addr.sin_family = AF_INET; -+ addr.sin_addr.s_addr = INADDR_ANY; -+ addr.sin_port = 0; -+ if (bind(sock, (struct sockaddr *)&addr, sizeof(addr)) < 0) { -+ return -1; -+ } -+ -+ len = sizeof(addr); -+ if (getsockname(sock, (struct sockaddr *)&addr, &len) < 0) { -+ return -1; -+ } -+ -+ return ntohs(addr.sin_port); -+} -+ -+static int inet_get_free_port_socket_ipv6(int sock) -+{ -+ struct sockaddr_in6 addr; -+ socklen_t len; -+ -+ memset(&addr, 0, sizeof(addr)); -+ addr.sin6_family = AF_INET6; -+ addr.sin6_addr = in6addr_any; -+ addr.sin6_port = 0; -+ if (bind(sock, (struct sockaddr *)&addr, sizeof(addr)) < 0) { -+ return -1; -+ } -+ -+ len = sizeof(addr); -+ if (getsockname(sock, (struct sockaddr *)&addr, &len) < 0) { -+ return -1; -+ } -+ -+ return ntohs(addr.sin6_port); -+} -+ -+static int inet_get_free_port_multiple(int nb, int *port, bool ipv6) -+{ -+ int sock[nb]; -+ int i; -+ -+ for (i = 0; i < nb; i++) { -+ sock[i] = socket(ipv6 ? AF_INET6 : AF_INET, SOCK_STREAM, 0); -+ if (sock[i] < 0) { -+ break; -+ } -+ port[i] = ipv6 ? inet_get_free_port_socket_ipv6(sock[i]) : -+ inet_get_free_port_socket_ipv4(sock[i]); -+ if (port[i] == -1) { -+ break; -+ } -+ } -+ -+ nb = i; -+ for (i = 0; i < nb; i++) { -+ closesocket(sock[i]); -+ } -+ -+ return nb; -+} -+ -+static int inet_get_free_port(bool ipv6) -+{ -+ int nb, port; -+ -+ nb = inet_get_free_port_multiple(1, &port, ipv6); -+ g_assert_cmpint(nb, ==, 1); -+ -+ return port; -+} -+ -+static void test_stream_inet_ipv4(void) -+{ -+ QTestState *qts0, *qts1; -+ char *expect; -+ int port; -+ -+ port = inet_get_free_port(false); -+ qts0 = qtest_initf("-nodefaults -M none " -+ "-netdev stream,id=st0,server=true,addr.type=inet," -+ "addr.ipv4=on,addr.ipv6=off," -+ "addr.host=127.0.0.1,addr.port=%d", port); -+ -+ EXPECT_STATE(qts0, "st0: index=0,type=stream,\r\n", 0); -+ -+ qts1 = qtest_initf("-nodefaults -M none " -+ "-netdev stream,server=false,id=st0,addr.type=inet," -+ "addr.ipv4=on,addr.ipv6=off," -+ "addr.host=127.0.0.1,addr.port=%d", port); -+ -+ expect = g_strdup_printf("st0: index=0,type=stream,tcp:127.0.0.1:%d\r\n", -+ port); -+ EXPECT_STATE(qts1, expect, 0); -+ g_free(expect); -+ -+ /* the port is unknown, check only the address */ -+ EXPECT_STATE(qts0, "st0: index=0,type=stream,tcp:127.0.0.1", ':'); -+ -+ qtest_quit(qts1); -+ qtest_quit(qts0); -+} -+ -+static void test_stream_inet_ipv6(void) -+{ -+ QTestState *qts0, *qts1; -+ char *expect; -+ int port; -+ -+ port = inet_get_free_port(true); -+ qts0 = qtest_initf("-nodefaults -M none " -+ "-netdev stream,id=st0,server=true,addr.type=inet," -+ "addr.ipv4=off,addr.ipv6=on," -+ "addr.host=::1,addr.port=%d", port); -+ -+ EXPECT_STATE(qts0, "st0: index=0,type=stream,\r\n", 0); -+ -+ qts1 = qtest_initf("-nodefaults -M none " -+ "-netdev stream,server=false,id=st0,addr.type=inet," -+ "addr.ipv4=off,addr.ipv6=on," -+ "addr.host=::1,addr.port=%d", port); -+ -+ expect = g_strdup_printf("st0: index=0,type=stream,tcp:::1:%d\r\n", -+ port); -+ EXPECT_STATE(qts1, expect, 0); -+ g_free(expect); -+ -+ /* the port is unknown, check only the address */ -+ EXPECT_STATE(qts0, "st0: index=0,type=stream,tcp:::1", ':'); -+ -+ qtest_quit(qts1); -+ qtest_quit(qts0); -+} -+ -+static void test_stream_unix(void) -+{ -+ QTestState *qts0, *qts1; -+ char *expect; -+ gchar *path; -+ -+ path = g_strconcat(tmpdir, "/stream_unix", NULL); -+ -+ qts0 = qtest_initf("-nodefaults -M none " -+ "-netdev stream,id=st0,server=true," -+ "addr.type=unix,addr.path=%s,", -+ path); -+ -+ EXPECT_STATE(qts0, "st0: index=0,type=stream,\r\n", 0); -+ -+ qts1 = qtest_initf("-nodefaults -M none " -+ "-netdev stream,id=st0,server=false," -+ "addr.type=unix,addr.path=%s", -+ path); -+ -+ expect = g_strdup_printf("st0: index=0,type=stream,unix:%s\r\n", path); -+ EXPECT_STATE(qts1, expect, 0); -+ EXPECT_STATE(qts0, expect, 0); -+ g_free(expect); -+ g_free(path); -+ -+ qtest_quit(qts1); -+ qtest_quit(qts0); -+} -+ -+#ifdef CONFIG_LINUX -+static void test_stream_unix_abstract(void) -+{ -+ QTestState *qts0, *qts1; -+ char *expect; -+ gchar *path; -+ -+ path = g_strconcat(tmpdir, "/stream_unix_abstract", NULL); -+ -+ qts0 = qtest_initf("-nodefaults -M none " -+ "-netdev stream,id=st0,server=true," -+ "addr.type=unix,addr.path=%s," -+ "addr.abstract=on", -+ path); -+ -+ EXPECT_STATE(qts0, "st0: index=0,type=stream,\r\n", 0); -+ -+ qts1 = qtest_initf("-nodefaults -M none " -+ "-netdev stream,id=st0,server=false," -+ "addr.type=unix,addr.path=%s,addr.abstract=on", -+ path); -+ -+ expect = g_strdup_printf("st0: index=0,type=stream,unix:%s\r\n", path); -+ EXPECT_STATE(qts1, expect, 0); -+ EXPECT_STATE(qts0, expect, 0); -+ g_free(expect); -+ g_free(path); -+ -+ qtest_quit(qts1); -+ qtest_quit(qts0); -+} -+#endif -+ -+#ifndef _WIN32 -+static void test_stream_fd(void) -+{ -+ QTestState *qts0, *qts1; -+ int sock[2]; -+ int ret; -+ -+ ret = socketpair(AF_LOCAL, SOCK_STREAM, 0, sock); -+ g_assert_true(ret == 0); -+ -+ qts0 = qtest_initf("-nodefaults -M none " -+ "-netdev stream,id=st0,addr.type=fd,addr.str=%d", -+ sock[0]); -+ -+ EXPECT_STATE(qts0, "st0: index=0,type=stream,unix:\r\n", 0); -+ -+ qts1 = qtest_initf("-nodefaults -M none " -+ "-netdev stream,id=st0,addr.type=fd,addr.str=%d", -+ sock[1]); -+ -+ EXPECT_STATE(qts1, "st0: index=0,type=stream,unix:\r\n", 0); -+ EXPECT_STATE(qts0, "st0: index=0,type=stream,unix:\r\n", 0); -+ -+ qtest_quit(qts1); -+ qtest_quit(qts0); -+ -+ closesocket(sock[0]); -+ closesocket(sock[1]); -+} -+#endif -+ -+static void test_dgram_inet(void) -+{ -+ QTestState *qts0, *qts1; -+ char *expect; -+ int port[2]; -+ int nb; -+ -+ nb = inet_get_free_port_multiple(2, port, false); -+ g_assert_cmpint(nb, ==, 2); -+ -+ qts0 = qtest_initf("-nodefaults -M none " -+ "-netdev dgram,id=st0," -+ "local.type=inet,local.host=127.0.0.1,local.port=%d," -+ "remote.type=inet,remote.host=127.0.0.1,remote.port=%d", -+ port[0], port[1]); -+ -+ expect = g_strdup_printf("st0: index=0,type=dgram," -+ "udp=127.0.0.1:%d/127.0.0.1:%d\r\n", -+ port[0], port[1]); -+ EXPECT_STATE(qts0, expect, 0); -+ g_free(expect); -+ -+ qts1 = qtest_initf("-nodefaults -M none " -+ "-netdev dgram,id=st0," -+ "local.type=inet,local.host=127.0.0.1,local.port=%d," -+ "remote.type=inet,remote.host=127.0.0.1,remote.port=%d", -+ port[1], port[0]); -+ -+ expect = g_strdup_printf("st0: index=0,type=dgram," -+ "udp=127.0.0.1:%d/127.0.0.1:%d\r\n", -+ port[1], port[0]); -+ EXPECT_STATE(qts1, expect, 0); -+ g_free(expect); -+ -+ qtest_quit(qts1); -+ qtest_quit(qts0); -+} -+ -+#ifndef _WIN32 -+static void test_dgram_mcast(void) -+{ -+ QTestState *qts; -+ -+ qts = qtest_initf("-nodefaults -M none " -+ "-netdev dgram,id=st0," -+ "remote.type=inet,remote.host=230.0.0.1,remote.port=1234"); -+ -+ EXPECT_STATE(qts, "st0: index=0,type=dgram,mcast=230.0.0.1:1234\r\n", 0); -+ -+ qtest_quit(qts); -+} -+ -+static void test_dgram_unix(void) -+{ -+ QTestState *qts0, *qts1; -+ char *expect; -+ gchar *path0, *path1; -+ -+ path0 = g_strconcat(tmpdir, "/dgram_unix0", NULL); -+ path1 = g_strconcat(tmpdir, "/dgram_unix1", NULL); -+ -+ qts0 = qtest_initf("-nodefaults -M none " -+ "-netdev dgram,id=st0,local.type=unix,local.path=%s," -+ "remote.type=unix,remote.path=%s", -+ path0, path1); -+ -+ expect = g_strdup_printf("st0: index=0,type=dgram,udp=%s:%s\r\n", -+ path0, path1); -+ EXPECT_STATE(qts0, expect, 0); -+ g_free(expect); -+ -+ qts1 = qtest_initf("-nodefaults -M none " -+ "-netdev dgram,id=st0,local.type=unix,local.path=%s," -+ "remote.type=unix,remote.path=%s", -+ path1, path0); -+ -+ -+ expect = g_strdup_printf("st0: index=0,type=dgram,udp=%s:%s\r\n", -+ path1, path0); -+ EXPECT_STATE(qts1, expect, 0); -+ g_free(expect); -+ -+ unlink(path0); -+ g_free(path0); -+ unlink(path1); -+ g_free(path1); -+ -+ qtest_quit(qts1); -+ qtest_quit(qts0); -+} -+ -+static void test_dgram_fd(void) -+{ -+ QTestState *qts0, *qts1; -+ char *expect; -+ int ret; -+ int sv[2]; -+ -+ ret = socketpair(PF_UNIX, SOCK_DGRAM, 0, sv); -+ g_assert_cmpint(ret, !=, -1); -+ -+ qts0 = qtest_initf("-nodefaults -M none " -+ "-netdev dgram,id=st0,local.type=fd,local.str=%d", -+ sv[0]); -+ -+ expect = g_strdup_printf("st0: index=0,type=dgram,fd=%d unix\r\n", sv[0]); -+ EXPECT_STATE(qts0, expect, 0); -+ g_free(expect); -+ -+ qts1 = qtest_initf("-nodefaults -M none " -+ "-netdev dgram,id=st0,local.type=fd,local.str=%d", -+ sv[1]); -+ -+ -+ expect = g_strdup_printf("st0: index=0,type=dgram,fd=%d unix\r\n", sv[1]); -+ EXPECT_STATE(qts1, expect, 0); -+ g_free(expect); -+ -+ qtest_quit(qts1); -+ qtest_quit(qts0); -+ -+ closesocket(sv[0]); -+ closesocket(sv[1]); -+} -+#endif -+ -+int main(int argc, char **argv) -+{ -+ int ret; -+ bool has_ipv4, has_ipv6, has_afunix; -+ g_autoptr(GError) err = NULL; -+ -+ socket_init(); -+ g_test_init(&argc, &argv, NULL); -+ -+ if (socket_check_protocol_support(&has_ipv4, &has_ipv6) < 0) { -+ g_error("socket_check_protocol_support() failed\n"); -+ } -+ -+ tmpdir = g_dir_make_tmp("netdev-socket.XXXXXX", &err); -+ if (tmpdir == NULL) { -+ g_error("Can't create temporary directory in %s: %s", -+ g_get_tmp_dir(), err->message); -+ } -+ -+ if (has_ipv4) { -+ qtest_add_func("/netdev/stream/inet/ipv4", test_stream_inet_ipv4); -+ qtest_add_func("/netdev/dgram/inet", test_dgram_inet); -+#ifndef _WIN32 -+ qtest_add_func("/netdev/dgram/mcast", test_dgram_mcast); -+#endif -+ } -+ if (has_ipv6) { -+ qtest_add_func("/netdev/stream/inet/ipv6", test_stream_inet_ipv6); -+ } -+ -+ socket_check_afunix_support(&has_afunix); -+ if (has_afunix) { -+#ifndef _WIN32 -+ qtest_add_func("/netdev/dgram/unix", test_dgram_unix); -+#endif -+ qtest_add_func("/netdev/stream/unix", test_stream_unix); -+#ifdef CONFIG_LINUX -+ qtest_add_func("/netdev/stream/unix/abstract", -+ test_stream_unix_abstract); -+#endif -+#ifndef _WIN32 -+ qtest_add_func("/netdev/stream/fd", test_stream_fd); -+ qtest_add_func("/netdev/dgram/fd", test_dgram_fd); -+#endif -+ } -+ -+ ret = g_test_run(); -+ -+ g_rmdir(tmpdir); -+ g_free(tmpdir); -+ -+ return ret; -+} --- -2.31.1 - diff --git a/SOURCES/kvm-tests-tcg-i386-Introduce-and-use-reg_t-consistently.patch b/SOURCES/kvm-tests-tcg-i386-Introduce-and-use-reg_t-consistently.patch deleted file mode 100644 index 14388fe..0000000 --- a/SOURCES/kvm-tests-tcg-i386-Introduce-and-use-reg_t-consistently.patch +++ /dev/null @@ -1,299 +0,0 @@ -From 120db3dfeb88c447f0e115c19b7ede704f8f80cb Mon Sep 17 00:00:00 2001 -From: Richard Henderson -Date: Sat, 14 Jan 2023 13:05:41 -1000 -Subject: [PATCH 2/8] tests/tcg/i386: Introduce and use reg_t consistently -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Paolo Bonzini -RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions -RH-Bugzilla: 2173590 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Bandan Das -RH-Commit: [2/7] 843a677555414170392db21c828bef3dc3c29300 (bonzini/rhel-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590 -Upstream-Status: merged - -Define reg_t based on the actual register width. -Define the inlines using that type. This will allow -input registers to 32-bit insns to be set to 64-bit -values on x86-64, which allows testing various edge cases. - -Signed-off-by: Richard Henderson -Reviewed-by: Philippe Mathieu-Daudé -Message-Id: <20230114230542.3116013-2-richard.henderson@linaro.org> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 5d62d6649cd367b5b4a3676e7514d2f9ca86cb03) ---- - tests/tcg/i386/test-i386-bmi2.c | 182 ++++++++++++++++---------------- - 1 file changed, 93 insertions(+), 89 deletions(-) - -diff --git a/tests/tcg/i386/test-i386-bmi2.c b/tests/tcg/i386/test-i386-bmi2.c -index 5fadf47510..3c3ef85513 100644 ---- a/tests/tcg/i386/test-i386-bmi2.c -+++ b/tests/tcg/i386/test-i386-bmi2.c -@@ -3,34 +3,40 @@ - #include - #include - -+#ifdef __x86_64 -+typedef uint64_t reg_t; -+#else -+typedef uint32_t reg_t; -+#endif -+ - #define insn1q(name, arg0) \ --static inline uint64_t name##q(uint64_t arg0) \ -+static inline reg_t name##q(reg_t arg0) \ - { \ -- uint64_t result64; \ -+ reg_t result64; \ - asm volatile (#name "q %1, %0" : "=r"(result64) : "rm"(arg0)); \ - return result64; \ - } - - #define insn1l(name, arg0) \ --static inline uint32_t name##l(uint32_t arg0) \ -+static inline reg_t name##l(reg_t arg0) \ - { \ -- uint32_t result32; \ -+ reg_t result32; \ - asm volatile (#name "l %k1, %k0" : "=r"(result32) : "rm"(arg0)); \ - return result32; \ - } - - #define insn2q(name, arg0, c0, arg1, c1) \ --static inline uint64_t name##q(uint64_t arg0, uint64_t arg1) \ -+static inline reg_t name##q(reg_t arg0, reg_t arg1) \ - { \ -- uint64_t result64; \ -+ reg_t result64; \ - asm volatile (#name "q %2, %1, %0" : "=r"(result64) : c0(arg0), c1(arg1)); \ - return result64; \ - } - - #define insn2l(name, arg0, c0, arg1, c1) \ --static inline uint32_t name##l(uint32_t arg0, uint32_t arg1) \ -+static inline reg_t name##l(reg_t arg0, reg_t arg1) \ - { \ -- uint32_t result32; \ -+ reg_t result32; \ - asm volatile (#name "l %k2, %k1, %k0" : "=r"(result32) : c0(arg0), c1(arg1)); \ - return result32; \ - } -@@ -65,130 +71,128 @@ insn1l(blsr, src) - int main(int argc, char *argv[]) { - uint64_t ehlo = 0x202020204f4c4845ull; - uint64_t mask = 0xa080800302020001ull; -- uint32_t result32; -+ reg_t result; - - #ifdef __x86_64 -- uint64_t result64; -- - /* 64 bits */ -- result64 = andnq(mask, ehlo); -- assert(result64 == 0x002020204d4c4844); -+ result = andnq(mask, ehlo); -+ assert(result == 0x002020204d4c4844); - -- result64 = pextq(ehlo, mask); -- assert(result64 == 133); -+ result = pextq(ehlo, mask); -+ assert(result == 133); - -- result64 = pdepq(result64, mask); -- assert(result64 == (ehlo & mask)); -+ result = pdepq(result, mask); -+ assert(result == (ehlo & mask)); - -- result64 = pextq(-1ull, mask); -- assert(result64 == 511); /* mask has 9 bits set */ -+ result = pextq(-1ull, mask); -+ assert(result == 511); /* mask has 9 bits set */ - -- result64 = pdepq(-1ull, mask); -- assert(result64 == mask); -+ result = pdepq(-1ull, mask); -+ assert(result == mask); - -- result64 = bextrq(mask, 0x3f00); -- assert(result64 == (mask & ~INT64_MIN)); -+ result = bextrq(mask, 0x3f00); -+ assert(result == (mask & ~INT64_MIN)); - -- result64 = bextrq(mask, 0x1038); -- assert(result64 == 0xa0); -+ result = bextrq(mask, 0x1038); -+ assert(result == 0xa0); - -- result64 = bextrq(mask, 0x10f8); -- assert(result64 == 0); -+ result = bextrq(mask, 0x10f8); -+ assert(result == 0); - -- result64 = blsiq(0x30); -- assert(result64 == 0x10); -+ result = blsiq(0x30); -+ assert(result == 0x10); - -- result64 = blsiq(0x30ull << 32); -- assert(result64 == 0x10ull << 32); -+ result = blsiq(0x30ull << 32); -+ assert(result == 0x10ull << 32); - -- result64 = blsmskq(0x30); -- assert(result64 == 0x1f); -+ result = blsmskq(0x30); -+ assert(result == 0x1f); - -- result64 = blsrq(0x30); -- assert(result64 == 0x20); -+ result = blsrq(0x30); -+ assert(result == 0x20); - -- result64 = blsrq(0x30ull << 32); -- assert(result64 == 0x20ull << 32); -+ result = blsrq(0x30ull << 32); -+ assert(result == 0x20ull << 32); - -- result64 = bzhiq(mask, 0x3f); -- assert(result64 == (mask & ~INT64_MIN)); -+ result = bzhiq(mask, 0x3f); -+ assert(result == (mask & ~INT64_MIN)); - -- result64 = bzhiq(mask, 0x1f); -- assert(result64 == (mask & ~(-1 << 30))); -+ result = bzhiq(mask, 0x1f); -+ assert(result == (mask & ~(-1 << 30))); - -- result64 = rorxq(0x2132435465768798, 8); -- assert(result64 == 0x9821324354657687); -+ result = rorxq(0x2132435465768798, 8); -+ assert(result == 0x9821324354657687); - -- result64 = sarxq(0xffeeddccbbaa9988, 8); -- assert(result64 == 0xffffeeddccbbaa99); -+ result = sarxq(0xffeeddccbbaa9988, 8); -+ assert(result == 0xffffeeddccbbaa99); - -- result64 = sarxq(0x77eeddccbbaa9988, 8 | 64); -- assert(result64 == 0x0077eeddccbbaa99); -+ result = sarxq(0x77eeddccbbaa9988, 8 | 64); -+ assert(result == 0x0077eeddccbbaa99); - -- result64 = shrxq(0xffeeddccbbaa9988, 8); -- assert(result64 == 0x00ffeeddccbbaa99); -+ result = shrxq(0xffeeddccbbaa9988, 8); -+ assert(result == 0x00ffeeddccbbaa99); - -- result64 = shrxq(0x77eeddccbbaa9988, 8 | 192); -- assert(result64 == 0x0077eeddccbbaa99); -+ result = shrxq(0x77eeddccbbaa9988, 8 | 192); -+ assert(result == 0x0077eeddccbbaa99); - -- result64 = shlxq(0xffeeddccbbaa9988, 8); -- assert(result64 == 0xeeddccbbaa998800); -+ result = shlxq(0xffeeddccbbaa9988, 8); -+ assert(result == 0xeeddccbbaa998800); - #endif - - /* 32 bits */ -- result32 = andnl(mask, ehlo); -- assert(result32 == 0x04d4c4844); -+ result = andnl(mask, ehlo); -+ assert(result == 0x04d4c4844); - -- result32 = pextl((uint32_t) ehlo, mask); -- assert(result32 == 5); -+ result = pextl((uint32_t) ehlo, mask); -+ assert(result == 5); - -- result32 = pdepl(result32, mask); -- assert(result32 == (uint32_t)(ehlo & mask)); -+ result = pdepl(result, mask); -+ assert(result == (uint32_t)(ehlo & mask)); - -- result32 = pextl(-1u, mask); -- assert(result32 == 7); /* mask has 3 bits set */ -+ result = pextl(-1u, mask); -+ assert(result == 7); /* mask has 3 bits set */ - -- result32 = pdepl(-1u, mask); -- assert(result32 == (uint32_t)mask); -+ result = pdepl(-1u, mask); -+ assert(result == (uint32_t)mask); - -- result32 = bextrl(mask, 0x1f00); -- assert(result32 == (mask & ~INT32_MIN)); -+ result = bextrl(mask, 0x1f00); -+ assert(result == (mask & ~INT32_MIN)); - -- result32 = bextrl(ehlo, 0x1018); -- assert(result32 == 0x4f); -+ result = bextrl(ehlo, 0x1018); -+ assert(result == 0x4f); - -- result32 = bextrl(mask, 0x1038); -- assert(result32 == 0); -+ result = bextrl(mask, 0x1038); -+ assert(result == 0); - -- result32 = blsil(0xffff); -- assert(result32 == 1); -+ result = blsil(0xffff); -+ assert(result == 1); - -- result32 = blsmskl(0x300); -- assert(result32 == 0x1ff); -+ result = blsmskl(0x300); -+ assert(result == 0x1ff); - -- result32 = blsrl(0xffc); -- assert(result32 == 0xff8); -+ result = blsrl(0xffc); -+ assert(result == 0xff8); - -- result32 = bzhil(mask, 0xf); -- assert(result32 == 1); -+ result = bzhil(mask, 0xf); -+ assert(result == 1); - -- result32 = rorxl(0x65768798, 8); -- assert(result32 == 0x98657687); -+ result = rorxl(0x65768798, 8); -+ assert(result == 0x98657687); - -- result32 = sarxl(0xffeeddcc, 8); -- assert(result32 == 0xffffeedd); -+ result = sarxl(0xffeeddcc, 8); -+ assert(result == 0xffffeedd); - -- result32 = sarxl(0x77eeddcc, 8 | 32); -- assert(result32 == 0x0077eedd); -+ result = sarxl(0x77eeddcc, 8 | 32); -+ assert(result == 0x0077eedd); - -- result32 = shrxl(0xffeeddcc, 8); -- assert(result32 == 0x00ffeedd); -+ result = shrxl(0xffeeddcc, 8); -+ assert(result == 0x00ffeedd); - -- result32 = shrxl(0x77eeddcc, 8 | 128); -- assert(result32 == 0x0077eedd); -+ result = shrxl(0x77eeddcc, 8 | 128); -+ assert(result == 0x0077eedd); - -- result32 = shlxl(0xffeeddcc, 8); -- assert(result32 == 0xeeddcc00); -+ result = shlxl(0xffeeddcc, 8); -+ assert(result == 0xeeddcc00); - - return 0; - } --- -2.39.1 - diff --git a/SOURCES/kvm-ui-Fix-pixel-colour-channel-order-for-PNG-screenshot.patch b/SOURCES/kvm-ui-Fix-pixel-colour-channel-order-for-PNG-screenshot.patch new file mode 100644 index 0000000..ef99b30 --- /dev/null +++ b/SOURCES/kvm-ui-Fix-pixel-colour-channel-order-for-PNG-screenshot.patch @@ -0,0 +1,88 @@ +From b998f8474846886fa1e0428fe79fe2a79231cc05 Mon Sep 17 00:00:00 2001 +From: Peter Maydell +Date: Fri, 12 May 2023 15:43:38 +0100 +Subject: [PATCH 35/37] ui: Fix pixel colour channel order for PNG screenshots +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +RH-MergeRequest: 183: ui: Fix pixel colour channel order for PNG screenshots +RH-Bugzilla: 2222579 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/1] 76acd3c5526639e70bc2998f584503c78fc9bc56 (marcandre.lureau-rh/qemu-kvm-centos) + +When we take a PNG screenshot the ordering of the colour channels in +the data is not correct, resulting in the image having weird +colouring compared to the actual display. (Specifically, on a +little-endian host the blue and red channels are swapped; on +big-endian everything is wrong.) + +This happens because the pixman idea of the pixel data and the libpng +idea differ. PIXMAN_a8r8g8b8 defines that pixels are 32-bit values, +with A in bits 24-31, R in bits 16-23, G in bits 8-15 and B in bits +0-7. This means that on little-endian systems the bytes in memory +are + B G R A +and on big-endian systems they are + A R G B + +libpng, on the other hand, thinks of pixels as being a series of +values for each channel, so its format PNG_COLOR_TYPE_RGB_ALPHA +always wants bytes in the order + R G B A + +This isn't the same as the pixman order for either big or little +endian hosts. + +The alpha channel is also unnecessary bulk in the output PNG file, +because there is no alpha information in a screenshot. + +To handle the endianness issue, we already define in ui/qemu-pixman.h +various PIXMAN_BE_* and PIXMAN_LE_* values that give consistent +byte-order pixel channel formats. So we can use PIXMAN_BE_r8g8b8 and +PNG_COLOR_TYPE_RGB, which both have an in-memory byte order of + R G B +and 3 bytes per pixel. + +(PPM format screenshots get this right; they already use the +PIXMAN_BE_r8g8b8 format.) + +Cc: qemu-stable@nongnu.org +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1622 +Fixes: 9a0a119a382867 ("Added parameter to take screenshot with screendump as PNG") +Signed-off-by: Peter Maydell +Reviewed-by: Marc-André Lureau +Message-id: 20230502135548.2451309-1-peter.maydell@linaro.org + +(cherry picked from commit cd22a0f520f471e3bd33bc19cf3b2fa772cdb2a8) +Signed-off-by: Marc-André Lureau +--- + ui/console.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/ui/console.c b/ui/console.c +index 6e8a3cdc62..e173731e20 100644 +--- a/ui/console.c ++++ b/ui/console.c +@@ -311,7 +311,7 @@ static bool png_save(int fd, pixman_image_t *image, Error **errp) + png_struct *png_ptr; + png_info *info_ptr; + g_autoptr(pixman_image_t) linebuf = +- qemu_pixman_linebuf_create(PIXMAN_a8r8g8b8, width); ++ qemu_pixman_linebuf_create(PIXMAN_BE_r8g8b8, width); + uint8_t *buf = (uint8_t *)pixman_image_get_data(linebuf); + FILE *f = fdopen(fd, "wb"); + int y; +@@ -341,7 +341,7 @@ static bool png_save(int fd, pixman_image_t *image, Error **errp) + png_init_io(png_ptr, f); + + png_set_IHDR(png_ptr, info_ptr, width, height, 8, +- PNG_COLOR_TYPE_RGB_ALPHA, PNG_INTERLACE_NONE, ++ PNG_COLOR_TYPE_RGB, PNG_INTERLACE_NONE, + PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE); + + png_write_info(png_ptr, info_ptr); +-- +2.39.3 + diff --git a/SOURCES/kvm-util-async-teardown-wire-up-query-command-line-optio.patch b/SOURCES/kvm-util-async-teardown-wire-up-query-command-line-optio.patch new file mode 100644 index 0000000..8c468d8 --- /dev/null +++ b/SOURCES/kvm-util-async-teardown-wire-up-query-command-line-optio.patch @@ -0,0 +1,180 @@ +From c1502b0cd16378d6d5bd4259b90bf81a5fb5aad3 Mon Sep 17 00:00:00 2001 +From: Claudio Imbrenda +Date: Fri, 5 May 2023 14:00:51 +0200 +Subject: [PATCH 20/21] util/async-teardown: wire up query-command-line-options +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 173: Improve memory reclaiming for z15 Secure Execution guests +RH-Bugzilla: 2168500 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cédric Le Goater +RH-Commit: [1/2] 76e5f25df2c02721f5a29f552ee3061be589abb2 (thuth/qemu-kvm-cs9) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168500 + +Add new -run-with option with an async-teardown=on|off parameter. It is +visible in the output of query-command-line-options QMP command, so it +can be discovered and used by libvirt. + +The option -async-teardown is now redundant, deprecate it. + +Reported-by: Boris Fiuczynski +Fixes: c891c24b1a ("os-posix: asynchronous teardown for shutdown on Linux") +Signed-off-by: Claudio Imbrenda +Message-Id: <20230505120051.36605-2-imbrenda@linux.ibm.com> +[thuth: Add curly braces to fix error with GCC 8.5, fix bug in deprecated.rst] +Signed-off-by: Thomas Huth + +(cherry picked from commit 80bd81cadd127c1e2fc784612a52abe392670ba4) +Conflicts: + docs/about/deprecated.rst (missing context from other patches) +Signed-off-by: Thomas Huth +--- + docs/about/deprecated.rst | 5 +++++ + os-posix.c | 14 ++++++++++++++ + qemu-options.hx | 34 +++++++++++++++++++++++----------- + util/async-teardown.c | 21 +++++++++++++++++++++ + 4 files changed, 63 insertions(+), 11 deletions(-) + +diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst +index 1ca9dc33d6..52893fcf38 100644 +--- a/docs/about/deprecated.rst ++++ b/docs/about/deprecated.rst +@@ -111,6 +111,11 @@ Use ``-machine acpi=off`` instead. + The HAXM project has been retired (see https://github.com/intel/haxm#status). + Use "whpx" (on Windows) or "hvf" (on macOS) instead. + ++``-async-teardown`` (since 8.1) ++''''''''''''''''''''''''''''''' ++ ++Use ``-run-with async-teardown=on`` instead. ++ + + QEMU Machine Protocol (QMP) commands + ------------------------------------ +diff --git a/os-posix.c b/os-posix.c +index 5adc69f560..90ea71725f 100644 +--- a/os-posix.c ++++ b/os-posix.c +@@ -36,6 +36,8 @@ + #include "qemu/log.h" + #include "sysemu/runstate.h" + #include "qemu/cutils.h" ++#include "qemu/config-file.h" ++#include "qemu/option.h" + + #ifdef CONFIG_LINUX + #include +@@ -152,9 +154,21 @@ int os_parse_cmd_args(int index, const char *optarg) + daemonize = 1; + break; + #if defined(CONFIG_LINUX) ++ /* deprecated */ + case QEMU_OPTION_asyncteardown: + init_async_teardown(); + break; ++ case QEMU_OPTION_run_with: { ++ QemuOpts *opts = qemu_opts_parse_noisily(qemu_find_opts("run-with"), ++ optarg, false); ++ if (!opts) { ++ exit(1); ++ } ++ if (qemu_opt_get_bool(opts, "async-teardown", false)) { ++ init_async_teardown(); ++ } ++ break; ++ } + #endif + default: + return -1; +diff --git a/qemu-options.hx b/qemu-options.hx +index 52b49f1f6a..b18f933703 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -4766,20 +4766,32 @@ DEF("qtest-log", HAS_ARG, QEMU_OPTION_qtest_log, "", QEMU_ARCH_ALL) + DEF("async-teardown", 0, QEMU_OPTION_asyncteardown, + "-async-teardown enable asynchronous teardown\n", + QEMU_ARCH_ALL) +-#endif + SRST + ``-async-teardown`` +- Enable asynchronous teardown. A new process called "cleanup/" +- will be created at startup sharing the address space with the main qemu +- process, using clone. It will wait for the main qemu process to +- terminate completely, and then exit. +- This allows qemu to terminate very quickly even if the guest was +- huge, leaving the teardown of the address space to the cleanup +- process. Since the cleanup process shares the same cgroups as the +- main qemu process, accounting is performed correctly. This only +- works if the cleanup process is not forcefully killed with SIGKILL +- before the main qemu process has terminated completely. ++ This option is deprecated and should no longer be used. The new option ++ ``-run-with async-teardown=on`` is a replacement. + ERST ++DEF("run-with", HAS_ARG, QEMU_OPTION_run_with, ++ "-run-with async-teardown[=on|off]\n" ++ " misc QEMU process lifecycle options\n" ++ " async-teardown=on enables asynchronous teardown\n", ++ QEMU_ARCH_ALL) ++SRST ++``-run-with`` ++ Set QEMU process lifecycle options. ++ ++ ``async-teardown=on`` enables asynchronous teardown. A new process called ++ "cleanup/" will be created at startup sharing the address ++ space with the main QEMU process, using clone. It will wait for the ++ main QEMU process to terminate completely, and then exit. This allows ++ QEMU to terminate very quickly even if the guest was huge, leaving the ++ teardown of the address space to the cleanup process. Since the cleanup ++ process shares the same cgroups as the main QEMU process, accounting is ++ performed correctly. This only works if the cleanup process is not ++ forcefully killed with SIGKILL before the main QEMU process has ++ terminated completely. ++ERST ++#endif + + DEF("msg", HAS_ARG, QEMU_OPTION_msg, + "-msg [timestamp[=on|off]][,guest-name=[on|off]]\n" +diff --git a/util/async-teardown.c b/util/async-teardown.c +index 62cdeb0f20..3ab19c8740 100644 +--- a/util/async-teardown.c ++++ b/util/async-teardown.c +@@ -12,6 +12,9 @@ + */ + + #include "qemu/osdep.h" ++#include "qemu/config-file.h" ++#include "qemu/option.h" ++#include "qemu/module.h" + #include + #include + #include +@@ -144,3 +147,21 @@ void init_async_teardown(void) + clone(async_teardown_fn, new_stack_for_clone(), CLONE_VM, NULL); + sigprocmask(SIG_SETMASK, &old_signals, NULL); + } ++ ++static QemuOptsList qemu_run_with_opts = { ++ .name = "run-with", ++ .head = QTAILQ_HEAD_INITIALIZER(qemu_run_with_opts.head), ++ .desc = { ++ { ++ .name = "async-teardown", ++ .type = QEMU_OPT_BOOL, ++ }, ++ { /* end of list */ } ++ }, ++}; ++ ++static void register_teardown(void) ++{ ++ qemu_add_opts(&qemu_run_with_opts); ++} ++opts_init(register_teardown); +-- +2.39.3 + diff --git a/SOURCES/kvm-util-iov-Make-qiov_slice-public.patch b/SOURCES/kvm-util-iov-Make-qiov_slice-public.patch new file mode 100644 index 0000000..fe68d18 --- /dev/null +++ b/SOURCES/kvm-util-iov-Make-qiov_slice-public.patch @@ -0,0 +1,97 @@ +From 64652225695c23855cfb1252cea2b55c24da2260 Mon Sep 17 00:00:00 2001 +From: Hanna Czenczek +Date: Tue, 11 Apr 2023 19:34:15 +0200 +Subject: [PATCH 1/9] util/iov: Make qiov_slice() public + +RH-Author: Hanna Czenczek +RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX +RH-Bugzilla: 2174676 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/5] 9c3cd661f7139ce124ee4f4d5fcbeaf3dbb9c45c (hreitz/qemu-kvm-c-9-s) + +We want to inline qemu_iovec_init_extended() in block/io.c for padding +requests, and having access to qiov_slice() is useful for this. As a +public function, it is renamed to qemu_iovec_slice(). + +(We will need to count the number of I/O vector elements of a slice +there, and then later process this slice. Without qiov_slice(), we +would need to call qemu_iovec_subvec_niov(), and all further +IOV-processing functions may need to skip prefixing elements to +accomodate for a qiov_offset. Because qemu_iovec_subvec_niov() +internally calls qiov_slice(), we can just have the block/io.c code call +qiov_slice() itself, thus get the number of elements, and also create an +iovec array with the superfluous prefixing elements stripped, so the +following processing functions no longer need to skip them.) + +Reviewed-by: Eric Blake +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Czenczek +Message-Id: <20230411173418.19549-2-hreitz@redhat.com> +(cherry picked from commit 3d06cea8256d54a6b0238934c31012f7f17100f5) +Signed-off-by: Hanna Czenczek +--- + include/qemu/iov.h | 3 +++ + util/iov.c | 14 +++++++------- + 2 files changed, 10 insertions(+), 7 deletions(-) + +diff --git a/include/qemu/iov.h b/include/qemu/iov.h +index 9330746680..46fadfb27a 100644 +--- a/include/qemu/iov.h ++++ b/include/qemu/iov.h +@@ -229,6 +229,9 @@ int qemu_iovec_init_extended( + void *tail_buf, size_t tail_len); + void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source, + size_t offset, size_t len); ++struct iovec *qemu_iovec_slice(QEMUIOVector *qiov, ++ size_t offset, size_t len, ++ size_t *head, size_t *tail, int *niov); + int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len); + void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len); + void qemu_iovec_concat(QEMUIOVector *dst, +diff --git a/util/iov.c b/util/iov.c +index b4be580022..65a70449da 100644 +--- a/util/iov.c ++++ b/util/iov.c +@@ -378,15 +378,15 @@ static struct iovec *iov_skip_offset(struct iovec *iov, size_t offset, + } + + /* +- * qiov_slice ++ * qemu_iovec_slice + * + * Find subarray of iovec's, containing requested range. @head would + * be offset in first iov (returned by the function), @tail would be + * count of extra bytes in last iovec (returned iov + @niov - 1). + */ +-static struct iovec *qiov_slice(QEMUIOVector *qiov, +- size_t offset, size_t len, +- size_t *head, size_t *tail, int *niov) ++struct iovec *qemu_iovec_slice(QEMUIOVector *qiov, ++ size_t offset, size_t len, ++ size_t *head, size_t *tail, int *niov) + { + struct iovec *iov, *end_iov; + +@@ -411,7 +411,7 @@ int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len) + size_t head, tail; + int niov; + +- qiov_slice(qiov, offset, len, &head, &tail, &niov); ++ qemu_iovec_slice(qiov, offset, len, &head, &tail, &niov); + + return niov; + } +@@ -439,8 +439,8 @@ int qemu_iovec_init_extended( + } + + if (mid_len) { +- mid_iov = qiov_slice(mid_qiov, mid_offset, mid_len, +- &mid_head, &mid_tail, &mid_niov); ++ mid_iov = qemu_iovec_slice(mid_qiov, mid_offset, mid_len, ++ &mid_head, &mid_tail, &mid_niov); + } + + total_niov = !!head_len + mid_niov + !!tail_len; +-- +2.39.3 + diff --git a/SOURCES/kvm-util-iov-Remove-qemu_iovec_init_extended.patch b/SOURCES/kvm-util-iov-Remove-qemu_iovec_init_extended.patch new file mode 100644 index 0000000..fd21880 --- /dev/null +++ b/SOURCES/kvm-util-iov-Remove-qemu_iovec_init_extended.patch @@ -0,0 +1,156 @@ +From 8ff973985a04fec1a3cdf886976a03e0dca7b0ea Mon Sep 17 00:00:00 2001 +From: Hanna Czenczek +Date: Tue, 11 Apr 2023 19:34:17 +0200 +Subject: [PATCH 3/9] util/iov: Remove qemu_iovec_init_extended() + +RH-Author: Hanna Czenczek +RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX +RH-Bugzilla: 2174676 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/5] 1740d7b15ea4fbfbe71e7adc122741e85e83fb8c (hreitz/qemu-kvm-c-9-s) + +bdrv_pad_request() was the main user of qemu_iovec_init_extended(). +HEAD^ has removed that use, so we can remove qemu_iovec_init_extended() +now. + +The only remaining user is qemu_iovec_init_slice(), which can easily +inline the small part it really needs. + +Note that qemu_iovec_init_extended() offered a memcpy() optimization to +initialize the new I/O vector. qemu_iovec_concat_iov(), which is used +to replace its functionality, does not, but calls qemu_iovec_add() for +every single element. If we decide this optimization was important, we +will need to re-implement it in qemu_iovec_concat_iov(), which might +also benefit its pre-existing users. + +Reviewed-by: Eric Blake +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Czenczek +Message-Id: <20230411173418.19549-4-hreitz@redhat.com> +(cherry picked from commit cc63f6f6fa1aaa4b6405dd69432c693e9c8d18ca) +Signed-off-by: Hanna Czenczek +--- + include/qemu/iov.h | 5 --- + util/iov.c | 79 +++++++--------------------------------------- + 2 files changed, 11 insertions(+), 73 deletions(-) + +diff --git a/include/qemu/iov.h b/include/qemu/iov.h +index 46fadfb27a..63a1c01965 100644 +--- a/include/qemu/iov.h ++++ b/include/qemu/iov.h +@@ -222,11 +222,6 @@ static inline void *qemu_iovec_buf(QEMUIOVector *qiov) + + void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint); + void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov); +-int qemu_iovec_init_extended( +- QEMUIOVector *qiov, +- void *head_buf, size_t head_len, +- QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len, +- void *tail_buf, size_t tail_len); + void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source, + size_t offset, size_t len); + struct iovec *qemu_iovec_slice(QEMUIOVector *qiov, +diff --git a/util/iov.c b/util/iov.c +index 65a70449da..866fb577f3 100644 +--- a/util/iov.c ++++ b/util/iov.c +@@ -416,70 +416,6 @@ int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len) + return niov; + } + +-/* +- * Compile new iovec, combining @head_buf buffer, sub-qiov of @mid_qiov, +- * and @tail_buf buffer into new qiov. +- */ +-int qemu_iovec_init_extended( +- QEMUIOVector *qiov, +- void *head_buf, size_t head_len, +- QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len, +- void *tail_buf, size_t tail_len) +-{ +- size_t mid_head, mid_tail; +- int total_niov, mid_niov = 0; +- struct iovec *p, *mid_iov = NULL; +- +- assert(mid_qiov->niov <= IOV_MAX); +- +- if (SIZE_MAX - head_len < mid_len || +- SIZE_MAX - head_len - mid_len < tail_len) +- { +- return -EINVAL; +- } +- +- if (mid_len) { +- mid_iov = qemu_iovec_slice(mid_qiov, mid_offset, mid_len, +- &mid_head, &mid_tail, &mid_niov); +- } +- +- total_niov = !!head_len + mid_niov + !!tail_len; +- if (total_niov > IOV_MAX) { +- return -EINVAL; +- } +- +- if (total_niov == 1) { +- qemu_iovec_init_buf(qiov, NULL, 0); +- p = &qiov->local_iov; +- } else { +- qiov->niov = qiov->nalloc = total_niov; +- qiov->size = head_len + mid_len + tail_len; +- p = qiov->iov = g_new(struct iovec, qiov->niov); +- } +- +- if (head_len) { +- p->iov_base = head_buf; +- p->iov_len = head_len; +- p++; +- } +- +- assert(!mid_niov == !mid_len); +- if (mid_niov) { +- memcpy(p, mid_iov, mid_niov * sizeof(*p)); +- p[0].iov_base = (uint8_t *)p[0].iov_base + mid_head; +- p[0].iov_len -= mid_head; +- p[mid_niov - 1].iov_len -= mid_tail; +- p += mid_niov; +- } +- +- if (tail_len) { +- p->iov_base = tail_buf; +- p->iov_len = tail_len; +- } +- +- return 0; +-} +- + /* + * Check if the contents of subrange of qiov data is all zeroes. + */ +@@ -511,14 +447,21 @@ bool qemu_iovec_is_zero(QEMUIOVector *qiov, size_t offset, size_t bytes) + void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source, + size_t offset, size_t len) + { +- int ret; ++ struct iovec *slice_iov; ++ int slice_niov; ++ size_t slice_head, slice_tail; + + assert(source->size >= len); + assert(source->size - len >= offset); + +- /* We shrink the request, so we can't overflow neither size_t nor MAX_IOV */ +- ret = qemu_iovec_init_extended(qiov, NULL, 0, source, offset, len, NULL, 0); +- assert(ret == 0); ++ slice_iov = qemu_iovec_slice(source, offset, len, ++ &slice_head, &slice_tail, &slice_niov); ++ if (slice_niov == 1) { ++ qemu_iovec_init_buf(qiov, slice_iov[0].iov_base + slice_head, len); ++ } else { ++ qemu_iovec_init(qiov, slice_niov); ++ qemu_iovec_concat_iov(qiov, slice_iov, slice_niov, slice_head, len); ++ } + } + + void qemu_iovec_destroy(QEMUIOVector *qiov) +-- +2.39.3 + diff --git a/SOURCES/kvm-util-mmap-alloc-qemu_fd_getfs.patch b/SOURCES/kvm-util-mmap-alloc-qemu_fd_getfs.patch new file mode 100644 index 0000000..b0e66f6 --- /dev/null +++ b/SOURCES/kvm-util-mmap-alloc-qemu_fd_getfs.patch @@ -0,0 +1,95 @@ +From 439a8cdd010dfd253fc2277ae4ec605b5ba621d9 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Wed, 19 Apr 2023 12:17:36 -0400 +Subject: [PATCH 02/56] util/mmap-alloc: qemu_fd_getfs() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [1/50] 8970b5ae611a933d693e0c90cbf4eda073635494 (peterx/qemu-kvm) + +This new helper fetches file system type for a fd. Only Linux is +implemented so far. Currently only tmpfs and hugetlbfs are defined, +but it can grow as needed. + +Signed-off-by: Peter Xu +Reviewed-by: David Hildenbrand +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit fa45f8dab9613993c042176ea2d25552bfebc955) +Signed-off-by: Peter Xu +--- + include/qemu/mmap-alloc.h | 7 +++++++ + util/mmap-alloc.c | 28 ++++++++++++++++++++++++++++ + 2 files changed, 35 insertions(+) + +diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h +index 2825e231a7..8344daaa03 100644 +--- a/include/qemu/mmap-alloc.h ++++ b/include/qemu/mmap-alloc.h +@@ -1,8 +1,15 @@ + #ifndef QEMU_MMAP_ALLOC_H + #define QEMU_MMAP_ALLOC_H + ++typedef enum { ++ QEMU_FS_TYPE_UNKNOWN = 0, ++ QEMU_FS_TYPE_TMPFS, ++ QEMU_FS_TYPE_HUGETLBFS, ++ QEMU_FS_TYPE_NUM, ++} QemuFsType; + + size_t qemu_fd_getpagesize(int fd); ++QemuFsType qemu_fd_getfs(int fd); + + /** + * qemu_ram_mmap: mmap anonymous memory, the specified file or device. +diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c +index 5ed7d29183..ed14f9c64d 100644 +--- a/util/mmap-alloc.c ++++ b/util/mmap-alloc.c +@@ -27,8 +27,36 @@ + + #ifdef CONFIG_LINUX + #include ++#include + #endif + ++QemuFsType qemu_fd_getfs(int fd) ++{ ++#ifdef CONFIG_LINUX ++ struct statfs fs; ++ int ret; ++ ++ if (fd < 0) { ++ return QEMU_FS_TYPE_UNKNOWN; ++ } ++ ++ do { ++ ret = fstatfs(fd, &fs); ++ } while (ret != 0 && errno == EINTR); ++ ++ switch (fs.f_type) { ++ case TMPFS_MAGIC: ++ return QEMU_FS_TYPE_TMPFS; ++ case HUGETLBFS_MAGIC: ++ return QEMU_FS_TYPE_HUGETLBFS; ++ default: ++ return QEMU_FS_TYPE_UNKNOWN; ++ } ++#else ++ return QEMU_FS_TYPE_UNKNOWN; ++#endif ++} ++ + size_t qemu_fd_getpagesize(int fd) + { + #ifdef CONFIG_LINUX +-- +2.39.1 + diff --git a/SOURCES/kvm-util-userfaultfd-Add-uffd_open.patch b/SOURCES/kvm-util-userfaultfd-Add-uffd_open.patch deleted file mode 100644 index 5a5f90c..0000000 --- a/SOURCES/kvm-util-userfaultfd-Add-uffd_open.patch +++ /dev/null @@ -1,169 +0,0 @@ -From 80445fed73a7d1a87e8ce96f6cb7d505e437f845 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Wed, 1 Feb 2023 16:10:54 -0500 -Subject: [PATCH 4/8] util/userfaultfd: Add uffd_open() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 149: Support /dev/userfaultfd -RH-Bugzilla: 2158704 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: quintela1 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/3] 4c81696314ab26db47c3415fa2c2501c6a572b5c (peterx/qemu-kvm) - -Add a helper to create the uffd handle. - -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Juan Quintela -Signed-off-by: Peter Xu -Signed-off-by: Juan Quintela -(cherry picked from commit d5890ea0722831eea76a0efd23a496b3e8815fe8) -Signed-off-by: Peter Xu ---- - include/qemu/userfaultfd.h | 12 ++++++++++++ - migration/postcopy-ram.c | 11 +++++------ - tests/qtest/migration-test.c | 4 ++-- - util/userfaultfd.c | 13 +++++++++++-- - 4 files changed, 30 insertions(+), 10 deletions(-) - -diff --git a/include/qemu/userfaultfd.h b/include/qemu/userfaultfd.h -index 6b74f92792..d764496f0b 100644 ---- a/include/qemu/userfaultfd.h -+++ b/include/qemu/userfaultfd.h -@@ -13,10 +13,20 @@ - #ifndef USERFAULTFD_H - #define USERFAULTFD_H - -+#ifdef CONFIG_LINUX -+ - #include "qemu/osdep.h" - #include "exec/hwaddr.h" - #include - -+/** -+ * uffd_open(): Open an userfaultfd handle for current context. -+ * -+ * @flags: The flags we want to pass in when creating the handle. -+ * -+ * Returns: the uffd handle if >=0, or <0 if error happens. -+ */ -+int uffd_open(int flags); - int uffd_query_features(uint64_t *features); - int uffd_create_fd(uint64_t features, bool non_blocking); - void uffd_close_fd(int uffd_fd); -@@ -32,4 +42,6 @@ int uffd_wakeup(int uffd_fd, void *addr, uint64_t length); - int uffd_read_events(int uffd_fd, struct uffd_msg *msgs, int count); - bool uffd_poll_events(int uffd_fd, int tmo); - -+#endif /* CONFIG_LINUX */ -+ - #endif /* USERFAULTFD_H */ -diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c -index b9a37ef255..0c55df0e52 100644 ---- a/migration/postcopy-ram.c -+++ b/migration/postcopy-ram.c -@@ -37,6 +37,7 @@ - #include "qemu-file.h" - #include "yank_functions.h" - #include "tls.h" -+#include "qemu/userfaultfd.h" - - /* Arbitrary limit on size of each discard command, - * keeps them around ~200 bytes -@@ -226,11 +227,9 @@ static bool receive_ufd_features(uint64_t *features) - int ufd; - bool ret = true; - -- /* if we are here __NR_userfaultfd should exists */ -- ufd = syscall(__NR_userfaultfd, O_CLOEXEC); -+ ufd = uffd_open(O_CLOEXEC); - if (ufd == -1) { -- error_report("%s: syscall __NR_userfaultfd failed: %s", __func__, -- strerror(errno)); -+ error_report("%s: uffd_open() failed: %s", __func__, strerror(errno)); - return false; - } - -@@ -375,7 +374,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) - goto out; - } - -- ufd = syscall(__NR_userfaultfd, O_CLOEXEC); -+ ufd = uffd_open(O_CLOEXEC); - if (ufd == -1) { - error_report("%s: userfaultfd not available: %s", __func__, - strerror(errno)); -@@ -1160,7 +1159,7 @@ static int postcopy_temp_pages_setup(MigrationIncomingState *mis) - int postcopy_ram_incoming_setup(MigrationIncomingState *mis) - { - /* Open the fd for the kernel to give us userfaults */ -- mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); -+ mis->userfault_fd = uffd_open(O_CLOEXEC | O_NONBLOCK); - if (mis->userfault_fd == -1) { - error_report("%s: Failed to open userfault fd: %s", __func__, - strerror(errno)); -diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c -index dbde726adf..0100e1bdbc 100644 ---- a/tests/qtest/migration-test.c -+++ b/tests/qtest/migration-test.c -@@ -61,14 +61,14 @@ static bool uffd_feature_thread_id; - #if defined(__linux__) && defined(__NR_userfaultfd) && defined(CONFIG_EVENTFD) - #include - #include --#include -+#include "qemu/userfaultfd.h" - - static bool ufd_version_check(void) - { - struct uffdio_api api_struct; - uint64_t ioctl_mask; - -- int ufd = syscall(__NR_userfaultfd, O_CLOEXEC); -+ int ufd = uffd_open(O_CLOEXEC); - - if (ufd == -1) { - g_test_message("Skipping test: userfaultfd not available"); -diff --git a/util/userfaultfd.c b/util/userfaultfd.c -index f1cd6af2b1..4953b3137d 100644 ---- a/util/userfaultfd.c -+++ b/util/userfaultfd.c -@@ -19,6 +19,15 @@ - #include - #include - -+int uffd_open(int flags) -+{ -+#if defined(__NR_userfaultfd) -+ return syscall(__NR_userfaultfd, flags); -+#else -+ return -EINVAL; -+#endif -+} -+ - /** - * uffd_query_features: query UFFD features - * -@@ -32,7 +41,7 @@ int uffd_query_features(uint64_t *features) - struct uffdio_api api_struct = { 0 }; - int ret = -1; - -- uffd_fd = syscall(__NR_userfaultfd, O_CLOEXEC); -+ uffd_fd = uffd_open(O_CLOEXEC); - if (uffd_fd < 0) { - trace_uffd_query_features_nosys(errno); - return -1; -@@ -69,7 +78,7 @@ int uffd_create_fd(uint64_t features, bool non_blocking) - uint64_t ioctl_mask = BIT(_UFFDIO_REGISTER) | BIT(_UFFDIO_UNREGISTER); - - flags = O_CLOEXEC | (non_blocking ? O_NONBLOCK : 0); -- uffd_fd = syscall(__NR_userfaultfd, flags); -+ uffd_fd = uffd_open(flags); - if (uffd_fd < 0) { - trace_uffd_create_fd_nosys(errno); - return -1; --- -2.31.1 - diff --git a/SOURCES/kvm-util-userfaultfd-Support-dev-userfaultfd.patch b/SOURCES/kvm-util-userfaultfd-Support-dev-userfaultfd.patch deleted file mode 100644 index b0a22eb..0000000 --- a/SOURCES/kvm-util-userfaultfd-Support-dev-userfaultfd.patch +++ /dev/null @@ -1,94 +0,0 @@ -From a91da7741464dadeb306a741b4fb562e49ffea57 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Tue, 7 Feb 2023 15:57:11 -0500 -Subject: [PATCH 5/8] util/userfaultfd: Support /dev/userfaultfd - -RH-Author: Peter Xu -RH-MergeRequest: 149: Support /dev/userfaultfd -RH-Bugzilla: 2158704 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: quintela1 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/3] 5f427d8c18c210ff8f66724c9e358a7120619e69 (peterx/qemu-kvm) - -Teach QEMU to use /dev/userfaultfd when it existed and fallback to the -system call if either it's not there or doesn't have enough permission. - -Firstly, as long as the app has permission to access /dev/userfaultfd, it -always have the ability to trap kernel faults which QEMU mostly wants. -Meanwhile, in some context (e.g. containers) the userfaultfd syscall can be -forbidden, so it can be the major way to use postcopy in a restricted -environment with strict seccomp setup. - -Signed-off-by: Peter Xu -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -(cherry picked from commit c40c0463413b941c13fe5f99a90c02d7d6584828) -Signed-off-by: Peter Xu ---- - util/trace-events | 1 + - util/userfaultfd.c | 32 ++++++++++++++++++++++++++++++++ - 2 files changed, 33 insertions(+) - -diff --git a/util/trace-events b/util/trace-events -index c8f53d7d9f..16f78d8fe5 100644 ---- a/util/trace-events -+++ b/util/trace-events -@@ -93,6 +93,7 @@ qemu_vfio_region_info(const char *desc, uint64_t region_ofs, uint64_t region_siz - qemu_vfio_pci_map_bar(int index, uint64_t region_ofs, uint64_t region_size, int ofs, void *host) "map region bar#%d addr 0x%"PRIx64" size 0x%"PRIx64" ofs 0x%x host %p" - - #userfaultfd.c -+uffd_detect_open_mode(int mode) "%d" - uffd_query_features_nosys(int err) "errno: %i" - uffd_query_features_api_failed(int err) "errno: %i" - uffd_create_fd_nosys(int err) "errno: %i" -diff --git a/util/userfaultfd.c b/util/userfaultfd.c -index 4953b3137d..fdff4867e8 100644 ---- a/util/userfaultfd.c -+++ b/util/userfaultfd.c -@@ -18,10 +18,42 @@ - #include - #include - #include -+#include -+ -+typedef enum { -+ UFFD_UNINITIALIZED = 0, -+ UFFD_USE_DEV_PATH, -+ UFFD_USE_SYSCALL, -+} uffd_open_mode; - - int uffd_open(int flags) - { - #if defined(__NR_userfaultfd) -+ static uffd_open_mode open_mode; -+ static int uffd_dev; -+ -+ /* Detect how to generate uffd desc when run the 1st time */ -+ if (open_mode == UFFD_UNINITIALIZED) { -+ /* -+ * Make /dev/userfaultfd the default approach because it has better -+ * permission controls, meanwhile allows kernel faults without any -+ * privilege requirement (e.g. SYS_CAP_PTRACE). -+ */ -+ uffd_dev = open("/dev/userfaultfd", O_RDWR | O_CLOEXEC); -+ if (uffd_dev >= 0) { -+ open_mode = UFFD_USE_DEV_PATH; -+ } else { -+ /* Fallback to the system call */ -+ open_mode = UFFD_USE_SYSCALL; -+ } -+ trace_uffd_detect_open_mode(open_mode); -+ } -+ -+ if (open_mode == UFFD_USE_DEV_PATH) { -+ assert(uffd_dev >= 0); -+ return ioctl(uffd_dev, USERFAULTFD_IOC_NEW, flags); -+ } -+ - return syscall(__NR_userfaultfd, flags); - #else - return -EINVAL; --- -2.31.1 - diff --git a/SOURCES/kvm-util-vfio-helpers-Use-g_file_read_link.patch b/SOURCES/kvm-util-vfio-helpers-Use-g_file_read_link.patch new file mode 100644 index 0000000..4e492d9 --- /dev/null +++ b/SOURCES/kvm-util-vfio-helpers-Use-g_file_read_link.patch @@ -0,0 +1,82 @@ +From fb2d40cc84f689e46138a81c57ccd1f234dbbb7c Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 07/37] util/vfio-helpers: Use g_file_read_link() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [5/28] 3545a07c967782dba8dd081415232f91d3f600a9 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit dbdea0dbfe2c +Author: Akihiko Odaki +Date: Tue May 23 11:39:12 2023 +0900 + + util/vfio-helpers: Use g_file_read_link() + + When _FORTIFY_SOURCE=2, glibc version is 2.35, and GCC version is + 12.1.0, the compiler complains as follows: + + In file included from /usr/include/features.h:490, + from /usr/include/bits/libc-header-start.h:33, + from /usr/include/stdint.h:26, + from /usr/lib/gcc/aarch64-unknown-linux-gnu/12.1.0/include/stdint.h:9, + from /home/alarm/q/var/qemu/include/qemu/osdep.h:94, + from ../util/vfio-helpers.c:13: + In function 'readlink', + inlined from 'sysfs_find_group_file' at ../util/vfio-helpers.c:116:9, + inlined from 'qemu_vfio_init_pci' at ../util/vfio-helpers.c:326:18, + inlined from 'qemu_vfio_open_pci' at ../util/vfio-helpers.c:517:9: + /usr/include/bits/unistd.h:119:10: error: argument 2 is null but the corresponding size argument 3 value is 4095 [-Werror=nonnull] + 119 | return __glibc_fortify (readlink, __len, sizeof (char), + | ^~~~~~~~~~~~~~~ + + This error implies the allocated buffer can be NULL. Use + g_file_read_link(), which allocates buffer automatically to avoid the + error. + + Signed-off-by: Akihiko Odaki + Reviewed-by: Philippe Mathieu-Daudé + Reviewed-by: Cédric Le Goater + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + util/vfio-helpers.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c +index 2d8af38f88..f8bab46c68 100644 +--- a/util/vfio-helpers.c ++++ b/util/vfio-helpers.c +@@ -106,15 +106,17 @@ struct QEMUVFIOState { + */ + static char *sysfs_find_group_file(const char *device, Error **errp) + { ++ g_autoptr(GError) gerr = NULL; + char *sysfs_link; + char *sysfs_group; + char *p; + char *path = NULL; + + sysfs_link = g_strdup_printf("/sys/bus/pci/devices/%s/iommu_group", device); +- sysfs_group = g_malloc0(PATH_MAX); +- if (readlink(sysfs_link, sysfs_group, PATH_MAX - 1) == -1) { +- error_setg_errno(errp, errno, "Failed to find iommu group sysfs path"); ++ sysfs_group = g_file_read_link(sysfs_link, &gerr); ++ if (gerr) { ++ error_setg(errp, "Failed to find iommu group sysfs path: %s", ++ gerr->message); + goto out; + } + p = strrchr(sysfs_group, '/'); +-- +2.39.3 + diff --git a/SOURCES/kvm-vdpa-add-asid-parameter-to-vhost_vdpa_dma_map-unmap.patch b/SOURCES/kvm-vdpa-add-asid-parameter-to-vhost_vdpa_dma_map-unmap.patch deleted file mode 100644 index a56c6eb..0000000 --- a/SOURCES/kvm-vdpa-add-asid-parameter-to-vhost_vdpa_dma_map-unmap.patch +++ /dev/null @@ -1,221 +0,0 @@ -From d0e7f24a8d941ab142f2a1973ae18ed1bfdc074f Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 15 Dec 2022 12:31:41 +0100 -Subject: [PATCH 09/14] vdpa: add asid parameter to vhost_vdpa_dma_map/unmap -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 136: vDPA ASID support in Qemu -RH-Bugzilla: 2104412 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [9/13] 3e7f89e57f73661017ccf0206f2ea77a72ca46bb (eperezmartin/qemu-kvm) - -So the caller can choose which ASID is destined. - -No need to update the batch functions as they will always be called from -memory listener updates at the moment. Memory listener updates will -always update ASID 0, as it's the passthrough ASID. - -All vhost devices's ASID are 0 at this moment. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Message-Id: <20221215113144.322011-10-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit cd831ed5c4add8ed6ee980c3645b241cbef5130f) ---- - hw/virtio/trace-events | 4 ++-- - hw/virtio/vhost-vdpa.c | 36 +++++++++++++++++++++++----------- - include/hw/virtio/vhost-vdpa.h | 14 ++++++++++--- - net/vhost-vdpa.c | 6 +++--- - 4 files changed, 41 insertions(+), 19 deletions(-) - -diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events -index 46f2faf04e..a87c5f39a2 100644 ---- a/hw/virtio/trace-events -+++ b/hw/virtio/trace-events -@@ -30,8 +30,8 @@ vhost_user_write(uint32_t req, uint32_t flags) "req:%d flags:0x%"PRIx32"" - vhost_user_create_notifier(int idx, void *n) "idx:%d n:%p" - - # vhost-vdpa.c --vhost_vdpa_dma_map(void *vdpa, int fd, uint32_t msg_type, uint64_t iova, uint64_t size, uint64_t uaddr, uint8_t perm, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" iova: 0x%"PRIx64" size: 0x%"PRIx64" uaddr: 0x%"PRIx64" perm: 0x%"PRIx8" type: %"PRIu8 --vhost_vdpa_dma_unmap(void *vdpa, int fd, uint32_t msg_type, uint64_t iova, uint64_t size, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" iova: 0x%"PRIx64" size: 0x%"PRIx64" type: %"PRIu8 -+vhost_vdpa_dma_map(void *vdpa, int fd, uint32_t msg_type, uint32_t asid, uint64_t iova, uint64_t size, uint64_t uaddr, uint8_t perm, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" asid: %"PRIu32" iova: 0x%"PRIx64" size: 0x%"PRIx64" uaddr: 0x%"PRIx64" perm: 0x%"PRIx8" type: %"PRIu8 -+vhost_vdpa_dma_unmap(void *vdpa, int fd, uint32_t msg_type, uint32_t asid, uint64_t iova, uint64_t size, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" asid: %"PRIu32" iova: 0x%"PRIx64" size: 0x%"PRIx64" type: %"PRIu8 - vhost_vdpa_listener_begin_batch(void *v, int fd, uint32_t msg_type, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" type: %"PRIu8 - vhost_vdpa_listener_commit(void *v, int fd, uint32_t msg_type, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" type: %"PRIu8 - vhost_vdpa_listener_region_add(void *vdpa, uint64_t iova, uint64_t llend, void *vaddr, bool readonly) "vdpa: %p iova 0x%"PRIx64" llend 0x%"PRIx64" vaddr: %p read-only: %d" -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index dd2768634b..0ecf2bbaa0 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -72,22 +72,28 @@ static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section, - return false; - } - --int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, -- void *vaddr, bool readonly) -+/* -+ * The caller must set asid = 0 if the device does not support asid. -+ * This is not an ABI break since it is set to 0 by the initializer anyway. -+ */ -+int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, -+ hwaddr size, void *vaddr, bool readonly) - { - struct vhost_msg_v2 msg = {}; - int fd = v->device_fd; - int ret = 0; - - msg.type = v->msg_type; -+ msg.asid = asid; - msg.iotlb.iova = iova; - msg.iotlb.size = size; - msg.iotlb.uaddr = (uint64_t)(uintptr_t)vaddr; - msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW; - msg.iotlb.type = VHOST_IOTLB_UPDATE; - -- trace_vhost_vdpa_dma_map(v, fd, msg.type, msg.iotlb.iova, msg.iotlb.size, -- msg.iotlb.uaddr, msg.iotlb.perm, msg.iotlb.type); -+ trace_vhost_vdpa_dma_map(v, fd, msg.type, msg.asid, msg.iotlb.iova, -+ msg.iotlb.size, msg.iotlb.uaddr, msg.iotlb.perm, -+ msg.iotlb.type); - - if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { - error_report("failed to write, fd=%d, errno=%d (%s)", -@@ -98,18 +104,24 @@ int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, - return ret; - } - --int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size) -+/* -+ * The caller must set asid = 0 if the device does not support asid. -+ * This is not an ABI break since it is set to 0 by the initializer anyway. -+ */ -+int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, -+ hwaddr size) - { - struct vhost_msg_v2 msg = {}; - int fd = v->device_fd; - int ret = 0; - - msg.type = v->msg_type; -+ msg.asid = asid; - msg.iotlb.iova = iova; - msg.iotlb.size = size; - msg.iotlb.type = VHOST_IOTLB_INVALIDATE; - -- trace_vhost_vdpa_dma_unmap(v, fd, msg.type, msg.iotlb.iova, -+ trace_vhost_vdpa_dma_unmap(v, fd, msg.type, msg.asid, msg.iotlb.iova, - msg.iotlb.size, msg.iotlb.type); - - if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { -@@ -229,8 +241,8 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, - } - - vhost_vdpa_iotlb_batch_begin_once(v); -- ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize), -- vaddr, section->readonly); -+ ret = vhost_vdpa_dma_map(v, VHOST_VDPA_GUEST_PA_ASID, iova, -+ int128_get64(llsize), vaddr, section->readonly); - if (ret) { - error_report("vhost vdpa map fail!"); - goto fail_map; -@@ -303,7 +315,8 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, - vhost_iova_tree_remove(v->iova_tree, *result); - } - vhost_vdpa_iotlb_batch_begin_once(v); -- ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize)); -+ ret = vhost_vdpa_dma_unmap(v, VHOST_VDPA_GUEST_PA_ASID, iova, -+ int128_get64(llsize)); - if (ret) { - error_report("vhost_vdpa dma unmap error!"); - } -@@ -876,7 +889,7 @@ static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr addr) - } - - size = ROUND_UP(result->size, qemu_real_host_page_size()); -- r = vhost_vdpa_dma_unmap(v, result->iova, size); -+ r = vhost_vdpa_dma_unmap(v, v->address_space_id, result->iova, size); - if (unlikely(r < 0)) { - error_report("Unable to unmap SVQ vring: %s (%d)", g_strerror(-r), -r); - return; -@@ -916,7 +929,8 @@ static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, DMAMap *needle, - return false; - } - -- r = vhost_vdpa_dma_map(v, needle->iova, needle->size + 1, -+ r = vhost_vdpa_dma_map(v, v->address_space_id, needle->iova, -+ needle->size + 1, - (void *)(uintptr_t)needle->translated_addr, - needle->perm == IOMMU_RO); - if (unlikely(r != 0)) { -diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h -index 1111d85643..e57dfa1fd1 100644 ---- a/include/hw/virtio/vhost-vdpa.h -+++ b/include/hw/virtio/vhost-vdpa.h -@@ -19,6 +19,12 @@ - #include "hw/virtio/virtio.h" - #include "standard-headers/linux/vhost_types.h" - -+/* -+ * ASID dedicated to map guest's addresses. If SVQ is disabled it maps GPA to -+ * qemu's IOVA. If SVQ is enabled it maps also the SVQ vring here -+ */ -+#define VHOST_VDPA_GUEST_PA_ASID 0 -+ - typedef struct VhostVDPAHostNotifier { - MemoryRegion mr; - void *addr; -@@ -29,6 +35,7 @@ typedef struct vhost_vdpa { - int index; - uint32_t msg_type; - bool iotlb_batch_begin_sent; -+ uint32_t address_space_id; - MemoryListener listener; - struct vhost_vdpa_iova_range iova_range; - uint64_t acked_features; -@@ -42,8 +49,9 @@ typedef struct vhost_vdpa { - VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX]; - } VhostVDPA; - --int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, -- void *vaddr, bool readonly); --int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size); -+int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, -+ hwaddr size, void *vaddr, bool readonly); -+int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, -+ hwaddr size); - - #endif -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 85aa0da39a..c2f319eb88 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -258,7 +258,7 @@ static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr) - return; - } - -- r = vhost_vdpa_dma_unmap(v, map->iova, map->size + 1); -+ r = vhost_vdpa_dma_unmap(v, v->address_space_id, map->iova, map->size + 1); - if (unlikely(r != 0)) { - error_report("Device cannot unmap: %s(%d)", g_strerror(r), r); - } -@@ -298,8 +298,8 @@ static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size, - return r; - } - -- r = vhost_vdpa_dma_map(v, map.iova, vhost_vdpa_net_cvq_cmd_page_len(), buf, -- !write); -+ r = vhost_vdpa_dma_map(v, v->address_space_id, map.iova, -+ vhost_vdpa_net_cvq_cmd_page_len(), buf, !write); - if (unlikely(r < 0)) { - goto dma_map_err; - } --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-add-shadow_data-to-vhost_vdpa.patch b/SOURCES/kvm-vdpa-add-shadow_data-to-vhost_vdpa.patch deleted file mode 100644 index 57c38d1..0000000 --- a/SOURCES/kvm-vdpa-add-shadow_data-to-vhost_vdpa.patch +++ /dev/null @@ -1,94 +0,0 @@ -From 6282a83619f274ca45a52d61577c10a05a0714dc Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 15 Dec 2022 12:31:43 +0100 -Subject: [PATCH 11/14] vdpa: add shadow_data to vhost_vdpa -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 136: vDPA ASID support in Qemu -RH-Bugzilla: 2104412 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [11/13] 9d317add1318b555ba06e19e4c67849069e047b9 (eperezmartin/qemu-kvm) - -The memory listener that thells the device how to convert GPA to qemu's -va is registered against CVQ vhost_vdpa. memory listener translations -are always ASID 0, CVQ ones are ASID 1 if supported. - -Let's tell the listener if it needs to register them on iova tree or -not. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Message-Id: <20221215113144.322011-12-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 6188d78a19894ac8f2bf9484d48a5235a529d3b7) ---- - hw/virtio/vhost-vdpa.c | 6 +++--- - include/hw/virtio/vhost-vdpa.h | 2 ++ - net/vhost-vdpa.c | 1 + - 3 files changed, 6 insertions(+), 3 deletions(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 0ecf2bbaa0..dc3498e995 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -224,7 +224,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, - vaddr, section->readonly); - - llsize = int128_sub(llend, int128_make64(iova)); -- if (v->shadow_vqs_enabled) { -+ if (v->shadow_data) { - int r; - - mem_region.translated_addr = (hwaddr)(uintptr_t)vaddr, -@@ -251,7 +251,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, - return; - - fail_map: -- if (v->shadow_vqs_enabled) { -+ if (v->shadow_data) { - vhost_iova_tree_remove(v->iova_tree, mem_region); - } - -@@ -296,7 +296,7 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, - - llsize = int128_sub(llend, int128_make64(iova)); - -- if (v->shadow_vqs_enabled) { -+ if (v->shadow_data) { - const DMAMap *result; - const void *vaddr = memory_region_get_ram_ptr(section->mr) + - section->offset_within_region + -diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h -index e57dfa1fd1..45b969a311 100644 ---- a/include/hw/virtio/vhost-vdpa.h -+++ b/include/hw/virtio/vhost-vdpa.h -@@ -40,6 +40,8 @@ typedef struct vhost_vdpa { - struct vhost_vdpa_iova_range iova_range; - uint64_t acked_features; - bool shadow_vqs_enabled; -+ /* Vdpa must send shadow addresses as IOTLB key for data queues, not GPA */ -+ bool shadow_data; - /* IOVA mapping used by the Shadow Virtqueue */ - VhostIOVATree *iova_tree; - GPtrArray *shadow_vqs; -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 1757f1d028..eea7a0df12 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -581,6 +581,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - s->always_svq = svq; - s->vhost_vdpa.shadow_vqs_enabled = svq; - s->vhost_vdpa.iova_range = iova_range; -+ s->vhost_vdpa.shadow_data = svq; - s->vhost_vdpa.iova_tree = iova_tree; - if (!is_datapath) { - s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(), --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-add-vhost_vdpa_net_valid_svq_features.patch b/SOURCES/kvm-vdpa-add-vhost_vdpa_net_valid_svq_features.patch deleted file mode 100644 index c54a831..0000000 --- a/SOURCES/kvm-vdpa-add-vhost_vdpa_net_valid_svq_features.patch +++ /dev/null @@ -1,76 +0,0 @@ -From 0f3a28e1e128754184c4af6a578f27e16c6a61d5 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 15 Dec 2022 12:31:37 +0100 -Subject: [PATCH 05/14] vdpa: add vhost_vdpa_net_valid_svq_features -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 136: vDPA ASID support in Qemu -RH-Bugzilla: 2104412 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [5/13] 0b27e04f178ec73cb800f4fb05c17a92576142e4 (eperezmartin/qemu-kvm) - -It will be reused at vdpa device start so let's extract in its own -function. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Message-Id: <20221215113144.322011-6-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 36e4647247f200b6fa4d2f656133f567036e8a85) ---- - net/vhost-vdpa.c | 26 +++++++++++++++++--------- - 1 file changed, 17 insertions(+), 9 deletions(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index b06540ac89..16a5ebe2dd 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -106,6 +106,22 @@ VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc) - return s->vhost_net; - } - -+static bool vhost_vdpa_net_valid_svq_features(uint64_t features, Error **errp) -+{ -+ uint64_t invalid_dev_features = -+ features & ~vdpa_svq_device_features & -+ /* Transport are all accepted at this point */ -+ ~MAKE_64BIT_MASK(VIRTIO_TRANSPORT_F_START, -+ VIRTIO_TRANSPORT_F_END - VIRTIO_TRANSPORT_F_START); -+ -+ if (invalid_dev_features) { -+ error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64, -+ invalid_dev_features); -+ } -+ -+ return !invalid_dev_features; -+} -+ - static int vhost_vdpa_net_check_device_id(struct vhost_net *net) - { - uint32_t device_id; -@@ -684,15 +700,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - if (opts->x_svq) { - struct vhost_vdpa_iova_range iova_range; - -- uint64_t invalid_dev_features = -- features & ~vdpa_svq_device_features & -- /* Transport are all accepted at this point */ -- ~MAKE_64BIT_MASK(VIRTIO_TRANSPORT_F_START, -- VIRTIO_TRANSPORT_F_END - VIRTIO_TRANSPORT_F_START); -- -- if (invalid_dev_features) { -- error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64, -- invalid_dev_features); -+ if (!vhost_vdpa_net_valid_svq_features(features, errp)) { - goto err_svq; - } - --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-allocate-SVQ-array-unconditionally.patch b/SOURCES/kvm-vdpa-allocate-SVQ-array-unconditionally.patch deleted file mode 100644 index 22c5955..0000000 --- a/SOURCES/kvm-vdpa-allocate-SVQ-array-unconditionally.patch +++ /dev/null @@ -1,50 +0,0 @@ -From 72f296870805750df8dfe5eaad77dd7d435a8f41 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 15 Dec 2022 12:31:40 +0100 -Subject: [PATCH 08/14] vdpa: allocate SVQ array unconditionally -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 136: vDPA ASID support in Qemu -RH-Bugzilla: 2104412 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [8/13] 08cd86d0859f82d768794e29241cfeff25df667c (eperezmartin/qemu-kvm) - -SVQ may run or not in a device depending on runtime conditions (for -example, if the device can move CVQ to its own group or not). - -Allocate the SVQ array unconditionally at startup, since its hard to -move this allocation elsewhere. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Message-Id: <20221215113144.322011-9-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 273e0003f0005cc17292dedae01e5edb0064b69c) ---- - hw/virtio/vhost-vdpa.c | 4 ---- - 1 file changed, 4 deletions(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 84218ce078..dd2768634b 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -532,10 +532,6 @@ static void vhost_vdpa_svq_cleanup(struct vhost_dev *dev) - struct vhost_vdpa *v = dev->opaque; - size_t idx; - -- if (!v->shadow_vqs) { -- return; -- } -- - for (idx = 0; idx < v->shadow_vqs->len; ++idx) { - vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, idx)); - } --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-always-start-CVQ-in-SVQ-mode-if-possible.patch b/SOURCES/kvm-vdpa-always-start-CVQ-in-SVQ-mode-if-possible.patch deleted file mode 100644 index 9b78b5c..0000000 --- a/SOURCES/kvm-vdpa-always-start-CVQ-in-SVQ-mode-if-possible.patch +++ /dev/null @@ -1,193 +0,0 @@ -From 84c203faa570b85eec006215768c83371c9f0399 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 15 Dec 2022 12:31:44 +0100 -Subject: [PATCH 12/14] vdpa: always start CVQ in SVQ mode if possible -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 136: vDPA ASID support in Qemu -RH-Bugzilla: 2104412 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [12/13] 83f94b3e163ca38d08dbf7c111a4cfa7a44e3dc2 (eperezmartin/qemu-kvm) - -Isolate control virtqueue in its own group, allowing to intercept control -commands but letting dataplane run totally passthrough to the guest. - -Signed-off-by: Eugenio Pérez -Message-Id: <20221215113144.322011-13-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Acked-by: Jason Wang -(cherry picked from commit c1a1008685af0327d9d03f03d43bdb77e7af5bea) ---- - hw/virtio/vhost-vdpa.c | 3 +- - net/vhost-vdpa.c | 110 ++++++++++++++++++++++++++++++++++++++++- - 2 files changed, 111 insertions(+), 2 deletions(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index dc3498e995..72ff06673c 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -638,7 +638,8 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) - { - uint64_t features; - uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 | -- 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH; -+ 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH | -+ 0x1ULL << VHOST_BACKEND_F_IOTLB_ASID; - int r; - - if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) { -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index eea7a0df12..07d33dae26 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -101,6 +101,8 @@ static const uint64_t vdpa_svq_device_features = - BIT_ULL(VIRTIO_NET_F_RSC_EXT) | - BIT_ULL(VIRTIO_NET_F_STANDBY); - -+#define VHOST_VDPA_NET_CVQ_ASID 1 -+ - VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc) - { - VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); -@@ -242,6 +244,40 @@ static NetClientInfo net_vhost_vdpa_info = { - .check_peer_type = vhost_vdpa_check_peer_type, - }; - -+static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) -+{ -+ struct vhost_vring_state state = { -+ .index = vq_index, -+ }; -+ int r = ioctl(device_fd, VHOST_VDPA_GET_VRING_GROUP, &state); -+ -+ if (unlikely(r < 0)) { -+ error_report("Cannot get VQ %u group: %s", vq_index, -+ g_strerror(errno)); -+ return r; -+ } -+ -+ return state.num; -+} -+ -+static int vhost_vdpa_set_address_space_id(struct vhost_vdpa *v, -+ unsigned vq_group, -+ unsigned asid_num) -+{ -+ struct vhost_vring_state asid = { -+ .index = vq_group, -+ .num = asid_num, -+ }; -+ int r; -+ -+ r = ioctl(v->device_fd, VHOST_VDPA_SET_GROUP_ASID, &asid); -+ if (unlikely(r < 0)) { -+ error_report("Can't set vq group %u asid %u, errno=%d (%s)", -+ asid.index, asid.num, errno, g_strerror(errno)); -+ } -+ return r; -+} -+ - static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr) - { - VhostIOVATree *tree = v->iova_tree; -@@ -316,11 +352,75 @@ dma_map_err: - static int vhost_vdpa_net_cvq_start(NetClientState *nc) - { - VhostVDPAState *s; -- int r; -+ struct vhost_vdpa *v; -+ uint64_t backend_features; -+ int64_t cvq_group; -+ int cvq_index, r; - - assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); - - s = DO_UPCAST(VhostVDPAState, nc, nc); -+ v = &s->vhost_vdpa; -+ -+ v->shadow_data = s->always_svq; -+ v->shadow_vqs_enabled = s->always_svq; -+ s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID; -+ -+ if (s->always_svq) { -+ /* SVQ is already configured for all virtqueues */ -+ goto out; -+ } -+ -+ /* -+ * If we early return in these cases SVQ will not be enabled. The migration -+ * will be blocked as long as vhost-vdpa backends will not offer _F_LOG. -+ * -+ * Calling VHOST_GET_BACKEND_FEATURES as they are not available in v->dev -+ * yet. -+ */ -+ r = ioctl(v->device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features); -+ if (unlikely(r < 0)) { -+ error_report("Cannot get vdpa backend_features: %s(%d)", -+ g_strerror(errno), errno); -+ return -1; -+ } -+ if (!(backend_features & VHOST_BACKEND_F_IOTLB_ASID) || -+ !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { -+ return 0; -+ } -+ -+ /* -+ * Check if all the virtqueues of the virtio device are in a different vq -+ * than the last vq. VQ group of last group passed in cvq_group. -+ */ -+ cvq_index = v->dev->vq_index_end - 1; -+ cvq_group = vhost_vdpa_get_vring_group(v->device_fd, cvq_index); -+ if (unlikely(cvq_group < 0)) { -+ return cvq_group; -+ } -+ for (int i = 0; i < cvq_index; ++i) { -+ int64_t group = vhost_vdpa_get_vring_group(v->device_fd, i); -+ -+ if (unlikely(group < 0)) { -+ return group; -+ } -+ -+ if (group == cvq_group) { -+ return 0; -+ } -+ } -+ -+ r = vhost_vdpa_set_address_space_id(v, cvq_group, VHOST_VDPA_NET_CVQ_ASID); -+ if (unlikely(r < 0)) { -+ return r; -+ } -+ -+ v->iova_tree = vhost_iova_tree_new(v->iova_range.first, -+ v->iova_range.last); -+ v->shadow_vqs_enabled = true; -+ s->vhost_vdpa.address_space_id = VHOST_VDPA_NET_CVQ_ASID; -+ -+out: - if (!s->vhost_vdpa.shadow_vqs_enabled) { - return 0; - } -@@ -349,6 +449,14 @@ static void vhost_vdpa_net_cvq_stop(NetClientState *nc) - if (s->vhost_vdpa.shadow_vqs_enabled) { - vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer); - vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->status); -+ if (!s->always_svq) { -+ /* -+ * If only the CVQ is shadowed we can delete this safely. -+ * If all the VQs are shadows this will be needed by the time the -+ * device is started again to register SVQ vrings and similar. -+ */ -+ g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete); -+ } - } - } - --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-do-not-block-migration-if-device-has-cvq-and-x-.patch b/SOURCES/kvm-vdpa-do-not-block-migration-if-device-has-cvq-and-x-.patch new file mode 100644 index 0000000..56b9aed --- /dev/null +++ b/SOURCES/kvm-vdpa-do-not-block-migration-if-device-has-cvq-and-x-.patch @@ -0,0 +1,61 @@ +From 74c2f378bdf278a03c02ae48948b00b4431a3fd6 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Fri, 2 Jun 2023 16:38:52 +0200 +Subject: [PATCH 6/9] vdpa: do not block migration if device has cvq and + x-svq=on +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 190: vdpa: do not block migration if device has cvq and x-svq=on +RH-Jira: RHEL-573 +RH-Acked-by: Jason Wang +RH-Acked-by: Laurent Vivier +RH-Commit: [1/1] b0e2ec3c9e5c17252cf6a043fe1374ddc3c37de7 (eperezmartin/qemu-kvm) + +It was a mistake to forbid in all cases, as SVQ is already able to send +all the CVQ messages before start forwarding data vqs. It actually +caused a regression, making impossible to migrate device previously +migratable. + +Fixes: 36e4647247f2 ("vdpa: add vhost_vdpa_net_valid_svq_features") +Signed-off-by: Eugenio Pérez +Message-Id: <20230602143854.1879091-2-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Tested-by: Lei Yang +(cherry picked from commit 8bc0049eadafb984d305c847cedff550b58e5fc0) +Signed-off-by: Eugenio Pérez +--- + net/vhost-vdpa.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 8c8900f0f4..1ae839da34 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -844,13 +844,16 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + s->vhost_vdpa.shadow_vq_ops_opaque = s; + + /* +- * TODO: We cannot migrate devices with CVQ as there is no way to set +- * the device state (MAC, MQ, etc) before starting the datapath. ++ * TODO: We cannot migrate devices with CVQ and no x-svq enabled as ++ * there is no way to set the device state (MAC, MQ, etc) before ++ * starting the datapath. + * + * Migration blocker ownership now belongs to s->vhost_vdpa. + */ +- error_setg(&s->vhost_vdpa.migration_blocker, +- "net vdpa cannot migrate with CVQ feature"); ++ if (!svq) { ++ error_setg(&s->vhost_vdpa.migration_blocker, ++ "net vdpa cannot migrate with CVQ feature"); ++ } + } + ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); + if (ret) { +-- +2.39.3 + diff --git a/SOURCES/kvm-vdpa-do-not-handle-VIRTIO_NET_F_GUEST_ANNOUNCE-in-vh.patch b/SOURCES/kvm-vdpa-do-not-handle-VIRTIO_NET_F_GUEST_ANNOUNCE-in-vh.patch deleted file mode 100644 index d800258..0000000 --- a/SOURCES/kvm-vdpa-do-not-handle-VIRTIO_NET_F_GUEST_ANNOUNCE-in-vh.patch +++ /dev/null @@ -1,44 +0,0 @@ -From fbb177ad84d562a20e51e71c73257d2ef85be2d9 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Wed, 21 Dec 2022 12:50:15 +0100 -Subject: [PATCH 4/9] vdpa: do not handle VIRTIO_NET_F_GUEST_ANNOUNCE in - vhost-vdpa -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 137: vDPA net SVQ guest announce support -RH-Bugzilla: 2141088 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Jason Wang -RH-Commit: [4/4] b3960a8b3e4ca569b1b1e6ceccf2051d8c4b1079 (eperezmartin/qemu-kvm) - -So qemu emulates it even in case the device does not support it. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Message-Id: <20221221115015.1400889-5-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 980003debddd18306ea2e1364b96598383c0e257) ---- - net/vhost-vdpa.c | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 52ef9cb3a2..b06540ac89 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -72,7 +72,6 @@ const int vdpa_feature_bits[] = { - VIRTIO_F_RING_RESET, - VIRTIO_NET_F_RSS, - VIRTIO_NET_F_HASH_REPORT, -- VIRTIO_NET_F_GUEST_ANNOUNCE, - VIRTIO_NET_F_STATUS, - VHOST_INVALID_FEATURE_BIT - }; --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-export-vhost_vdpa_set_vring_ready.patch b/SOURCES/kvm-vdpa-export-vhost_vdpa_set_vring_ready.patch new file mode 100644 index 0000000..1ab8f02 --- /dev/null +++ b/SOURCES/kvm-vdpa-export-vhost_vdpa_set_vring_ready.patch @@ -0,0 +1,105 @@ +From 636eb63cbf23b31fc9880528490ac4bef680305b Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Wed, 25 Jan 2023 08:47:34 +0100 +Subject: [PATCH 4/7] vdpa: export vhost_vdpa_set_vring_ready +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 199: CVQ migration support +RH-Jira: RHEL-923 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Commit: [4/7] 8d1fecec7a993b8b68e268e8783c200c158f5ee0 (eperezmartin/qemu-kvm) + +The vhost-vdpa net backend needs to enable vrings in a different order +than default, so export it. + +No functional change intended except for tracing, that now includes the +(virtio) index being enabled and the return value of the ioctl. + +Still ignoring return value of this function if called from +vhost_vdpa_dev_start, as reorganize calling code around it is out of +the scope of this series. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +--- + hw/virtio/trace-events | 2 +- + hw/virtio/vhost-vdpa.c | 25 +++++++++++++------------ + include/hw/virtio/vhost-vdpa.h | 1 + + 3 files changed, 15 insertions(+), 13 deletions(-) + +diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events +index 300dec8d3e..85b43cd8fe 100644 +--- a/hw/virtio/trace-events ++++ b/hw/virtio/trace-events +@@ -48,7 +48,7 @@ vhost_vdpa_set_features(void *dev, uint64_t features) "dev: %p features: 0x%"PRI + vhost_vdpa_get_device_id(void *dev, uint32_t device_id) "dev: %p device_id %"PRIu32 + vhost_vdpa_reset_device(void *dev, uint8_t status) "dev: %p status: 0x%"PRIx8 + vhost_vdpa_get_vq_index(void *dev, int idx, int vq_idx) "dev: %p idx: %d vq idx: %d" +-vhost_vdpa_set_vring_ready(void *dev) "dev: %p" ++vhost_vdpa_set_vring_ready(void *dev, unsigned i, int r) "dev: %p, idx: %u, r: %d" + vhost_vdpa_dump_config(void *dev, const char *line) "dev: %p %s" + vhost_vdpa_set_config(void *dev, uint32_t offset, uint32_t size, uint32_t flags) "dev: %p offset: %"PRIu32" size: %"PRIu32" flags: 0x%"PRIx32 + vhost_vdpa_get_config(void *dev, void *config, uint32_t config_len) "dev: %p config: %p config_len: %"PRIu32 +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index c04f14420d..e4d0101327 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -733,18 +733,17 @@ static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx) + return idx; + } + +-static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev) ++int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, unsigned idx) + { +- int i; +- trace_vhost_vdpa_set_vring_ready(dev); +- for (i = 0; i < dev->nvqs; ++i) { +- struct vhost_vring_state state = { +- .index = dev->vq_index + i, +- .num = 1, +- }; +- vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state); +- } +- return 0; ++ struct vhost_dev *dev = v->dev; ++ struct vhost_vring_state state = { ++ .index = idx, ++ .num = 1, ++ }; ++ int r = vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state); ++ ++ trace_vhost_vdpa_set_vring_ready(dev, idx, r); ++ return r; + } + + static int vhost_vdpa_set_config_call(struct vhost_dev *dev, +@@ -1155,7 +1154,9 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) + if (unlikely(!ok)) { + return -1; + } +- vhost_vdpa_set_vring_ready(dev); ++ for (int i = 0; i < dev->nvqs; ++i) { ++ vhost_vdpa_set_vring_ready(v, dev->vq_index + i); ++ } + } else { + vhost_vdpa_suspend(dev); + vhost_vdpa_svqs_stop(dev); +diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h +index c278a2a8de..540642d304 100644 +--- a/include/hw/virtio/vhost-vdpa.h ++++ b/include/hw/virtio/vhost-vdpa.h +@@ -55,6 +55,7 @@ typedef struct vhost_vdpa { + } VhostVDPA; + + int vhost_vdpa_get_iova_range(int fd, struct vhost_vdpa_iova_range *iova_range); ++int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, unsigned idx); + + int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, + hwaddr size, void *vaddr, bool readonly); +-- +2.39.3 + diff --git a/SOURCES/kvm-vdpa-fix-VHOST_BACKEND_F_IOTLB_ASID-flag-check.patch b/SOURCES/kvm-vdpa-fix-VHOST_BACKEND_F_IOTLB_ASID-flag-check.patch deleted file mode 100644 index bb55256..0000000 --- a/SOURCES/kvm-vdpa-fix-VHOST_BACKEND_F_IOTLB_ASID-flag-check.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 46e80a9350a02fdb5689638df96bc7389e953cf8 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 17 Jan 2023 11:53:08 +0100 -Subject: [PATCH 13/14] vdpa: fix VHOST_BACKEND_F_IOTLB_ASID flag check -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 136: vDPA ASID support in Qemu -RH-Bugzilla: 2104412 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [13/13] b7fb4b8e9ea26b6664a9179ed0a88376acf5115f (eperezmartin/qemu-kvm) - -VHOST_BACKEND_F_IOTLB_ASID is the feature bit, not the bitmask. Since -the device under test also provided VHOST_BACKEND_F_IOTLB_MSG_V2 and -VHOST_BACKEND_F_IOTLB_BATCH, this went unnoticed. - -Fixes: c1a1008685 ("vdpa: always start CVQ in SVQ mode if possible") -Signed-off-by: Eugenio Pérez -Reviewed-by: Michael S. Tsirkin -Acked-by: Jason Wang -Signed-off-by: Jason Wang - -Upstream status: git@github.com:jasowang/qemu.git -(cherry picked from commit 2bd492bca521ee8594f1d5db8dc9aac126fc4f85) ---- - net/vhost-vdpa.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 07d33dae26..7d9c4ea09d 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -384,7 +384,7 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) - g_strerror(errno), errno); - return -1; - } -- if (!(backend_features & VHOST_BACKEND_F_IOTLB_ASID) || -+ if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)) || - !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { - return 0; - } --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-handle-VIRTIO_NET_CTRL_ANNOUNCE-in-vhost_vdpa_n.patch b/SOURCES/kvm-vdpa-handle-VIRTIO_NET_CTRL_ANNOUNCE-in-vhost_vdpa_n.patch deleted file mode 100644 index ebb7f38..0000000 --- a/SOURCES/kvm-vdpa-handle-VIRTIO_NET_CTRL_ANNOUNCE-in-vhost_vdpa_n.patch +++ /dev/null @@ -1,59 +0,0 @@ -From b71724e94c94acd6e09fed2b47be2901799c2353 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Wed, 21 Dec 2022 12:50:14 +0100 -Subject: [PATCH 3/9] vdpa: handle VIRTIO_NET_CTRL_ANNOUNCE in - vhost_vdpa_net_handle_ctrl_avail -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 137: vDPA net SVQ guest announce support -RH-Bugzilla: 2141088 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Jason Wang -RH-Commit: [3/4] c4ef5b62a5d41911565b8960a88bb48d746ff6c7 (eperezmartin/qemu-kvm) - -Since this capability is emulated by qemu shadowed CVQ cannot forward it -to the device. Process all that command within qemu. - -Signed-off-by: Eugenio Pérez -Message-Id: <20221221115015.1400889-4-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Acked-by: Jason Wang -(cherry picked from commit 3f9a3eeb7ca6acd899e2205a9118928b4cd94e47) ---- - net/vhost-vdpa.c | 15 ++++++++++++--- - 1 file changed, 12 insertions(+), 3 deletions(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 2b4b85d8f8..52ef9cb3a2 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -489,9 +489,18 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, - out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0, - s->cvq_cmd_out_buffer, - vhost_vdpa_net_cvq_cmd_len()); -- dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status)); -- if (unlikely(dev_written < 0)) { -- goto out; -+ if (*(uint8_t *)s->cvq_cmd_out_buffer == VIRTIO_NET_CTRL_ANNOUNCE) { -+ /* -+ * Guest announce capability is emulated by qemu, so don't forward to -+ * the device. -+ */ -+ dev_written = sizeof(status); -+ *s->status = VIRTIO_NET_OK; -+ } else { -+ dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status)); -+ if (unlikely(dev_written < 0)) { -+ goto out; -+ } - } - - if (unlikely(dev_written < sizeof(status))) { --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-map-shadow-vrings-with-MAP_SHARED.patch b/SOURCES/kvm-vdpa-map-shadow-vrings-with-MAP_SHARED.patch deleted file mode 100644 index c577758..0000000 --- a/SOURCES/kvm-vdpa-map-shadow-vrings-with-MAP_SHARED.patch +++ /dev/null @@ -1,131 +0,0 @@ -From 965f27235276e3b16ebf630436eb1d7e792a3d2a Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Fri, 2 Jun 2023 16:38:54 +0200 -Subject: [PATCH 3/4] vdpa: map shadow vrings with MAP_SHARED -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 298: Fix qemu core dump with "x-svq=on" when hot-plugging a NIC -RH-Jira: RHEL-1060 -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/3] 673ba501d6e76bae9272847acebaf5f01689f9cf - -JIRA: https://issues.redhat.com/browse/RHEL-1060 - -The vdpa devices that use va addresses neeeds these maps shared. -Otherwise, vhost_vdpa checks will refuse to accept the maps. - -The mmap call will always return a page aligned address, so removing the -qemu_memalign call. Keeping the ROUND_UP for the size as we still need -to DMA-map them in full. - -Not applying fixes tag as it never worked with va devices. - -Signed-off-by: Eugenio Pérez -Message-Id: <20230602143854.1879091-4-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit babf8b87127ae809b31b3c0a117dcbc91aaf9aba) - -Conflicts - - because of missing commits: - - 5d410557dea4 ("vhost: fix possible wrap in SVQ descriptor ring") - 5c1ebd4c432e ("vdpa: block migration if device has unsupported features") - - and already backported commit$ - - a0d7215e339b ("vhost-vdpa: do not cleanup the vdpa/vhost-net structures if peer nic is present") - -Signed-off-by: Laurent Vivier ---- - hw/virtio/vhost-shadow-virtqueue.c | 18 +++++++++--------- - net/vhost-vdpa.c | 16 ++++++++-------- - 2 files changed, 17 insertions(+), 17 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index 4307296358..9f09d435be 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -647,7 +647,7 @@ void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd) - void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, - VirtQueue *vq, VhostIOVATree *iova_tree) - { -- size_t desc_size, driver_size, device_size; -+ size_t desc_size; - - event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call); - svq->next_guest_avail_elem = NULL; -@@ -659,14 +659,14 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, - svq->iova_tree = iova_tree; - - svq->vring.num = virtio_queue_get_num(vdev, virtio_get_queue_index(vq)); -- driver_size = vhost_svq_driver_area_size(svq); -- device_size = vhost_svq_device_area_size(svq); -- svq->vring.desc = qemu_memalign(qemu_real_host_page_size(), driver_size); -+ svq->vring.desc = mmap(NULL, vhost_svq_driver_area_size(svq), -+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -+ -1, 0); - desc_size = sizeof(vring_desc_t) * svq->vring.num; - svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size); -- memset(svq->vring.desc, 0, driver_size); -- svq->vring.used = qemu_memalign(qemu_real_host_page_size(), device_size); -- memset(svq->vring.used, 0, device_size); -+ svq->vring.used = mmap(NULL, vhost_svq_device_area_size(svq), -+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -+ -1, 0); - svq->desc_state = g_new0(SVQDescState, svq->vring.num); - svq->desc_next = g_new0(uint16_t, svq->vring.num); - for (unsigned i = 0; i < svq->vring.num - 1; i++) { -@@ -705,8 +705,8 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) - svq->vq = NULL; - g_free(svq->desc_next); - g_free(svq->desc_state); -- qemu_vfree(svq->vring.desc); -- qemu_vfree(svq->vring.used); -+ munmap(svq->vring.desc, vhost_svq_driver_area_size(svq)); -+ munmap(svq->vring.used, vhost_svq_device_area_size(svq)); - event_notifier_set_handler(&svq->hdev_call, NULL); - } - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index d282c90a3d..8bfa95b801 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -203,8 +203,8 @@ static void vhost_vdpa_cleanup(NetClientState *nc) - if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_NIC) { - return; - } -- qemu_vfree(s->cvq_cmd_out_buffer); -- qemu_vfree(s->status); -+ munmap(s->cvq_cmd_out_buffer, vhost_vdpa_net_cvq_cmd_page_len()); -+ munmap(s->status, vhost_vdpa_net_cvq_cmd_page_len()); - if (s->vhost_net) { - vhost_net_cleanup(s->vhost_net); - g_free(s->vhost_net); -@@ -761,12 +761,12 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - s->vhost_vdpa.iova_range = iova_range; - s->vhost_vdpa.shadow_data = svq; - if (!is_datapath) { -- s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(), -- vhost_vdpa_net_cvq_cmd_page_len()); -- memset(s->cvq_cmd_out_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len()); -- s->status = qemu_memalign(qemu_real_host_page_size(), -- vhost_vdpa_net_cvq_cmd_page_len()); -- memset(s->status, 0, vhost_vdpa_net_cvq_cmd_page_len()); -+ s->cvq_cmd_out_buffer = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(), -+ PROT_READ | PROT_WRITE, -+ MAP_SHARED | MAP_ANONYMOUS, -1, 0); -+ s->status = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(), -+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -+ -1, 0); - - s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; - s->vhost_vdpa.shadow_vq_ops_opaque = s; --- -2.39.3 - diff --git a/SOURCES/kvm-vdpa-move-CVQ-isolation-check-to-net_init_vhost_vdpa.patch b/SOURCES/kvm-vdpa-move-CVQ-isolation-check-to-net_init_vhost_vdpa.patch new file mode 100644 index 0000000..a37612c --- /dev/null +++ b/SOURCES/kvm-vdpa-move-CVQ-isolation-check-to-net_init_vhost_vdpa.patch @@ -0,0 +1,286 @@ +From 1609e47511c9a02b26e0023ff6e1e999d7cdf179 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Fri, 26 May 2023 17:31:43 +0200 +Subject: [PATCH 2/7] vdpa: move CVQ isolation check to net_init_vhost_vdpa +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 199: CVQ migration support +RH-Jira: RHEL-923 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Commit: [2/7] caed8f81c3e30e6147817e7f43225aa3ee90ff37 (eperezmartin/qemu-kvm) + +Evaluating it at start time instead of initialization time may make the +guest capable of dynamically adding or removing migration blockers. + +Also, moving to initialization reduces the number of ioctls in the +migration, reducing failure possibilities. + +As a drawback we need to check for CVQ isolation twice: one time with no +MQ negotiated and another one acking it, as long as the device supports +it. This is because Vring ASID / group management is based on vq +indexes, but we don't know the index of CVQ before negotiating MQ. + +Signed-off-by: Eugenio Pérez +Message-Id: <20230526153143.470745-3-eperezma@redhat.com> +Tested-by: Lei Yang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Acked-by: Jason Wang +--- + net/vhost-vdpa.c | 155 ++++++++++++++++++++++++++++++++++------------- + 1 file changed, 112 insertions(+), 43 deletions(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 801d4e0422..ce17e4416a 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -43,6 +43,10 @@ typedef struct VhostVDPAState { + + /* The device always have SVQ enabled */ + bool always_svq; ++ ++ /* The device can isolate CVQ in its own ASID */ ++ bool cvq_isolated; ++ + bool started; + } VhostVDPAState; + +@@ -369,15 +373,8 @@ static NetClientInfo net_vhost_vdpa_info = { + .check_peer_type = vhost_vdpa_check_peer_type, + }; + +-/** +- * Get vring virtqueue group +- * +- * @device_fd vdpa device fd +- * @vq_index Virtqueue index +- * +- * Return -errno in case of error, or vq group if success. +- */ +-static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) ++static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index, ++ Error **errp) + { + struct vhost_vring_state state = { + .index = vq_index, +@@ -386,8 +383,7 @@ static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) + + if (unlikely(r < 0)) { + r = -errno; +- error_report("Cannot get VQ %u group: %s", vq_index, +- g_strerror(errno)); ++ error_setg_errno(errp, errno, "Cannot get VQ %u group", vq_index); + return r; + } + +@@ -487,9 +483,9 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) + { + VhostVDPAState *s, *s0; + struct vhost_vdpa *v; +- uint64_t backend_features; + int64_t cvq_group; +- int cvq_index, r; ++ int r; ++ Error *err = NULL; + + assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); + +@@ -509,41 +505,22 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) + /* + * If we early return in these cases SVQ will not be enabled. The migration + * will be blocked as long as vhost-vdpa backends will not offer _F_LOG. +- * +- * Calling VHOST_GET_BACKEND_FEATURES as they are not available in v->dev +- * yet. + */ +- r = ioctl(v->device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features); +- if (unlikely(r < 0)) { +- error_report("Cannot get vdpa backend_features: %s(%d)", +- g_strerror(errno), errno); +- return -1; ++ if (!vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { ++ return 0; + } +- if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)) || +- !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { ++ ++ if (!s->cvq_isolated) { + return 0; + } + +- /* +- * Check if all the virtqueues of the virtio device are in a different vq +- * than the last vq. VQ group of last group passed in cvq_group. +- */ +- cvq_index = v->dev->vq_index_end - 1; +- cvq_group = vhost_vdpa_get_vring_group(v->device_fd, cvq_index); ++ cvq_group = vhost_vdpa_get_vring_group(v->device_fd, ++ v->dev->vq_index_end - 1, ++ &err); + if (unlikely(cvq_group < 0)) { ++ error_report_err(err); + return cvq_group; + } +- for (int i = 0; i < cvq_index; ++i) { +- int64_t group = vhost_vdpa_get_vring_group(v->device_fd, i); +- +- if (unlikely(group < 0)) { +- return group; +- } +- +- if (group == cvq_group) { +- return 0; +- } +- } + + r = vhost_vdpa_set_address_space_id(v, cvq_group, VHOST_VDPA_NET_CVQ_ASID); + if (unlikely(r < 0)) { +@@ -806,6 +783,87 @@ static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = { + .avail_handler = vhost_vdpa_net_handle_ctrl_avail, + }; + ++/** ++ * Probe if CVQ is isolated ++ * ++ * @device_fd The vdpa device fd ++ * @features Features offered by the device. ++ * @cvq_index The control vq pair index ++ * ++ * Returns <0 in case of failure, 0 if false and 1 if true. ++ */ ++static int vhost_vdpa_probe_cvq_isolation(int device_fd, uint64_t features, ++ int cvq_index, Error **errp) ++{ ++ uint64_t backend_features; ++ int64_t cvq_group; ++ uint8_t status = VIRTIO_CONFIG_S_ACKNOWLEDGE | ++ VIRTIO_CONFIG_S_DRIVER | ++ VIRTIO_CONFIG_S_FEATURES_OK; ++ int r; ++ ++ ERRP_GUARD(); ++ ++ r = ioctl(device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features); ++ if (unlikely(r < 0)) { ++ error_setg_errno(errp, errno, "Cannot get vdpa backend_features"); ++ return r; ++ } ++ ++ if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) { ++ return 0; ++ } ++ ++ r = ioctl(device_fd, VHOST_SET_FEATURES, &features); ++ if (unlikely(r)) { ++ error_setg_errno(errp, errno, "Cannot set features"); ++ } ++ ++ r = ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status); ++ if (unlikely(r)) { ++ error_setg_errno(errp, -r, "Cannot set device features"); ++ goto out; ++ } ++ ++ cvq_group = vhost_vdpa_get_vring_group(device_fd, cvq_index, errp); ++ if (unlikely(cvq_group < 0)) { ++ if (cvq_group != -ENOTSUP) { ++ r = cvq_group; ++ goto out; ++ } ++ ++ /* ++ * The kernel report VHOST_BACKEND_F_IOTLB_ASID if the vdpa frontend ++ * support ASID even if the parent driver does not. The CVQ cannot be ++ * isolated in this case. ++ */ ++ error_free(*errp); ++ *errp = NULL; ++ r = 0; ++ goto out; ++ } ++ ++ for (int i = 0; i < cvq_index; ++i) { ++ int64_t group = vhost_vdpa_get_vring_group(device_fd, i, errp); ++ if (unlikely(group < 0)) { ++ r = group; ++ goto out; ++ } ++ ++ if (group == (int64_t)cvq_group) { ++ r = 0; ++ goto out; ++ } ++ } ++ ++ r = 1; ++ ++out: ++ status = 0; ++ ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status); ++ return r; ++} ++ + static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + const char *device, + const char *name, +@@ -815,16 +873,26 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + bool is_datapath, + bool svq, + struct vhost_vdpa_iova_range iova_range, +- uint64_t features) ++ uint64_t features, ++ Error **errp) + { + NetClientState *nc = NULL; + VhostVDPAState *s; + int ret = 0; + assert(name); ++ int cvq_isolated; ++ + if (is_datapath) { + nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device, + name); + } else { ++ cvq_isolated = vhost_vdpa_probe_cvq_isolation(vdpa_device_fd, features, ++ queue_pair_index * 2, ++ errp); ++ if (unlikely(cvq_isolated < 0)) { ++ return NULL; ++ } ++ + nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer, + device, name); + } +@@ -851,6 +919,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + + s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; + s->vhost_vdpa.shadow_vq_ops_opaque = s; ++ s->cvq_isolated = cvq_isolated; + + /* + * TODO: We cannot migrate devices with CVQ and no x-svq enabled as +@@ -982,7 +1051,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, + for (i = 0; i < queue_pairs; i++) { + ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, + vdpa_device_fd, i, 2, true, opts->x_svq, +- iova_range, features); ++ iova_range, features, errp); + if (!ncs[i]) + goto err; + } +@@ -990,7 +1059,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, + if (has_cvq) { + nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, + vdpa_device_fd, i, 1, false, +- opts->x_svq, iova_range, features); ++ opts->x_svq, iova_range, features, errp); + if (!nc) + goto err; + } +-- +2.39.3 + diff --git a/SOURCES/kvm-vdpa-move-SVQ-vring-features-check-to-net.patch b/SOURCES/kvm-vdpa-move-SVQ-vring-features-check-to-net.patch deleted file mode 100644 index 7cda847..0000000 --- a/SOURCES/kvm-vdpa-move-SVQ-vring-features-check-to-net.patch +++ /dev/null @@ -1,118 +0,0 @@ -From 63a45add7c9f7bb2b7775ae4cb2d7df22f7f2033 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 15 Dec 2022 12:31:39 +0100 -Subject: [PATCH 07/14] vdpa: move SVQ vring features check to net/ -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 136: vDPA ASID support in Qemu -RH-Bugzilla: 2104412 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [7/13] a24189aea4dbde3ed4486f685d0d88aeee1a0ee7 (eperezmartin/qemu-kvm) - -The next patches will start control SVQ if possible. However, we don't -know if that will be possible at qemu boot anymore. - -Since the moved checks will be already evaluated at net/ to know if it -is ok to shadow CVQ, move them. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Message-Id: <20221215113144.322011-8-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 258a03941fd23108a322d09abc9c55341e09688d) ---- - hw/virtio/vhost-vdpa.c | 32 ++------------------------------ - net/vhost-vdpa.c | 3 ++- - 2 files changed, 4 insertions(+), 31 deletions(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 9e7cbf1776..84218ce078 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -389,29 +389,9 @@ static int vhost_vdpa_get_dev_features(struct vhost_dev *dev, - return ret; - } - --static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v, -- Error **errp) -+static void vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v) - { - g_autoptr(GPtrArray) shadow_vqs = NULL; -- uint64_t dev_features, svq_features; -- int r; -- bool ok; -- -- if (!v->shadow_vqs_enabled) { -- return 0; -- } -- -- r = vhost_vdpa_get_dev_features(hdev, &dev_features); -- if (r != 0) { -- error_setg_errno(errp, -r, "Can't get vdpa device features"); -- return r; -- } -- -- svq_features = dev_features; -- ok = vhost_svq_valid_features(svq_features, errp); -- if (unlikely(!ok)) { -- return -1; -- } - - shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free); - for (unsigned n = 0; n < hdev->nvqs; ++n) { -@@ -422,7 +402,6 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v, - } - - v->shadow_vqs = g_steal_pointer(&shadow_vqs); -- return 0; - } - - static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) -@@ -447,10 +426,7 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) - dev->opaque = opaque ; - v->listener = vhost_vdpa_memory_listener; - v->msg_type = VHOST_IOTLB_MSG_V2; -- ret = vhost_vdpa_init_svq(dev, v, errp); -- if (ret) { -- goto err; -- } -+ vhost_vdpa_init_svq(dev, v); - - if (!vhost_vdpa_first_dev(dev)) { - return 0; -@@ -460,10 +436,6 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) - VIRTIO_CONFIG_S_DRIVER); - - return 0; -- --err: -- ram_block_discard_disable(false); -- return ret; - } - - static void vhost_vdpa_host_notifier_uninit(struct vhost_dev *dev, -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 8d3ed095d0..85aa0da39a 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -117,9 +117,10 @@ static bool vhost_vdpa_net_valid_svq_features(uint64_t features, Error **errp) - if (invalid_dev_features) { - error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64, - invalid_dev_features); -+ return false; - } - -- return !invalid_dev_features; -+ return vhost_svq_valid_features(features, errp); - } - - static int vhost_vdpa_net_check_device_id(struct vhost_net *net) --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-move-vhost_vdpa_set_vring_ready-to-the-caller.patch b/SOURCES/kvm-vdpa-move-vhost_vdpa_set_vring_ready-to-the-caller.patch new file mode 100644 index 0000000..4ebd8bd --- /dev/null +++ b/SOURCES/kvm-vdpa-move-vhost_vdpa_set_vring_ready-to-the-caller.patch @@ -0,0 +1,134 @@ +From 09bf0febef2512f00e71edca0fcbaf452652c2c7 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 10 Aug 2023 11:27:28 +0200 +Subject: [PATCH 6/7] vdpa: move vhost_vdpa_set_vring_ready to the caller +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 199: CVQ migration support +RH-Jira: RHEL-923 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Commit: [6/7] cf4fd1071ca127914c8e8d6aefec451cad97ecc1 (eperezmartin/qemu-kvm) + +Doing that way allows CVQ to be enabled before the dataplane vqs, +restoring the state as MQ or MAC addresses properly in the case of a +migration. + +The patch does it by defining a ->load NetClientInfo callback also for +dataplane. Ideally, this should be done by an independent patch, but +the function is already static so it would only add an empty +vhost_vdpa_net_data_load stub. + +Signed-off-by: Eugenio Pérez +--- +v3: +* Fix subject typo +* Expand patch message so it explains why +--- + hw/virtio/vdpa-dev.c | 3 +++ + hw/virtio/vhost-vdpa.c | 3 --- + net/vhost-vdpa.c | 41 +++++++++++++++++++++++++++++++---------- + 3 files changed, 34 insertions(+), 13 deletions(-) + +diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c +index 01b41eb0f1..8c47d643bf 100644 +--- a/hw/virtio/vdpa-dev.c ++++ b/hw/virtio/vdpa-dev.c +@@ -256,6 +256,9 @@ static int vhost_vdpa_device_start(VirtIODevice *vdev, Error **errp) + error_setg_errno(errp, -ret, "Error starting vhost"); + goto err_guest_notifiers; + } ++ for (i = 0; i < s->dev.nvqs; ++i) { ++ vhost_vdpa_set_vring_ready(&s->vdpa, i); ++ } + s->started = true; + + /* +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index e4d0101327..0d9d311abd 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -1154,9 +1154,6 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) + if (unlikely(!ok)) { + return -1; + } +- for (int i = 0; i < dev->nvqs; ++i) { +- vhost_vdpa_set_vring_ready(v, dev->vq_index + i); +- } + } else { + vhost_vdpa_suspend(dev); + vhost_vdpa_svqs_stop(dev); +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index a1b16bbc52..47b87bf80d 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -344,6 +344,22 @@ static int vhost_vdpa_net_data_start(NetClientState *nc) + return 0; + } + ++static int vhost_vdpa_net_data_load(NetClientState *nc) ++{ ++ VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); ++ struct vhost_vdpa *v = &s->vhost_vdpa; ++ bool has_cvq = v->dev->vq_index_end % 2; ++ ++ if (has_cvq) { ++ return 0; ++ } ++ ++ for (int i = 0; i < v->dev->nvqs; ++i) { ++ vhost_vdpa_set_vring_ready(v, i + v->dev->vq_index); ++ } ++ return 0; ++} ++ + static void vhost_vdpa_net_client_stop(NetClientState *nc) + { + VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); +@@ -366,6 +382,7 @@ static NetClientInfo net_vhost_vdpa_info = { + .size = sizeof(VhostVDPAState), + .receive = vhost_vdpa_receive, + .start = vhost_vdpa_net_data_start, ++ .load = vhost_vdpa_net_data_load, + .stop = vhost_vdpa_net_client_stop, + .cleanup = vhost_vdpa_cleanup, + .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, +@@ -682,18 +699,22 @@ static int vhost_vdpa_net_cvq_load(NetClientState *nc) + + assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); + +- if (!v->shadow_vqs_enabled) { +- return 0; +- } ++ vhost_vdpa_set_vring_ready(v, v->dev->vq_index); + +- n = VIRTIO_NET(v->dev->vdev); +- r = vhost_vdpa_net_load_mac(s, n); +- if (unlikely(r < 0)) { +- return r; ++ if (v->shadow_vqs_enabled) { ++ n = VIRTIO_NET(v->dev->vdev); ++ r = vhost_vdpa_net_load_mac(s, n); ++ if (unlikely(r < 0)) { ++ return r; ++ } ++ r = vhost_vdpa_net_load_mq(s, n); ++ if (unlikely(r)) { ++ return r; ++ } + } +- r = vhost_vdpa_net_load_mq(s, n); +- if (unlikely(r)) { +- return r; ++ ++ for (int i = 0; i < v->dev->vq_index; ++i) { ++ vhost_vdpa_set_vring_ready(v, i); + } + + return 0; +-- +2.39.3 + diff --git a/SOURCES/kvm-vdpa-net-move-iova-tree-creation-from-init-to-start.patch b/SOURCES/kvm-vdpa-net-move-iova-tree-creation-from-init-to-start.patch deleted file mode 100644 index 7191628..0000000 --- a/SOURCES/kvm-vdpa-net-move-iova-tree-creation-from-init-to-start.patch +++ /dev/null @@ -1,268 +0,0 @@ -From 293e249644c14b2bd19dd6a3f08a0e18ec040200 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Fri, 3 Mar 2023 18:24:32 +0100 -Subject: [PATCH 1/4] vdpa net: move iova tree creation from init to start -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 298: Fix qemu core dump with "x-svq=on" when hot-plugging a NIC -RH-Jira: RHEL-1060 -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/3] f85a05eb528820adf4a3c0cad2950b4ab500d5fe - -JIRA: https://issues.redhat.com/browse/RHEL-1060 - -Only create iova_tree if and when it is needed. - -The cleanup keeps being responsible for the last VQ but this change -allows it to merge both cleanup functions. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Message-Id: <20230303172445.1089785-2-eperezma@redhat.com> -Tested-by: Lei Yang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 00ef422e9fbfef1fb40447b08826db0951d788dd) - -Conflicts - - because of missing commit - - bf7a2ad8b6df ("vdpa: harden the error path if get_iova_range failed") - -Signed-off-by: Laurent Vivier ---- - net/vhost-vdpa.c | 113 ++++++++++++++++++++++++++++++++++------------- - 1 file changed, 83 insertions(+), 30 deletions(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 1b4fec59a2..a914348e2a 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -178,7 +178,6 @@ err_init: - static void vhost_vdpa_cleanup(NetClientState *nc) - { - VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); -- struct vhost_dev *dev = &s->vhost_net->dev; - - /* - * If a peer NIC is attached, do not cleanup anything. -@@ -190,9 +189,6 @@ static void vhost_vdpa_cleanup(NetClientState *nc) - } - qemu_vfree(s->cvq_cmd_out_buffer); - qemu_vfree(s->status); -- if (dev->vq_index + dev->nvqs == dev->vq_index_end) { -- g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete); -- } - if (s->vhost_net) { - vhost_net_cleanup(s->vhost_net); - g_free(s->vhost_net); -@@ -242,10 +238,64 @@ static ssize_t vhost_vdpa_receive(NetClientState *nc, const uint8_t *buf, - return size; - } - -+/** From any vdpa net client, get the netclient of the first queue pair */ -+static VhostVDPAState *vhost_vdpa_net_first_nc_vdpa(VhostVDPAState *s) -+{ -+ NICState *nic = qemu_get_nic(s->nc.peer); -+ NetClientState *nc0 = qemu_get_peer(nic->ncs, 0); -+ -+ return DO_UPCAST(VhostVDPAState, nc, nc0); -+} -+ -+static void vhost_vdpa_net_data_start_first(VhostVDPAState *s) -+{ -+ struct vhost_vdpa *v = &s->vhost_vdpa; -+ -+ if (v->shadow_vqs_enabled) { -+ v->iova_tree = vhost_iova_tree_new(v->iova_range.first, -+ v->iova_range.last); -+ } -+} -+ -+static int vhost_vdpa_net_data_start(NetClientState *nc) -+{ -+ VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); -+ struct vhost_vdpa *v = &s->vhost_vdpa; -+ -+ assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); -+ -+ if (v->index == 0) { -+ vhost_vdpa_net_data_start_first(s); -+ return 0; -+ } -+ -+ if (v->shadow_vqs_enabled) { -+ VhostVDPAState *s0 = vhost_vdpa_net_first_nc_vdpa(s); -+ v->iova_tree = s0->vhost_vdpa.iova_tree; -+ } -+ -+ return 0; -+} -+ -+static void vhost_vdpa_net_client_stop(NetClientState *nc) -+{ -+ VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); -+ struct vhost_dev *dev; -+ -+ assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); -+ -+ dev = s->vhost_vdpa.dev; -+ if (dev->vq_index + dev->nvqs == dev->vq_index_end) { -+ g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete); -+ } -+} -+ - static NetClientInfo net_vhost_vdpa_info = { - .type = NET_CLIENT_DRIVER_VHOST_VDPA, - .size = sizeof(VhostVDPAState), - .receive = vhost_vdpa_receive, -+ .start = vhost_vdpa_net_data_start, -+ .stop = vhost_vdpa_net_client_stop, - .cleanup = vhost_vdpa_cleanup, - .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, - .has_ufo = vhost_vdpa_has_ufo, -@@ -359,7 +409,7 @@ dma_map_err: - - static int vhost_vdpa_net_cvq_start(NetClientState *nc) - { -- VhostVDPAState *s; -+ VhostVDPAState *s, *s0; - struct vhost_vdpa *v; - uint64_t backend_features; - int64_t cvq_group; -@@ -423,8 +473,6 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) - return r; - } - -- v->iova_tree = vhost_iova_tree_new(v->iova_range.first, -- v->iova_range.last); - v->shadow_vqs_enabled = true; - s->vhost_vdpa.address_space_id = VHOST_VDPA_NET_CVQ_ASID; - -@@ -433,6 +481,27 @@ out: - return 0; - } - -+ s0 = vhost_vdpa_net_first_nc_vdpa(s); -+ if (s0->vhost_vdpa.iova_tree) { -+ /* -+ * SVQ is already configured for all virtqueues. Reuse IOVA tree for -+ * simplicity, whether CVQ shares ASID with guest or not, because: -+ * - Memory listener need access to guest's memory addresses allocated -+ * in the IOVA tree. -+ * - There should be plenty of IOVA address space for both ASID not to -+ * worry about collisions between them. Guest's translations are -+ * still validated with virtio virtqueue_pop so there is no risk for -+ * the guest to access memory that it shouldn't. -+ * -+ * To allocate a iova tree per ASID is doable but it complicates the -+ * code and it is not worth it for the moment. -+ */ -+ v->iova_tree = s0->vhost_vdpa.iova_tree; -+ } else { -+ v->iova_tree = vhost_iova_tree_new(v->iova_range.first, -+ v->iova_range.last); -+ } -+ - r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer, - vhost_vdpa_net_cvq_cmd_page_len(), false); - if (unlikely(r < 0)) { -@@ -457,15 +526,9 @@ static void vhost_vdpa_net_cvq_stop(NetClientState *nc) - if (s->vhost_vdpa.shadow_vqs_enabled) { - vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer); - vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->status); -- if (!s->always_svq) { -- /* -- * If only the CVQ is shadowed we can delete this safely. -- * If all the VQs are shadows this will be needed by the time the -- * device is started again to register SVQ vrings and similar. -- */ -- g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete); -- } - } -+ -+ vhost_vdpa_net_client_stop(nc); - } - - static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len, -@@ -675,8 +738,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - int nvqs, - bool is_datapath, - bool svq, -- struct vhost_vdpa_iova_range iova_range, -- VhostIOVATree *iova_tree) -+ struct vhost_vdpa_iova_range iova_range) - { - NetClientState *nc = NULL; - VhostVDPAState *s; -@@ -698,7 +760,6 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - s->vhost_vdpa.shadow_vqs_enabled = svq; - s->vhost_vdpa.iova_range = iova_range; - s->vhost_vdpa.shadow_data = svq; -- s->vhost_vdpa.iova_tree = iova_tree; - if (!is_datapath) { - s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(), - vhost_vdpa_net_cvq_cmd_page_len()); -@@ -776,7 +837,6 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - uint64_t features; - int vdpa_device_fd; - g_autofree NetClientState **ncs = NULL; -- g_autoptr(VhostIOVATree) iova_tree = NULL; - struct vhost_vdpa_iova_range iova_range; - NetClientState *nc; - int queue_pairs, r, i = 0, has_cvq = 0; -@@ -822,12 +882,8 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - } - - vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range); -- if (opts->x_svq) { -- if (!vhost_vdpa_net_valid_svq_features(features, errp)) { -- goto err_svq; -- } -- -- iova_tree = vhost_iova_tree_new(iova_range.first, iova_range.last); -+ if (opts->x_svq && !vhost_vdpa_net_valid_svq_features(features, errp)) { -+ goto err; - } - - ncs = g_malloc0(sizeof(*ncs) * queue_pairs); -@@ -835,7 +891,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - for (i = 0; i < queue_pairs; i++) { - ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, - vdpa_device_fd, i, 2, true, opts->x_svq, -- iova_range, iova_tree); -+ iova_range); - if (!ncs[i]) - goto err; - } -@@ -843,13 +899,11 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - if (has_cvq) { - nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, - vdpa_device_fd, i, 1, false, -- opts->x_svq, iova_range, iova_tree); -+ opts->x_svq, iova_range); - if (!nc) - goto err; - } - -- /* iova_tree ownership belongs to last NetClientState */ -- g_steal_pointer(&iova_tree); - return 0; - - err: -@@ -859,7 +913,6 @@ err: - } - } - --err_svq: - qemu_close(vdpa_device_fd); - - return -1; --- -2.39.3 - diff --git a/SOURCES/kvm-vdpa-remove-net-cvq-migration-blocker.patch b/SOURCES/kvm-vdpa-remove-net-cvq-migration-blocker.patch new file mode 100644 index 0000000..9388d75 --- /dev/null +++ b/SOURCES/kvm-vdpa-remove-net-cvq-migration-blocker.patch @@ -0,0 +1,51 @@ +From 46d5b861a39b7d0d3222162e6b7707526c131230 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Fri, 24 Mar 2023 13:28:15 +0100 +Subject: [PATCH 7/7] vdpa: remove net cvq migration blocker +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 199: CVQ migration support +RH-Jira: RHEL-923 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Commit: [7/7] 9542e305c7ea3a47e0f1fe0629281238b0bb2111 (eperezmartin/qemu-kvm) + +Now that we have add migration blockers if the device does not support +all the needed features, remove the general blocker applied to all net +devices with CVQ. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +--- + net/vhost-vdpa.c | 12 ------------ + 1 file changed, 12 deletions(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 47b87bf80d..6e03db4afa 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -941,18 +941,6 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; + s->vhost_vdpa.shadow_vq_ops_opaque = s; + s->cvq_isolated = cvq_isolated; +- +- /* +- * TODO: We cannot migrate devices with CVQ and no x-svq enabled as +- * there is no way to set the device state (MAC, MQ, etc) before +- * starting the datapath. +- * +- * Migration blocker ownership now belongs to s->vhost_vdpa. +- */ +- if (!svq) { +- error_setg(&s->vhost_vdpa.migration_blocker, +- "net vdpa cannot migrate with CVQ feature"); +- } + } + ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); + if (ret) { +-- +2.39.3 + diff --git a/SOURCES/kvm-vdpa-rename-vhost_vdpa_net_load-to-vhost_vdpa_net_cv.patch b/SOURCES/kvm-vdpa-rename-vhost_vdpa_net_load-to-vhost_vdpa_net_cv.patch new file mode 100644 index 0000000..15dc410 --- /dev/null +++ b/SOURCES/kvm-vdpa-rename-vhost_vdpa_net_load-to-vhost_vdpa_net_cv.patch @@ -0,0 +1,49 @@ +From db7ca7692e264e8bf1bd9e08e3de7a92fc76a363 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Wed, 9 Aug 2023 18:07:26 +0200 +Subject: [PATCH 5/7] vdpa: rename vhost_vdpa_net_load to + vhost_vdpa_net_cvq_load +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 199: CVQ migration support +RH-Jira: RHEL-923 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Commit: [5/7] aea91f3274786665725af892eb905818eb0f44f1 (eperezmartin/qemu-kvm) + +Next patches will add the corresponding data load. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +--- + net/vhost-vdpa.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 29d3fd3ca6..a1b16bbc52 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -673,7 +673,7 @@ static int vhost_vdpa_net_load_mq(VhostVDPAState *s, + return *s->status != VIRTIO_NET_OK; + } + +-static int vhost_vdpa_net_load(NetClientState *nc) ++static int vhost_vdpa_net_cvq_load(NetClientState *nc) + { + VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); + struct vhost_vdpa *v = &s->vhost_vdpa; +@@ -704,7 +704,7 @@ static NetClientInfo net_vhost_vdpa_cvq_info = { + .size = sizeof(VhostVDPAState), + .receive = vhost_vdpa_receive, + .start = vhost_vdpa_net_cvq_start, +- .load = vhost_vdpa_net_load, ++ .load = vhost_vdpa_net_cvq_load, + .stop = vhost_vdpa_net_cvq_stop, + .cleanup = vhost_vdpa_cleanup, + .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, +-- +2.39.3 + diff --git a/SOURCES/kvm-vdpa-reorder-vhost_vdpa_net_cvq_cmd_page_len-functio.patch b/SOURCES/kvm-vdpa-reorder-vhost_vdpa_net_cvq_cmd_page_len-functio.patch deleted file mode 100644 index 57c3f6f..0000000 --- a/SOURCES/kvm-vdpa-reorder-vhost_vdpa_net_cvq_cmd_page_len-functio.patch +++ /dev/null @@ -1,84 +0,0 @@ -From af109b3c7e8d7cb3b6c7c842a92ddf5de2270a3c Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Fri, 2 Jun 2023 16:38:53 +0200 -Subject: [PATCH 2/4] vdpa: reorder vhost_vdpa_net_cvq_cmd_page_len function -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 298: Fix qemu core dump with "x-svq=on" when hot-plugging a NIC -RH-Jira: RHEL-1060 -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/3] df45d2faa25186a246c18f24909ced67f94cf33f - -JIRA: https://issues.redhat.com/browse/RHEL-1060 - -We need to call it from resource cleanup context, as munmap needs the -size of the mappings. - -Signed-off-by: Eugenio Pérez -Reviewed-by: Philippe Mathieu-Daudé -Message-Id: <20230602143854.1879091-3-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 915bf6ccd7a5c9b6cbea7a72f153597d1b98834f) -Signed-off-by: Laurent Vivier ---- - net/vhost-vdpa.c | 32 ++++++++++++++++---------------- - 1 file changed, 16 insertions(+), 16 deletions(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index a914348e2a..d282c90a3d 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -110,6 +110,22 @@ VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc) - return s->vhost_net; - } - -+static size_t vhost_vdpa_net_cvq_cmd_len(void) -+{ -+ /* -+ * MAC_TABLE_SET is the ctrl command that produces the longer out buffer. -+ * In buffer is always 1 byte, so it should fit here -+ */ -+ return sizeof(struct virtio_net_ctrl_hdr) + -+ 2 * sizeof(struct virtio_net_ctrl_mac) + -+ MAC_TABLE_ENTRIES * ETH_ALEN; -+} -+ -+static size_t vhost_vdpa_net_cvq_cmd_page_len(void) -+{ -+ return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size()); -+} -+ - static bool vhost_vdpa_net_valid_svq_features(uint64_t features, Error **errp) - { - uint64_t invalid_dev_features = -@@ -362,22 +378,6 @@ static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr) - vhost_iova_tree_remove(tree, *map); - } - --static size_t vhost_vdpa_net_cvq_cmd_len(void) --{ -- /* -- * MAC_TABLE_SET is the ctrl command that produces the longer out buffer. -- * In buffer is always 1 byte, so it should fit here -- */ -- return sizeof(struct virtio_net_ctrl_hdr) + -- 2 * sizeof(struct virtio_net_ctrl_mac) + -- MAC_TABLE_ENTRIES * ETH_ALEN; --} -- --static size_t vhost_vdpa_net_cvq_cmd_page_len(void) --{ -- return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size()); --} -- - /** Map CVQ buffer. */ - static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size, - bool write) --- -2.39.3 - diff --git a/SOURCES/kvm-vdpa-request-iova_range-only-once.patch b/SOURCES/kvm-vdpa-request-iova_range-only-once.patch deleted file mode 100644 index 041e8f7..0000000 --- a/SOURCES/kvm-vdpa-request-iova_range-only-once.patch +++ /dev/null @@ -1,145 +0,0 @@ -From 760169d538a4e6ba61006f6796cd55af967a7f1e Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 15 Dec 2022 12:31:38 +0100 -Subject: [PATCH 06/14] vdpa: request iova_range only once -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 136: vDPA ASID support in Qemu -RH-Bugzilla: 2104412 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [6/13] 2a8ae2f46ae88f01c5535038f38cb7895098b610 (eperezmartin/qemu-kvm) - -Currently iova range is requested once per queue pair in the case of -net. Reduce the number of ioctls asking it once at initialization and -reusing that value for each vhost_vdpa. - -Signed-off-by: Eugenio Pérez -Message-Id: <20221215113144.322011-7-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Acked-by: Jason Wang -(cherry picked from commit a585fad26b2e6ccca156d9e65158ad1c5efd268d) ---- - hw/virtio/vhost-vdpa.c | 15 --------------- - net/vhost-vdpa.c | 27 ++++++++++++++------------- - 2 files changed, 14 insertions(+), 28 deletions(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index e65603022f..9e7cbf1776 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -365,19 +365,6 @@ static int vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status) - return 0; - } - --static void vhost_vdpa_get_iova_range(struct vhost_vdpa *v) --{ -- int ret = vhost_vdpa_call(v->dev, VHOST_VDPA_GET_IOVA_RANGE, -- &v->iova_range); -- if (ret != 0) { -- v->iova_range.first = 0; -- v->iova_range.last = UINT64_MAX; -- } -- -- trace_vhost_vdpa_get_iova_range(v->dev, v->iova_range.first, -- v->iova_range.last); --} -- - /* - * The use of this function is for requests that only need to be - * applied once. Typically such request occurs at the beginning -@@ -465,8 +452,6 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) - goto err; - } - -- vhost_vdpa_get_iova_range(v); -- - if (!vhost_vdpa_first_dev(dev)) { - return 0; - } -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 16a5ebe2dd..8d3ed095d0 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -549,14 +549,15 @@ static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = { - }; - - static NetClientState *net_vhost_vdpa_init(NetClientState *peer, -- const char *device, -- const char *name, -- int vdpa_device_fd, -- int queue_pair_index, -- int nvqs, -- bool is_datapath, -- bool svq, -- VhostIOVATree *iova_tree) -+ const char *device, -+ const char *name, -+ int vdpa_device_fd, -+ int queue_pair_index, -+ int nvqs, -+ bool is_datapath, -+ bool svq, -+ struct vhost_vdpa_iova_range iova_range, -+ VhostIOVATree *iova_tree) - { - NetClientState *nc = NULL; - VhostVDPAState *s; -@@ -575,6 +576,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - s->vhost_vdpa.device_fd = vdpa_device_fd; - s->vhost_vdpa.index = queue_pair_index; - s->vhost_vdpa.shadow_vqs_enabled = svq; -+ s->vhost_vdpa.iova_range = iova_range; - s->vhost_vdpa.iova_tree = iova_tree; - if (!is_datapath) { - s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(), -@@ -654,6 +656,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - int vdpa_device_fd; - g_autofree NetClientState **ncs = NULL; - g_autoptr(VhostIOVATree) iova_tree = NULL; -+ struct vhost_vdpa_iova_range iova_range; - NetClientState *nc; - int queue_pairs, r, i = 0, has_cvq = 0; - -@@ -697,14 +700,12 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - return queue_pairs; - } - -+ vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range); - if (opts->x_svq) { -- struct vhost_vdpa_iova_range iova_range; -- - if (!vhost_vdpa_net_valid_svq_features(features, errp)) { - goto err_svq; - } - -- vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range); - iova_tree = vhost_iova_tree_new(iova_range.first, iova_range.last); - } - -@@ -713,7 +714,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - for (i = 0; i < queue_pairs; i++) { - ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, - vdpa_device_fd, i, 2, true, opts->x_svq, -- iova_tree); -+ iova_range, iova_tree); - if (!ncs[i]) - goto err; - } -@@ -721,7 +722,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - if (has_cvq) { - nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, - vdpa_device_fd, i, 1, false, -- opts->x_svq, iova_tree); -+ opts->x_svq, iova_range, iova_tree); - if (!nc) - goto err; - } --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-return-errno-in-vhost_vdpa_get_vring_group-erro.patch b/SOURCES/kvm-vdpa-return-errno-in-vhost_vdpa_get_vring_group-erro.patch new file mode 100644 index 0000000..c8b4913 --- /dev/null +++ b/SOURCES/kvm-vdpa-return-errno-in-vhost_vdpa_get_vring_group-erro.patch @@ -0,0 +1,67 @@ +From 09583f39d51d16079c9fda32545d7a44b6f5c8c6 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Fri, 26 May 2023 17:31:42 +0200 +Subject: [PATCH 1/7] vdpa: return errno in vhost_vdpa_get_vring_group error +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 199: CVQ migration support +RH-Jira: RHEL-923 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Commit: [1/7] 89745b1828a1af535c40657022d385250688d11d (eperezmartin/qemu-kvm) + +We need to tell in the caller, as some errors are expected in a normal +workflow. In particular, parent drivers in recent kernels with +VHOST_BACKEND_F_IOTLB_ASID may not support vring groups. In that case, +-ENOTSUP is returned. + +This is the case of vp_vdpa in Linux 6.2. + +Next patches in this series will use that information to know if it must +abort or not. Also, next patches return properly an errp instead of +printing with error_report. + +Reviewed-by: Stefano Garzarella +Acked-by: Jason Wang +Signed-off-by: Eugenio Pérez +Message-Id: <20230526153143.470745-2-eperezma@redhat.com> +Tested-by: Lei Yang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +--- + net/vhost-vdpa.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 1ae839da34..801d4e0422 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -369,6 +369,14 @@ static NetClientInfo net_vhost_vdpa_info = { + .check_peer_type = vhost_vdpa_check_peer_type, + }; + ++/** ++ * Get vring virtqueue group ++ * ++ * @device_fd vdpa device fd ++ * @vq_index Virtqueue index ++ * ++ * Return -errno in case of error, or vq group if success. ++ */ + static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) + { + struct vhost_vring_state state = { +@@ -377,6 +385,7 @@ static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) + int r = ioctl(device_fd, VHOST_VDPA_GET_VRING_GROUP, &state); + + if (unlikely(r < 0)) { ++ r = -errno; + error_report("Cannot get VQ %u group: %s", vq_index, + g_strerror(errno)); + return r; +-- +2.39.3 + diff --git a/SOURCES/kvm-vdpa-stop-all-svq-on-device-deletion.patch b/SOURCES/kvm-vdpa-stop-all-svq-on-device-deletion.patch deleted file mode 100644 index e1e4c20..0000000 --- a/SOURCES/kvm-vdpa-stop-all-svq-on-device-deletion.patch +++ /dev/null @@ -1,84 +0,0 @@ -From 757767330abb2c0a650c387a9a5965fee224beee Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 9 Feb 2023 18:00:04 +0100 -Subject: [PATCH] vdpa: stop all svq on device deletion -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 286: vdpa: stop all svq on device deletion -RH-Bugzilla: 2213864 -RH-Acked-by: Cindy Lu -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Stefano Garzarella -RH-Commit: [1/1] b9ff402e4c6e386be3ea867df9358cdaa283cda7 - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2213864 -Upstream-Status: merged - -Not stopping them leave the device in a bad state when virtio-net -fronted device is unplugged with device_del monitor command. - -This is not triggable in regular poweroff or qemu forces shutdown -because cleanup is called right after vhost_vdpa_dev_start(false). But -devices hot unplug does not call vdpa device cleanups. This lead to all -the vhost_vdpa devices without stop the SVQ but the last. - -Fix it and clean the code, making it symmetric with -vhost_vdpa_svqs_start. - -Fixes: dff4426fa656 ("vhost: Add Shadow VirtQueue kick forwarding capabilities") -Reported-by: Lei Yang -Signed-off-by: Eugenio Pérez -Message-Id: <20230209170004.899472-1-eperezma@redhat.com> -Tested-by: Laurent Vivier -Acked-by: Jason Wang -(cherry picked from commit 2e1a9de96b487cf818a22d681cad8d3f5d18dcca) ---- - hw/virtio/vhost-vdpa.c | 17 ++--------------- - 1 file changed, 2 insertions(+), 15 deletions(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 72ff06673c..46896b7592 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -669,26 +669,11 @@ static int vhost_vdpa_get_device_id(struct vhost_dev *dev, - return ret; - } - --static void vhost_vdpa_reset_svq(struct vhost_vdpa *v) --{ -- if (!v->shadow_vqs_enabled) { -- return; -- } -- -- for (unsigned i = 0; i < v->shadow_vqs->len; ++i) { -- VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); -- vhost_svq_stop(svq); -- } --} -- - static int vhost_vdpa_reset_device(struct vhost_dev *dev) - { -- struct vhost_vdpa *v = dev->opaque; - int ret; - uint8_t status = 0; - -- vhost_vdpa_reset_svq(v); -- - ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status); - trace_vhost_vdpa_reset_device(dev, status); - return ret; -@@ -1080,6 +1065,8 @@ static void vhost_vdpa_svqs_stop(struct vhost_dev *dev) - - for (unsigned i = 0; i < v->shadow_vqs->len; ++i) { - VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); -+ -+ vhost_svq_stop(svq); - vhost_vdpa_svq_unmap_rings(dev, svq); - - event_notifier_cleanup(&svq->hdev_kick); --- -2.39.3 - diff --git a/SOURCES/kvm-vdpa-store-x-svq-parameter-in-VhostVDPAState.patch b/SOURCES/kvm-vdpa-store-x-svq-parameter-in-VhostVDPAState.patch deleted file mode 100644 index 68c0c86..0000000 --- a/SOURCES/kvm-vdpa-store-x-svq-parameter-in-VhostVDPAState.patch +++ /dev/null @@ -1,62 +0,0 @@ -From 28163d7d61b6b0b8312b78d57dabc8f44bf39c46 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 15 Dec 2022 12:31:42 +0100 -Subject: [PATCH 10/14] vdpa: store x-svq parameter in VhostVDPAState -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 136: vDPA ASID support in Qemu -RH-Bugzilla: 2104412 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [10/13] 53f3b2698b4a5caca434f55e4300103a78778548 (eperezmartin/qemu-kvm) - -CVQ can be shadowed two ways: -- Device has x-svq=on parameter (current way) -- The device can isolate CVQ in its own vq group - -QEMU needs to check for the second condition dynamically, because CVQ -index is not known before the driver ack the features. Since this is -dynamic, the CVQ isolation could vary with different conditions, making -it possible to go from "not isolated group" to "isolated". - -Saving the cmdline parameter in an extra field so we never disable CVQ -SVQ in case the device was started with x-svq cmdline. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Message-Id: <20221215113144.322011-11-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 7f211a28fd5482f76583988beecd8ee61588d45e) ---- - net/vhost-vdpa.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index c2f319eb88..1757f1d028 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -38,6 +38,8 @@ typedef struct VhostVDPAState { - void *cvq_cmd_out_buffer; - virtio_net_ctrl_ack *status; - -+ /* The device always have SVQ enabled */ -+ bool always_svq; - bool started; - } VhostVDPAState; - -@@ -576,6 +578,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - - s->vhost_vdpa.device_fd = vdpa_device_fd; - s->vhost_vdpa.index = queue_pair_index; -+ s->always_svq = svq; - s->vhost_vdpa.shadow_vqs_enabled = svq; - s->vhost_vdpa.iova_range = iova_range; - s->vhost_vdpa.iova_tree = iova_tree; --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-use-first-queue-SVQ-state-for-CVQ-default.patch b/SOURCES/kvm-vdpa-use-first-queue-SVQ-state-for-CVQ-default.patch new file mode 100644 index 0000000..bfb1b8e --- /dev/null +++ b/SOURCES/kvm-vdpa-use-first-queue-SVQ-state-for-CVQ-default.patch @@ -0,0 +1,46 @@ +From 726662aee0bc295f6931b7aba1bd68f033e949aa Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 10 Aug 2023 16:08:18 +0200 +Subject: [PATCH 3/7] vdpa: use first queue SVQ state for CVQ default +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 199: CVQ migration support +RH-Jira: RHEL-923 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Commit: [3/7] 5c98f11b5080552a62c8e37ff2c23339455b7b86 (eperezmartin/qemu-kvm) + +Previous to this patch the only way CVQ would be shadowed is if it does +support to isolate CVQ group or if all vqs were shadowed from the +beginning. The second condition was checked at the beginning, and no +more configuration was done. + +After this series we need to check if data queues are shadowed because +they are in the middle of the migration. As checking if they are +shadowed already covers the previous case, let's just mimic it. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +--- + net/vhost-vdpa.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index ce17e4416a..29d3fd3ca6 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -494,7 +494,7 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) + + s0 = vhost_vdpa_net_first_nc_vdpa(s); + v->shadow_data = s0->vhost_vdpa.shadow_vqs_enabled; +- v->shadow_vqs_enabled = s->always_svq; ++ v->shadow_vqs_enabled = s0->vhost_vdpa.shadow_vqs_enabled; + s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID; + + if (s->vhost_vdpa.shadow_data) { +-- +2.39.3 + diff --git a/SOURCES/kvm-vdpa-use-v-shadow_vqs_enabled-in-vhost_vdpa_svqs_sta.patch b/SOURCES/kvm-vdpa-use-v-shadow_vqs_enabled-in-vhost_vdpa_svqs_sta.patch deleted file mode 100644 index 3d11438..0000000 --- a/SOURCES/kvm-vdpa-use-v-shadow_vqs_enabled-in-vhost_vdpa_svqs_sta.patch +++ /dev/null @@ -1,58 +0,0 @@ -From cb974f2f9a0c5b9520b6ac80bd1d1e4a6b12bbdc Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 15 Dec 2022 12:31:33 +0100 -Subject: [PATCH 01/14] vdpa: use v->shadow_vqs_enabled in - vhost_vdpa_svqs_start & stop -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 136: vDPA ASID support in Qemu -RH-Bugzilla: 2104412 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/13] f0db50a95f87dd011418617be7b80aa6813a1146 (eperezmartin/qemu-kvm) - -This function used to trust in v->shadow_vqs != NULL to know if it must -start svq or not. - -This is not going to be valid anymore, as qemu is going to allocate svq -array unconditionally (but it will only start them conditionally). - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Message-Id: <20221215113144.322011-2-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 712c1a3171cf62d501dac5af58f77d5fea70350d) ---- - hw/virtio/vhost-vdpa.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index c5be2645b0..44e6a9b7b3 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -1036,7 +1036,7 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev) - Error *err = NULL; - unsigned i; - -- if (!v->shadow_vqs) { -+ if (!v->shadow_vqs_enabled) { - return true; - } - -@@ -1089,7 +1089,7 @@ static void vhost_vdpa_svqs_stop(struct vhost_dev *dev) - { - struct vhost_vdpa *v = dev->opaque; - -- if (!v->shadow_vqs) { -+ if (!v->shadow_vqs_enabled) { - return; - } - --- -2.31.1 - diff --git a/SOURCES/kvm-vfio-Fix-null-pointer-dereference-bug-in-vfio_bars_f.patch b/SOURCES/kvm-vfio-Fix-null-pointer-dereference-bug-in-vfio_bars_f.patch new file mode 100644 index 0000000..1e00427 --- /dev/null +++ b/SOURCES/kvm-vfio-Fix-null-pointer-dereference-bug-in-vfio_bars_f.patch @@ -0,0 +1,72 @@ +From 97124d4f2afbc8e65a3ecf76096e6b34a9b71541 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 30/37] vfio: Fix null pointer dereference bug in + vfio_bars_finalize() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [28/28] 4bbdf7f9c5595897244c6cc3d88d487dd5f99bf0 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 8af87a3ec7e4 +Author: Avihai Horon +Date: Tue Jul 4 16:39:27 2023 +0300 + + vfio: Fix null pointer dereference bug in vfio_bars_finalize() + + vfio_realize() has the following flow: + 1. vfio_bars_prepare() -- sets VFIOBAR->size. + 2. msix_early_setup(). + 3. vfio_bars_register() -- allocates VFIOBAR->mr. + + After vfio_bars_prepare() is called msix_early_setup() can fail. If it + does fail, vfio_bars_register() is never called and VFIOBAR->mr is not + allocated. + + In this case, vfio_bars_finalize() is called as part of the error flow + to free the bars' resources. However, vfio_bars_finalize() calls + object_unparent() for VFIOBAR->mr after checking only VFIOBAR->size, and + thus we get a null pointer dereference. + + Fix it by checking VFIOBAR->mr in vfio_bars_finalize(). + + Fixes: 89d5202edc50 ("vfio/pci: Allow relocating MSI-X MMIO") + Signed-off-by: Avihai Horon + Reviewed-by: Philippe Mathieu-Daudé + Reviewed-by: Cédric Le Goater + Reviewed-by: Alex Williamson + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/pci.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index ba40ca8784..9189459a38 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -1755,9 +1755,11 @@ static void vfio_bars_finalize(VFIOPCIDevice *vdev) + + vfio_bar_quirk_finalize(vdev, i); + vfio_region_finalize(&bar->region); +- if (bar->size) { ++ if (bar->mr) { ++ assert(bar->size); + object_unparent(OBJECT(bar->mr)); + g_free(bar->mr); ++ bar->mr = NULL; + } + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-Implement-a-common-device-info-helper.patch b/SOURCES/kvm-vfio-Implement-a-common-device-info-helper.patch new file mode 100644 index 0000000..78a554d --- /dev/null +++ b/SOURCES/kvm-vfio-Implement-a-common-device-info-helper.patch @@ -0,0 +1,196 @@ +From f68e8c5d841cd7fc785cc3d15b3c280211bfb4c3 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 17/37] vfio: Implement a common device info helper +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [15/28] 9cfd233ab1b95dc7de776e8ef901823bd37c5a6b (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 634f38f0f73f +Author: Alex Williamson +Date: Thu Jun 1 08:45:06 2023 -0600 + + vfio: Implement a common device info helper + + A common helper implementing the realloc algorithm for handling + capabilities. + + Reviewed-by: Philippe Mathieu-Daudé + Reviewed-by: Cédric Le Goater + Signed-off-by: Alex Williamson + Reviewed-by: Robin Voetter + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-vfio.c | 37 ++++------------------------ + hw/vfio/common.c | 46 ++++++++++++++++++++++++++--------- + include/hw/vfio/vfio-common.h | 1 + + 3 files changed, 41 insertions(+), 43 deletions(-) + +diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c +index f51190d466..59a2e03873 100644 +--- a/hw/s390x/s390-pci-vfio.c ++++ b/hw/s390x/s390-pci-vfio.c +@@ -289,38 +289,11 @@ static void s390_pci_read_pfip(S390PCIBusDevice *pbdev, + memcpy(pbdev->zpci_fn.pfip, cap->pfip, CLP_PFIP_NR_SEGMENTS); + } + +-static struct vfio_device_info *get_device_info(S390PCIBusDevice *pbdev, +- uint32_t argsz) ++static struct vfio_device_info *get_device_info(S390PCIBusDevice *pbdev) + { +- struct vfio_device_info *info = g_malloc0(argsz); +- VFIOPCIDevice *vfio_pci; +- int fd; ++ VFIOPCIDevice *vfio_pci = container_of(pbdev->pdev, VFIOPCIDevice, pdev); + +- vfio_pci = container_of(pbdev->pdev, VFIOPCIDevice, pdev); +- fd = vfio_pci->vbasedev.fd; +- +- /* +- * If the specified argsz is not large enough to contain all capabilities +- * it will be updated upon return from the ioctl. Retry until we have +- * a big enough buffer to hold the entire capability chain. On error, +- * just exit and rely on CLP defaults. +- */ +-retry: +- info->argsz = argsz; +- +- if (ioctl(fd, VFIO_DEVICE_GET_INFO, info)) { +- trace_s390_pci_clp_dev_info(vfio_pci->vbasedev.name); +- g_free(info); +- return NULL; +- } +- +- if (info->argsz > argsz) { +- argsz = info->argsz; +- info = g_realloc(info, argsz); +- goto retry; +- } +- +- return info; ++ return vfio_get_device_info(vfio_pci->vbasedev.fd); + } + + /* +@@ -335,7 +308,7 @@ bool s390_pci_get_host_fh(S390PCIBusDevice *pbdev, uint32_t *fh) + + assert(fh); + +- info = get_device_info(pbdev, sizeof(*info)); ++ info = get_device_info(pbdev); + if (!info) { + return false; + } +@@ -356,7 +329,7 @@ void s390_pci_get_clp_info(S390PCIBusDevice *pbdev) + { + g_autofree struct vfio_device_info *info = NULL; + +- info = get_device_info(pbdev, sizeof(*info)); ++ info = get_device_info(pbdev); + if (!info) { + return; + } +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index b73086e17a..3b4ac53f15 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -2845,11 +2845,35 @@ void vfio_put_group(VFIOGroup *group) + } + } + ++struct vfio_device_info *vfio_get_device_info(int fd) ++{ ++ struct vfio_device_info *info; ++ uint32_t argsz = sizeof(*info); ++ ++ info = g_malloc0(argsz); ++ ++retry: ++ info->argsz = argsz; ++ ++ if (ioctl(fd, VFIO_DEVICE_GET_INFO, info)) { ++ g_free(info); ++ return NULL; ++ } ++ ++ if (info->argsz > argsz) { ++ argsz = info->argsz; ++ info = g_realloc(info, argsz); ++ goto retry; ++ } ++ ++ return info; ++} ++ + int vfio_get_device(VFIOGroup *group, const char *name, + VFIODevice *vbasedev, Error **errp) + { +- struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) }; +- int ret, fd; ++ g_autofree struct vfio_device_info *info = NULL; ++ int fd; + + fd = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name); + if (fd < 0) { +@@ -2861,11 +2885,11 @@ int vfio_get_device(VFIOGroup *group, const char *name, + return fd; + } + +- ret = ioctl(fd, VFIO_DEVICE_GET_INFO, &dev_info); +- if (ret) { ++ info = vfio_get_device_info(fd); ++ if (!info) { + error_setg_errno(errp, errno, "error getting device info"); + close(fd); +- return ret; ++ return -1; + } + + /* +@@ -2893,14 +2917,14 @@ int vfio_get_device(VFIOGroup *group, const char *name, + vbasedev->group = group; + QLIST_INSERT_HEAD(&group->device_list, vbasedev, next); + +- vbasedev->num_irqs = dev_info.num_irqs; +- vbasedev->num_regions = dev_info.num_regions; +- vbasedev->flags = dev_info.flags; ++ vbasedev->num_irqs = info->num_irqs; ++ vbasedev->num_regions = info->num_regions; ++ vbasedev->flags = info->flags; ++ ++ trace_vfio_get_device(name, info->flags, info->num_regions, info->num_irqs); + +- trace_vfio_get_device(name, dev_info.flags, dev_info.num_regions, +- dev_info.num_irqs); ++ vbasedev->reset_works = !!(info->flags & VFIO_DEVICE_FLAGS_RESET); + +- vbasedev->reset_works = !!(dev_info.flags & VFIO_DEVICE_FLAGS_RESET); + return 0; + } + +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 3dc5f2104c..6d1b8487c3 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -216,6 +216,7 @@ void vfio_region_finalize(VFIORegion *region); + void vfio_reset_handler(void *opaque); + VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp); + void vfio_put_group(VFIOGroup *group); ++struct vfio_device_info *vfio_get_device_info(int fd); + int vfio_get_device(VFIOGroup *group, const char *name, + VFIODevice *vbasedev, Error **errp); + +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-migration-Add-VFIO-migration-pre-copy-support.patch b/SOURCES/kvm-vfio-migration-Add-VFIO-migration-pre-copy-support.patch new file mode 100644 index 0000000..b8e72e6 --- /dev/null +++ b/SOURCES/kvm-vfio-migration-Add-VFIO-migration-pre-copy-support.patch @@ -0,0 +1,438 @@ +From 080d28c191b7d951f1f4596dcaa13d590c07d886 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 15/37] vfio/migration: Add VFIO migration pre-copy support +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [13/28] 7b2ea1471440d47e5aed1211c96942ca7bface96 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit eda7362af959 +Author: Avihai Horon +Date: Wed Jun 21 14:12:00 2023 +0300 + + vfio/migration: Add VFIO migration pre-copy support + + Pre-copy support allows the VFIO device data to be transferred while the + VM is running. This helps to accommodate VFIO devices that have a large + amount of data that needs to be transferred, and it can reduce migration + downtime. + + Pre-copy support is optional in VFIO migration protocol v2. + Implement pre-copy of VFIO migration protocol v2 and use it for devices + that support it. Full description of it can be found in the following + Linux commit: 4db52602a607 ("vfio: Extend the device migration protocol + with PRE_COPY"). + + Signed-off-by: Avihai Horon + Reviewed-by: Cédric Le Goater + Tested-by: YangHang Liu + Acked-by: Alex Williamson + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + docs/devel/vfio-migration.rst | 35 +++++--- + hw/vfio/common.c | 6 +- + hw/vfio/migration.c | 165 ++++++++++++++++++++++++++++++++-- + hw/vfio/trace-events | 4 +- + include/hw/vfio/vfio-common.h | 2 + + 5 files changed, 190 insertions(+), 22 deletions(-) + +diff --git a/docs/devel/vfio-migration.rst b/docs/devel/vfio-migration.rst +index 1b68ccf115..e896b2a673 100644 +--- a/docs/devel/vfio-migration.rst ++++ b/docs/devel/vfio-migration.rst +@@ -7,12 +7,14 @@ the guest is running on source host and restoring this saved state on the + destination host. This document details how saving and restoring of VFIO + devices is done in QEMU. + +-Migration of VFIO devices currently consists of a single stop-and-copy phase. +-During the stop-and-copy phase the guest is stopped and the entire VFIO device +-data is transferred to the destination. +- +-The pre-copy phase of migration is currently not supported for VFIO devices. +-Support for VFIO pre-copy will be added later on. ++Migration of VFIO devices consists of two phases: the optional pre-copy phase, ++and the stop-and-copy phase. The pre-copy phase is iterative and allows to ++accommodate VFIO devices that have a large amount of data that needs to be ++transferred. The iterative pre-copy phase of migration allows for the guest to ++continue whilst the VFIO device state is transferred to the destination, this ++helps to reduce the total downtime of the VM. VFIO devices opt-in to pre-copy ++support by reporting the VFIO_MIGRATION_PRE_COPY flag in the ++VFIO_DEVICE_FEATURE_MIGRATION ioctl. + + Note that currently VFIO migration is supported only for a single device. This + is due to VFIO migration's lack of P2P support. However, P2P support is planned +@@ -29,10 +31,20 @@ VFIO implements the device hooks for the iterative approach as follows: + * A ``load_setup`` function that sets the VFIO device on the destination in + _RESUMING state. + ++* A ``state_pending_estimate`` function that reports an estimate of the ++ remaining pre-copy data that the vendor driver has yet to save for the VFIO ++ device. ++ + * A ``state_pending_exact`` function that reads pending_bytes from the vendor + driver, which indicates the amount of data that the vendor driver has yet to + save for the VFIO device. + ++* An ``is_active_iterate`` function that indicates ``save_live_iterate`` is ++ active only when the VFIO device is in pre-copy states. ++ ++* A ``save_live_iterate`` function that reads the VFIO device's data from the ++ vendor driver during iterative pre-copy phase. ++ + * A ``save_state`` function to save the device config space if it is present. + + * A ``save_live_complete_precopy`` function that sets the VFIO device in +@@ -111,8 +123,10 @@ Flow of state changes during Live migration + =========================================== + + Below is the flow of state change during live migration. +-The values in the brackets represent the VM state, the migration state, and ++The values in the parentheses represent the VM state, the migration state, and + the VFIO device state, respectively. ++The text in the square brackets represents the flow if the VFIO device supports ++pre-copy. + + Live migration save path + ------------------------ +@@ -124,11 +138,12 @@ Live migration save path + | + migrate_init spawns migration_thread + Migration thread then calls each device's .save_setup() +- (RUNNING, _SETUP, _RUNNING) ++ (RUNNING, _SETUP, _RUNNING [_PRE_COPY]) + | +- (RUNNING, _ACTIVE, _RUNNING) +- If device is active, get pending_bytes by .state_pending_exact() ++ (RUNNING, _ACTIVE, _RUNNING [_PRE_COPY]) ++ If device is active, get pending_bytes by .state_pending_{estimate,exact}() + If total pending_bytes >= threshold_size, call .save_live_iterate() ++ [Data of VFIO device for pre-copy phase is copied] + Iterate till total pending bytes converge and are less than threshold + | + On migration completion, vCPU stops and calls .save_live_complete_precopy for +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 78358ede27..b73086e17a 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -492,7 +492,8 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) + } + + if (vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF && +- migration->device_state == VFIO_DEVICE_STATE_RUNNING) { ++ (migration->device_state == VFIO_DEVICE_STATE_RUNNING || ++ migration->device_state == VFIO_DEVICE_STATE_PRE_COPY)) { + return false; + } + } +@@ -537,7 +538,8 @@ static bool vfio_devices_all_running_and_mig_active(VFIOContainer *container) + return false; + } + +- if (migration->device_state == VFIO_DEVICE_STATE_RUNNING) { ++ if (migration->device_state == VFIO_DEVICE_STATE_RUNNING || ++ migration->device_state == VFIO_DEVICE_STATE_PRE_COPY) { + continue; + } else { + return false; +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index 8d33414379..d8f6a22ae1 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -68,6 +68,8 @@ static const char *mig_state_to_str(enum vfio_device_mig_state state) + return "STOP_COPY"; + case VFIO_DEVICE_STATE_RESUMING: + return "RESUMING"; ++ case VFIO_DEVICE_STATE_PRE_COPY: ++ return "PRE_COPY"; + default: + return "UNKNOWN STATE"; + } +@@ -241,6 +243,25 @@ static int vfio_query_stop_copy_size(VFIODevice *vbasedev, + return 0; + } + ++static int vfio_query_precopy_size(VFIOMigration *migration) ++{ ++ struct vfio_precopy_info precopy = { ++ .argsz = sizeof(precopy), ++ }; ++ ++ migration->precopy_init_size = 0; ++ migration->precopy_dirty_size = 0; ++ ++ if (ioctl(migration->data_fd, VFIO_MIG_GET_PRECOPY_INFO, &precopy)) { ++ return -errno; ++ } ++ ++ migration->precopy_init_size = precopy.initial_bytes; ++ migration->precopy_dirty_size = precopy.dirty_bytes; ++ ++ return 0; ++} ++ + /* Returns the size of saved data on success and -errno on error */ + static ssize_t vfio_save_block(QEMUFile *f, VFIOMigration *migration) + { +@@ -249,6 +270,14 @@ static ssize_t vfio_save_block(QEMUFile *f, VFIOMigration *migration) + data_size = read(migration->data_fd, migration->data_buffer, + migration->data_buffer_size); + if (data_size < 0) { ++ /* ++ * Pre-copy emptied all the device state for now. For more information, ++ * please refer to the Linux kernel VFIO uAPI. ++ */ ++ if (errno == ENOMSG) { ++ return 0; ++ } ++ + return -errno; + } + if (data_size == 0) { +@@ -265,6 +294,38 @@ static ssize_t vfio_save_block(QEMUFile *f, VFIOMigration *migration) + return qemu_file_get_error(f) ?: data_size; + } + ++static void vfio_update_estimated_pending_data(VFIOMigration *migration, ++ uint64_t data_size) ++{ ++ if (!data_size) { ++ /* ++ * Pre-copy emptied all the device state for now, update estimated sizes ++ * accordingly. ++ */ ++ migration->precopy_init_size = 0; ++ migration->precopy_dirty_size = 0; ++ ++ return; ++ } ++ ++ if (migration->precopy_init_size) { ++ uint64_t init_size = MIN(migration->precopy_init_size, data_size); ++ ++ migration->precopy_init_size -= init_size; ++ data_size -= init_size; ++ } ++ ++ migration->precopy_dirty_size -= MIN(migration->precopy_dirty_size, ++ data_size); ++} ++ ++static bool vfio_precopy_supported(VFIODevice *vbasedev) ++{ ++ VFIOMigration *migration = vbasedev->migration; ++ ++ return migration->mig_flags & VFIO_MIGRATION_PRE_COPY; ++} ++ + /* ---------------------------------------------------------------------- */ + + static int vfio_save_setup(QEMUFile *f, void *opaque) +@@ -285,6 +346,28 @@ static int vfio_save_setup(QEMUFile *f, void *opaque) + return -ENOMEM; + } + ++ if (vfio_precopy_supported(vbasedev)) { ++ int ret; ++ ++ switch (migration->device_state) { ++ case VFIO_DEVICE_STATE_RUNNING: ++ ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_PRE_COPY, ++ VFIO_DEVICE_STATE_RUNNING); ++ if (ret) { ++ return ret; ++ } ++ ++ vfio_query_precopy_size(migration); ++ ++ break; ++ case VFIO_DEVICE_STATE_STOP: ++ /* vfio_save_complete_precopy() will go to STOP_COPY */ ++ break; ++ default: ++ return -EINVAL; ++ } ++ } ++ + trace_vfio_save_setup(vbasedev->name, migration->data_buffer_size); + + qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); +@@ -299,26 +382,42 @@ static void vfio_save_cleanup(void *opaque) + + g_free(migration->data_buffer); + migration->data_buffer = NULL; ++ migration->precopy_init_size = 0; ++ migration->precopy_dirty_size = 0; + vfio_migration_cleanup(vbasedev); + trace_vfio_save_cleanup(vbasedev->name); + } + ++static void vfio_state_pending_estimate(void *opaque, uint64_t *must_precopy, ++ uint64_t *can_postcopy) ++{ ++ VFIODevice *vbasedev = opaque; ++ VFIOMigration *migration = vbasedev->migration; ++ ++ if (migration->device_state != VFIO_DEVICE_STATE_PRE_COPY) { ++ return; ++ } ++ ++ *must_precopy += ++ migration->precopy_init_size + migration->precopy_dirty_size; ++ ++ trace_vfio_state_pending_estimate(vbasedev->name, *must_precopy, ++ *can_postcopy, ++ migration->precopy_init_size, ++ migration->precopy_dirty_size); ++} ++ + /* + * Migration size of VFIO devices can be as little as a few KBs or as big as + * many GBs. This value should be big enough to cover the worst case. + */ + #define VFIO_MIG_STOP_COPY_SIZE (100 * GiB) + +-/* +- * Only exact function is implemented and not estimate function. The reason is +- * that during pre-copy phase of migration the estimate function is called +- * repeatedly while pending RAM size is over the threshold, thus migration +- * can't converge and querying the VFIO device pending data size is useless. +- */ + static void vfio_state_pending_exact(void *opaque, uint64_t *must_precopy, + uint64_t *can_postcopy) + { + VFIODevice *vbasedev = opaque; ++ VFIOMigration *migration = vbasedev->migration; + uint64_t stop_copy_size = VFIO_MIG_STOP_COPY_SIZE; + + /* +@@ -328,8 +427,48 @@ static void vfio_state_pending_exact(void *opaque, uint64_t *must_precopy, + vfio_query_stop_copy_size(vbasedev, &stop_copy_size); + *must_precopy += stop_copy_size; + ++ if (migration->device_state == VFIO_DEVICE_STATE_PRE_COPY) { ++ vfio_query_precopy_size(migration); ++ ++ *must_precopy += ++ migration->precopy_init_size + migration->precopy_dirty_size; ++ } ++ + trace_vfio_state_pending_exact(vbasedev->name, *must_precopy, *can_postcopy, +- stop_copy_size); ++ stop_copy_size, migration->precopy_init_size, ++ migration->precopy_dirty_size); ++} ++ ++static bool vfio_is_active_iterate(void *opaque) ++{ ++ VFIODevice *vbasedev = opaque; ++ VFIOMigration *migration = vbasedev->migration; ++ ++ return migration->device_state == VFIO_DEVICE_STATE_PRE_COPY; ++} ++ ++static int vfio_save_iterate(QEMUFile *f, void *opaque) ++{ ++ VFIODevice *vbasedev = opaque; ++ VFIOMigration *migration = vbasedev->migration; ++ ssize_t data_size; ++ ++ data_size = vfio_save_block(f, migration); ++ if (data_size < 0) { ++ return data_size; ++ } ++ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); ++ ++ vfio_update_estimated_pending_data(migration, data_size); ++ ++ trace_vfio_save_iterate(vbasedev->name, migration->precopy_init_size, ++ migration->precopy_dirty_size); ++ ++ /* ++ * A VFIO device's pre-copy dirty_bytes is not guaranteed to reach zero. ++ * Return 1 so following handlers will not be potentially blocked. ++ */ ++ return 1; + } + + static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) +@@ -338,7 +477,7 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) + ssize_t data_size; + int ret; + +- /* We reach here with device state STOP only */ ++ /* We reach here with device state STOP or STOP_COPY only */ + ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY, + VFIO_DEVICE_STATE_STOP); + if (ret) { +@@ -457,7 +596,10 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) + static const SaveVMHandlers savevm_vfio_handlers = { + .save_setup = vfio_save_setup, + .save_cleanup = vfio_save_cleanup, ++ .state_pending_estimate = vfio_state_pending_estimate, + .state_pending_exact = vfio_state_pending_exact, ++ .is_active_iterate = vfio_is_active_iterate, ++ .save_live_iterate = vfio_save_iterate, + .save_live_complete_precopy = vfio_save_complete_precopy, + .save_state = vfio_save_state, + .load_setup = vfio_load_setup, +@@ -470,13 +612,18 @@ static const SaveVMHandlers savevm_vfio_handlers = { + static void vfio_vmstate_change(void *opaque, bool running, RunState state) + { + VFIODevice *vbasedev = opaque; ++ VFIOMigration *migration = vbasedev->migration; + enum vfio_device_mig_state new_state; + int ret; + + if (running) { + new_state = VFIO_DEVICE_STATE_RUNNING; + } else { +- new_state = VFIO_DEVICE_STATE_STOP; ++ new_state = ++ (migration->device_state == VFIO_DEVICE_STATE_PRE_COPY && ++ (state == RUN_STATE_FINISH_MIGRATE || state == RUN_STATE_PAUSED)) ? ++ VFIO_DEVICE_STATE_STOP_COPY : ++ VFIO_DEVICE_STATE_STOP; + } + + /* +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 646e42fd27..4150b59e58 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -162,6 +162,8 @@ vfio_save_block(const char *name, int data_size) " (%s) data_size %d" + vfio_save_cleanup(const char *name) " (%s)" + vfio_save_complete_precopy(const char *name, int ret) " (%s) ret %d" + vfio_save_device_config_state(const char *name) " (%s)" ++vfio_save_iterate(const char *name, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64 + vfio_save_setup(const char *name, uint64_t data_buffer_size) " (%s) data buffer size 0x%"PRIx64 +-vfio_state_pending_exact(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t stopcopy_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" stopcopy size 0x%"PRIx64 ++vfio_state_pending_estimate(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64 ++vfio_state_pending_exact(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t stopcopy_size, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" stopcopy size 0x%"PRIx64" precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64 + vfio_vmstate_change(const char *name, int running, const char *reason, const char *dev_state) " (%s) running %d reason %s device state %s" +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 5f29dab839..1db901c194 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -67,6 +67,8 @@ typedef struct VFIOMigration { + void *data_buffer; + size_t data_buffer_size; + uint64_t mig_flags; ++ uint64_t precopy_init_size; ++ uint64_t precopy_dirty_size; + } VFIOMigration; + + typedef struct VFIOAddressSpace { +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-migration-Add-support-for-switchover-ack-capabi.patch b/SOURCES/kvm-vfio-migration-Add-support-for-switchover-ack-capabi.patch new file mode 100644 index 0000000..d87680d --- /dev/null +++ b/SOURCES/kvm-vfio-migration-Add-support-for-switchover-ack-capabi.patch @@ -0,0 +1,192 @@ +From 169dc1bb051b3aebc571936d956b49ba0621ae43 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 16/37] vfio/migration: Add support for switchover ack + capability +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [14/28] b3bd2eb2d0ca49ff05a0a82ae5bb956a354aed47 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 745c42912a04 +Author: Avihai Horon +Date: Wed Jun 21 14:12:01 2023 +0300 + + vfio/migration: Add support for switchover ack capability + + Loading of a VFIO device's data can take a substantial amount of time as + the device may need to allocate resources, prepare internal data + structures, etc. This can increase migration downtime, especially for + VFIO devices with a lot of resources. + + To solve this, VFIO migration uAPI defines "initial bytes" as part of + its precopy data stream. Initial bytes can be used in various ways to + improve VFIO migration performance. For example, it can be used to + transfer device metadata to pre-allocate resources in the destination. + However, for this to work we need to make sure that all initial bytes + are sent and loaded in the destination before the source VM is stopped. + + Use migration switchover ack capability to make sure a VFIO device's + initial bytes are sent and loaded in the destination before the source + stops the VM and attempts to complete the migration. + This can significantly reduce migration downtime for some devices. + + Signed-off-by: Avihai Horon + Reviewed-by: Cédric Le Goater + Tested-by: YangHang Liu + Acked-by: Alex Williamson + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + docs/devel/vfio-migration.rst | 10 +++++++++ + hw/vfio/migration.c | 39 ++++++++++++++++++++++++++++++++++- + include/hw/vfio/vfio-common.h | 1 + + 3 files changed, 49 insertions(+), 1 deletion(-) + +diff --git a/docs/devel/vfio-migration.rst b/docs/devel/vfio-migration.rst +index e896b2a673..b433cb5bb2 100644 +--- a/docs/devel/vfio-migration.rst ++++ b/docs/devel/vfio-migration.rst +@@ -16,6 +16,13 @@ helps to reduce the total downtime of the VM. VFIO devices opt-in to pre-copy + support by reporting the VFIO_MIGRATION_PRE_COPY flag in the + VFIO_DEVICE_FEATURE_MIGRATION ioctl. + ++When pre-copy is supported, it's possible to further reduce downtime by ++enabling "switchover-ack" migration capability. ++VFIO migration uAPI defines "initial bytes" as part of its pre-copy data stream ++and recommends that the initial bytes are sent and loaded in the destination ++before stopping the source VM. Enabling this migration capability will ++guarantee that and thus, can potentially reduce downtime even further. ++ + Note that currently VFIO migration is supported only for a single device. This + is due to VFIO migration's lack of P2P support. However, P2P support is planned + to be added later on. +@@ -45,6 +52,9 @@ VFIO implements the device hooks for the iterative approach as follows: + * A ``save_live_iterate`` function that reads the VFIO device's data from the + vendor driver during iterative pre-copy phase. + ++* A ``switchover_ack_needed`` function that checks if the VFIO device uses ++ "switchover-ack" migration capability when this capability is enabled. ++ + * A ``save_state`` function to save the device config space if it is present. + + * A ``save_live_complete_precopy`` function that sets the VFIO device in +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index d8f6a22ae1..acbf0bb7ab 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -18,6 +18,8 @@ + #include "sysemu/runstate.h" + #include "hw/vfio/vfio-common.h" + #include "migration/migration.h" ++#include "migration/options.h" ++#include "migration/savevm.h" + #include "migration/vmstate.h" + #include "migration/qemu-file.h" + #include "migration/register.h" +@@ -45,6 +47,7 @@ + #define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL) + #define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) + #define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) ++#define VFIO_MIG_FLAG_DEV_INIT_DATA_SENT (0xffffffffef100005ULL) + + /* + * This is an arbitrary size based on migration of mlx5 devices, where typically +@@ -384,6 +387,7 @@ static void vfio_save_cleanup(void *opaque) + migration->data_buffer = NULL; + migration->precopy_init_size = 0; + migration->precopy_dirty_size = 0; ++ migration->initial_data_sent = false; + vfio_migration_cleanup(vbasedev); + trace_vfio_save_cleanup(vbasedev->name); + } +@@ -457,10 +461,17 @@ static int vfio_save_iterate(QEMUFile *f, void *opaque) + if (data_size < 0) { + return data_size; + } +- qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); + + vfio_update_estimated_pending_data(migration, data_size); + ++ if (migrate_switchover_ack() && !migration->precopy_init_size && ++ !migration->initial_data_sent) { ++ qemu_put_be64(f, VFIO_MIG_FLAG_DEV_INIT_DATA_SENT); ++ migration->initial_data_sent = true; ++ } else { ++ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); ++ } ++ + trace_vfio_save_iterate(vbasedev->name, migration->precopy_init_size, + migration->precopy_dirty_size); + +@@ -579,6 +590,24 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) + } + break; + } ++ case VFIO_MIG_FLAG_DEV_INIT_DATA_SENT: ++ { ++ if (!vfio_precopy_supported(vbasedev) || ++ !migrate_switchover_ack()) { ++ error_report("%s: Received INIT_DATA_SENT but switchover ack " ++ "is not used", vbasedev->name); ++ return -EINVAL; ++ } ++ ++ ret = qemu_loadvm_approve_switchover(); ++ if (ret) { ++ error_report( ++ "%s: qemu_loadvm_approve_switchover failed, err=%d (%s)", ++ vbasedev->name, ret, strerror(-ret)); ++ } ++ ++ return ret; ++ } + default: + error_report("%s: Unknown tag 0x%"PRIx64, vbasedev->name, data); + return -EINVAL; +@@ -593,6 +622,13 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) + return ret; + } + ++static bool vfio_switchover_ack_needed(void *opaque) ++{ ++ VFIODevice *vbasedev = opaque; ++ ++ return vfio_precopy_supported(vbasedev); ++} ++ + static const SaveVMHandlers savevm_vfio_handlers = { + .save_setup = vfio_save_setup, + .save_cleanup = vfio_save_cleanup, +@@ -605,6 +641,7 @@ static const SaveVMHandlers savevm_vfio_handlers = { + .load_setup = vfio_load_setup, + .load_cleanup = vfio_load_cleanup, + .load_state = vfio_load_state, ++ .switchover_ack_needed = vfio_switchover_ack_needed, + }; + + /* ---------------------------------------------------------------------- */ +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 1db901c194..3dc5f2104c 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -69,6 +69,7 @@ typedef struct VFIOMigration { + uint64_t mig_flags; + uint64_t precopy_init_size; + uint64_t precopy_dirty_size; ++ bool initial_data_sent; + } VFIOMigration; + + typedef struct VFIOAddressSpace { +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-migration-Block-VFIO-migration-with-postcopy-mi.patch b/SOURCES/kvm-vfio-migration-Block-VFIO-migration-with-postcopy-mi.patch new file mode 100644 index 0000000..da43b4b --- /dev/null +++ b/SOURCES/kvm-vfio-migration-Block-VFIO-migration-with-postcopy-mi.patch @@ -0,0 +1,90 @@ +From df7814de08c8c7c45eacb7b9d9ead9be4d1e3baf Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Mon, 11 Sep 2023 16:10:19 +0200 +Subject: [PATCH 4/4] vfio/migration: Block VFIO migration with postcopy + migration +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 318: VFIO migration: fix a QEMU crash when postcopy is enabled +RH-Bugzilla: 2229868 +RH-Acked-by: Alex Williamson +RH-Acked-by: Peter Xu +RH-Commit: [4/4] 36eedf879547044c2ba2763fb48784a95f9e4ea7 + +Bugzilla: https://bugzilla.redhat.com/2229868 + +commit bf7ef7a2da3e61dc104f26c679c9465e3fbe7dde +Author: Avihai Horon +Date: Wed Sep 6 18:08:52 2023 +0300 + + vfio/migration: Block VFIO migration with postcopy migration + + VFIO migration is not compatible with postcopy migration. A VFIO device + in the destination can't handle page faults for pages that have not been + sent yet. + + Doing such migration will cause the VM to crash in the destination: + + qemu-system-x86_64: VFIO_MAP_DMA failed: Bad address + qemu-system-x86_64: vfio_dma_map(0x55a28c7659d0, 0xc0000, 0xb000, 0x7f1b11a00000) = -14 (Bad address) + qemu: hardware error: vfio: DMA mapping failed, unable to continue + + To prevent this, block VFIO migration with postcopy migration. + + Reported-by: Yanghang Liu + Signed-off-by: Avihai Horon + Tested-by: Yanghang Liu + Reviewed-by: Peter Xu + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/migration.c | 22 ++++++++++++++++++++++ + 1 file changed, 22 insertions(+) + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index 2674f4bc47..4f018c7531 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -331,6 +331,27 @@ static bool vfio_precopy_supported(VFIODevice *vbasedev) + + /* ---------------------------------------------------------------------- */ + ++static int vfio_save_prepare(void *opaque, Error **errp) ++{ ++ VFIODevice *vbasedev = opaque; ++ ++ /* ++ * Snapshot doesn't use postcopy, so allow snapshot even if postcopy is on. ++ */ ++ if (runstate_check(RUN_STATE_SAVE_VM)) { ++ return 0; ++ } ++ ++ if (migrate_postcopy_ram()) { ++ error_setg( ++ errp, "%s: VFIO migration is not supported with postcopy migration", ++ vbasedev->name); ++ return -EOPNOTSUPP; ++ } ++ ++ return 0; ++} ++ + static int vfio_save_setup(QEMUFile *f, void *opaque) + { + VFIODevice *vbasedev = opaque; +@@ -630,6 +651,7 @@ static bool vfio_switchover_ack_needed(void *opaque) + } + + static const SaveVMHandlers savevm_vfio_handlers = { ++ .save_prepare = vfio_save_prepare, + .save_setup = vfio_save_setup, + .save_cleanup = vfio_save_cleanup, + .state_pending_estimate = vfio_state_pending_estimate, +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-migration-Change-vIOMMU-blocker-from-global-to-.patch b/SOURCES/kvm-vfio-migration-Change-vIOMMU-blocker-from-global-to-.patch new file mode 100644 index 0000000..dde2e24 --- /dev/null +++ b/SOURCES/kvm-vfio-migration-Change-vIOMMU-blocker-from-global-to-.patch @@ -0,0 +1,171 @@ +From 35c7d0d3b02d61d6f29afae74bd83edd70a6a1b4 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 26/37] vfio/migration: Change vIOMMU blocker from global to + per device +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [24/28] 8fda1c82a81fadd4f38e6a5e878c9228a81c0f6e (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 3c26c80a0a26 +Author: Zhenzhong Duan +Date: Mon Jul 3 15:15:07 2023 +0800 + + vfio/migration: Change vIOMMU blocker from global to per device + + Contrary to multiple device blocker which needs to consider already-attached + devices to unblock/block dynamically, the vIOMMU migration blocker is a device + specific config. Meaning it only needs to know whether the device is bypassing + or not the vIOMMU (via machine property, or per pxb-pcie::bypass_iommu), and + does not need the state of currently present devices. For this reason, the + vIOMMU global migration blocker can be consolidated into the per-device + migration blocker, allowing us to remove some unnecessary code. + + This change also makes vfio_mig_active() more accurate as it doesn't check for + global blocker. + + Signed-off-by: Zhenzhong Duan + Reviewed-by: Joao Martins + Reviewed-by: Cédric Le Goater + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/common.c | 51 ++--------------------------------- + hw/vfio/migration.c | 7 ++--- + hw/vfio/pci.c | 1 - + include/hw/vfio/vfio-common.h | 3 +-- + 4 files changed, 7 insertions(+), 55 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 136d8243d6..e815f6ba30 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -362,7 +362,6 @@ bool vfio_mig_active(void) + } + + static Error *multiple_devices_migration_blocker; +-static Error *giommu_migration_blocker; + + static unsigned int vfio_migratable_device_num(void) + { +@@ -420,55 +419,9 @@ void vfio_unblock_multiple_devices_migration(void) + multiple_devices_migration_blocker = NULL; + } + +-static bool vfio_viommu_preset(void) ++bool vfio_viommu_preset(VFIODevice *vbasedev) + { +- VFIOAddressSpace *space; +- +- QLIST_FOREACH(space, &vfio_address_spaces, list) { +- if (space->as != &address_space_memory) { +- return true; +- } +- } +- +- return false; +-} +- +-int vfio_block_giommu_migration(VFIODevice *vbasedev, Error **errp) +-{ +- int ret; +- +- if (giommu_migration_blocker || +- !vfio_viommu_preset()) { +- return 0; +- } +- +- if (vbasedev->enable_migration == ON_OFF_AUTO_ON) { +- error_setg(errp, +- "Migration is currently not supported with vIOMMU enabled"); +- return -EINVAL; +- } +- +- error_setg(&giommu_migration_blocker, +- "Migration is currently not supported with vIOMMU enabled"); +- ret = migrate_add_blocker(giommu_migration_blocker, errp); +- if (ret < 0) { +- error_free(giommu_migration_blocker); +- giommu_migration_blocker = NULL; +- } +- +- return ret; +-} +- +-void vfio_migration_finalize(void) +-{ +- if (!giommu_migration_blocker || +- vfio_viommu_preset()) { +- return; +- } +- +- migrate_del_blocker(giommu_migration_blocker); +- error_free(giommu_migration_blocker); +- giommu_migration_blocker = NULL; ++ return vbasedev->group->container->space->as != &address_space_memory; + } + + static void vfio_set_migration_error(int err) +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index 1db7d52ab2..e6e5e85f75 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -878,9 +878,10 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) + return ret; + } + +- ret = vfio_block_giommu_migration(vbasedev, errp); +- if (ret) { +- return ret; ++ if (vfio_viommu_preset(vbasedev)) { ++ error_setg(&err, "%s: Migration is currently not supported " ++ "with vIOMMU enabled", vbasedev->name); ++ return vfio_block_migration(vbasedev, err, errp); + } + + trace_vfio_migration_realize(vbasedev->name); +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 2d059832a4..922c81872c 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3279,7 +3279,6 @@ static void vfio_instance_finalize(Object *obj) + */ + vfio_put_device(vdev); + vfio_put_group(group); +- vfio_migration_finalize(); + } + + static void vfio_exitfn(PCIDevice *pdev) +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 93429b9abb..45167c8a8a 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -227,7 +227,7 @@ extern VFIOGroupList vfio_group_list; + bool vfio_mig_active(void); + int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp); + void vfio_unblock_multiple_devices_migration(void); +-int vfio_block_giommu_migration(VFIODevice *vbasedev, Error **errp); ++bool vfio_viommu_preset(VFIODevice *vbasedev); + int64_t vfio_mig_bytes_transferred(void); + void vfio_reset_bytes_transferred(void); + +@@ -254,6 +254,5 @@ int vfio_spapr_remove_window(VFIOContainer *container, + + int vfio_migration_realize(VFIODevice *vbasedev, Error **errp); + void vfio_migration_exit(VFIODevice *vbasedev); +-void vfio_migration_finalize(void); + + #endif /* HW_VFIO_VFIO_COMMON_H */ +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-migration-Free-resources-when-vfio_migration_re.patch b/SOURCES/kvm-vfio-migration-Free-resources-when-vfio_migration_re.patch new file mode 100644 index 0000000..9deaf1a --- /dev/null +++ b/SOURCES/kvm-vfio-migration-Free-resources-when-vfio_migration_re.patch @@ -0,0 +1,145 @@ +From a36fa46369fe9bf2a2174e9ed6ab83042e904066 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 27/37] vfio/migration: Free resources when + vfio_migration_realize fails +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [25/28] b3ab8d3443d4bc12a689dc7d88a94da315814bb7 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 2b43b2995b02 +Author: Zhenzhong Duan +Date: Mon Jul 3 15:15:08 2023 +0800 + + vfio/migration: Free resources when vfio_migration_realize fails + + When vfio_realize() succeeds, hot unplug will call vfio_exitfn() + to free resources allocated in vfio_realize(); when vfio_realize() + fails, vfio_exitfn() is never called and we need to free resources + in vfio_realize(). + + In the case that vfio_migration_realize() fails, + e.g: with -only-migratable & enable-migration=off, we see below: + + (qemu) device_add vfio-pci,host=81:11.1,id=vfio1,bus=root1,enable-migration=off + 0000:81:11.1: Migration disabled + Error: disallowing migration blocker (--only-migratable) for: 0000:81:11.1: Migration is disabled for VFIO device + + If we hotplug again we should see same log as above, but we see: + (qemu) device_add vfio-pci,host=81:11.1,id=vfio1,bus=root1,enable-migration=off + Error: vfio 0000:81:11.1: device is already attached + + That's because some references to VFIO device isn't released. + For resources allocated in vfio_migration_realize(), free them by + jumping to out_deinit path with calling a new function + vfio_migration_deinit(). For resources allocated in vfio_realize(), + free them by jumping to de-register path in vfio_realize(). + + Signed-off-by: Zhenzhong Duan + Fixes: a22651053b59 ("vfio: Make vfio-pci device migration capable") + Reviewed-by: Cédric Le Goater + Reviewed-by: Joao Martins + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/migration.c | 33 +++++++++++++++++++++++---------- + hw/vfio/pci.c | 1 + + 2 files changed, 24 insertions(+), 10 deletions(-) + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index e6e5e85f75..e3954570c8 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -802,6 +802,17 @@ static int vfio_migration_init(VFIODevice *vbasedev) + return 0; + } + ++static void vfio_migration_deinit(VFIODevice *vbasedev) ++{ ++ VFIOMigration *migration = vbasedev->migration; ++ ++ remove_migration_state_change_notifier(&migration->migration_state); ++ qemu_del_vm_change_state_handler(migration->vm_state); ++ unregister_savevm(VMSTATE_IF(vbasedev->dev), "vfio", vbasedev); ++ vfio_migration_free(vbasedev); ++ vfio_unblock_multiple_devices_migration(); ++} ++ + static int vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp) + { + int ret; +@@ -866,7 +877,7 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) + error_setg(&err, + "%s: VFIO device doesn't support device dirty tracking", + vbasedev->name); +- return vfio_block_migration(vbasedev, err, errp); ++ goto add_blocker; + } + + warn_report("%s: VFIO device doesn't support device dirty tracking", +@@ -875,29 +886,31 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) + + ret = vfio_block_multiple_devices_migration(vbasedev, errp); + if (ret) { +- return ret; ++ goto out_deinit; + } + + if (vfio_viommu_preset(vbasedev)) { + error_setg(&err, "%s: Migration is currently not supported " + "with vIOMMU enabled", vbasedev->name); +- return vfio_block_migration(vbasedev, err, errp); ++ goto add_blocker; + } + + trace_vfio_migration_realize(vbasedev->name); + return 0; ++ ++add_blocker: ++ ret = vfio_block_migration(vbasedev, err, errp); ++out_deinit: ++ if (ret) { ++ vfio_migration_deinit(vbasedev); ++ } ++ return ret; + } + + void vfio_migration_exit(VFIODevice *vbasedev) + { + if (vbasedev->migration) { +- VFIOMigration *migration = vbasedev->migration; +- +- remove_migration_state_change_notifier(&migration->migration_state); +- qemu_del_vm_change_state_handler(migration->vm_state); +- unregister_savevm(VMSTATE_IF(vbasedev->dev), "vfio", vbasedev); +- vfio_migration_free(vbasedev); +- vfio_unblock_multiple_devices_migration(); ++ vfio_migration_deinit(vbasedev); + } + + if (vbasedev->migration_blocker) { +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 922c81872c..037b7d4176 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3234,6 +3234,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + ret = vfio_migration_realize(vbasedev, errp); + if (ret) { + error_report("%s: Migration disabled", vbasedev->name); ++ goto out_deregister; + } + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-migration-Make-VFIO-migration-non-experimental.patch b/SOURCES/kvm-vfio-migration-Make-VFIO-migration-non-experimental.patch new file mode 100644 index 0000000..3258541 --- /dev/null +++ b/SOURCES/kvm-vfio-migration-Make-VFIO-migration-non-experimental.patch @@ -0,0 +1,283 @@ +From 747c34c0a3b8048ebdab387d22f2b922c81d572a Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 21/37] vfio/migration: Make VFIO migration non-experimental +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [19/28] 2f457c1c0de95a3fced0270f2edbbc5193cc4de9 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 8bbcb64a71d8 +Author: Avihai Horon +Date: Wed Jun 28 10:31:12 2023 +0300 + + vfio/migration: Make VFIO migration non-experimental + + The major parts of VFIO migration are supported today in QEMU. This + includes basic VFIO migration, device dirty page tracking and precopy + support. + + Thus, at this point in time, it seems appropriate to make VFIO migration + non-experimental: remove the x prefix from enable_migration property, + change it to ON_OFF_AUTO and let the default value be AUTO. + + In addition, make the following adjustments: + 1. When enable_migration is ON and migration is not supported, fail VFIO + device realization. + 2. When enable_migration is AUTO (i.e., not explicitly enabled), require + device dirty tracking support. This is because device dirty tracking + is currently the only method to do dirty page tracking, which is + essential for migrating in a reasonable downtime. Setting + enable_migration to ON will not require device dirty tracking. + 3. Make migration error and blocker messages more elaborate. + 4. Remove error prints in vfio_migration_query_flags(). + 5. Rename trace_vfio_migration_probe() to + trace_vfio_migration_realize(). + + Signed-off-by: Avihai Horon + Reviewed-by: Joao Martins + Reviewed-by: Cédric Le Goater + Reviewed-by: Alex Williamson + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/common.c | 16 ++++++- + hw/vfio/migration.c | 79 +++++++++++++++++++++++------------ + hw/vfio/pci.c | 4 +- + hw/vfio/trace-events | 2 +- + include/hw/vfio/vfio-common.h | 6 +-- + 5 files changed, 73 insertions(+), 34 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 3b4ac53f15..136d8243d6 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -381,7 +381,7 @@ static unsigned int vfio_migratable_device_num(void) + return device_num; + } + +-int vfio_block_multiple_devices_migration(Error **errp) ++int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp) + { + int ret; + +@@ -390,6 +390,12 @@ int vfio_block_multiple_devices_migration(Error **errp) + return 0; + } + ++ if (vbasedev->enable_migration == ON_OFF_AUTO_ON) { ++ error_setg(errp, "Migration is currently not supported with multiple " ++ "VFIO devices"); ++ return -EINVAL; ++ } ++ + error_setg(&multiple_devices_migration_blocker, + "Migration is currently not supported with multiple " + "VFIO devices"); +@@ -427,7 +433,7 @@ static bool vfio_viommu_preset(void) + return false; + } + +-int vfio_block_giommu_migration(Error **errp) ++int vfio_block_giommu_migration(VFIODevice *vbasedev, Error **errp) + { + int ret; + +@@ -436,6 +442,12 @@ int vfio_block_giommu_migration(Error **errp) + return 0; + } + ++ if (vbasedev->enable_migration == ON_OFF_AUTO_ON) { ++ error_setg(errp, ++ "Migration is currently not supported with vIOMMU enabled"); ++ return -EINVAL; ++ } ++ + error_setg(&giommu_migration_blocker, + "Migration is currently not supported with vIOMMU enabled"); + ret = migrate_add_blocker(giommu_migration_blocker, errp); +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index 7cf143926c..1db7d52ab2 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -724,14 +724,6 @@ static int vfio_migration_query_flags(VFIODevice *vbasedev, uint64_t *mig_flags) + feature->argsz = sizeof(buf); + feature->flags = VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_MIGRATION; + if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) { +- if (errno == ENOTTY) { +- error_report("%s: VFIO migration is not supported in kernel", +- vbasedev->name); +- } else { +- error_report("%s: Failed to query VFIO migration support, err: %s", +- vbasedev->name, strerror(errno)); +- } +- + return -errno; + } + +@@ -810,6 +802,27 @@ static int vfio_migration_init(VFIODevice *vbasedev) + return 0; + } + ++static int vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp) ++{ ++ int ret; ++ ++ if (vbasedev->enable_migration == ON_OFF_AUTO_ON) { ++ error_propagate(errp, err); ++ return -EINVAL; ++ } ++ ++ vbasedev->migration_blocker = error_copy(err); ++ error_free(err); ++ ++ ret = migrate_add_blocker(vbasedev->migration_blocker, errp); ++ if (ret < 0) { ++ error_free(vbasedev->migration_blocker); ++ vbasedev->migration_blocker = NULL; ++ } ++ ++ return ret; ++} ++ + /* ---------------------------------------------------------------------- */ + + int64_t vfio_mig_bytes_transferred(void) +@@ -824,40 +837,54 @@ void vfio_reset_bytes_transferred(void) + + int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) + { +- int ret = -ENOTSUP; ++ Error *err = NULL; ++ int ret; + +- if (!vbasedev->enable_migration) { +- goto add_blocker; ++ if (vbasedev->enable_migration == ON_OFF_AUTO_OFF) { ++ error_setg(&err, "%s: Migration is disabled for VFIO device", ++ vbasedev->name); ++ return vfio_block_migration(vbasedev, err, errp); + } + + ret = vfio_migration_init(vbasedev); + if (ret) { +- goto add_blocker; ++ if (ret == -ENOTTY) { ++ error_setg(&err, "%s: VFIO migration is not supported in kernel", ++ vbasedev->name); ++ } else { ++ error_setg(&err, ++ "%s: Migration couldn't be initialized for VFIO device, " ++ "err: %d (%s)", ++ vbasedev->name, ret, strerror(-ret)); ++ } ++ ++ return vfio_block_migration(vbasedev, err, errp); ++ } ++ ++ if (!vbasedev->dirty_pages_supported) { ++ if (vbasedev->enable_migration == ON_OFF_AUTO_AUTO) { ++ error_setg(&err, ++ "%s: VFIO device doesn't support device dirty tracking", ++ vbasedev->name); ++ return vfio_block_migration(vbasedev, err, errp); ++ } ++ ++ warn_report("%s: VFIO device doesn't support device dirty tracking", ++ vbasedev->name); + } + +- ret = vfio_block_multiple_devices_migration(errp); ++ ret = vfio_block_multiple_devices_migration(vbasedev, errp); + if (ret) { + return ret; + } + +- ret = vfio_block_giommu_migration(errp); ++ ret = vfio_block_giommu_migration(vbasedev, errp); + if (ret) { + return ret; + } + +- trace_vfio_migration_probe(vbasedev->name); ++ trace_vfio_migration_realize(vbasedev->name); + return 0; +- +-add_blocker: +- error_setg(&vbasedev->migration_blocker, +- "VFIO device doesn't support migration"); +- +- ret = migrate_add_blocker(vbasedev->migration_blocker, errp); +- if (ret < 0) { +- error_free(vbasedev->migration_blocker); +- vbasedev->migration_blocker = NULL; +- } +- return ret; + } + + void vfio_migration_exit(VFIODevice *vbasedev) +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 15e7554954..6634945a70 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3371,8 +3371,8 @@ static Property vfio_pci_dev_properties[] = { + VFIO_FEATURE_ENABLE_REQ_BIT, true), + DEFINE_PROP_BIT("x-igd-opregion", VFIOPCIDevice, features, + VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false), +- DEFINE_PROP_BOOL("x-enable-migration", VFIOPCIDevice, +- vbasedev.enable_migration, false), ++ DEFINE_PROP_ON_OFF_AUTO("enable-migration", VFIOPCIDevice, ++ vbasedev.enable_migration, ON_OFF_AUTO_AUTO), + DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false), + DEFINE_PROP_BOOL("x-balloon-allowed", VFIOPCIDevice, + vbasedev.ram_block_discard_allowed, false), +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 4150b59e58..0391bd583b 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -155,7 +155,7 @@ vfio_load_cleanup(const char *name) " (%s)" + vfio_load_device_config_state(const char *name) " (%s)" + vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64 + vfio_load_state_device_data(const char *name, uint64_t data_size, int ret) " (%s) size 0x%"PRIx64" ret %d" +-vfio_migration_probe(const char *name) " (%s)" ++vfio_migration_realize(const char *name) " (%s)" + vfio_migration_set_state(const char *name, const char *state) " (%s) state %s" + vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s" + vfio_save_block(const char *name, int data_size) " (%s) data_size %d" +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 1d19c6f251..93429b9abb 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -139,7 +139,7 @@ typedef struct VFIODevice { + bool needs_reset; + bool no_mmap; + bool ram_block_discard_allowed; +- bool enable_migration; ++ OnOffAuto enable_migration; + VFIODeviceOps *ops; + unsigned int num_irqs; + unsigned int num_regions; +@@ -225,9 +225,9 @@ typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList; + extern VFIOGroupList vfio_group_list; + + bool vfio_mig_active(void); +-int vfio_block_multiple_devices_migration(Error **errp); ++int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp); + void vfio_unblock_multiple_devices_migration(void); +-int vfio_block_giommu_migration(Error **errp); ++int vfio_block_giommu_migration(VFIODevice *vbasedev, Error **errp); + int64_t vfio_mig_bytes_transferred(void); + void vfio_reset_bytes_transferred(void); + +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-migration-Refactor-vfio_save_block-to-return-sa.patch b/SOURCES/kvm-vfio-migration-Refactor-vfio_save_block-to-return-sa.patch new file mode 100644 index 0000000..3b61c5d --- /dev/null +++ b/SOURCES/kvm-vfio-migration-Refactor-vfio_save_block-to-return-sa.patch @@ -0,0 +1,102 @@ +From edcf24a08d66d620a10c746824e31d230c8516ce Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 13/37] vfio/migration: Refactor vfio_save_block() to return + saved data size +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [11/28] b4aed6ddcbde159e98275a0675dcdf45d644673b (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit cf53efbbda2e +Author: Avihai Horon +Date: Wed Jun 21 14:11:58 2023 +0300 + + vfio/migration: Refactor vfio_save_block() to return saved data size + + Refactor vfio_save_block() to return the size of saved data on success + and -errno on error. + + This will be used in next patch to implement VFIO migration pre-copy + support. + + Signed-off-by: Avihai Horon + Reviewed-by: Cédric Le Goater + Reviewed-by: Juan Quintela + Tested-by: YangHang Liu + Acked-by: Alex Williamson + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/migration.c | 17 +++++++++-------- + 1 file changed, 9 insertions(+), 8 deletions(-) + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index 6b58dddb88..235978fd68 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -241,8 +241,8 @@ static int vfio_query_stop_copy_size(VFIODevice *vbasedev, + return 0; + } + +-/* Returns 1 if end-of-stream is reached, 0 if more data and -errno if error */ +-static int vfio_save_block(QEMUFile *f, VFIOMigration *migration) ++/* Returns the size of saved data on success and -errno on error */ ++static ssize_t vfio_save_block(QEMUFile *f, VFIOMigration *migration) + { + ssize_t data_size; + +@@ -252,7 +252,7 @@ static int vfio_save_block(QEMUFile *f, VFIOMigration *migration) + return -errno; + } + if (data_size == 0) { +- return 1; ++ return 0; + } + + qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE); +@@ -262,7 +262,7 @@ static int vfio_save_block(QEMUFile *f, VFIOMigration *migration) + + trace_vfio_save_block(migration->vbasedev->name, data_size); + +- return qemu_file_get_error(f); ++ return qemu_file_get_error(f) ?: data_size; + } + + /* ---------------------------------------------------------------------- */ +@@ -335,6 +335,7 @@ static void vfio_state_pending_exact(void *opaque, uint64_t *must_precopy, + static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) + { + VFIODevice *vbasedev = opaque; ++ ssize_t data_size; + int ret; + + /* We reach here with device state STOP only */ +@@ -345,11 +346,11 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) + } + + do { +- ret = vfio_save_block(f, vbasedev->migration); +- if (ret < 0) { +- return ret; ++ data_size = vfio_save_block(f, vbasedev->migration); ++ if (data_size < 0) { ++ return data_size; + } +- } while (!ret); ++ } while (data_size); + + qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); + ret = qemu_file_get_error(f); +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-migration-Remove-print-of-Migration-disabled.patch b/SOURCES/kvm-vfio-migration-Remove-print-of-Migration-disabled.patch new file mode 100644 index 0000000..ad3c6ca --- /dev/null +++ b/SOURCES/kvm-vfio-migration-Remove-print-of-Migration-disabled.patch @@ -0,0 +1,56 @@ +From 5bb94c4eaeb94f0b41a57660098a4c12a295b725 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 28/37] vfio/migration: Remove print of "Migration disabled" +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [26/28] c7ff1f9c90b4cfcb327ef474042ea71ea577a94d (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 0520d63c7701 +Author: Zhenzhong Duan +Date: Mon Jul 3 15:15:09 2023 +0800 + + vfio/migration: Remove print of "Migration disabled" + + Property enable_migration supports [on/off/auto]. + In ON mode, error pointer is passed to errp and logged. + In OFF mode, we doesn't need to log "Migration disabled" as it's intentional. + In AUTO mode, we should only ever see errors or warnings if the device + supports migration and an error or incompatibility occurs while further + probing or configuring it. Lack of support for migration shoundn't + generate an error or warning. + + Signed-off-by: Zhenzhong Duan + Reviewed-by: Joao Martins + Reviewed-by: Cédric Le Goater + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/pci.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 037b7d4176..a60b868c38 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3233,7 +3233,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + if (!pdev->failover_pair_id) { + ret = vfio_migration_realize(vbasedev, errp); + if (ret) { +- error_report("%s: Migration disabled", vbasedev->name); + goto out_deregister; + } + } +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-migration-Reset-bytes_transferred-properly.patch b/SOURCES/kvm-vfio-migration-Reset-bytes_transferred-properly.patch new file mode 100644 index 0000000..2666460 --- /dev/null +++ b/SOURCES/kvm-vfio-migration-Reset-bytes_transferred-properly.patch @@ -0,0 +1,165 @@ +From a63b4010ba4f491c9144afff363bebcf35ecf496 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 20/37] vfio/migration: Reset bytes_transferred properly +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [18/28] e9a70faeca4fd5aa7ef36502cf76bf0b62f65057 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 808642a2f640 +Author: Avihai Horon +Date: Wed Jun 28 10:31:11 2023 +0300 + + vfio/migration: Reset bytes_transferred properly + + Currently, VFIO bytes_transferred is not reset properly: + 1. bytes_transferred is not reset after a VM snapshot (so a migration + following a snapshot will report incorrect value). + 2. bytes_transferred is a single counter for all VFIO devices, however + upon migration failure it is reset multiple times, by each VFIO + device. + + Fix it by introducing a new function vfio_reset_bytes_transferred() and + calling it during migration and snapshot start. + + Remove existing bytes_transferred reset in VFIO migration state + notifier, which is not needed anymore. + + Fixes: 3710586caa5d ("qapi: Add VFIO devices migration stats in Migration stats") + Signed-off-by: Avihai Horon + Reviewed-by: Cédric Le Goater + Reviewed-by: Alex Williamson + Signed-off-by: Cédric Le Goater + +Conflicts: + - migration/migration.c + migration/savevm.c + context changes due to commit aff3f6606d14 ("migration: Rename + ram_counters to mig_stats") + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/migration.c | 6 +++++- + include/hw/vfio/vfio-common.h | 1 + + migration/migration.c | 1 + + migration/migration.h | 1 + + migration/savevm.c | 1 + + migration/target.c | 17 +++++++++++++++-- + 6 files changed, 24 insertions(+), 3 deletions(-) + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index acbf0bb7ab..7cf143926c 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -697,7 +697,6 @@ static void vfio_migration_state_notifier(Notifier *notifier, void *data) + case MIGRATION_STATUS_CANCELLING: + case MIGRATION_STATUS_CANCELLED: + case MIGRATION_STATUS_FAILED: +- bytes_transferred = 0; + /* + * If setting the device in RUNNING state fails, the device should + * be reset. To do so, use ERROR state as a recover state. +@@ -818,6 +817,11 @@ int64_t vfio_mig_bytes_transferred(void) + return bytes_transferred; + } + ++void vfio_reset_bytes_transferred(void) ++{ ++ bytes_transferred = 0; ++} ++ + int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) + { + int ret = -ENOTSUP; +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 6d1b8487c3..1d19c6f251 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -229,6 +229,7 @@ int vfio_block_multiple_devices_migration(Error **errp); + void vfio_unblock_multiple_devices_migration(void); + int vfio_block_giommu_migration(Error **errp); + int64_t vfio_mig_bytes_transferred(void); ++void vfio_reset_bytes_transferred(void); + + #ifdef CONFIG_LINUX + int vfio_get_region_info(VFIODevice *vbasedev, int index, +diff --git a/migration/migration.c b/migration/migration.c +index 9bf1caee6c..47ad6c43cb 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1638,6 +1638,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, + */ + memset(&ram_counters, 0, sizeof(ram_counters)); + memset(&compression_counters, 0, sizeof(compression_counters)); ++ reset_vfio_bytes_transferred(); + + return true; + } +diff --git a/migration/migration.h b/migration/migration.h +index e9679f8029..7ccf460aa2 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -495,6 +495,7 @@ bool migration_rate_limit(void); + void migration_cancel(const Error *error); + + void populate_vfio_info(MigrationInfo *info); ++void reset_vfio_bytes_transferred(void); + void postcopy_temp_page_reset(PostcopyTmpPage *tmp_page); + + #endif +diff --git a/migration/savevm.c b/migration/savevm.c +index aff70e6263..83088fc3f8 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -1620,6 +1620,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) + migrate_init(ms); + memset(&ram_counters, 0, sizeof(ram_counters)); + memset(&compression_counters, 0, sizeof(compression_counters)); ++ reset_vfio_bytes_transferred(); + ms->to_dst_file = f; + + qemu_mutex_unlock_iothread(); +diff --git a/migration/target.c b/migration/target.c +index 00ca007f97..f39c9a8d88 100644 +--- a/migration/target.c ++++ b/migration/target.c +@@ -14,12 +14,25 @@ + #include "hw/vfio/vfio-common.h" + #endif + ++#ifdef CONFIG_VFIO + void populate_vfio_info(MigrationInfo *info) + { +-#ifdef CONFIG_VFIO + if (vfio_mig_active()) { + info->vfio = g_malloc0(sizeof(*info->vfio)); + info->vfio->transferred = vfio_mig_bytes_transferred(); + } +-#endif + } ++ ++void reset_vfio_bytes_transferred(void) ++{ ++ vfio_reset_bytes_transferred(); ++} ++#else ++void populate_vfio_info(MigrationInfo *info) ++{ ++} ++ ++void reset_vfio_bytes_transferred(void) ++{ ++} ++#endif +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-migration-Return-bool-type-for-vfio_migration_r.patch b/SOURCES/kvm-vfio-migration-Return-bool-type-for-vfio_migration_r.patch new file mode 100644 index 0000000..efd42a9 --- /dev/null +++ b/SOURCES/kvm-vfio-migration-Return-bool-type-for-vfio_migration_r.patch @@ -0,0 +1,125 @@ +From 223eef8363c9ba58514b2d4f93e5ff015d111ff2 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 29/37] vfio/migration: Return bool type for + vfio_migration_realize() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [27/28] d5aea3ea4c53e4573076cbacbbe3134f9f0f9e53 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit d4a2af747d5a +Author: Zhenzhong Duan +Date: Mon Jul 3 15:15:10 2023 +0800 + + vfio/migration: Return bool type for vfio_migration_realize() + + Make vfio_migration_realize() adhere to the convention of other realize() + callbacks(like qdev_realize) by returning bool instead of int. + + Suggested-by: Cédric Le Goater + Suggested-by: Joao Martins + Signed-off-by: Zhenzhong Duan + Reviewed-by: Joao Martins + Reviewed-by: Cédric Le Goater + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/migration.c | 15 ++++++++++----- + hw/vfio/pci.c | 3 +-- + include/hw/vfio/vfio-common.h | 2 +- + 3 files changed, 12 insertions(+), 8 deletions(-) + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index e3954570c8..2674f4bc47 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -846,7 +846,12 @@ void vfio_reset_bytes_transferred(void) + bytes_transferred = 0; + } + +-int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) ++/* ++ * Return true when either migration initialized or blocker registered. ++ * Currently only return false when adding blocker fails which will ++ * de-register vfio device. ++ */ ++bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp) + { + Error *err = NULL; + int ret; +@@ -854,7 +859,7 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) + if (vbasedev->enable_migration == ON_OFF_AUTO_OFF) { + error_setg(&err, "%s: Migration is disabled for VFIO device", + vbasedev->name); +- return vfio_block_migration(vbasedev, err, errp); ++ return !vfio_block_migration(vbasedev, err, errp); + } + + ret = vfio_migration_init(vbasedev); +@@ -869,7 +874,7 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) + vbasedev->name, ret, strerror(-ret)); + } + +- return vfio_block_migration(vbasedev, err, errp); ++ return !vfio_block_migration(vbasedev, err, errp); + } + + if (!vbasedev->dirty_pages_supported) { +@@ -896,7 +901,7 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) + } + + trace_vfio_migration_realize(vbasedev->name); +- return 0; ++ return true; + + add_blocker: + ret = vfio_block_migration(vbasedev, err, errp); +@@ -904,7 +909,7 @@ out_deinit: + if (ret) { + vfio_migration_deinit(vbasedev); + } +- return ret; ++ return !ret; + } + + void vfio_migration_exit(VFIODevice *vbasedev) +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index a60b868c38..ba40ca8784 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3231,8 +3231,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + } + + if (!pdev->failover_pair_id) { +- ret = vfio_migration_realize(vbasedev, errp); +- if (ret) { ++ if (!vfio_migration_realize(vbasedev, errp)) { + goto out_deregister; + } + } +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 45167c8a8a..da43d27352 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -252,7 +252,7 @@ int vfio_spapr_create_window(VFIOContainer *container, + int vfio_spapr_remove_window(VFIOContainer *container, + hwaddr offset_within_address_space); + +-int vfio_migration_realize(VFIODevice *vbasedev, Error **errp); ++bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp); + void vfio_migration_exit(VFIODevice *vbasedev); + + #endif /* HW_VFIO_VFIO_COMMON_H */ +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-migration-Skip-log_sync-during-migration-SETUP-.patch b/SOURCES/kvm-vfio-migration-Skip-log_sync-during-migration-SETUP-.patch new file mode 100644 index 0000000..6211db7 --- /dev/null +++ b/SOURCES/kvm-vfio-migration-Skip-log_sync-during-migration-SETUP-.patch @@ -0,0 +1,68 @@ +From 76208f7824d5139ac8d86140b0e01031b67638cc Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:56 +0200 +Subject: [PATCH 04/37] vfio/migration: Skip log_sync during migration SETUP + state +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/28] 4c340992b472ac4627b57705f4e971f14bbb0846 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit ff180c6bd7a8 +Author: Avihai Horon +Date: Mon Apr 3 16:00:00 2023 +0300 + + vfio/migration: Skip log_sync during migration SETUP state + + Currently, VFIO log_sync can be issued while migration is in SETUP + state. However, doing this log_sync is at best redundant and at worst + can fail. + + Redundant -- all RAM is marked dirty in migration SETUP state and is + transferred only after migration is set to ACTIVE state, so doing + log_sync during migration SETUP is pointless. + + Can fail -- there is a time window, between setting migration state to + SETUP and starting dirty tracking by RAM save_live_setup handler, during + which dirty tracking is still not started. Any VFIO log_sync call that + is issued during this time window will fail. For example, this error can + be triggered by migrating a VM when a GUI is active, which constantly + calls log_sync. + + Fix it by skipping VFIO log_sync while migration is in SETUP state. + + Fixes: 758b96b61d5c ("vfio/migrate: Move switch of dirty tracking into vfio_memory_listener") + Signed-off-by: Avihai Horon + Link: https://lore.kernel.org/r/20230403130000.6422-1-avihaih@nvidia.com + Signed-off-by: Alex Williamson + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/common.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 4d01ea3515..78358ede27 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -478,7 +478,8 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) + VFIODevice *vbasedev; + MigrationState *ms = migrate_get_current(); + +- if (!migration_is_setup_or_active(ms->state)) { ++ if (ms->state != MIGRATION_STATUS_ACTIVE && ++ ms->state != MIGRATION_STATUS_DEVICE) { + return false; + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-migration-Store-VFIO-migration-flags-in-VFIOMig.patch b/SOURCES/kvm-vfio-migration-Store-VFIO-migration-flags-in-VFIOMig.patch new file mode 100644 index 0000000..2db8511 --- /dev/null +++ b/SOURCES/kvm-vfio-migration-Store-VFIO-migration-flags-in-VFIOMig.patch @@ -0,0 +1,70 @@ +From 77353cdafd08562dff9c99e9f3984d12224bee52 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 14/37] vfio/migration: Store VFIO migration flags in + VFIOMigration +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [12/28] 31a9c39e6ee6338a35dc08c3e7f5c1a204166249 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 6cd1fe11598a +Author: Avihai Horon +Date: Wed Jun 21 14:11:59 2023 +0300 + + vfio/migration: Store VFIO migration flags in VFIOMigration + + VFIO migration flags are queried once in vfio_migration_init(). Store + them in VFIOMigration so they can be used later to check the device's + migration capabilities without re-querying them. + + This will be used in the next patch to check if the device supports + precopy migration. + + Signed-off-by: Avihai Horon + Reviewed-by: Cédric Le Goater + Tested-by: YangHang Liu + Acked-by: Alex Williamson + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/migration.c | 1 + + include/hw/vfio/vfio-common.h | 1 + + 2 files changed, 2 insertions(+) + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index 235978fd68..8d33414379 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -603,6 +603,7 @@ static int vfio_migration_init(VFIODevice *vbasedev) + migration->vbasedev = vbasedev; + migration->device_state = VFIO_DEVICE_STATE_RUNNING; + migration->data_fd = -1; ++ migration->mig_flags = mig_flags; + + vbasedev->dirty_pages_supported = vfio_dma_logging_supported(vbasedev); + +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index eed244f25f..5f29dab839 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -66,6 +66,7 @@ typedef struct VFIOMigration { + int data_fd; + void *data_buffer; + size_t data_buffer_size; ++ uint64_t mig_flags; + } VFIOMigration; + + typedef struct VFIOAddressSpace { +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-pci-Call-vfio_prepare_kvm_msi_virq_batch-in-MSI.patch b/SOURCES/kvm-vfio-pci-Call-vfio_prepare_kvm_msi_virq_batch-in-MSI.patch new file mode 100644 index 0000000..b5d9d37 --- /dev/null +++ b/SOURCES/kvm-vfio-pci-Call-vfio_prepare_kvm_msi_virq_batch-in-MSI.patch @@ -0,0 +1,67 @@ +From b5a69101abac153c9c9be7f539d810e3e4af3bdf Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 19/37] vfio/pci: Call vfio_prepare_kvm_msi_virq_batch() in MSI + retry path +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [17/28] 2067bb58f3a2c1a793e5566cee3c78a8299c9c1c (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit c17408892319 +Author: Shameer Kolothum +Date: Tue Jun 13 15:09:43 2023 +0100 + + vfio/pci: Call vfio_prepare_kvm_msi_virq_batch() in MSI retry path + + When vfio_enable_vectors() returns with less than requested nr_vectors + we retry with what kernel reported back. But the retry path doesn't + call vfio_prepare_kvm_msi_virq_batch() and this results in, + + qemu-system-aarch64: vfio: Error: Failed to enable 4 MSI vectors, retry with 1 + qemu-system-aarch64: ../hw/vfio/pci.c:602: vfio_commit_kvm_msi_virq_batch: Assertion `vdev->defer_kvm_irq_routing' failed + + Fixes: dc580d51f7dd ("vfio: defer to commit kvm irq routing when enable msi/msix") + Reviewed-by: Longpeng + Signed-off-by: Shameer Kolothum + Reviewed-by: Cédric Le Goater + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/pci.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 7c5e2b5996..15e7554954 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -666,6 +666,8 @@ static void vfio_msi_enable(VFIOPCIDevice *vdev) + + vfio_disable_interrupts(vdev); + ++ vdev->nr_vectors = msi_nr_vectors_allocated(&vdev->pdev); ++retry: + /* + * Setting vector notifiers needs to enable route for each vector. + * Deferring to commit the KVM routes once rather than per vector +@@ -673,8 +675,6 @@ static void vfio_msi_enable(VFIOPCIDevice *vdev) + */ + vfio_prepare_kvm_msi_virq_batch(vdev); + +- vdev->nr_vectors = msi_nr_vectors_allocated(&vdev->pdev); +-retry: + vdev->msi_vectors = g_new0(VFIOMSIVector, vdev->nr_vectors); + + for (i = 0; i < vdev->nr_vectors; i++) { +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-pci-Disable-INTx-in-vfio_realize-error-path.patch b/SOURCES/kvm-vfio-pci-Disable-INTx-in-vfio_realize-error-path.patch new file mode 100644 index 0000000..0aca4ef --- /dev/null +++ b/SOURCES/kvm-vfio-pci-Disable-INTx-in-vfio_realize-error-path.patch @@ -0,0 +1,54 @@ +From 816c20b23546d31316c9ca450db8a6668ac6216c Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 25/37] vfio/pci: Disable INTx in vfio_realize error path +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [23/28] 2fde4bad00c4286e6bbe24947c2bfd6468fc0ff3 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit adee0da0368f +Author: Zhenzhong Duan +Date: Mon Jul 3 15:15:06 2023 +0800 + + vfio/pci: Disable INTx in vfio_realize error path + + When vfio realize fails, INTx isn't disabled if it has been enabled. + This may confuse host side with unhandled interrupt report. + + Fixes: c5478fea27ac ("vfio/pci: Respond to KVM irqchip change notifier") + Signed-off-by: Zhenzhong Duan + Reviewed-by: Joao Martins + Reviewed-by: Cédric Le Goater + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/pci.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 87bd440504..2d059832a4 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3244,6 +3244,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + return; + + out_deregister: ++ if (vdev->interrupt == VFIO_INT_INTx) { ++ vfio_intx_disable(vdev); ++ } + pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); + if (vdev->irqchip_change_notifier.notify) { + kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-pci-Fix-a-segfault-in-vfio_realize.patch b/SOURCES/kvm-vfio-pci-Fix-a-segfault-in-vfio_realize.patch new file mode 100644 index 0000000..d05d114 --- /dev/null +++ b/SOURCES/kvm-vfio-pci-Fix-a-segfault-in-vfio_realize.patch @@ -0,0 +1,67 @@ +From 0b1ab3aacc02e70bfe8440236eb9def426bbe10e Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 22/37] vfio/pci: Fix a segfault in vfio_realize +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [20/28] 48b9c1efe295c2672693d9c99f6d11738d2b98d1 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 357bd7932a13 +Author: Zhenzhong Duan +Date: Thu Jun 29 16:40:38 2023 +0800 + + vfio/pci: Fix a segfault in vfio_realize + + The kvm irqchip notifier is only registered if the device supports + INTx, however it's unconditionally removed in vfio realize error + path. If the assigned device does not support INTx, this will cause + QEMU to crash when vfio realize fails. Change it to conditionally + remove the notifier only if the notify hook is setup. + + Before fix: + (qemu) device_add vfio-pci,host=81:11.1,id=vfio1,bus=root1,xres=1 + Connection closed by foreign host. + + After fix: + (qemu) device_add vfio-pci,host=81:11.1,id=vfio1,bus=root1,xres=1 + Error: vfio 0000:81:11.1: xres and yres properties require display=on + (qemu) + + Fixes: c5478fea27ac ("vfio/pci: Respond to KVM irqchip change notifier") + Signed-off-by: Zhenzhong Duan + Reviewed-by: Cédric Le Goater + Reviewed-by: Joao Martins + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/pci.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 6634945a70..d08e6c1a20 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3245,7 +3245,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + + out_deregister: + pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); +- kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); ++ if (vdev->irqchip_change_notifier.notify) { ++ kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); ++ } + out_teardown: + vfio_teardown_msi(vdev); + vfio_bars_exit(vdev); +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-pci-Fix-a-use-after-free-issue.patch b/SOURCES/kvm-vfio-pci-Fix-a-use-after-free-issue.patch new file mode 100644 index 0000000..1fa725f --- /dev/null +++ b/SOURCES/kvm-vfio-pci-Fix-a-use-after-free-issue.patch @@ -0,0 +1,56 @@ +From 2437a06ff137c4bc856df096e42407c1f50b25b0 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:56 +0200 +Subject: [PATCH 06/37] vfio/pci: Fix a use-after-free issue +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [4/28] eca69a7e0a6fb8c1c70be8b91209a53b040e30ba (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit b83b40b61484 +Author: Zhenzhong Duan +Date: Wed May 17 10:46:51 2023 +0800 + + vfio/pci: Fix a use-after-free issue + + vbasedev->name is freed wrongly which leads to garbage VFIO trace log. + Fix it by allocating a dup of vbasedev->name and then free the dup. + + Fixes: 2dca1b37a760 ("vfio/pci: add support for VF token") + Suggested-by: Alex Williamson + Signed-off-by: Zhenzhong Duan + Reviewed-by: Cédric Le Goater + Reviewed-by: Matthew Rosato + Acked-by: Alex Williamson + Reviewed-by: Philippe Mathieu-Daudé + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/pci.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 6cd3a98c39..7c5e2b5996 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3018,7 +3018,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + qemu_uuid_unparse(&vdev->vf_token, uuid); + name = g_strdup_printf("%s vf_token=%s", vbasedev->name, uuid); + } else { +- name = vbasedev->name; ++ name = g_strdup(vbasedev->name); + } + + ret = vfio_get_device(group, name, vbasedev, errp); +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-pci-Free-leaked-timer-in-vfio_realize-error-pat.patch b/SOURCES/kvm-vfio-pci-Free-leaked-timer-in-vfio_realize-error-pat.patch new file mode 100644 index 0000000..3978b96 --- /dev/null +++ b/SOURCES/kvm-vfio-pci-Free-leaked-timer-in-vfio_realize-error-pat.patch @@ -0,0 +1,55 @@ +From 9c5016c9b3f9cf66d1b531de829e8b5010962695 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 23/37] vfio/pci: Free leaked timer in vfio_realize error path +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [21/28] dbaae4e484de4613f7f7735be519b7357627326e (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 0cc889c8826c +Author: Zhenzhong Duan +Date: Thu Jun 29 16:40:39 2023 +0800 + + vfio/pci: Free leaked timer in vfio_realize error path + + When vfio_realize fails, the mmap_timer used for INTx optimization + isn't freed. As this timer isn't activated yet, the potential impact + is just a piece of leaked memory. + + Fixes: ea486926b07d ("vfio-pci: Update slow path INTx algorithm timer related") + Signed-off-by: Zhenzhong Duan + Reviewed-by: Cédric Le Goater + Reviewed-by: Joao Martins + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/pci.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index d08e6c1a20..87bd440504 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3248,6 +3248,9 @@ out_deregister: + if (vdev->irqchip_change_notifier.notify) { + kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); + } ++ if (vdev->intx.mmap_timer) { ++ timer_free(vdev->intx.mmap_timer); ++ } + out_teardown: + vfio_teardown_msi(vdev); + vfio_bars_exit(vdev); +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-pci-Static-Resizable-BAR-capability.patch b/SOURCES/kvm-vfio-pci-Static-Resizable-BAR-capability.patch new file mode 100644 index 0000000..d937140 --- /dev/null +++ b/SOURCES/kvm-vfio-pci-Static-Resizable-BAR-capability.patch @@ -0,0 +1,141 @@ +From db53345dba5682c3ba0bc3fc596b30a98dadb88f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:56 +0200 +Subject: [PATCH 05/37] vfio/pci: Static Resizable BAR capability +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/28] 42e9f4b517eb919c77c6fdbe771d9d05a91955bd (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit b5048a4cbfa0 +Author: Alex Williamson +Date: Thu May 4 14:42:48 2023 -0600 + + vfio/pci: Static Resizable BAR capability + + The PCI Resizable BAR (ReBAR) capability is currently hidden from the + VM because the protocol for interacting with the capability does not + support a mechanism for the device to reject an advertised supported + BAR size. However, when assigned to a VM, the act of resizing the + BAR requires adjustment of host resources for the device, which + absolutely can fail. Linux does not currently allow us to reserve + resources for the device independent of the current usage. + + The only writable field within the ReBAR capability is the BAR Size + register. The PCIe spec indicates that when written, the device + should immediately begin to operate with the provided BAR size. The + spec however also notes that software must only write values + corresponding to supported sizes as indicated in the capability and + control registers. Writing unsupported sizes produces undefined + results. Therefore, if the hypervisor were to virtualize the + capability and control registers such that the current size is the + only indicated available size, then a write of anything other than + the current size falls into the category of undefined behavior, + where we can essentially expose the modified ReBAR capability as + read-only. + + This may seem pointless, but users have reported that virtualizing + the capability in this way not only allows guest software to expose + related features as available (even if only cosmetic), but in some + scenarios can resolve guest driver issues. Additionally, no + regressions in behavior have been reported for this change. + + A caveat here is that the PCIe spec requires for compatibility that + devices report support for a size in the range of 1MB to 512GB, + therefore if the current BAR size falls outside that range we revert + to hiding the capability. + + Reviewed-by: Cédric Le Goater + Link: https://lore.kernel.org/r/20230505232308.2869912-1-alex.williamson@redhat.com + Signed-off-by: Alex Williamson + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/pci.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 53 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 579b92a6ed..6cd3a98c39 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -2069,6 +2069,54 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos, Error **errp) + return 0; + } + ++static int vfio_setup_rebar_ecap(VFIOPCIDevice *vdev, uint16_t pos) ++{ ++ uint32_t ctrl; ++ int i, nbar; ++ ++ ctrl = pci_get_long(vdev->pdev.config + pos + PCI_REBAR_CTRL); ++ nbar = (ctrl & PCI_REBAR_CTRL_NBAR_MASK) >> PCI_REBAR_CTRL_NBAR_SHIFT; ++ ++ for (i = 0; i < nbar; i++) { ++ uint32_t cap; ++ int size; ++ ++ ctrl = pci_get_long(vdev->pdev.config + pos + PCI_REBAR_CTRL + (i * 8)); ++ size = (ctrl & PCI_REBAR_CTRL_BAR_SIZE) >> PCI_REBAR_CTRL_BAR_SHIFT; ++ ++ /* The cap register reports sizes 1MB to 128TB, with 4 reserved bits */ ++ cap = size <= 27 ? 1U << (size + 4) : 0; ++ ++ /* ++ * The PCIe spec (v6.0.1, 7.8.6) requires HW to support at least one ++ * size in the range 1MB to 512GB. We intend to mask all sizes except ++ * the one currently enabled in the size field, therefore if it's ++ * outside the range, hide the whole capability as this virtualization ++ * trick won't work. If >512GB resizable BARs start to appear, we ++ * might need an opt-in or reservation scheme in the kernel. ++ */ ++ if (!(cap & PCI_REBAR_CAP_SIZES)) { ++ return -EINVAL; ++ } ++ ++ /* Hide all sizes reported in the ctrl reg per above requirement. */ ++ ctrl &= (PCI_REBAR_CTRL_BAR_SIZE | ++ PCI_REBAR_CTRL_NBAR_MASK | ++ PCI_REBAR_CTRL_BAR_IDX); ++ ++ /* ++ * The BAR size field is RW, however we've mangled the capability ++ * register such that we only report a single size, ie. the current ++ * BAR size. A write of an unsupported value is undefined, therefore ++ * the register field is essentially RO. ++ */ ++ vfio_add_emulated_long(vdev, pos + PCI_REBAR_CAP + (i * 8), cap, ~0); ++ vfio_add_emulated_long(vdev, pos + PCI_REBAR_CTRL + (i * 8), ctrl, ~0); ++ } ++ ++ return 0; ++} ++ + static void vfio_add_ext_cap(VFIOPCIDevice *vdev) + { + PCIDevice *pdev = &vdev->pdev; +@@ -2142,9 +2190,13 @@ static void vfio_add_ext_cap(VFIOPCIDevice *vdev) + case 0: /* kernel masked capability */ + case PCI_EXT_CAP_ID_SRIOV: /* Read-only VF BARs confuse OVMF */ + case PCI_EXT_CAP_ID_ARI: /* XXX Needs next function virtualization */ +- case PCI_EXT_CAP_ID_REBAR: /* Can't expose read-only */ + trace_vfio_add_ext_cap_dropped(vdev->vbasedev.name, cap_id, next); + break; ++ case PCI_EXT_CAP_ID_REBAR: ++ if (!vfio_setup_rebar_ecap(vdev, next)) { ++ pcie_add_capability(pdev, cap_id, cap_ver, next, size); ++ } ++ break; + default: + pcie_add_capability(pdev, cap_id, cap_ver, next, size); + } +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-pci-add-support-for-VF-token.patch b/SOURCES/kvm-vfio-pci-add-support-for-VF-token.patch new file mode 100644 index 0000000..7b40e5e --- /dev/null +++ b/SOURCES/kvm-vfio-pci-add-support-for-VF-token.patch @@ -0,0 +1,104 @@ +From 3022cc31bca5a5441e285c971eaf72b7643b9be0 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:56 +0200 +Subject: [PATCH 03/37] vfio/pci: add support for VF token +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/28] ff24284ede2806e21f4f6709d8abd4c4029b7d5c (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 2dca1b37a760 +Author: Minwoo Im +Date: Mon Mar 20 16:35:22 2023 +0900 + + vfio/pci: add support for VF token + + VF token was introduced [1] to kernel vfio-pci along with SR-IOV + support [2]. This patch adds support VF token among PF and VF(s). To + passthu PCIe VF to a VM, kernel >= v5.7 needs this. + + It can be configured with UUID like: + + -device vfio-pci,host=DDDD:BB:DD:F,vf-token=,... + + [1] https://lore.kernel.org/linux-pci/158396393244.5601.10297430724964025753.stgit@gimli.home/ + [2] https://lore.kernel.org/linux-pci/158396044753.5601.14804870681174789709.stgit@gimli.home/ + + Cc: Alex Williamson + Signed-off-by: Minwoo Im + Reviewed-by: Klaus Jensen + Link: https://lore.kernel.org/r/20230320073522epcms2p48f682ecdb73e0ae1a4850ad0712fd780@epcms2p4 + Signed-off-by: Alex Williamson + +Conflicts: + - hw/vfio/pci.c + context changes in vfio_realize () due to redhat commit 267071d16b23 + ("vfio: cap number of devices that can be assigned") + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/pci.c | 13 ++++++++++++- + hw/vfio/pci.h | 1 + + 2 files changed, 13 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index a779053be3..579b92a6ed 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -2859,6 +2859,8 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + int groupid; + int ret, i = 0; + bool is_mdev; ++ char uuid[UUID_FMT_LEN]; ++ char *name; + + if (device_limit && device_limit != vdev->assigned_device_limit) { + error_setg(errp, "Assigned device limit has been redefined. " +@@ -2960,7 +2962,15 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + goto error; + } + +- ret = vfio_get_device(group, vbasedev->name, vbasedev, errp); ++ if (!qemu_uuid_is_null(&vdev->vf_token)) { ++ qemu_uuid_unparse(&vdev->vf_token, uuid); ++ name = g_strdup_printf("%s vf_token=%s", vbasedev->name, uuid); ++ } else { ++ name = vbasedev->name; ++ } ++ ++ ret = vfio_get_device(group, name, vbasedev, errp); ++ g_free(name); + if (ret) { + vfio_put_group(group); + goto error; +@@ -3292,6 +3302,7 @@ static void vfio_instance_init(Object *obj) + + static Property vfio_pci_dev_properties[] = { + DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIOPCIDevice, host), ++ DEFINE_PROP_UUID_NODEFAULT("vf-token", VFIOPCIDevice, vf_token), + DEFINE_PROP_STRING("sysfsdev", VFIOPCIDevice, vbasedev.sysfsdev), + DEFINE_PROP_ON_OFF_AUTO("x-pre-copy-dirty-page-tracking", VFIOPCIDevice, + vbasedev.pre_copy_dirty_page_tracking, +diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h +index 45235d38ba..10530743ad 100644 +--- a/hw/vfio/pci.h ++++ b/hw/vfio/pci.h +@@ -137,6 +137,7 @@ struct VFIOPCIDevice { + VFIOVGA *vga; /* 0xa0000, 0x3b0, 0x3c0 */ + void *igd_opregion; + PCIHostDeviceAddress host; ++ QemuUUID vf_token; + EventNotifier err_notifier; + EventNotifier req_notifier; + int (*resetfn)(struct VFIOPCIDevice *); +-- +2.39.3 + diff --git a/SOURCES/kvm-vhost-add-support-for-configure-interrupt.patch b/SOURCES/kvm-vhost-add-support-for-configure-interrupt.patch deleted file mode 100644 index a7cfb2f..0000000 --- a/SOURCES/kvm-vhost-add-support-for-configure-interrupt.patch +++ /dev/null @@ -1,185 +0,0 @@ -From 42818e2bc6fa537fe52f7f0e6b094774a1eb00e1 Mon Sep 17 00:00:00 2001 -From: Cindy Lu -Date: Thu, 22 Dec 2022 15:04:48 +0800 -Subject: [PATCH 07/31] vhost: add support for configure interrupt -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cindy Lu -RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa -RH-Bugzilla: 1905805 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Miroslav Rezanina -RH-Commit: [7/10] d58b439eb093f5dd3b7ca081af0ab75780e42917 (lulu6/qemu-kvm3) - -https://bugzilla.redhat.com/show_bug.cgi?id=1905805 -Add functions to support configure interrupt. -The configure interrupt process will start in vhost_dev_start -and stop in vhost_dev_stop. - -Also add the functions to support vhost_config_pending and -vhost_config_mask. - -Signed-off-by: Cindy Lu -Message-Id: <20221222070451.936503-8-lulu@redhat.com> -Acked-by: Jason Wang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit f9a09ca3ea69d108d828b7c82f1bd61b2df6fc96) -Signed-off-by: Cindy Lu ---- - hw/virtio/vhost.c | 78 ++++++++++++++++++++++++++++++++++++++- - include/hw/virtio/vhost.h | 4 ++ - 2 files changed, 81 insertions(+), 1 deletion(-) - -diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c -index 7fb008bc9e..84dbb39e07 100644 ---- a/hw/virtio/vhost.c -+++ b/hw/virtio/vhost.c -@@ -1596,7 +1596,68 @@ void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n, - file.index = hdev->vhost_ops->vhost_get_vq_index(hdev, n); - r = hdev->vhost_ops->vhost_set_vring_call(hdev, &file); - if (r < 0) { -- VHOST_OPS_DEBUG(r, "vhost_set_vring_call failed"); -+ error_report("vhost_set_vring_call failed %d", -r); -+ } -+} -+ -+bool vhost_config_pending(struct vhost_dev *hdev) -+{ -+ assert(hdev->vhost_ops); -+ if ((hdev->started == false) || -+ (hdev->vhost_ops->vhost_set_config_call == NULL)) { -+ return false; -+ } -+ -+ EventNotifier *notifier = -+ &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier; -+ return event_notifier_test_and_clear(notifier); -+} -+ -+void vhost_config_mask(struct vhost_dev *hdev, VirtIODevice *vdev, bool mask) -+{ -+ int fd; -+ int r; -+ EventNotifier *notifier = -+ &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier; -+ EventNotifier *config_notifier = &vdev->config_notifier; -+ assert(hdev->vhost_ops); -+ -+ if ((hdev->started == false) || -+ (hdev->vhost_ops->vhost_set_config_call == NULL)) { -+ return; -+ } -+ if (mask) { -+ assert(vdev->use_guest_notifier_mask); -+ fd = event_notifier_get_fd(notifier); -+ } else { -+ fd = event_notifier_get_fd(config_notifier); -+ } -+ r = hdev->vhost_ops->vhost_set_config_call(hdev, fd); -+ if (r < 0) { -+ error_report("vhost_set_config_call failed %d", -r); -+ } -+} -+ -+static void vhost_stop_config_intr(struct vhost_dev *dev) -+{ -+ int fd = -1; -+ assert(dev->vhost_ops); -+ if (dev->vhost_ops->vhost_set_config_call) { -+ dev->vhost_ops->vhost_set_config_call(dev, fd); -+ } -+} -+ -+static void vhost_start_config_intr(struct vhost_dev *dev) -+{ -+ int r; -+ -+ assert(dev->vhost_ops); -+ int fd = event_notifier_get_fd(&dev->vdev->config_notifier); -+ if (dev->vhost_ops->vhost_set_config_call) { -+ r = dev->vhost_ops->vhost_set_config_call(dev, fd); -+ if (!r) { -+ event_notifier_set(&dev->vdev->config_notifier); -+ } - } - } - -@@ -1836,6 +1897,16 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) - } - } - -+ r = event_notifier_init( -+ &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier, 0); -+ if (r < 0) { -+ return r; -+ } -+ event_notifier_test_and_clear( -+ &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier); -+ if (!vdev->use_guest_notifier_mask) { -+ vhost_config_mask(hdev, vdev, true); -+ } - if (hdev->log_enabled) { - uint64_t log_base; - -@@ -1874,6 +1945,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) - vhost_device_iotlb_miss(hdev, vq->used_phys, true); - } - } -+ vhost_start_config_intr(hdev); - return 0; - fail_start: - if (vrings) { -@@ -1903,6 +1975,9 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) - - /* should only be called after backend is connected */ - assert(hdev->vhost_ops); -+ event_notifier_test_and_clear( -+ &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier); -+ event_notifier_test_and_clear(&vdev->config_notifier); - - trace_vhost_dev_stop(hdev, vdev->name, vrings); - -@@ -1925,6 +2000,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) - } - memory_listener_unregister(&hdev->iommu_listener); - } -+ vhost_stop_config_intr(hdev); - vhost_log_put(hdev, true); - hdev->started = false; - vdev->vhost_started = false; -diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h -index 67a6807fac..05bedb2416 100644 ---- a/include/hw/virtio/vhost.h -+++ b/include/hw/virtio/vhost.h -@@ -33,6 +33,7 @@ struct vhost_virtqueue { - unsigned used_size; - EventNotifier masked_notifier; - EventNotifier error_notifier; -+ EventNotifier masked_config_notifier; - struct vhost_dev *dev; - }; - -@@ -41,6 +42,7 @@ typedef unsigned long vhost_log_chunk_t; - #define VHOST_LOG_BITS (8 * sizeof(vhost_log_chunk_t)) - #define VHOST_LOG_CHUNK (VHOST_LOG_PAGE * VHOST_LOG_BITS) - #define VHOST_INVALID_FEATURE_BIT (0xff) -+#define VHOST_QUEUE_NUM_CONFIG_INR 0 - - struct vhost_log { - unsigned long long size; -@@ -168,6 +170,8 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev); - * Disable direct notifications to vhost device. - */ - void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev); -+bool vhost_config_pending(struct vhost_dev *hdev); -+void vhost_config_mask(struct vhost_dev *hdev, VirtIODevice *vdev, bool mask); - - /** - * vhost_dev_is_started() - report status of vhost device --- -2.31.1 - diff --git a/SOURCES/kvm-vhost-allocate-SVQ-device-file-descriptors-at-device.patch b/SOURCES/kvm-vhost-allocate-SVQ-device-file-descriptors-at-device.patch deleted file mode 100644 index 940133b..0000000 --- a/SOURCES/kvm-vhost-allocate-SVQ-device-file-descriptors-at-device.patch +++ /dev/null @@ -1,171 +0,0 @@ -From bffccbd59a2e2c641810cd7362c7b5ecf5989ed8 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 15 Dec 2022 12:31:35 +0100 -Subject: [PATCH 03/14] vhost: allocate SVQ device file descriptors at device - start -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 136: vDPA ASID support in Qemu -RH-Bugzilla: 2104412 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/13] bab2d43f0fc0d13a4917e706244b37e1a431b082 (eperezmartin/qemu-kvm) - -The next patches will start control SVQ if possible. However, we don't -know if that will be possible at qemu boot anymore. - -Delay device file descriptors until we know it at device start. This -will avoid to create them if the device does not support SVQ. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Message-Id: <20221215113144.322011-4-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 3cfb4d069cd2977b707fb519c455d7d416e1f4b0) ---- - hw/virtio/vhost-shadow-virtqueue.c | 31 ++------------------------ - hw/virtio/vhost-vdpa.c | 35 ++++++++++++++++++++++++------ - 2 files changed, 30 insertions(+), 36 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index 264ddc166d..3b05bab44d 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -715,43 +715,18 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) - * @iova_tree: Tree to perform descriptors translations - * @ops: SVQ owner callbacks - * @ops_opaque: ops opaque pointer -- * -- * Returns the new virtqueue or NULL. -- * -- * In case of error, reason is reported through error_report. - */ - VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree, - const VhostShadowVirtqueueOps *ops, - void *ops_opaque) - { -- g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1); -- int r; -- -- r = event_notifier_init(&svq->hdev_kick, 0); -- if (r != 0) { -- error_report("Couldn't create kick event notifier: %s (%d)", -- g_strerror(errno), errno); -- goto err_init_hdev_kick; -- } -- -- r = event_notifier_init(&svq->hdev_call, 0); -- if (r != 0) { -- error_report("Couldn't create call event notifier: %s (%d)", -- g_strerror(errno), errno); -- goto err_init_hdev_call; -- } -+ VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1); - - event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND); - svq->iova_tree = iova_tree; - svq->ops = ops; - svq->ops_opaque = ops_opaque; -- return g_steal_pointer(&svq); -- --err_init_hdev_call: -- event_notifier_cleanup(&svq->hdev_kick); -- --err_init_hdev_kick: -- return NULL; -+ return svq; - } - - /** -@@ -763,7 +738,5 @@ void vhost_svq_free(gpointer pvq) - { - VhostShadowVirtqueue *vq = pvq; - vhost_svq_stop(vq); -- event_notifier_cleanup(&vq->hdev_kick); -- event_notifier_cleanup(&vq->hdev_call); - g_free(vq); - } -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 44e6a9b7b3..530d2ca362 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -428,15 +428,11 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v, - - shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free); - for (unsigned n = 0; n < hdev->nvqs; ++n) { -- g_autoptr(VhostShadowVirtqueue) svq; -+ VhostShadowVirtqueue *svq; - - svq = vhost_svq_new(v->iova_tree, v->shadow_vq_ops, - v->shadow_vq_ops_opaque); -- if (unlikely(!svq)) { -- error_setg(errp, "Cannot create svq %u", n); -- return -1; -- } -- g_ptr_array_add(shadow_vqs, g_steal_pointer(&svq)); -+ g_ptr_array_add(shadow_vqs, svq); - } - - v->shadow_vqs = g_steal_pointer(&shadow_vqs); -@@ -871,11 +867,23 @@ static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev, - const EventNotifier *event_notifier = &svq->hdev_kick; - int r; - -+ r = event_notifier_init(&svq->hdev_kick, 0); -+ if (r != 0) { -+ error_setg_errno(errp, -r, "Couldn't create kick event notifier"); -+ goto err_init_hdev_kick; -+ } -+ -+ r = event_notifier_init(&svq->hdev_call, 0); -+ if (r != 0) { -+ error_setg_errno(errp, -r, "Couldn't create call event notifier"); -+ goto err_init_hdev_call; -+ } -+ - file.fd = event_notifier_get_fd(event_notifier); - r = vhost_vdpa_set_vring_dev_kick(dev, &file); - if (unlikely(r != 0)) { - error_setg_errno(errp, -r, "Can't set device kick fd"); -- return r; -+ goto err_init_set_dev_fd; - } - - event_notifier = &svq->hdev_call; -@@ -883,8 +891,18 @@ static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev, - r = vhost_vdpa_set_vring_dev_call(dev, &file); - if (unlikely(r != 0)) { - error_setg_errno(errp, -r, "Can't set device call fd"); -+ goto err_init_set_dev_fd; - } - -+ return 0; -+ -+err_init_set_dev_fd: -+ event_notifier_set_handler(&svq->hdev_call, NULL); -+ -+err_init_hdev_call: -+ event_notifier_cleanup(&svq->hdev_kick); -+ -+err_init_hdev_kick: - return r; - } - -@@ -1096,6 +1114,9 @@ static void vhost_vdpa_svqs_stop(struct vhost_dev *dev) - for (unsigned i = 0; i < v->shadow_vqs->len; ++i) { - VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); - vhost_vdpa_svq_unmap_rings(dev, svq); -+ -+ event_notifier_cleanup(&svq->hdev_kick); -+ event_notifier_cleanup(&svq->hdev_call); - } - } - --- -2.31.1 - diff --git a/SOURCES/kvm-vhost-fix-vhost_dev_enable_notifiers-error-case.patch b/SOURCES/kvm-vhost-fix-vhost_dev_enable_notifiers-error-case.patch new file mode 100644 index 0000000..3282c24 --- /dev/null +++ b/SOURCES/kvm-vhost-fix-vhost_dev_enable_notifiers-error-case.patch @@ -0,0 +1,138 @@ +From ac54f5f746782da89ab674733af5622e524b58eb Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Fri, 2 Jun 2023 18:27:35 +0200 +Subject: [PATCH 4/6] vhost: fix vhost_dev_enable_notifiers() error case +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 176: vhost: fix vhost_dev_enable_notifiers() error case +RH-Jira: RHEL-330 +RH-Acked-by: MST +RH-Acked-by: Cindy Lu +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Jason Wang +RH-Commit: [1/1] fd30d7501be59f7e5b9d6fc5ed84efcc4037d08e (lvivier/qemu-kvm-centos) + +JIRA: https://issues.redhat.com/browse/RHEL-330 + +in vhost_dev_enable_notifiers(), if virtio_bus_set_host_notifier(true) +fails, we call vhost_dev_disable_notifiers() that executes +virtio_bus_set_host_notifier(false) on all queues, even on queues that +have failed to be initialized. + +This triggers a core dump in memory_region_del_eventfd(): + + virtio_bus_set_host_notifier: unable to init event notifier: Too many open files (-24) + vhost VQ 1 notifier binding failed: 24 + .../softmmu/memory.c:2611: memory_region_del_eventfd: Assertion `i != mr->ioeventfd_nb' failed. + +Fix the problem by providing to vhost_dev_disable_notifiers() the +number of queues to disable. + +Fixes: 8771589b6f81 ("vhost: simplify vhost_dev_enable_notifiers") +Cc: longpeng2@huawei.com +Signed-off-by: Laurent Vivier +Message-Id: <20230602162735.3670785-1-lvivier@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Philippe Mathieu-Daudé +(cherry picked from commit 92099aa4e9a3bb6856c290afaf41c76f9e3dd9fd) +--- + hw/virtio/vhost.c | 65 ++++++++++++++++++++++++++--------------------- + 1 file changed, 36 insertions(+), 29 deletions(-) + +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index a266396576..ae0a033e60 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -1545,6 +1545,40 @@ void vhost_dev_cleanup(struct vhost_dev *hdev) + memset(hdev, 0, sizeof(struct vhost_dev)); + } + ++static void vhost_dev_disable_notifiers_nvqs(struct vhost_dev *hdev, ++ VirtIODevice *vdev, ++ unsigned int nvqs) ++{ ++ BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); ++ int i, r; ++ ++ /* ++ * Batch all the host notifiers in a single transaction to avoid ++ * quadratic time complexity in address_space_update_ioeventfds(). ++ */ ++ memory_region_transaction_begin(); ++ ++ for (i = 0; i < nvqs; ++i) { ++ r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i, ++ false); ++ if (r < 0) { ++ error_report("vhost VQ %d notifier cleanup failed: %d", i, -r); ++ } ++ assert(r >= 0); ++ } ++ ++ /* ++ * The transaction expects the ioeventfds to be open when it ++ * commits. Do it now, before the cleanup loop. ++ */ ++ memory_region_transaction_commit(); ++ ++ for (i = 0; i < nvqs; ++i) { ++ virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i); ++ } ++ virtio_device_release_ioeventfd(vdev); ++} ++ + /* Stop processing guest IO notifications in qemu. + * Start processing them in vhost in kernel. + */ +@@ -1574,7 +1608,7 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) + if (r < 0) { + error_report("vhost VQ %d notifier binding failed: %d", i, -r); + memory_region_transaction_commit(); +- vhost_dev_disable_notifiers(hdev, vdev); ++ vhost_dev_disable_notifiers_nvqs(hdev, vdev, i); + return r; + } + } +@@ -1591,34 +1625,7 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) + */ + void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) + { +- BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); +- int i, r; +- +- /* +- * Batch all the host notifiers in a single transaction to avoid +- * quadratic time complexity in address_space_update_ioeventfds(). +- */ +- memory_region_transaction_begin(); +- +- for (i = 0; i < hdev->nvqs; ++i) { +- r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i, +- false); +- if (r < 0) { +- error_report("vhost VQ %d notifier cleanup failed: %d", i, -r); +- } +- assert (r >= 0); +- } +- +- /* +- * The transaction expects the ioeventfds to be open when it +- * commits. Do it now, before the cleanup loop. +- */ +- memory_region_transaction_commit(); +- +- for (i = 0; i < hdev->nvqs; ++i) { +- virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i); +- } +- virtio_device_release_ioeventfd(vdev); ++ vhost_dev_disable_notifiers_nvqs(hdev, vdev, hdev->nvqs); + } + + /* Test and clear event pending status. +-- +2.39.3 + diff --git a/SOURCES/kvm-vhost-fix-vq-dirty-bitmap-syncing-when-vIOMMU-is-ena.patch b/SOURCES/kvm-vhost-fix-vq-dirty-bitmap-syncing-when-vIOMMU-is-ena.patch deleted file mode 100644 index ca93785..0000000 --- a/SOURCES/kvm-vhost-fix-vq-dirty-bitmap-syncing-when-vIOMMU-is-ena.patch +++ /dev/null @@ -1,157 +0,0 @@ -From 55aad90e347599e88747888ddbefcba33427f386 Mon Sep 17 00:00:00 2001 -From: Jason Wang -Date: Fri, 16 Dec 2022 11:35:52 +0800 -Subject: [PATCH 12/31] vhost: fix vq dirty bitmap syncing when vIOMMU is - enabled - -RH-Author: Eric Auger -RH-MergeRequest: 134: vhost: fix vq dirty bitmap syncing when vIOMMU is enabled -RH-Bugzilla: 2124856 -RH-Acked-by: Peter Xu -RH-Acked-by: Jason Wang -RH-Acked-by: Laurent Vivier -RH-Commit: [1/1] 57ef499b63dc2cca6e64ee84d1dc127635868ca2 (eauger1/centos-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2124856 -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=49989924 -Upstream: yes - -When vIOMMU is enabled, the vq->used_phys is actually the IOVA not -GPA. So we need to translate it to GPA before the syncing otherwise we -may hit the following crash since IOVA could be out of the scope of -the GPA log size. This could be noted when using virtio-IOMMU with -vhost using 1G memory. - -Fixes: c471ad0e9bd46 ("vhost_net: device IOTLB support") -Cc: qemu-stable@nongnu.org -Tested-by: Lei Yang -Reported-by: Yalan Zhang -Signed-off-by: Jason Wang -Message-Id: <20221216033552.77087-1-jasowang@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 345cc1cbcbce2bab00abc2b88338d7d89c702d6b) -Signed-off-by: Eric Auger ---- - hw/virtio/vhost.c | 84 ++++++++++++++++++++++++++++++++++++----------- - 1 file changed, 64 insertions(+), 20 deletions(-) - -diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c -index 84dbb39e07..2c566dc539 100644 ---- a/hw/virtio/vhost.c -+++ b/hw/virtio/vhost.c -@@ -20,6 +20,7 @@ - #include "qemu/range.h" - #include "qemu/error-report.h" - #include "qemu/memfd.h" -+#include "qemu/log.h" - #include "standard-headers/linux/vhost_types.h" - #include "hw/virtio/virtio-bus.h" - #include "hw/virtio/virtio-access.h" -@@ -106,6 +107,24 @@ static void vhost_dev_sync_region(struct vhost_dev *dev, - } - } - -+static bool vhost_dev_has_iommu(struct vhost_dev *dev) -+{ -+ VirtIODevice *vdev = dev->vdev; -+ -+ /* -+ * For vhost, VIRTIO_F_IOMMU_PLATFORM means the backend support -+ * incremental memory mapping API via IOTLB API. For platform that -+ * does not have IOMMU, there's no need to enable this feature -+ * which may cause unnecessary IOTLB miss/update transactions. -+ */ -+ if (vdev) { -+ return virtio_bus_device_iommu_enabled(vdev) && -+ virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); -+ } else { -+ return false; -+ } -+} -+ - static int vhost_sync_dirty_bitmap(struct vhost_dev *dev, - MemoryRegionSection *section, - hwaddr first, -@@ -137,8 +156,51 @@ static int vhost_sync_dirty_bitmap(struct vhost_dev *dev, - continue; - } - -- vhost_dev_sync_region(dev, section, start_addr, end_addr, vq->used_phys, -- range_get_last(vq->used_phys, vq->used_size)); -+ if (vhost_dev_has_iommu(dev)) { -+ IOMMUTLBEntry iotlb; -+ hwaddr used_phys = vq->used_phys, used_size = vq->used_size; -+ hwaddr phys, s, offset; -+ -+ while (used_size) { -+ rcu_read_lock(); -+ iotlb = address_space_get_iotlb_entry(dev->vdev->dma_as, -+ used_phys, -+ true, -+ MEMTXATTRS_UNSPECIFIED); -+ rcu_read_unlock(); -+ -+ if (!iotlb.target_as) { -+ qemu_log_mask(LOG_GUEST_ERROR, "translation " -+ "failure for used_iova %"PRIx64"\n", -+ used_phys); -+ return -EINVAL; -+ } -+ -+ offset = used_phys & iotlb.addr_mask; -+ phys = iotlb.translated_addr + offset; -+ -+ /* -+ * Distance from start of used ring until last byte of -+ * IOMMU page. -+ */ -+ s = iotlb.addr_mask - offset; -+ /* -+ * Size of used ring, or of the part of it until end -+ * of IOMMU page. To avoid zero result, do the adding -+ * outside of MIN(). -+ */ -+ s = MIN(s, used_size - 1) + 1; -+ -+ vhost_dev_sync_region(dev, section, start_addr, end_addr, phys, -+ range_get_last(phys, s)); -+ used_size -= s; -+ used_phys += s; -+ } -+ } else { -+ vhost_dev_sync_region(dev, section, start_addr, -+ end_addr, vq->used_phys, -+ range_get_last(vq->used_phys, vq->used_size)); -+ } - } - return 0; - } -@@ -306,24 +368,6 @@ static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size) - dev->log_size = size; - } - --static bool vhost_dev_has_iommu(struct vhost_dev *dev) --{ -- VirtIODevice *vdev = dev->vdev; -- -- /* -- * For vhost, VIRTIO_F_IOMMU_PLATFORM means the backend support -- * incremental memory mapping API via IOTLB API. For platform that -- * does not have IOMMU, there's no need to enable this feature -- * which may cause unnecessary IOTLB miss/update transactions. -- */ -- if (vdev) { -- return virtio_bus_device_iommu_enabled(vdev) && -- virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); -- } else { -- return false; -- } --} -- - static void *vhost_memory_map(struct vhost_dev *dev, hwaddr addr, - hwaddr *plen, bool is_write) - { --- -2.31.1 - diff --git a/SOURCES/kvm-vhost-introduce-new-VhostOps-vhost_set_config_call.patch b/SOURCES/kvm-vhost-introduce-new-VhostOps-vhost_set_config_call.patch deleted file mode 100644 index 1b48f5d..0000000 --- a/SOURCES/kvm-vhost-introduce-new-VhostOps-vhost_set_config_call.patch +++ /dev/null @@ -1,56 +0,0 @@ -From d135303da1187d9f214e520a977fe7c47e5ce1f0 Mon Sep 17 00:00:00 2001 -From: Cindy Lu -Date: Thu, 22 Dec 2022 15:04:45 +0800 -Subject: [PATCH 04/31] vhost: introduce new VhostOps vhost_set_config_call -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cindy Lu -RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa -RH-Bugzilla: 1905805 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Miroslav Rezanina -RH-Commit: [4/10] c2492838d9c1415e42d2507f2956d640a30325f2 (lulu6/qemu-kvm3) - -https://bugzilla.redhat.com/show_bug.cgi?id=1905805 -This patch introduces new VhostOps vhost_set_config_call. -This function allows the qemu to set the config -event fd to kernel driver. - -Signed-off-by: Cindy Lu -Message-Id: <20221222070451.936503-5-lulu@redhat.com> -Acked-by: Jason Wang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 9b30cdf9bbf9524a4f4f8a6eb551eb13cbbd3893) -Signed-off-by: Cindy Lu ---- - include/hw/virtio/vhost-backend.h | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h -index eab46d7f0b..c5ab49051e 100644 ---- a/include/hw/virtio/vhost-backend.h -+++ b/include/hw/virtio/vhost-backend.h -@@ -128,6 +128,8 @@ typedef int (*vhost_get_device_id_op)(struct vhost_dev *dev, uint32_t *dev_id); - - typedef bool (*vhost_force_iommu_op)(struct vhost_dev *dev); - -+typedef int (*vhost_set_config_call_op)(struct vhost_dev *dev, -+ int fd); - typedef struct VhostOps { - VhostBackendType backend_type; - vhost_backend_init vhost_backend_init; -@@ -174,6 +176,7 @@ typedef struct VhostOps { - vhost_vq_get_addr_op vhost_vq_get_addr; - vhost_get_device_id_op vhost_get_device_id; - vhost_force_iommu_op vhost_force_iommu; -+ vhost_set_config_call_op vhost_set_config_call; - } VhostOps; - - int vhost_backend_update_device_iotlb(struct vhost_dev *dev, --- -2.31.1 - diff --git a/SOURCES/kvm-vhost-move-iova_tree-set-to-vhost_svq_start.patch b/SOURCES/kvm-vhost-move-iova_tree-set-to-vhost_svq_start.patch deleted file mode 100644 index de005ba..0000000 --- a/SOURCES/kvm-vhost-move-iova_tree-set-to-vhost_svq_start.patch +++ /dev/null @@ -1,122 +0,0 @@ -From 6584478deca49d0ea20add588e4fdb51cdc26f1d Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 15 Dec 2022 12:31:36 +0100 -Subject: [PATCH 04/14] vhost: move iova_tree set to vhost_svq_start -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 136: vDPA ASID support in Qemu -RH-Bugzilla: 2104412 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [4/13] 200d8e9b58e258a6e301430debc73ef7d962b732 (eperezmartin/qemu-kvm) - -Since we don't know if we will use SVQ at qemu initialization, let's -allocate iova_tree only if needed. To do so, accept it at SVQ start, not -at initialization. - -This will avoid to create it if the device does not support SVQ. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Message-Id: <20221215113144.322011-5-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 5fde952bbdd521c10fc018ee04f922a7dca5f663) ---- - hw/virtio/vhost-shadow-virtqueue.c | 9 ++++----- - hw/virtio/vhost-shadow-virtqueue.h | 5 ++--- - hw/virtio/vhost-vdpa.c | 5 ++--- - 3 files changed, 8 insertions(+), 11 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index 3b05bab44d..4307296358 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -642,9 +642,10 @@ void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd) - * @svq: Shadow Virtqueue - * @vdev: VirtIO device - * @vq: Virtqueue to shadow -+ * @iova_tree: Tree to perform descriptors translations - */ - void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, -- VirtQueue *vq) -+ VirtQueue *vq, VhostIOVATree *iova_tree) - { - size_t desc_size, driver_size, device_size; - -@@ -655,6 +656,7 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, - svq->last_used_idx = 0; - svq->vdev = vdev; - svq->vq = vq; -+ svq->iova_tree = iova_tree; - - svq->vring.num = virtio_queue_get_num(vdev, virtio_get_queue_index(vq)); - driver_size = vhost_svq_driver_area_size(svq); -@@ -712,18 +714,15 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) - * Creates vhost shadow virtqueue, and instructs the vhost device to use the - * shadow methods and file descriptors. - * -- * @iova_tree: Tree to perform descriptors translations - * @ops: SVQ owner callbacks - * @ops_opaque: ops opaque pointer - */ --VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree, -- const VhostShadowVirtqueueOps *ops, -+VhostShadowVirtqueue *vhost_svq_new(const VhostShadowVirtqueueOps *ops, - void *ops_opaque) - { - VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1); - - event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND); -- svq->iova_tree = iova_tree; - svq->ops = ops; - svq->ops_opaque = ops_opaque; - return svq; -diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h -index d04c34a589..926a4897b1 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.h -+++ b/hw/virtio/vhost-shadow-virtqueue.h -@@ -126,11 +126,10 @@ size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq); - size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq); - - void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, -- VirtQueue *vq); -+ VirtQueue *vq, VhostIOVATree *iova_tree); - void vhost_svq_stop(VhostShadowVirtqueue *svq); - --VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree, -- const VhostShadowVirtqueueOps *ops, -+VhostShadowVirtqueue *vhost_svq_new(const VhostShadowVirtqueueOps *ops, - void *ops_opaque); - - void vhost_svq_free(gpointer vq); -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 530d2ca362..e65603022f 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -430,8 +430,7 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v, - for (unsigned n = 0; n < hdev->nvqs; ++n) { - VhostShadowVirtqueue *svq; - -- svq = vhost_svq_new(v->iova_tree, v->shadow_vq_ops, -- v->shadow_vq_ops_opaque); -+ svq = vhost_svq_new(v->shadow_vq_ops, v->shadow_vq_ops_opaque); - g_ptr_array_add(shadow_vqs, svq); - } - -@@ -1070,7 +1069,7 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev) - goto err; - } - -- vhost_svq_start(svq, dev->vdev, vq); -+ vhost_svq_start(svq, dev->vdev, vq, v->iova_tree); - ok = vhost_vdpa_svq_map_rings(dev, svq, &addr, &err); - if (unlikely(!ok)) { - goto err_map; --- -2.31.1 - diff --git a/SOURCES/kvm-vhost-set-SVQ-device-call-handler-at-SVQ-start.patch b/SOURCES/kvm-vhost-set-SVQ-device-call-handler-at-SVQ-start.patch deleted file mode 100644 index 099dd73..0000000 --- a/SOURCES/kvm-vhost-set-SVQ-device-call-handler-at-SVQ-start.patch +++ /dev/null @@ -1,73 +0,0 @@ -From 2906f8df3c5e915a3dc05a705b87990211f114b5 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 15 Dec 2022 12:31:34 +0100 -Subject: [PATCH 02/14] vhost: set SVQ device call handler at SVQ start -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 136: vDPA ASID support in Qemu -RH-Bugzilla: 2104412 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/13] ad90a6cc5c71b70d705904433d5a986e8fedb924 (eperezmartin/qemu-kvm) - -By the end of this series CVQ is shadowed as long as the features -support it. - -Since we don't know at the beginning of qemu running if this is -supported, move the event notifier handler setting to the start of the -SVQ, instead of the start of qemu run. This will avoid to create them if -the device does not support SVQ. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Message-Id: <20221215113144.322011-3-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 20e7412bfd63c68f1798fbdb799aedb7e05fee88) ---- - hw/virtio/vhost-shadow-virtqueue.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index 5bd14cad96..264ddc166d 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -648,6 +648,7 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, - { - size_t desc_size, driver_size, device_size; - -+ event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call); - svq->next_guest_avail_elem = NULL; - svq->shadow_avail_idx = 0; - svq->shadow_used_idx = 0; -@@ -704,6 +705,7 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) - g_free(svq->desc_state); - qemu_vfree(svq->vring.desc); - qemu_vfree(svq->vring.used); -+ event_notifier_set_handler(&svq->hdev_call, NULL); - } - - /** -@@ -740,7 +742,6 @@ VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree, - } - - event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND); -- event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call); - svq->iova_tree = iova_tree; - svq->ops = ops; - svq->ops_opaque = ops_opaque; -@@ -763,7 +764,6 @@ void vhost_svq_free(gpointer pvq) - VhostShadowVirtqueue *vq = pvq; - vhost_svq_stop(vq); - event_notifier_cleanup(&vq->hdev_kick); -- event_notifier_set_handler(&vq->hdev_call, NULL); - event_notifier_cleanup(&vq->hdev_call); - g_free(vq); - } --- -2.31.1 - diff --git a/SOURCES/kvm-vhost-vdpa-add-support-for-config-interrupt.patch b/SOURCES/kvm-vhost-vdpa-add-support-for-config-interrupt.patch deleted file mode 100644 index 88d4df6..0000000 --- a/SOURCES/kvm-vhost-vdpa-add-support-for-config-interrupt.patch +++ /dev/null @@ -1,73 +0,0 @@ -From e01563a8de9a45937ffd8d4c1d74a6890ffb6eb6 Mon Sep 17 00:00:00 2001 -From: Cindy Lu -Date: Thu, 22 Dec 2022 15:04:46 +0800 -Subject: [PATCH 05/31] vhost-vdpa: add support for config interrupt -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cindy Lu -RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa -RH-Bugzilla: 1905805 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Miroslav Rezanina -RH-Commit: [5/10] 49bfd214a503f8e199ff93f4bbfcbd4c4f2405b5 (lulu6/qemu-kvm3) - -https://bugzilla.redhat.com/show_bug.cgi?id=1905805 -Add new call back function in vhost-vdpa, The function -vhost_set_config_call can set the event fd to kernel. -This function will be called in the vhost_dev_start -and vhost_dev_stop - -Signed-off-by: Cindy Lu -Message-Id: <20221222070451.936503-6-lulu@redhat.com> -Acked-by: Jason Wang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 259f3acc1c675dd77ebbdb28a483f5d0220bdbf6) -Signed-off-by: Cindy Lu ---- - hw/virtio/trace-events | 1 + - hw/virtio/vhost-vdpa.c | 8 ++++++++ - 2 files changed, 9 insertions(+) - -diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events -index 14fc5b9bb2..46f2faf04e 100644 ---- a/hw/virtio/trace-events -+++ b/hw/virtio/trace-events -@@ -62,6 +62,7 @@ vhost_vdpa_get_features(void *dev, uint64_t features) "dev: %p features: 0x%"PRI - vhost_vdpa_set_owner(void *dev) "dev: %p" - vhost_vdpa_vq_get_addr(void *dev, void *vq, uint64_t desc_user_addr, uint64_t avail_user_addr, uint64_t used_user_addr) "dev: %p vq: %p desc_user_addr: 0x%"PRIx64" avail_user_addr: 0x%"PRIx64" used_user_addr: 0x%"PRIx64 - vhost_vdpa_get_iova_range(void *dev, uint64_t first, uint64_t last) "dev: %p first: 0x%"PRIx64" last: 0x%"PRIx64 -+vhost_vdpa_set_config_call(void *dev, int fd)"dev: %p fd: %d" - - # virtio.c - virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned out_num) "elem %p size %zd in_num %u out_num %u" -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 7468e44b87..c5be2645b0 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -754,6 +754,13 @@ static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev) - return 0; - } - -+static int vhost_vdpa_set_config_call(struct vhost_dev *dev, -+ int fd) -+{ -+ trace_vhost_vdpa_set_config_call(dev, fd); -+ return vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG_CALL, &fd); -+} -+ - static void vhost_vdpa_dump_config(struct vhost_dev *dev, const uint8_t *config, - uint32_t config_len) - { -@@ -1310,4 +1317,5 @@ const VhostOps vdpa_ops = { - .vhost_get_device_id = vhost_vdpa_get_device_id, - .vhost_vq_get_addr = vhost_vdpa_vq_get_addr, - .vhost_force_iommu = vhost_vdpa_force_iommu, -+ .vhost_set_config_call = vhost_vdpa_set_config_call, - }; --- -2.31.1 - diff --git a/SOURCES/kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch b/SOURCES/kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch index 0c82680..fd29eb7 100644 --- a/SOURCES/kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch +++ b/SOURCES/kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch @@ -1,14 +1,15 @@ -From cca66d3e5f7bc1d88d79a7653ae244ba31566ee8 Mon Sep 17 00:00:00 2001 +From 4e30ca551fb3740a428017a0debf0a6aab976639 Mon Sep 17 00:00:00 2001 From: Ani Sinha Date: Mon, 19 Jun 2023 12:22:09 +0530 -Subject: [PATCH 2/2] vhost-vdpa: do not cleanup the vdpa/vhost-net structures +Subject: [PATCH 6/6] vhost-vdpa: do not cleanup the vdpa/vhost-net structures if peer nic is present RH-Author: Ani Sinha -RH-MergeRequest: 294: vhost-vdpa: do not cleanup the vdpa/vhost-net structures if peer nic is present -RH-Bugzilla: 2227721 +RH-MergeRequest: 174: vhost-vdpa: do not cleanup the vdpa/vhost-net structures if peer nic is present +RH-Bugzilla: 2128929 +RH-Acked-by: Igor Mammedov RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/1] af8fa659afb3d8a2e38bb745b31d8cd665a1fc77 +RH-Commit: [1/1] c70d4e5fd93256326d318e0b507db6b9eb93ad86 (anisinha/centos-qemu-kvm) When a peer nic is still attached to the vdpa backend, it is too early to free up the vhost-net and vdpa structures. If these structures are freed here, then @@ -38,20 +39,17 @@ Message-Id: <20230619065209.442185-1-anisinha@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin (cherry picked from commit a0d7215e339b61c7d7a7b3fcf754954d80d93eb8) -Signed-off-by: Michael Tokarev -(Mjt: context change for stable-7.2) -(cherry picked from commit 3d12598b74ed4bcc6db8b50818a95c4b770d4487) --- net/vhost-vdpa.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 7d9c4ea09d..1b4fec59a2 100644 +index 99904a0da7..8c8900f0f4 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c -@@ -180,6 +180,14 @@ static void vhost_vdpa_cleanup(NetClientState *nc) +@@ -184,6 +184,14 @@ static void vhost_vdpa_cleanup(NetClientState *nc) + { VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); - struct vhost_dev *dev = &s->vhost_net->dev; + /* + * If a peer NIC is attached, do not cleanup anything. @@ -63,7 +61,7 @@ index 7d9c4ea09d..1b4fec59a2 100644 + } qemu_vfree(s->cvq_cmd_out_buffer); qemu_vfree(s->status); - if (dev->vq_index + dev->nvqs == dev->vq_index_end) { + if (s->vhost_net) { -- 2.39.3 diff --git a/SOURCES/kvm-vhost-vdpa-mute-unaligned-memory-error-report.patch b/SOURCES/kvm-vhost-vdpa-mute-unaligned-memory-error-report.patch new file mode 100644 index 0000000..3711949 --- /dev/null +++ b/SOURCES/kvm-vhost-vdpa-mute-unaligned-memory-error-report.patch @@ -0,0 +1,86 @@ +From 3b51a7b84ea21360c6d551284aecb8b6f371e888 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Tue, 4 Jul 2023 09:19:31 +0200 +Subject: [PATCH 9/9] vhost-vdpa: mute unaligned memory error report +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 193: vhost-vdpa: mute unaligned memory error report +RH-Bugzilla: 2141965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Eugenio Pérez +RH-Commit: [1/1] 60f5385d41269ce9310e1e8e0a2f1106e3a16ada (lvivier/qemu-kvm-centos) + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2141965 + +With TPM CRM device, vhost-vdpa reports an error when it tries +to register a listener for a non aligned memory region: + + qemu-system-x86_64: vhost_vdpa_listener_region_add received unaligned region + qemu-system-x86_64: vhost_vdpa_listener_region_del received unaligned region + +This error can be confusing for the user whereas we only need to skip +the region (as it's already done after the error_report()) + +Rather than introducing a special case for TPM CRB memory section +to not display the message in this case, simply replace the +error_report() by a trace function (with more information, like the +memory region name). + +Signed-off-by: Laurent Vivier +Message-Id: <20230704071931.575888-2-lvivier@redhat.com> +Reviewed-by: David Hildenbrand +Acked-by: Jason Wang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 77812aa7b1fdf8f547c35a7f9a4eb1cbf3a073db) +--- + hw/virtio/trace-events | 2 ++ + hw/virtio/vhost-vdpa.c | 8 ++++++-- + 2 files changed, 8 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events +index 68b752e304..300dec8d3e 100644 +--- a/hw/virtio/trace-events ++++ b/hw/virtio/trace-events +@@ -34,7 +34,9 @@ vhost_vdpa_dma_map(void *vdpa, int fd, uint32_t msg_type, uint32_t asid, uint64_ + vhost_vdpa_dma_unmap(void *vdpa, int fd, uint32_t msg_type, uint32_t asid, uint64_t iova, uint64_t size, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" asid: %"PRIu32" iova: 0x%"PRIx64" size: 0x%"PRIx64" type: %"PRIu8 + vhost_vdpa_listener_begin_batch(void *v, int fd, uint32_t msg_type, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" type: %"PRIu8 + vhost_vdpa_listener_commit(void *v, int fd, uint32_t msg_type, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" type: %"PRIu8 ++vhost_vdpa_listener_region_add_unaligned(void *v, const char *name, uint64_t offset_as, uint64_t offset_page) "vdpa: %p region %s offset_within_address_space %"PRIu64" offset_within_region %"PRIu64 + vhost_vdpa_listener_region_add(void *vdpa, uint64_t iova, uint64_t llend, void *vaddr, bool readonly) "vdpa: %p iova 0x%"PRIx64" llend 0x%"PRIx64" vaddr: %p read-only: %d" ++vhost_vdpa_listener_region_del_unaligned(void *v, const char *name, uint64_t offset_as, uint64_t offset_page) "vdpa: %p region %s offset_within_address_space %"PRIu64" offset_within_region %"PRIu64 + vhost_vdpa_listener_region_del(void *vdpa, uint64_t iova, uint64_t llend) "vdpa: %p iova 0x%"PRIx64" llend 0x%"PRIx64 + vhost_vdpa_add_status(void *dev, uint8_t status) "dev: %p status: 0x%"PRIx8 + vhost_vdpa_init(void *dev, void *vdpa) "dev: %p vdpa: %p" +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index bc6bad23d5..c04f14420d 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -202,7 +202,9 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, + + if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != + (section->offset_within_region & ~TARGET_PAGE_MASK))) { +- error_report("%s received unaligned region", __func__); ++ trace_vhost_vdpa_listener_region_add_unaligned(v, section->mr->name, ++ section->offset_within_address_space & ~TARGET_PAGE_MASK, ++ section->offset_within_region & ~TARGET_PAGE_MASK); + return; + } + +@@ -281,7 +283,9 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, + + if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != + (section->offset_within_region & ~TARGET_PAGE_MASK))) { +- error_report("%s received unaligned region", __func__); ++ trace_vhost_vdpa_listener_region_del_unaligned(v, section->mr->name, ++ section->offset_within_address_space & ~TARGET_PAGE_MASK, ++ section->offset_within_region & ~TARGET_PAGE_MASK); + return; + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-add-support-for-configure-interrupt.patch b/SOURCES/kvm-virtio-add-support-for-configure-interrupt.patch deleted file mode 100644 index 02f4666..0000000 --- a/SOURCES/kvm-virtio-add-support-for-configure-interrupt.patch +++ /dev/null @@ -1,115 +0,0 @@ -From e04c76339580effae41617b690b58a6605e0f40b Mon Sep 17 00:00:00 2001 -From: Cindy Lu -Date: Thu, 22 Dec 2022 15:04:47 +0800 -Subject: [PATCH 06/31] virtio: add support for configure interrupt -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cindy Lu -RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa -RH-Bugzilla: 1905805 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Miroslav Rezanina -RH-Commit: [6/10] 7048eb488b732578686d451684babaf17b582b05 (lulu6/qemu-kvm3) - -https://bugzilla.redhat.com/show_bug.cgi?id=1905805 -Add the functions to support the configure interrupt in virtio -The function virtio_config_guest_notifier_read will notify the -guest if there is an configure interrupt. -The function virtio_config_set_guest_notifier_fd_handler is -to set the fd hander for the notifier - -Signed-off-by: Cindy Lu -Message-Id: <20221222070451.936503-7-lulu@redhat.com> -Acked-by: Jason Wang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 7d847d0c9b93b91160f40d69a65c904d76f1edd8) -Signed-off-by: Cindy Lu ---- - hw/virtio/virtio.c | 29 +++++++++++++++++++++++++++++ - include/hw/virtio/virtio.h | 4 ++++ - 2 files changed, 33 insertions(+) - -diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c -index eb6347ab5d..34e9c5d141 100644 ---- a/hw/virtio/virtio.c -+++ b/hw/virtio/virtio.c -@@ -4012,7 +4012,14 @@ static void virtio_queue_guest_notifier_read(EventNotifier *n) - virtio_irq(vq); - } - } -+static void virtio_config_guest_notifier_read(EventNotifier *n) -+{ -+ VirtIODevice *vdev = container_of(n, VirtIODevice, config_notifier); - -+ if (event_notifier_test_and_clear(n)) { -+ virtio_notify_config(vdev); -+ } -+} - void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign, - bool with_irqfd) - { -@@ -4029,6 +4036,23 @@ void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign, - } - } - -+void virtio_config_set_guest_notifier_fd_handler(VirtIODevice *vdev, -+ bool assign, bool with_irqfd) -+{ -+ EventNotifier *n; -+ n = &vdev->config_notifier; -+ if (assign && !with_irqfd) { -+ event_notifier_set_handler(n, virtio_config_guest_notifier_read); -+ } else { -+ event_notifier_set_handler(n, NULL); -+ } -+ if (!assign) { -+ /* Test and clear notifier before closing it,*/ -+ /* in case poll callback didn't have time to run. */ -+ virtio_config_guest_notifier_read(n); -+ } -+} -+ - EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq) - { - return &vq->guest_notifier; -@@ -4109,6 +4133,11 @@ EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq) - return &vq->host_notifier; - } - -+EventNotifier *virtio_config_get_guest_notifier(VirtIODevice *vdev) -+{ -+ return &vdev->config_notifier; -+} -+ - void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled) - { - vq->host_notifier_enabled = enabled; -diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h -index 1f4a41b958..9c3a4642f2 100644 ---- a/include/hw/virtio/virtio.h -+++ b/include/hw/virtio/virtio.h -@@ -138,6 +138,7 @@ struct VirtIODevice - AddressSpace *dma_as; - QLIST_HEAD(, VirtQueue) *vector_queues; - QTAILQ_ENTRY(VirtIODevice) next; -+ EventNotifier config_notifier; - }; - - struct VirtioDeviceClass { -@@ -360,6 +361,9 @@ void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ct - void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx); - VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector); - VirtQueue *virtio_vector_next_queue(VirtQueue *vq); -+EventNotifier *virtio_config_get_guest_notifier(VirtIODevice *vdev); -+void virtio_config_set_guest_notifier_fd_handler(VirtIODevice *vdev, -+ bool assign, bool with_irqfd); - - static inline void virtio_add_feature(uint64_t *features, unsigned int fbit) - { --- -2.31.1 - diff --git a/SOURCES/kvm-virtio-introduce-macro-VIRTIO_CONFIG_IRQ_IDX.patch b/SOURCES/kvm-virtio-introduce-macro-VIRTIO_CONFIG_IRQ_IDX.patch deleted file mode 100644 index ea2589a..0000000 --- a/SOURCES/kvm-virtio-introduce-macro-VIRTIO_CONFIG_IRQ_IDX.patch +++ /dev/null @@ -1,262 +0,0 @@ -From 34a267758cf016f34b327318500efdbf0f606033 Mon Sep 17 00:00:00 2001 -From: Cindy Lu -Date: Thu, 22 Dec 2022 15:04:42 +0800 -Subject: [PATCH 01/31] virtio: introduce macro VIRTIO_CONFIG_IRQ_IDX -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cindy Lu -RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa -RH-Bugzilla: 1905805 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/10] f374aaae221bc5a4c2521a267d21350b812e11ba (lulu6/qemu-kvm3) - -https://bugzilla.redhat.com/show_bug.cgi?id=1905805 -To support configure interrupt for vhost-vdpa -Introduce VIRTIO_CONFIG_IRQ_IDX -1 as configure interrupt's queue index, -Then we can reuse the functions guest_notifier_mask and guest_notifier_pending. -Add the check of queue index in these drivers, if the driver does not support -configure interrupt, the function will just return - -Signed-off-by: Cindy Lu -Message-Id: <20221222070451.936503-2-lulu@redhat.com> -Acked-by: Jason Wang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 544f0278afcab2bebab61b14e4c2c58e65911f5b) -Signed-off-by: Cindy Lu ---- - hw/display/vhost-user-gpu.c | 18 ++++++++++++++++++ - hw/net/virtio-net.c | 22 ++++++++++++++++++++-- - hw/virtio/vhost-user-fs.c | 18 ++++++++++++++++++ - hw/virtio/vhost-user-gpio.c | 10 ++++++++++ - hw/virtio/vhost-vsock-common.c | 18 ++++++++++++++++++ - hw/virtio/virtio-crypto.c | 18 ++++++++++++++++++ - include/hw/virtio/virtio.h | 3 +++ - 7 files changed, 105 insertions(+), 2 deletions(-) - -diff --git a/hw/display/vhost-user-gpu.c b/hw/display/vhost-user-gpu.c -index 19c0e20103..4380a5e672 100644 ---- a/hw/display/vhost-user-gpu.c -+++ b/hw/display/vhost-user-gpu.c -@@ -486,6 +486,15 @@ vhost_user_gpu_guest_notifier_pending(VirtIODevice *vdev, int idx) - { - VhostUserGPU *g = VHOST_USER_GPU(vdev); - -+ /* -+ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 -+ * as the Marco of configure interrupt's IDX, If this driver does not -+ * support, the function will return -+ */ -+ -+ if (idx == VIRTIO_CONFIG_IRQ_IDX) { -+ return false; -+ } - return vhost_virtqueue_pending(&g->vhost->dev, idx); - } - -@@ -494,6 +503,15 @@ vhost_user_gpu_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask) - { - VhostUserGPU *g = VHOST_USER_GPU(vdev); - -+ /* -+ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 -+ * as the Marco of configure interrupt's IDX, If this driver does not -+ * support, the function will return -+ */ -+ -+ if (idx == VIRTIO_CONFIG_IRQ_IDX) { -+ return; -+ } - vhost_virtqueue_mask(&g->vhost->dev, vdev, idx, mask); - } - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index aba12759d5..bee35d6f9f 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -3316,6 +3316,15 @@ static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) - } else { - nc = qemu_get_subqueue(n->nic, vq2q(idx)); - } -+ /* -+ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 -+ * as the Marco of configure interrupt's IDX, If this driver does not -+ * support, the function will return false -+ */ -+ -+ if (idx == VIRTIO_CONFIG_IRQ_IDX) { -+ return false; -+ } - return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); - } - -@@ -3339,8 +3348,17 @@ static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, - } else { - nc = qemu_get_subqueue(n->nic, vq2q(idx)); - } -- vhost_net_virtqueue_mask(get_vhost_net(nc->peer), -- vdev, idx, mask); -+ /* -+ *Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 -+ * as the Marco of configure interrupt's IDX, If this driver does not -+ * support, the function will return -+ */ -+ -+ if (idx == VIRTIO_CONFIG_IRQ_IDX) { -+ return; -+ } -+ -+ vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask); - } - - static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features) -diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c -index d97b179e6f..f5049735ac 100644 ---- a/hw/virtio/vhost-user-fs.c -+++ b/hw/virtio/vhost-user-fs.c -@@ -159,6 +159,15 @@ static void vuf_guest_notifier_mask(VirtIODevice *vdev, int idx, - { - VHostUserFS *fs = VHOST_USER_FS(vdev); - -+ /* -+ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 -+ * as the Marco of configure interrupt's IDX, If this driver does not -+ * support, the function will return -+ */ -+ -+ if (idx == VIRTIO_CONFIG_IRQ_IDX) { -+ return; -+ } - vhost_virtqueue_mask(&fs->vhost_dev, vdev, idx, mask); - } - -@@ -166,6 +175,15 @@ static bool vuf_guest_notifier_pending(VirtIODevice *vdev, int idx) - { - VHostUserFS *fs = VHOST_USER_FS(vdev); - -+ /* -+ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 -+ * as the Marco of configure interrupt's IDX, If this driver does not -+ * support, the function will return -+ */ -+ -+ if (idx == VIRTIO_CONFIG_IRQ_IDX) { -+ return false; -+ } - return vhost_virtqueue_pending(&fs->vhost_dev, idx); - } - -diff --git a/hw/virtio/vhost-user-gpio.c b/hw/virtio/vhost-user-gpio.c -index b7b82a1099..fe3da32c74 100644 ---- a/hw/virtio/vhost-user-gpio.c -+++ b/hw/virtio/vhost-user-gpio.c -@@ -191,6 +191,16 @@ static void vu_gpio_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask) - { - VHostUserGPIO *gpio = VHOST_USER_GPIO(vdev); - -+ /* -+ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 -+ * as the Marco of configure interrupt's IDX, If this driver does not -+ * support, the function will return -+ */ -+ -+ if (idx == VIRTIO_CONFIG_IRQ_IDX) { -+ return; -+ } -+ - vhost_virtqueue_mask(&gpio->vhost_dev, vdev, idx, mask); - } - -diff --git a/hw/virtio/vhost-vsock-common.c b/hw/virtio/vhost-vsock-common.c -index d21c72b401..d2b5519d5a 100644 ---- a/hw/virtio/vhost-vsock-common.c -+++ b/hw/virtio/vhost-vsock-common.c -@@ -127,6 +127,15 @@ static void vhost_vsock_common_guest_notifier_mask(VirtIODevice *vdev, int idx, - { - VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); - -+ /* -+ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 -+ * as the Marco of configure interrupt's IDX, If this driver does not -+ * support, the function will return -+ */ -+ -+ if (idx == VIRTIO_CONFIG_IRQ_IDX) { -+ return; -+ } - vhost_virtqueue_mask(&vvc->vhost_dev, vdev, idx, mask); - } - -@@ -135,6 +144,15 @@ static bool vhost_vsock_common_guest_notifier_pending(VirtIODevice *vdev, - { - VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); - -+ /* -+ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 -+ * as the Marco of configure interrupt's IDX, If this driver does not -+ * support, the function will return -+ */ -+ -+ if (idx == VIRTIO_CONFIG_IRQ_IDX) { -+ return false; -+ } - return vhost_virtqueue_pending(&vvc->vhost_dev, idx); - } - -diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c -index 97da74e719..516425e26a 100644 ---- a/hw/virtio/virtio-crypto.c -+++ b/hw/virtio/virtio-crypto.c -@@ -1182,6 +1182,15 @@ static void virtio_crypto_guest_notifier_mask(VirtIODevice *vdev, int idx, - - assert(vcrypto->vhost_started); - -+ /* -+ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 -+ * as the Marco of configure interrupt's IDX, If this driver does not -+ * support, the function will return -+ */ -+ -+ if (idx == VIRTIO_CONFIG_IRQ_IDX) { -+ return; -+ } - cryptodev_vhost_virtqueue_mask(vdev, queue, idx, mask); - } - -@@ -1192,6 +1201,15 @@ static bool virtio_crypto_guest_notifier_pending(VirtIODevice *vdev, int idx) - - assert(vcrypto->vhost_started); - -+ /* -+ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 -+ * as the Marco of configure interrupt's IDX, If this driver does not -+ * support, the function will return -+ */ -+ -+ if (idx == VIRTIO_CONFIG_IRQ_IDX) { -+ return false; -+ } - return cryptodev_vhost_virtqueue_pending(vdev, queue, idx); - } - -diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h -index acfd4df125..1f4a41b958 100644 ---- a/include/hw/virtio/virtio.h -+++ b/include/hw/virtio/virtio.h -@@ -79,6 +79,9 @@ typedef struct VirtQueueElement - - #define VIRTIO_NO_VECTOR 0xffff - -+/* special index value used internally for config irqs */ -+#define VIRTIO_CONFIG_IRQ_IDX -1 -+ - #define TYPE_VIRTIO_DEVICE "virtio-device" - OBJECT_DECLARE_TYPE(VirtIODevice, VirtioDeviceClass, VIRTIO_DEVICE) - --- -2.31.1 - diff --git a/SOURCES/kvm-virtio-iommu-Fix-64kB-host-page-size-VFIO-device-ass.patch b/SOURCES/kvm-virtio-iommu-Fix-64kB-host-page-size-VFIO-device-ass.patch new file mode 100644 index 0000000..acfb3ae --- /dev/null +++ b/SOURCES/kvm-virtio-iommu-Fix-64kB-host-page-size-VFIO-device-ass.patch @@ -0,0 +1,151 @@ +From 08c8af80dbd03b46a6a8397ef0c41cda3e6de22c Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Wed, 5 Jul 2023 18:51:17 +0200 +Subject: [PATCH 01/37] virtio-iommu: Fix 64kB host page size VFIO device + assignment + +RH-Author: Eric Auger +RH-MergeRequest: 182: VIRTIO-IOMMU/VFIO page size related fixes +RH-Bugzilla: 2211609 2211634 +RH-Acked-by: Gavin Shan +RH-Acked-by: Sebastian Ott +RH-Commit: [1/2] b48db1c964559505dda4c6c9a3b79d68207b25eb (eauger1/centos-qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2211634 + +When running on a 64kB page size host and protecting a VFIO device +with the virtio-iommu, qemu crashes with this kind of message: + +qemu-kvm: virtio-iommu page mask 0xfffffffffffff000 is incompatible +with mask 0x20010000 +qemu: hardware error: vfio: DMA mapping failed, unable to continue + +This is due to the fact the IOMMU MR corresponding to the VFIO device +is enabled very late on domain attach, after the machine init. +The device reports a minimal 64kB page size but it is too late to be +applied. virtio_iommu_set_page_size_mask() fails and this causes +vfio_listener_region_add() to end up with hw_error(); + +To work around this issue, we transiently enable the IOMMU MR on +machine init to collect the page size requirements and then restore +the bypass state. + +Fixes: 90519b9053 ("virtio-iommu: Add bypass mode support to assigned device") +Signed-off-by: Eric Auger + +Message-Id: <20230705165118.28194-2-eric.auger@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Jean-Philippe Brucker +Tested-by: Jean-Philippe Brucker +Reviewed-by: Zhenzhong Duan +(cherry picked from commit 94df5b2180d61fb2ee2b04cc007981e58b6479a9) +Signed-off-by: Eric Auger +--- + hw/virtio/trace-events | 1 + + hw/virtio/virtio-iommu.c | 31 +++++++++++++++++++++++++++++-- + include/hw/virtio/virtio-iommu.h | 2 ++ + 3 files changed, 32 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events +index 8f8d05cf9b..68b752e304 100644 +--- a/hw/virtio/trace-events ++++ b/hw/virtio/trace-events +@@ -131,6 +131,7 @@ virtio_iommu_set_page_size_mask(const char *name, uint64_t old, uint64_t new) "m + virtio_iommu_notify_flag_add(const char *name) "add notifier to mr %s" + virtio_iommu_notify_flag_del(const char *name) "del notifier from mr %s" + virtio_iommu_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)" ++virtio_iommu_freeze_granule(uint64_t page_size_mask) "granule set to 0x%"PRIx64 + + # virtio-mem.c + virtio_mem_send_response(uint16_t type) "type=%" PRIu16 +diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c +index 1cd258135d..542679b321 100644 +--- a/hw/virtio/virtio-iommu.c ++++ b/hw/virtio/virtio-iommu.c +@@ -24,6 +24,7 @@ + #include "hw/virtio/virtio.h" + #include "sysemu/kvm.h" + #include "sysemu/reset.h" ++#include "sysemu/sysemu.h" + #include "qapi/error.h" + #include "qemu/error-report.h" + #include "trace.h" +@@ -1106,12 +1107,12 @@ static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr, + } + + /* +- * After the machine is finalized, we can't change the mask anymore. If by ++ * Once the granule is frozen we can't change the mask anymore. If by + * chance the hotplugged device supports the same granule, we can still + * accept it. Having a different masks is possible but the guest will use + * sub-optimal block sizes, so warn about it. + */ +- if (phase_check(PHASE_MACHINE_READY)) { ++ if (s->granule_frozen) { + int new_granule = ctz64(new_mask); + int cur_granule = ctz64(cur_mask); + +@@ -1146,6 +1147,28 @@ static void virtio_iommu_system_reset(void *opaque) + + } + ++static void virtio_iommu_freeze_granule(Notifier *notifier, void *data) ++{ ++ VirtIOIOMMU *s = container_of(notifier, VirtIOIOMMU, machine_done); ++ int granule; ++ ++ if (likely(s->config.bypass)) { ++ /* ++ * Transient IOMMU MR enable to collect page_size_mask requirements ++ * through memory_region_iommu_set_page_size_mask() called by ++ * VFIO region_add() callback ++ */ ++ s->config.bypass = false; ++ virtio_iommu_switch_address_space_all(s); ++ /* restore default */ ++ s->config.bypass = true; ++ virtio_iommu_switch_address_space_all(s); ++ } ++ s->granule_frozen = true; ++ granule = ctz64(s->config.page_size_mask); ++ trace_virtio_iommu_freeze_granule(BIT(granule)); ++} ++ + static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) + { + VirtIODevice *vdev = VIRTIO_DEVICE(dev); +@@ -1189,6 +1212,9 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) + error_setg(errp, "VIRTIO-IOMMU is not attached to any PCI bus!"); + } + ++ s->machine_done.notify = virtio_iommu_freeze_granule; ++ qemu_add_machine_init_done_notifier(&s->machine_done); ++ + qemu_register_reset(virtio_iommu_system_reset, s); + } + +@@ -1198,6 +1224,7 @@ static void virtio_iommu_device_unrealize(DeviceState *dev) + VirtIOIOMMU *s = VIRTIO_IOMMU(dev); + + qemu_unregister_reset(virtio_iommu_system_reset, s); ++ qemu_remove_machine_init_done_notifier(&s->machine_done); + + g_hash_table_destroy(s->as_by_busptr); + if (s->domains) { +diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h +index 2ad5ee320b..a93fc5383e 100644 +--- a/include/hw/virtio/virtio-iommu.h ++++ b/include/hw/virtio/virtio-iommu.h +@@ -61,6 +61,8 @@ struct VirtIOIOMMU { + QemuRecMutex mutex; + GTree *endpoints; + bool boot_bypass; ++ Notifier machine_done; ++ bool granule_frozen; + }; + + #endif +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-iommu-Rework-the-traces-in-virtio_iommu_set_p.patch b/SOURCES/kvm-virtio-iommu-Rework-the-traces-in-virtio_iommu_set_p.patch new file mode 100644 index 0000000..7934a12 --- /dev/null +++ b/SOURCES/kvm-virtio-iommu-Rework-the-traces-in-virtio_iommu_set_p.patch @@ -0,0 +1,83 @@ +From 643d93343759a350fe0f6327d308bf6a93c79d25 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Wed, 5 Jul 2023 18:51:18 +0200 +Subject: [PATCH 02/37] virtio-iommu: Rework the traces in + virtio_iommu_set_page_size_mask() + +RH-Author: Eric Auger +RH-MergeRequest: 182: VIRTIO-IOMMU/VFIO page size related fixes +RH-Bugzilla: 2211609 2211634 +RH-Acked-by: Gavin Shan +RH-Acked-by: Sebastian Ott +RH-Commit: [2/2] 0af7078dde158f07c83e2b293adc5d9d475688ae (eauger1/centos-qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2211609 + +The current error messages in virtio_iommu_set_page_size_mask() +sound quite similar for different situations and miss the IOMMU +memory region that causes the issue. + +Clarify them and rework the comment. + +Also remove the trace when the new page_size_mask is not applied as +the current frozen granule is kept. This message is rather confusing +for the end user and anyway the current granule would have been used +by the driver. + +Signed-off-by: Eric Auger +Reviewed-by: Zhenzhong Duan +Message-Id: <20230705165118.28194-3-eric.auger@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Jean-Philippe Brucker +Tested-by: Jean-Philippe Brucker +(cherry picked from commit 587a7641d53055054d68d67d94c9408ef808f127) +Signed-off-by: Eric Auger +--- + hw/virtio/virtio-iommu.c | 19 +++++++------------ + 1 file changed, 7 insertions(+), 12 deletions(-) + +diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c +index 542679b321..421e2a944f 100644 +--- a/hw/virtio/virtio-iommu.c ++++ b/hw/virtio/virtio-iommu.c +@@ -1101,29 +1101,24 @@ static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr, + new_mask); + + if ((cur_mask & new_mask) == 0) { +- error_setg(errp, "virtio-iommu page mask 0x%"PRIx64 +- " is incompatible with mask 0x%"PRIx64, cur_mask, new_mask); ++ error_setg(errp, "virtio-iommu %s reports a page size mask 0x%"PRIx64 ++ " incompatible with currently supported mask 0x%"PRIx64, ++ mr->parent_obj.name, new_mask, cur_mask); + return -1; + } + + /* + * Once the granule is frozen we can't change the mask anymore. If by + * chance the hotplugged device supports the same granule, we can still +- * accept it. Having a different masks is possible but the guest will use +- * sub-optimal block sizes, so warn about it. ++ * accept it. + */ + if (s->granule_frozen) { +- int new_granule = ctz64(new_mask); + int cur_granule = ctz64(cur_mask); + +- if (new_granule != cur_granule) { +- error_setg(errp, "virtio-iommu page mask 0x%"PRIx64 +- " is incompatible with mask 0x%"PRIx64, cur_mask, +- new_mask); ++ if (!(BIT(cur_granule) & new_mask)) { ++ error_setg(errp, "virtio-iommu %s does not support frozen granule 0x%llx", ++ mr->parent_obj.name, BIT_ULL(cur_granule)); + return -1; +- } else if (new_mask != cur_mask) { +- warn_report("virtio-iommu page mask 0x%"PRIx64 +- " does not match 0x%"PRIx64, cur_mask, new_mask); + } + return 0; + } +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-iommu-Standardize-granule-extraction-and-form.patch b/SOURCES/kvm-virtio-iommu-Standardize-granule-extraction-and-form.patch new file mode 100644 index 0000000..638ae98 --- /dev/null +++ b/SOURCES/kvm-virtio-iommu-Standardize-granule-extraction-and-form.patch @@ -0,0 +1,88 @@ +From 59cd85621b1b14ada843ea0562cc76b6a7c93df4 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Tue, 18 Jul 2023 20:21:36 +0200 +Subject: [PATCH 08/14] virtio-iommu: Standardize granule extraction and + formatting + +RH-Author: Eric Auger +RH-MergeRequest: 197: virtio-iommu/smmu: backport some late fixes +RH-Bugzilla: 2229133 +RH-Acked-by: Thomas Huth +RH-Acked-by: Peter Xu +RH-Commit: [2/3] 48784ef2a19174518f66479dcb532230bffe8bf1 (eauger1/centos-qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2229133 + +At several locations we compute the granule from the config +page_size_mask using ctz() and then format it in traces using +BIT(). As the page_size_mask is 64b we should use ctz64 and +BIT_ULL() for formatting. We failed to be consistent. + +Note the page_size_mask is garanteed to be non null. The spec +mandates the device to set at least one bit, so ctz64 cannot +return 64. This is garanteed by the fact the device +initializes the page_size_mask to qemu_target_page_mask() +and then the page_size_mask is further constrained by +virtio_iommu_set_page_size_mask() callback which can't +result in a new mask being null. So if Coverity complains +round those ctz64/BIT_ULL with CID 1517772 this is a false +positive + +Signed-off-by: Eric Auger +Fixes: 94df5b2180 ("virtio-iommu: Fix 64kB host page size VFIO device assignment") +Message-Id: <20230718182136.40096-1-eric.auger@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Jean-Philippe Brucker +(cherry picked from commit 1084feddc6a677cdfdde56936bfb97cf32cc4dee) +Signed-off-by: Eric Auger +--- + hw/virtio/virtio-iommu.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c +index 17ce630200..17b3dcd158 100644 +--- a/hw/virtio/virtio-iommu.c ++++ b/hw/virtio/virtio-iommu.c +@@ -854,17 +854,19 @@ static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr, + VirtIOIOMMUEndpoint *ep; + uint32_t sid, flags; + bool bypass_allowed; ++ int granule; + bool found; + int i; + + interval.low = addr; + interval.high = addr + 1; ++ granule = ctz64(s->config.page_size_mask); + + IOMMUTLBEntry entry = { + .target_as = &address_space_memory, + .iova = addr, + .translated_addr = addr, +- .addr_mask = (1 << ctz32(s->config.page_size_mask)) - 1, ++ .addr_mask = BIT_ULL(granule) - 1, + .perm = IOMMU_NONE, + }; + +@@ -1117,7 +1119,7 @@ static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr, + if (s->granule_frozen) { + int cur_granule = ctz64(cur_mask); + +- if (!(BIT(cur_granule) & new_mask)) { ++ if (!(BIT_ULL(cur_granule) & new_mask)) { + error_setg(errp, "virtio-iommu %s does not support frozen granule 0x%llx", + mr->parent_obj.name, BIT_ULL(cur_granule)); + return -1; +@@ -1163,7 +1165,7 @@ static void virtio_iommu_freeze_granule(Notifier *notifier, void *data) + } + s->granule_frozen = true; + granule = ctz64(s->config.page_size_mask); +- trace_virtio_iommu_freeze_granule(BIT(granule)); ++ trace_virtio_iommu_freeze_granule(BIT_ULL(granule)); + } + + static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-mmio-add-support-for-configure-interrupt.patch b/SOURCES/kvm-virtio-mmio-add-support-for-configure-interrupt.patch deleted file mode 100644 index 275b197..0000000 --- a/SOURCES/kvm-virtio-mmio-add-support-for-configure-interrupt.patch +++ /dev/null @@ -1,80 +0,0 @@ -From 181705090c9963c2da97811838ace5bb058737c6 Mon Sep 17 00:00:00 2001 -From: Cindy Lu -Date: Thu, 22 Dec 2022 15:04:50 +0800 -Subject: [PATCH 09/31] virtio-mmio: add support for configure interrupt -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cindy Lu -RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa -RH-Bugzilla: 1905805 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Miroslav Rezanina -RH-Commit: [9/10] 742cc2b425ffd7bbd393772526e7481446ee131c (lulu6/qemu-kvm3) - -https://bugzilla.redhat.com/show_bug.cgi?id=1905805 -Add configure interrupt support in virtio-mmio bus. -add function to set configure guest notifier. - -Signed-off-by: Cindy Lu -Message-Id: <20221222070451.936503-10-lulu@redhat.com> -Acked-by: Jason Wang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit cd336e834620ea78edef049c3567f312974e475b) -Signed-off-by: Cindy Lu ---- - hw/virtio/virtio-mmio.c | 27 +++++++++++++++++++++++++++ - 1 file changed, 27 insertions(+) - -diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c -index d240efef97..103260ec15 100644 ---- a/hw/virtio/virtio-mmio.c -+++ b/hw/virtio/virtio-mmio.c -@@ -670,7 +670,30 @@ static int virtio_mmio_set_guest_notifier(DeviceState *d, int n, bool assign, - - return 0; - } -+static int virtio_mmio_set_config_guest_notifier(DeviceState *d, bool assign, -+ bool with_irqfd) -+{ -+ VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d); -+ VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); -+ VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); -+ EventNotifier *notifier = virtio_config_get_guest_notifier(vdev); -+ int r = 0; - -+ if (assign) { -+ r = event_notifier_init(notifier, 0); -+ if (r < 0) { -+ return r; -+ } -+ virtio_config_set_guest_notifier_fd_handler(vdev, assign, with_irqfd); -+ } else { -+ virtio_config_set_guest_notifier_fd_handler(vdev, assign, with_irqfd); -+ event_notifier_cleanup(notifier); -+ } -+ if (vdc->guest_notifier_mask && vdev->use_guest_notifier_mask) { -+ vdc->guest_notifier_mask(vdev, VIRTIO_CONFIG_IRQ_IDX, !assign); -+ } -+ return r; -+} - static int virtio_mmio_set_guest_notifiers(DeviceState *d, int nvqs, - bool assign) - { -@@ -692,6 +715,10 @@ static int virtio_mmio_set_guest_notifiers(DeviceState *d, int nvqs, - goto assign_error; - } - } -+ r = virtio_mmio_set_config_guest_notifier(d, assign, with_irqfd); -+ if (r < 0) { -+ goto assign_error; -+ } - - return 0; - --- -2.31.1 - diff --git a/SOURCES/kvm-virtio-net-add-support-for-configure-interrupt.patch b/SOURCES/kvm-virtio-net-add-support-for-configure-interrupt.patch deleted file mode 100644 index 74b956a..0000000 --- a/SOURCES/kvm-virtio-net-add-support-for-configure-interrupt.patch +++ /dev/null @@ -1,115 +0,0 @@ -From 2b8e3409edb8a17d89c3829cfa3d92bdfdd43c53 Mon Sep 17 00:00:00 2001 -From: Cindy Lu -Date: Thu, 22 Dec 2022 15:04:49 +0800 -Subject: [PATCH 08/31] virtio-net: add support for configure interrupt -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cindy Lu -RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa -RH-Bugzilla: 1905805 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Miroslav Rezanina -RH-Commit: [8/10] 1b125169bea6c81c508b154fa1bae68af153b312 (lulu6/qemu-kvm3) - -https://bugzilla.redhat.com/show_bug.cgi?id=1905805 -Add functions to support configure interrupt in virtio_net -Add the functions to support vhost_net_config_pending -and vhost_net_config_mask. - -Signed-off-by: Cindy Lu -Message-Id: <20221222070451.936503-9-lulu@redhat.com> -Acked-by: Jason Wang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 8aab0d1dbe90c7b5ac6672a1a09b0578178f5f4c) -Signed-off-by: Cindy Lu ---- - hw/net/vhost_net-stub.c | 9 +++++++++ - hw/net/vhost_net.c | 9 +++++++++ - hw/net/virtio-net.c | 4 ++-- - include/net/vhost_net.h | 2 ++ - 4 files changed, 22 insertions(+), 2 deletions(-) - -diff --git a/hw/net/vhost_net-stub.c b/hw/net/vhost_net-stub.c -index 9f7daae99c..c36f258201 100644 ---- a/hw/net/vhost_net-stub.c -+++ b/hw/net/vhost_net-stub.c -@@ -82,6 +82,15 @@ void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, - { - } - -+bool vhost_net_config_pending(VHostNetState *net) -+{ -+ return false; -+} -+ -+void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask) -+{ -+} -+ - int vhost_net_notify_migration_done(struct vhost_net *net, char* mac_addr) - { - return -1; -diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c -index 043058ff43..6a55f5a473 100644 ---- a/hw/net/vhost_net.c -+++ b/hw/net/vhost_net.c -@@ -478,6 +478,15 @@ void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, - vhost_virtqueue_mask(&net->dev, dev, idx, mask); - } - -+bool vhost_net_config_pending(VHostNetState *net) -+{ -+ return vhost_config_pending(&net->dev); -+} -+ -+void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask) -+{ -+ vhost_config_mask(&net->dev, dev, mask); -+} - VHostNetState *get_vhost_net(NetClientState *nc) - { - VHostNetState *vhost_net = 0; -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index bee35d6f9f..ec974f7a76 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -3323,7 +3323,7 @@ static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) - */ - - if (idx == VIRTIO_CONFIG_IRQ_IDX) { -- return false; -+ return vhost_net_config_pending(get_vhost_net(nc->peer)); - } - return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); - } -@@ -3355,9 +3355,9 @@ static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, - */ - - if (idx == VIRTIO_CONFIG_IRQ_IDX) { -+ vhost_net_config_mask(get_vhost_net(nc->peer), vdev, mask); - return; - } -- - vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask); - } - -diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h -index 40b9a40074..dbbd0dc04e 100644 ---- a/include/net/vhost_net.h -+++ b/include/net/vhost_net.h -@@ -39,6 +39,8 @@ int vhost_net_set_config(struct vhost_net *net, const uint8_t *data, - bool vhost_net_virtqueue_pending(VHostNetState *net, int n); - void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, - int idx, bool mask); -+bool vhost_net_config_pending(VHostNetState *net); -+void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask); - int vhost_net_notify_migration_done(VHostNetState *net, char* mac_addr); - VHostNetState *get_vhost_net(NetClientState *nc); - --- -2.31.1 - diff --git a/SOURCES/kvm-virtio-net-correctly-report-maximum-tx_queue_size-va.patch b/SOURCES/kvm-virtio-net-correctly-report-maximum-tx_queue_size-va.patch new file mode 100644 index 0000000..119ea84 --- /dev/null +++ b/SOURCES/kvm-virtio-net-correctly-report-maximum-tx_queue_size-va.patch @@ -0,0 +1,92 @@ +From 4fe096a6fad61ab721fd29324d48383c7f427ac9 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Mon, 5 Jun 2023 16:21:25 +0200 +Subject: [PATCH 7/9] virtio-net: correctly report maximum tx_queue_size value +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 191: virtio-net: correctly report maximum tx_queue_size value +RH-Bugzilla: 2040509 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Eugenio Pérez +RH-Commit: [1/1] afb944c6d75fe476ac86fe267b1cca5f272dfbbd (lvivier/qemu-kvm-centos) + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2040509 + +Maximum value for tx_queue_size depends on the backend type. +1024 for vDPA/vhost-user, 256 for all the others. + +The value is returned by virtio_net_max_tx_queue_size() to set the +parameter: + + n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n), + n->net_conf.tx_queue_size); + +But the parameter checking uses VIRTQUEUE_MAX_SIZE (1024). + +So the parameter is silently ignored and ethtool reports a different +value than the one provided by the user. + + ... -netdev tap,... -device virtio-net,tx_queue_size=1024 + + # ethtool -g enp0s2 + Ring parameters for enp0s2: + Pre-set maximums: + RX: 256 + RX Mini: n/a + RX Jumbo: n/a + TX: 256 + Current hardware settings: + RX: 256 + RX Mini: n/a + RX Jumbo: n/a + TX: 256 + + ... -netdev vhost-user,... -device virtio-net,tx_queue_size=2048 + + Invalid tx_queue_size (= 2048), must be a power of 2 between 256 and 1024 + +With this patch the correct maximum value is checked and displayed. + +For vDPA/vhost-user: + + Invalid tx_queue_size (= 2048), must be a power of 2 between 256 and 1024 + +For all the others: + + Invalid tx_queue_size (= 512), must be a power of 2 between 256 and 256 + +Fixes: 2eef278b9e63 ("virtio-net: fix tx queue size for !vhost-user") +Cc: mst@redhat.com +Cc: qemu-stable@nongnu.org +Signed-off-by: Laurent Vivier +Signed-off-by: Jason Wang +(cherry picked from commit 4271f4038372f174dbafffacca1a748d058a03ba) +--- + hw/net/virtio-net.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 447f669921..ae1e6a5e3d 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -3628,12 +3628,12 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) + } + + if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE || +- n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE || ++ n->net_conf.tx_queue_size > virtio_net_max_tx_queue_size(n) || + !is_power_of_2(n->net_conf.tx_queue_size)) { + error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), " + "must be a power of 2 between %d and %d", + n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE, +- VIRTQUEUE_MAX_SIZE); ++ virtio_net_max_tx_queue_size(n)); + virtio_cleanup(vdev); + return; + } +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-pci-add-support-for-configure-interrupt.patch b/SOURCES/kvm-virtio-pci-add-support-for-configure-interrupt.patch deleted file mode 100644 index 14070a4..0000000 --- a/SOURCES/kvm-virtio-pci-add-support-for-configure-interrupt.patch +++ /dev/null @@ -1,274 +0,0 @@ -From 61ac1476d3820c97e1cc103af422b17bc94c6ca5 Mon Sep 17 00:00:00 2001 -From: Cindy Lu -Date: Thu, 22 Dec 2022 15:04:51 +0800 -Subject: [PATCH 10/31] virtio-pci: add support for configure interrupt -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cindy Lu -RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa -RH-Bugzilla: 1905805 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Miroslav Rezanina -RH-Commit: [10/10] ebd6a11d7699660d8ac5a4e44a790f823daea57c (lulu6/qemu-kvm3) - -https://bugzilla.redhat.com/show_bug.cgi?id=1905805 -Add process to handle the configure interrupt, The function's -logic is the same with vq interrupt.Add extra process to check -the configure interrupt - -Signed-off-by: Cindy Lu -Message-Id: <20221222070451.936503-11-lulu@redhat.com> -Acked-by: Jason Wang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 1680542862edd963e6380dd4121a5e85df55581f) -Signed-off-by: Cindy Lu ---- - hw/virtio/virtio-pci.c | 118 +++++++++++++++++++++++++++------ - include/hw/virtio/virtio-pci.h | 4 +- - 2 files changed, 102 insertions(+), 20 deletions(-) - -diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c -index ec816ea367..3f00e91718 100644 ---- a/hw/virtio/virtio-pci.c -+++ b/hw/virtio/virtio-pci.c -@@ -751,7 +751,8 @@ static int virtio_pci_get_notifier(VirtIOPCIProxy *proxy, int queue_no, - VirtQueue *vq; - - if (queue_no == VIRTIO_CONFIG_IRQ_IDX) { -- return -1; -+ *n = virtio_config_get_guest_notifier(vdev); -+ *vector = vdev->config_vector; - } else { - if (!virtio_queue_get_num(vdev, queue_no)) { - return -1; -@@ -811,7 +812,7 @@ undo: - } - return ret; - } --static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) -+static int kvm_virtio_pci_vector_vq_use(VirtIOPCIProxy *proxy, int nvqs) - { - int queue_no; - int ret = 0; -@@ -826,6 +827,10 @@ static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) - return ret; - } - -+static int kvm_virtio_pci_vector_config_use(VirtIOPCIProxy *proxy) -+{ -+ return kvm_virtio_pci_vector_use_one(proxy, VIRTIO_CONFIG_IRQ_IDX); -+} - - static void kvm_virtio_pci_vector_release_one(VirtIOPCIProxy *proxy, - int queue_no) -@@ -850,7 +855,7 @@ static void kvm_virtio_pci_vector_release_one(VirtIOPCIProxy *proxy, - kvm_virtio_pci_vq_vector_release(proxy, vector); - } - --static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs) -+static void kvm_virtio_pci_vector_vq_release(VirtIOPCIProxy *proxy, int nvqs) - { - int queue_no; - VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); -@@ -863,6 +868,11 @@ static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs) - } - } - -+static void kvm_virtio_pci_vector_config_release(VirtIOPCIProxy *proxy) -+{ -+ kvm_virtio_pci_vector_release_one(proxy, VIRTIO_CONFIG_IRQ_IDX); -+} -+ - static int virtio_pci_one_vector_unmask(VirtIOPCIProxy *proxy, - unsigned int queue_no, - unsigned int vector, -@@ -944,9 +954,19 @@ static int virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector, - } - vq = virtio_vector_next_queue(vq); - } -- -+ /* unmask config intr */ -+ if (vector == vdev->config_vector) { -+ n = virtio_config_get_guest_notifier(vdev); -+ ret = virtio_pci_one_vector_unmask(proxy, VIRTIO_CONFIG_IRQ_IDX, vector, -+ msg, n); -+ if (ret < 0) { -+ goto undo_config; -+ } -+ } - return 0; -- -+undo_config: -+ n = virtio_config_get_guest_notifier(vdev); -+ virtio_pci_one_vector_mask(proxy, VIRTIO_CONFIG_IRQ_IDX, vector, n); - undo: - vq = virtio_vector_first_queue(vdev, vector); - while (vq && unmasked >= 0) { -@@ -980,6 +1000,11 @@ static void virtio_pci_vector_mask(PCIDevice *dev, unsigned vector) - } - vq = virtio_vector_next_queue(vq); - } -+ -+ if (vector == vdev->config_vector) { -+ n = virtio_config_get_guest_notifier(vdev); -+ virtio_pci_one_vector_mask(proxy, VIRTIO_CONFIG_IRQ_IDX, vector, n); -+ } - } - - static void virtio_pci_vector_poll(PCIDevice *dev, -@@ -1011,6 +1036,34 @@ static void virtio_pci_vector_poll(PCIDevice *dev, - msix_set_pending(dev, vector); - } - } -+ /* poll the config intr */ -+ ret = virtio_pci_get_notifier(proxy, VIRTIO_CONFIG_IRQ_IDX, ¬ifier, -+ &vector); -+ if (ret < 0) { -+ return; -+ } -+ if (vector < vector_start || vector >= vector_end || -+ !msix_is_masked(dev, vector)) { -+ return; -+ } -+ if (k->guest_notifier_pending) { -+ if (k->guest_notifier_pending(vdev, VIRTIO_CONFIG_IRQ_IDX)) { -+ msix_set_pending(dev, vector); -+ } -+ } else if (event_notifier_test_and_clear(notifier)) { -+ msix_set_pending(dev, vector); -+ } -+} -+ -+void virtio_pci_set_guest_notifier_fd_handler(VirtIODevice *vdev, VirtQueue *vq, -+ int n, bool assign, -+ bool with_irqfd) -+{ -+ if (n == VIRTIO_CONFIG_IRQ_IDX) { -+ virtio_config_set_guest_notifier_fd_handler(vdev, assign, with_irqfd); -+ } else { -+ virtio_queue_set_guest_notifier_fd_handler(vq, assign, with_irqfd); -+ } - } - - static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign, -@@ -1019,17 +1072,25 @@ static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign, - VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); - VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); - VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); -- VirtQueue *vq = virtio_get_queue(vdev, n); -- EventNotifier *notifier = virtio_queue_get_guest_notifier(vq); -+ VirtQueue *vq = NULL; -+ EventNotifier *notifier = NULL; -+ -+ if (n == VIRTIO_CONFIG_IRQ_IDX) { -+ notifier = virtio_config_get_guest_notifier(vdev); -+ } else { -+ vq = virtio_get_queue(vdev, n); -+ notifier = virtio_queue_get_guest_notifier(vq); -+ } - - if (assign) { - int r = event_notifier_init(notifier, 0); - if (r < 0) { - return r; - } -- virtio_queue_set_guest_notifier_fd_handler(vq, true, with_irqfd); -+ virtio_pci_set_guest_notifier_fd_handler(vdev, vq, n, true, with_irqfd); - } else { -- virtio_queue_set_guest_notifier_fd_handler(vq, false, with_irqfd); -+ virtio_pci_set_guest_notifier_fd_handler(vdev, vq, n, false, -+ with_irqfd); - event_notifier_cleanup(notifier); - } - -@@ -1072,10 +1133,13 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign) - proxy->nvqs_with_notifiers = nvqs; - - /* Must unset vector notifier while guest notifier is still assigned */ -- if ((proxy->vector_irqfd || k->guest_notifier_mask) && !assign) { -+ if ((proxy->vector_irqfd || -+ (vdev->use_guest_notifier_mask && k->guest_notifier_mask)) && -+ !assign) { - msix_unset_vector_notifiers(&proxy->pci_dev); - if (proxy->vector_irqfd) { -- kvm_virtio_pci_vector_release(proxy, nvqs); -+ kvm_virtio_pci_vector_vq_release(proxy, nvqs); -+ kvm_virtio_pci_vector_config_release(proxy); - g_free(proxy->vector_irqfd); - proxy->vector_irqfd = NULL; - } -@@ -1091,20 +1155,30 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign) - goto assign_error; - } - } -- -+ r = virtio_pci_set_guest_notifier(d, VIRTIO_CONFIG_IRQ_IDX, assign, -+ with_irqfd); -+ if (r < 0) { -+ goto config_assign_error; -+ } - /* Must set vector notifier after guest notifier has been assigned */ -- if ((with_irqfd || k->guest_notifier_mask) && assign) { -+ if ((with_irqfd || -+ (vdev->use_guest_notifier_mask && k->guest_notifier_mask)) && -+ assign) { - if (with_irqfd) { - proxy->vector_irqfd = - g_malloc0(sizeof(*proxy->vector_irqfd) * - msix_nr_vectors_allocated(&proxy->pci_dev)); -- r = kvm_virtio_pci_vector_use(proxy, nvqs); -+ r = kvm_virtio_pci_vector_vq_use(proxy, nvqs); -+ if (r < 0) { -+ goto config_assign_error; -+ } -+ r = kvm_virtio_pci_vector_config_use(proxy); - if (r < 0) { -- goto assign_error; -+ goto config_error; - } - } -- r = msix_set_vector_notifiers(&proxy->pci_dev, -- virtio_pci_vector_unmask, -+ -+ r = msix_set_vector_notifiers(&proxy->pci_dev, virtio_pci_vector_unmask, - virtio_pci_vector_mask, - virtio_pci_vector_poll); - if (r < 0) { -@@ -1117,9 +1191,15 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign) - notifiers_error: - if (with_irqfd) { - assert(assign); -- kvm_virtio_pci_vector_release(proxy, nvqs); -+ kvm_virtio_pci_vector_vq_release(proxy, nvqs); - } -- -+config_error: -+ if (with_irqfd) { -+ kvm_virtio_pci_vector_config_release(proxy); -+ } -+config_assign_error: -+ virtio_pci_set_guest_notifier(d, VIRTIO_CONFIG_IRQ_IDX, !assign, -+ with_irqfd); - assign_error: - /* We get here on assignment failure. Recover by undoing for VQs 0 .. n. */ - assert(assign); -diff --git a/include/hw/virtio/virtio-pci.h b/include/hw/virtio/virtio-pci.h -index 938799e8f6..c02e278f46 100644 ---- a/include/hw/virtio/virtio-pci.h -+++ b/include/hw/virtio/virtio-pci.h -@@ -256,5 +256,7 @@ void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t); - * @fixed_queues. - */ - unsigned virtio_pci_optimal_num_queues(unsigned fixed_queues); -- -+void virtio_pci_set_guest_notifier_fd_handler(VirtIODevice *vdev, VirtQueue *vq, -+ int n, bool assign, -+ bool with_irqfd); - #endif --- -2.31.1 - diff --git a/SOURCES/kvm-virtio-pci-decouple-notifier-from-interrupt-process.patch b/SOURCES/kvm-virtio-pci-decouple-notifier-from-interrupt-process.patch deleted file mode 100644 index a8c32a2..0000000 --- a/SOURCES/kvm-virtio-pci-decouple-notifier-from-interrupt-process.patch +++ /dev/null @@ -1,272 +0,0 @@ -From 9a234f849273d3480e4a88042cb1ea06a37a626b Mon Sep 17 00:00:00 2001 -From: Cindy Lu -Date: Thu, 22 Dec 2022 15:04:43 +0800 -Subject: [PATCH 02/31] virtio-pci: decouple notifier from interrupt process -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cindy Lu -RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa -RH-Bugzilla: 1905805 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/10] a20f4c9ff38b239531d12cbcc7deaa649c86abc3 (lulu6/qemu-kvm3) - -https://bugzilla.redhat.com/show_bug.cgi?id=1905805 -To reuse the notifier process. We add the virtio_pci_get_notifier -to get the notifier and vector. The INPUT for this function is IDX, -The OUTPUT is the notifier and the vector - -Signed-off-by: Cindy Lu -Message-Id: <20221222070451.936503-3-lulu@redhat.com> -Acked-by: Jason Wang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 2e07f69d0c828e21515b63dc22884d548540b382) -Signed-off-by: Cindy Lu ---- - hw/virtio/virtio-pci.c | 88 +++++++++++++++++++++++++++--------------- - 1 file changed, 57 insertions(+), 31 deletions(-) - -diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c -index a1c9dfa7bb..52c7692fff 100644 ---- a/hw/virtio/virtio-pci.c -+++ b/hw/virtio/virtio-pci.c -@@ -728,29 +728,41 @@ static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy, - } - - static int kvm_virtio_pci_irqfd_use(VirtIOPCIProxy *proxy, -- unsigned int queue_no, -+ EventNotifier *n, - unsigned int vector) - { - VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; -- VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); -- VirtQueue *vq = virtio_get_queue(vdev, queue_no); -- EventNotifier *n = virtio_queue_get_guest_notifier(vq); - return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, irqfd->virq); - } - - static void kvm_virtio_pci_irqfd_release(VirtIOPCIProxy *proxy, -- unsigned int queue_no, -+ EventNotifier *n , - unsigned int vector) - { -- VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); -- VirtQueue *vq = virtio_get_queue(vdev, queue_no); -- EventNotifier *n = virtio_queue_get_guest_notifier(vq); - VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; - int ret; - - ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, irqfd->virq); - assert(ret == 0); - } -+static int virtio_pci_get_notifier(VirtIOPCIProxy *proxy, int queue_no, -+ EventNotifier **n, unsigned int *vector) -+{ -+ VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); -+ VirtQueue *vq; -+ -+ if (queue_no == VIRTIO_CONFIG_IRQ_IDX) { -+ return -1; -+ } else { -+ if (!virtio_queue_get_num(vdev, queue_no)) { -+ return -1; -+ } -+ *vector = virtio_queue_vector(vdev, queue_no); -+ vq = virtio_get_queue(vdev, queue_no); -+ *n = virtio_queue_get_guest_notifier(vq); -+ } -+ return 0; -+} - - static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) - { -@@ -759,12 +771,15 @@ static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) - VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); - unsigned int vector; - int ret, queue_no; -- -+ EventNotifier *n; - for (queue_no = 0; queue_no < nvqs; queue_no++) { - if (!virtio_queue_get_num(vdev, queue_no)) { - break; - } -- vector = virtio_queue_vector(vdev, queue_no); -+ ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); -+ if (ret < 0) { -+ break; -+ } - if (vector >= msix_nr_vectors_allocated(dev)) { - continue; - } -@@ -776,7 +791,7 @@ static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) - * Otherwise, delay until unmasked in the frontend. - */ - if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { -- ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector); -+ ret = kvm_virtio_pci_irqfd_use(proxy, n, vector); - if (ret < 0) { - kvm_virtio_pci_vq_vector_release(proxy, vector); - goto undo; -@@ -792,7 +807,11 @@ undo: - continue; - } - if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { -- kvm_virtio_pci_irqfd_release(proxy, queue_no, vector); -+ ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); -+ if (ret < 0) { -+ break; -+ } -+ kvm_virtio_pci_irqfd_release(proxy, n, vector); - } - kvm_virtio_pci_vq_vector_release(proxy, vector); - } -@@ -806,12 +825,16 @@ static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs) - unsigned int vector; - int queue_no; - VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); -- -+ EventNotifier *n; -+ int ret ; - for (queue_no = 0; queue_no < nvqs; queue_no++) { - if (!virtio_queue_get_num(vdev, queue_no)) { - break; - } -- vector = virtio_queue_vector(vdev, queue_no); -+ ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); -+ if (ret < 0) { -+ break; -+ } - if (vector >= msix_nr_vectors_allocated(dev)) { - continue; - } -@@ -819,21 +842,20 @@ static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs) - * Otherwise, it was cleaned when masked in the frontend. - */ - if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { -- kvm_virtio_pci_irqfd_release(proxy, queue_no, vector); -+ kvm_virtio_pci_irqfd_release(proxy, n, vector); - } - kvm_virtio_pci_vq_vector_release(proxy, vector); - } - } - --static int virtio_pci_vq_vector_unmask(VirtIOPCIProxy *proxy, -+static int virtio_pci_one_vector_unmask(VirtIOPCIProxy *proxy, - unsigned int queue_no, - unsigned int vector, -- MSIMessage msg) -+ MSIMessage msg, -+ EventNotifier *n) - { - VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); - VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); -- VirtQueue *vq = virtio_get_queue(vdev, queue_no); -- EventNotifier *n = virtio_queue_get_guest_notifier(vq); - VirtIOIRQFD *irqfd; - int ret = 0; - -@@ -860,14 +882,15 @@ static int virtio_pci_vq_vector_unmask(VirtIOPCIProxy *proxy, - event_notifier_set(n); - } - } else { -- ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector); -+ ret = kvm_virtio_pci_irqfd_use(proxy, n, vector); - } - return ret; - } - --static void virtio_pci_vq_vector_mask(VirtIOPCIProxy *proxy, -+static void virtio_pci_one_vector_mask(VirtIOPCIProxy *proxy, - unsigned int queue_no, -- unsigned int vector) -+ unsigned int vector, -+ EventNotifier *n) - { - VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); - VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); -@@ -878,7 +901,7 @@ static void virtio_pci_vq_vector_mask(VirtIOPCIProxy *proxy, - if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { - k->guest_notifier_mask(vdev, queue_no, true); - } else { -- kvm_virtio_pci_irqfd_release(proxy, queue_no, vector); -+ kvm_virtio_pci_irqfd_release(proxy, n, vector); - } - } - -@@ -888,6 +911,7 @@ static int virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector, - VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev); - VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); - VirtQueue *vq = virtio_vector_first_queue(vdev, vector); -+ EventNotifier *n; - int ret, index, unmasked = 0; - - while (vq) { -@@ -896,7 +920,8 @@ static int virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector, - break; - } - if (index < proxy->nvqs_with_notifiers) { -- ret = virtio_pci_vq_vector_unmask(proxy, index, vector, msg); -+ n = virtio_queue_get_guest_notifier(vq); -+ ret = virtio_pci_one_vector_unmask(proxy, index, vector, msg, n); - if (ret < 0) { - goto undo; - } -@@ -912,7 +937,8 @@ undo: - while (vq && unmasked >= 0) { - index = virtio_get_queue_index(vq); - if (index < proxy->nvqs_with_notifiers) { -- virtio_pci_vq_vector_mask(proxy, index, vector); -+ n = virtio_queue_get_guest_notifier(vq); -+ virtio_pci_one_vector_mask(proxy, index, vector, n); - --unmasked; - } - vq = virtio_vector_next_queue(vq); -@@ -925,15 +951,17 @@ static void virtio_pci_vector_mask(PCIDevice *dev, unsigned vector) - VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev); - VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); - VirtQueue *vq = virtio_vector_first_queue(vdev, vector); -+ EventNotifier *n; - int index; - - while (vq) { - index = virtio_get_queue_index(vq); -+ n = virtio_queue_get_guest_notifier(vq); - if (!virtio_queue_get_num(vdev, index)) { - break; - } - if (index < proxy->nvqs_with_notifiers) { -- virtio_pci_vq_vector_mask(proxy, index, vector); -+ virtio_pci_one_vector_mask(proxy, index, vector, n); - } - vq = virtio_vector_next_queue(vq); - } -@@ -949,19 +977,17 @@ static void virtio_pci_vector_poll(PCIDevice *dev, - int queue_no; - unsigned int vector; - EventNotifier *notifier; -- VirtQueue *vq; -+ int ret; - - for (queue_no = 0; queue_no < proxy->nvqs_with_notifiers; queue_no++) { -- if (!virtio_queue_get_num(vdev, queue_no)) { -+ ret = virtio_pci_get_notifier(proxy, queue_no, ¬ifier, &vector); -+ if (ret < 0) { - break; - } -- vector = virtio_queue_vector(vdev, queue_no); - if (vector < vector_start || vector >= vector_end || - !msix_is_masked(dev, vector)) { - continue; - } -- vq = virtio_get_queue(vdev, queue_no); -- notifier = virtio_queue_get_guest_notifier(vq); - if (k->guest_notifier_pending) { - if (k->guest_notifier_pending(vdev, queue_no)) { - msix_set_pending(dev, vector); --- -2.31.1 - diff --git a/SOURCES/kvm-virtio-pci-decouple-the-single-vector-from-the-inter.patch b/SOURCES/kvm-virtio-pci-decouple-the-single-vector-from-the-inter.patch deleted file mode 100644 index be9b3c7..0000000 --- a/SOURCES/kvm-virtio-pci-decouple-the-single-vector-from-the-inter.patch +++ /dev/null @@ -1,212 +0,0 @@ -From 58cd577ff157cfaf7506bba135db58e75c330ff0 Mon Sep 17 00:00:00 2001 -From: Cindy Lu -Date: Thu, 22 Dec 2022 15:04:44 +0800 -Subject: [PATCH 03/31] virtio-pci: decouple the single vector from the - interrupt process -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cindy Lu -RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa -RH-Bugzilla: 1905805 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/10] 2c79cb678f005fb2f53b2db0f237347634ab3422 (lulu6/qemu-kvm3) - -https://bugzilla.redhat.com/show_bug.cgi?id=1905805 - -To reuse the interrupt process in configure interrupt -Need to decouple the single vector from the interrupt process. -We add new function kvm_virtio_pci_vector_use_one and _release_one. -These functions are used for the single vector, the whole process will -finish in the loop with vq number. - -Signed-off-by: Cindy Lu -Message-Id: <20221222070451.936503-4-lulu@redhat.com> -Acked-by: Jason Wang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit ee3b8dc6cc496ba7f4e27aed4493275c706a7942) -Signed-off-by: Cindy Lu ---- - hw/virtio/virtio-pci.c | 131 +++++++++++++++++++++++------------------ - 1 file changed, 73 insertions(+), 58 deletions(-) - -diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c -index 52c7692fff..ec816ea367 100644 ---- a/hw/virtio/virtio-pci.c -+++ b/hw/virtio/virtio-pci.c -@@ -699,7 +699,6 @@ static uint32_t virtio_read_config(PCIDevice *pci_dev, - } - - static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy, -- unsigned int queue_no, - unsigned int vector) - { - VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; -@@ -764,87 +763,103 @@ static int virtio_pci_get_notifier(VirtIOPCIProxy *proxy, int queue_no, - return 0; - } - --static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) -+static int kvm_virtio_pci_vector_use_one(VirtIOPCIProxy *proxy, int queue_no) - { -+ unsigned int vector; -+ int ret; -+ EventNotifier *n; - PCIDevice *dev = &proxy->pci_dev; - VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); - VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); -- unsigned int vector; -- int ret, queue_no; -- EventNotifier *n; -- for (queue_no = 0; queue_no < nvqs; queue_no++) { -- if (!virtio_queue_get_num(vdev, queue_no)) { -- break; -- } -- ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); -- if (ret < 0) { -- break; -- } -- if (vector >= msix_nr_vectors_allocated(dev)) { -- continue; -- } -- ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector); -+ -+ ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); -+ if (ret < 0) { -+ return ret; -+ } -+ if (vector >= msix_nr_vectors_allocated(dev)) { -+ return 0; -+ } -+ ret = kvm_virtio_pci_vq_vector_use(proxy, vector); -+ if (ret < 0) { -+ goto undo; -+ } -+ /* -+ * If guest supports masking, set up irqfd now. -+ * Otherwise, delay until unmasked in the frontend. -+ */ -+ if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { -+ ret = kvm_virtio_pci_irqfd_use(proxy, n, vector); - if (ret < 0) { -+ kvm_virtio_pci_vq_vector_release(proxy, vector); - goto undo; - } -- /* If guest supports masking, set up irqfd now. -- * Otherwise, delay until unmasked in the frontend. -- */ -- if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { -- ret = kvm_virtio_pci_irqfd_use(proxy, n, vector); -- if (ret < 0) { -- kvm_virtio_pci_vq_vector_release(proxy, vector); -- goto undo; -- } -- } - } -- return 0; - -+ return 0; - undo: -- while (--queue_no >= 0) { -- vector = virtio_queue_vector(vdev, queue_no); -- if (vector >= msix_nr_vectors_allocated(dev)) { -- continue; -+ -+ vector = virtio_queue_vector(vdev, queue_no); -+ if (vector >= msix_nr_vectors_allocated(dev)) { -+ return ret; -+ } -+ if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { -+ ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); -+ if (ret < 0) { -+ return ret; - } -- if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { -- ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); -- if (ret < 0) { -- break; -- } -- kvm_virtio_pci_irqfd_release(proxy, n, vector); -+ kvm_virtio_pci_irqfd_release(proxy, n, vector); -+ } -+ return ret; -+} -+static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) -+{ -+ int queue_no; -+ int ret = 0; -+ VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); -+ -+ for (queue_no = 0; queue_no < nvqs; queue_no++) { -+ if (!virtio_queue_get_num(vdev, queue_no)) { -+ return -1; - } -- kvm_virtio_pci_vq_vector_release(proxy, vector); -+ ret = kvm_virtio_pci_vector_use_one(proxy, queue_no); - } - return ret; - } - --static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs) -+ -+static void kvm_virtio_pci_vector_release_one(VirtIOPCIProxy *proxy, -+ int queue_no) - { -- PCIDevice *dev = &proxy->pci_dev; - VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); - unsigned int vector; -- int queue_no; -- VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); - EventNotifier *n; -- int ret ; -+ int ret; -+ VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); -+ PCIDevice *dev = &proxy->pci_dev; -+ -+ ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); -+ if (ret < 0) { -+ return; -+ } -+ if (vector >= msix_nr_vectors_allocated(dev)) { -+ return; -+ } -+ if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { -+ kvm_virtio_pci_irqfd_release(proxy, n, vector); -+ } -+ kvm_virtio_pci_vq_vector_release(proxy, vector); -+} -+ -+static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs) -+{ -+ int queue_no; -+ VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); -+ - for (queue_no = 0; queue_no < nvqs; queue_no++) { - if (!virtio_queue_get_num(vdev, queue_no)) { - break; - } -- ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); -- if (ret < 0) { -- break; -- } -- if (vector >= msix_nr_vectors_allocated(dev)) { -- continue; -- } -- /* If guest supports masking, clean up irqfd now. -- * Otherwise, it was cleaned when masked in the frontend. -- */ -- if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { -- kvm_virtio_pci_irqfd_release(proxy, n, vector); -- } -- kvm_virtio_pci_vq_vector_release(proxy, vector); -+ kvm_virtio_pci_vector_release_one(proxy, queue_no); - } - } - --- -2.31.1 - diff --git a/SOURCES/kvm-virtio-rng-pci-fix-migration-compat-for-vectors.patch b/SOURCES/kvm-virtio-rng-pci-fix-migration-compat-for-vectors.patch deleted file mode 100644 index 0555a68..0000000 --- a/SOURCES/kvm-virtio-rng-pci-fix-migration-compat-for-vectors.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 35ffe28a91a2ef08dd181d1a22695050ccbb6995 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 9 Jan 2023 16:04:43 +0000 -Subject: [PATCH 1/2] virtio-rng-pci: fix migration compat for vectors - -RH-Author: Dr. David Alan Gilbert -RH-MergeRequest: 131: virtio-rng-pci: fix migration compat for vectors -RH-Bugzilla: 2155749 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Thomas Huth -RH-Commit: [1/1] 1a866491dd191b073d71ae1aa5f4d76ee885de6d (dagrh/c-9-s-qemu-kvm) - -Fixup the migration compatibility for existing machine types -so that they do not enable msi-x. - -Symptom: - -(qemu) qemu: get_pci_config_device: Bad config data: i=0x34 read: 84 device: 98 cmask: ff wmask: 0 w1cmask:0 -qemu: Failed to load PCIDevice:config -qemu: Failed to load virtio-rng:virtio -qemu: error while loading state for instance 0x0 of device '0000:00:03.0/virtio-rng' -qemu: load of migration failed: Invalid argument - -Note: This fix will break migration from 7.2->7.2-fixed with this patch - -bz: https://bugzilla.redhat.com/show_bug.cgi?id=2155749 -Fixes: 9ea02e8f1 ("virtio-rng-pci: Allow setting nvectors, so we can use MSI-X") - -This downstream fix is the equivalent of an upstream fix I've posted to -the 7.2 machine type compatibility. - -Signed-off-by: Dr. David Alan Gilbert ---- - hw/core/machine.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 3d851d34da..7adbac6f87 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -56,6 +56,8 @@ GlobalProperty hw_compat_rhel_9_1[] = { - { "nvme-ns", "eui64-default", "on"}, - /* hw_compat_rhel_9_1 from hw_compat_7_1 */ - { "virtio-device", "queue_reset", "false" }, -+ /* hw_compat_rhel_9_1 bz 2155749 */ -+ { "virtio-rng-pci", "vectors", "0" }, - }; - const size_t hw_compat_rhel_9_1_len = G_N_ELEMENTS(hw_compat_rhel_9_1); - --- -2.31.1 - diff --git a/SOURCES/kvm-virtio-rng-pci-fix-transitional-migration-compat-for.patch b/SOURCES/kvm-virtio-rng-pci-fix-transitional-migration-compat-for.patch deleted file mode 100644 index e5288d6..0000000 --- a/SOURCES/kvm-virtio-rng-pci-fix-transitional-migration-compat-for.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 5413b8825db6eecc6f245854a6bce58e4dee3294 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 7 Feb 2023 17:57:39 +0000 -Subject: [PATCH 20/20] virtio-rng-pci: fix transitional migration compat for - vectors - -RH-Author: Dr. David Alan Gilbert -RH-MergeRequest: 147: virtio-rng-pci: fix transitional migration compat for vectors -RH-Bugzilla: 2162569 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Thomas Huth -RH-Acked-by: Gerd Hoffmann -RH-Commit: [1/1] 6e2bd111cd56808fccf2c0464a40f7784fd893a2 (dagrh/c-9-s-qemu-kvm) - -In upstream bad9c5a5166/downstream 46e08bafe9ed I fixed the virito-rng-pci -migration compatibility, but it was discovered that we also need to fix -the other aliases of the device for the transitional cases. - -I've sent upstream: -https://lists.gnu.org/archive/html/qemu-devel/2023-02/msg01926.html -but downstream we need to change the downstream machine type anyway, -so it's not quite identical. - -Fixes: 9ea02e8f1 ('virtio-rng-pci: Allow setting nvectors, so we can use MSI-X') - -Signed-off-by: Dr. David Alan Gilbert ---- - hw/core/machine.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 7adbac6f87..3ee638394b 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -58,6 +58,9 @@ GlobalProperty hw_compat_rhel_9_1[] = { - { "virtio-device", "queue_reset", "false" }, - /* hw_compat_rhel_9_1 bz 2155749 */ - { "virtio-rng-pci", "vectors", "0" }, -+ /* hw_compat_rhel_9_1 bz 2162569 */ -+ { "virtio-rng-pci-transitional", "vectors", "0" }, -+ { "virtio-rng-pci-non-transitional", "vectors", "0" }, - }; - const size_t hw_compat_rhel_9_1_len = G_N_ELEMENTS(hw_compat_rhel_9_1); - --- -2.31.1 - diff --git a/SOURCES/kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch b/SOURCES/kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch deleted file mode 100644 index c951897..0000000 --- a/SOURCES/kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch +++ /dev/null @@ -1,325 +0,0 @@ -From c64027b1ff9856031c01009f4b5c3560d92cc998 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 21 Feb 2023 16:22:18 -0500 -Subject: [PATCH 03/12] virtio-scsi: reset SCSI devices from main loop thread - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 155: virtio-scsi: reset SCSI devices from main loop thread -RH-Bugzilla: 2155748 -RH-Acked-by: Eric Blake -RH-Acked-by: Kevin Wolf -RH-Acked-by: Laszlo Ersek -RH-Commit: [3/3] 2a29cb9600709a799daadb4addb58a747ed2e3a3 (stefanha/centos-stream-qemu-kvm) - -When an IOThread is configured, the ctrl virtqueue is processed in the -IOThread. TMFs that reset SCSI devices are currently called directly -from the IOThread and trigger an assertion failure in blk_drain() from -the following call stack: - -virtio_scsi_handle_ctrl_req -> virtio_scsi_do_tmf -> device_code_reset --> scsi_disk_reset -> scsi_device_purge_requests -> blk_drain - - ../block/block-backend.c:1780: void blk_drain(BlockBackend *): Assertion `qemu_in_main_thread()' failed. - -The blk_drain() function is not designed to be called from an IOThread -because it needs the Big QEMU Lock (BQL). - -This patch defers TMFs that reset SCSI devices to a Bottom Half (BH) -that runs in the main loop thread under the BQL. This way it's safe to -call blk_drain() and the assertion failure is avoided. - -Introduce s->tmf_bh_list for tracking TMF requests that have been -deferred to the BH. When the BH runs it will grab the entire list and -process all requests. Care must be taken to clear the list when the -virtio-scsi device is reset or unrealized. Otherwise deferred TMF -requests could execute later and lead to use-after-free or other -undefined behavior. - -The s->resetting counter that's used by TMFs that reset SCSI devices is -accessed from multiple threads. This patch makes that explicit by using -atomic accessor functions. With this patch applied the counter is only -modified by the main loop thread under the BQL but can be read by any -thread. - -Reported-by: Qing Wang -Cc: Paolo Bonzini -Reviewed-by: Eric Blake -Signed-off-by: Stefan Hajnoczi -Message-Id: <20230221212218.1378734-4-stefanha@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit be2c42b97c3a3a395b2f05bad1b6c7de20ecf2a5) -Signed-off-by: Stefan Hajnoczi ---- - hw/scsi/virtio-scsi.c | 169 +++++++++++++++++++++++++------- - include/hw/virtio/virtio-scsi.h | 11 ++- - 2 files changed, 143 insertions(+), 37 deletions(-) - -diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index 6f6e2e32ba..7d27e4c2a1 100644 ---- a/hw/scsi/virtio-scsi.c -+++ b/hw/scsi/virtio-scsi.c -@@ -42,13 +42,11 @@ typedef struct VirtIOSCSIReq { - QEMUSGList qsgl; - QEMUIOVector resp_iov; - -- union { -- /* Used for two-stage request submission */ -- QTAILQ_ENTRY(VirtIOSCSIReq) next; -+ /* Used for two-stage request submission and TMFs deferred to BH */ -+ QTAILQ_ENTRY(VirtIOSCSIReq) next; - -- /* Used for cancellation of request during TMFs */ -- int remaining; -- }; -+ /* Used for cancellation of request during TMFs */ -+ int remaining; - - SCSIRequest *sreq; - size_t resp_size; -@@ -293,6 +291,122 @@ static inline void virtio_scsi_ctx_check(VirtIOSCSI *s, SCSIDevice *d) - } - } - -+static void virtio_scsi_do_one_tmf_bh(VirtIOSCSIReq *req) -+{ -+ VirtIOSCSI *s = req->dev; -+ SCSIDevice *d = virtio_scsi_device_get(s, req->req.tmf.lun); -+ BusChild *kid; -+ int target; -+ -+ switch (req->req.tmf.subtype) { -+ case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET: -+ if (!d) { -+ req->resp.tmf.response = VIRTIO_SCSI_S_BAD_TARGET; -+ goto out; -+ } -+ if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) { -+ req->resp.tmf.response = VIRTIO_SCSI_S_INCORRECT_LUN; -+ goto out; -+ } -+ qatomic_inc(&s->resetting); -+ device_cold_reset(&d->qdev); -+ qatomic_dec(&s->resetting); -+ break; -+ -+ case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET: -+ target = req->req.tmf.lun[1]; -+ qatomic_inc(&s->resetting); -+ -+ rcu_read_lock(); -+ QTAILQ_FOREACH_RCU(kid, &s->bus.qbus.children, sibling) { -+ SCSIDevice *d1 = SCSI_DEVICE(kid->child); -+ if (d1->channel == 0 && d1->id == target) { -+ device_cold_reset(&d1->qdev); -+ } -+ } -+ rcu_read_unlock(); -+ -+ qatomic_dec(&s->resetting); -+ break; -+ -+ default: -+ g_assert_not_reached(); -+ break; -+ } -+ -+out: -+ object_unref(OBJECT(d)); -+ -+ virtio_scsi_acquire(s); -+ virtio_scsi_complete_req(req); -+ virtio_scsi_release(s); -+} -+ -+/* Some TMFs must be processed from the main loop thread */ -+static void virtio_scsi_do_tmf_bh(void *opaque) -+{ -+ VirtIOSCSI *s = opaque; -+ QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs); -+ VirtIOSCSIReq *req; -+ VirtIOSCSIReq *tmp; -+ -+ GLOBAL_STATE_CODE(); -+ -+ virtio_scsi_acquire(s); -+ -+ QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) { -+ QTAILQ_REMOVE(&s->tmf_bh_list, req, next); -+ QTAILQ_INSERT_TAIL(&reqs, req, next); -+ } -+ -+ qemu_bh_delete(s->tmf_bh); -+ s->tmf_bh = NULL; -+ -+ virtio_scsi_release(s); -+ -+ QTAILQ_FOREACH_SAFE(req, &reqs, next, tmp) { -+ QTAILQ_REMOVE(&reqs, req, next); -+ virtio_scsi_do_one_tmf_bh(req); -+ } -+} -+ -+static void virtio_scsi_reset_tmf_bh(VirtIOSCSI *s) -+{ -+ VirtIOSCSIReq *req; -+ VirtIOSCSIReq *tmp; -+ -+ GLOBAL_STATE_CODE(); -+ -+ virtio_scsi_acquire(s); -+ -+ if (s->tmf_bh) { -+ qemu_bh_delete(s->tmf_bh); -+ s->tmf_bh = NULL; -+ } -+ -+ QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) { -+ QTAILQ_REMOVE(&s->tmf_bh_list, req, next); -+ -+ /* SAM-6 6.3.2 Hard reset */ -+ req->resp.tmf.response = VIRTIO_SCSI_S_TARGET_FAILURE; -+ virtio_scsi_complete_req(req); -+ } -+ -+ virtio_scsi_release(s); -+} -+ -+static void virtio_scsi_defer_tmf_to_bh(VirtIOSCSIReq *req) -+{ -+ VirtIOSCSI *s = req->dev; -+ -+ QTAILQ_INSERT_TAIL(&s->tmf_bh_list, req, next); -+ -+ if (!s->tmf_bh) { -+ s->tmf_bh = qemu_bh_new(virtio_scsi_do_tmf_bh, s); -+ qemu_bh_schedule(s->tmf_bh); -+ } -+} -+ - /* Return 0 if the request is ready to be completed and return to guest; - * -EINPROGRESS if the request is submitted and will be completed later, in the - * case of async cancellation. */ -@@ -300,8 +414,6 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req) - { - SCSIDevice *d = virtio_scsi_device_get(s, req->req.tmf.lun); - SCSIRequest *r, *next; -- BusChild *kid; -- int target; - int ret = 0; - - virtio_scsi_ctx_check(s, d); -@@ -358,15 +470,9 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req) - break; - - case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET: -- if (!d) { -- goto fail; -- } -- if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) { -- goto incorrect_lun; -- } -- s->resetting++; -- device_cold_reset(&d->qdev); -- s->resetting--; -+ case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET: -+ virtio_scsi_defer_tmf_to_bh(req); -+ ret = -EINPROGRESS; - break; - - case VIRTIO_SCSI_T_TMF_ABORT_TASK_SET: -@@ -409,22 +515,6 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req) - } - break; - -- case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET: -- target = req->req.tmf.lun[1]; -- s->resetting++; -- -- rcu_read_lock(); -- QTAILQ_FOREACH_RCU(kid, &s->bus.qbus.children, sibling) { -- SCSIDevice *d1 = SCSI_DEVICE(kid->child); -- if (d1->channel == 0 && d1->id == target) { -- device_cold_reset(&d1->qdev); -- } -- } -- rcu_read_unlock(); -- -- s->resetting--; -- break; -- - case VIRTIO_SCSI_T_TMF_CLEAR_ACA: - default: - req->resp.tmf.response = VIRTIO_SCSI_S_FUNCTION_REJECTED; -@@ -654,7 +744,7 @@ static void virtio_scsi_request_cancelled(SCSIRequest *r) - if (!req) { - return; - } -- if (req->dev->resetting) { -+ if (qatomic_read(&req->dev->resetting)) { - req->resp.cmd.response = VIRTIO_SCSI_S_RESET; - } else { - req->resp.cmd.response = VIRTIO_SCSI_S_ABORTED; -@@ -830,9 +920,12 @@ static void virtio_scsi_reset(VirtIODevice *vdev) - VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(vdev); - - assert(!s->dataplane_started); -- s->resetting++; -+ -+ virtio_scsi_reset_tmf_bh(s); -+ -+ qatomic_inc(&s->resetting); - bus_cold_reset(BUS(&s->bus)); -- s->resetting--; -+ qatomic_dec(&s->resetting); - - vs->sense_size = VIRTIO_SCSI_SENSE_DEFAULT_SIZE; - vs->cdb_size = VIRTIO_SCSI_CDB_DEFAULT_SIZE; -@@ -1052,6 +1145,8 @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp) - VirtIOSCSI *s = VIRTIO_SCSI(dev); - Error *err = NULL; - -+ QTAILQ_INIT(&s->tmf_bh_list); -+ - virtio_scsi_common_realize(dev, - virtio_scsi_handle_ctrl, - virtio_scsi_handle_event, -@@ -1089,6 +1184,8 @@ static void virtio_scsi_device_unrealize(DeviceState *dev) - { - VirtIOSCSI *s = VIRTIO_SCSI(dev); - -+ virtio_scsi_reset_tmf_bh(s); -+ - qbus_set_hotplug_handler(BUS(&s->bus), NULL); - virtio_scsi_common_unrealize(dev); - } -diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h -index a36aad9c86..1c1cd77d6e 100644 ---- a/include/hw/virtio/virtio-scsi.h -+++ b/include/hw/virtio/virtio-scsi.h -@@ -75,13 +75,22 @@ struct VirtIOSCSICommon { - VirtQueue **cmd_vqs; - }; - -+struct VirtIOSCSIReq; -+ - struct VirtIOSCSI { - VirtIOSCSICommon parent_obj; - - SCSIBus bus; -- int resetting; -+ int resetting; /* written from main loop thread, read from any thread */ - bool events_dropped; - -+ /* -+ * TMFs deferred to main loop BH. These fields are protected by -+ * virtio_scsi_acquire(). -+ */ -+ QEMUBH *tmf_bh; -+ QTAILQ_HEAD(, VirtIOSCSIReq) tmf_bh_list; -+ - /* Fields for dataplane below */ - AioContext *ctx; /* one iothread per virtio-scsi-pci for now */ - --- -2.39.1 - diff --git a/SOURCES/kvm-virtio_net-Modify-virtio_net_get_config-to-early-ret.patch b/SOURCES/kvm-virtio_net-Modify-virtio_net_get_config-to-early-ret.patch deleted file mode 100644 index d797023..0000000 --- a/SOURCES/kvm-virtio_net-Modify-virtio_net_get_config-to-early-ret.patch +++ /dev/null @@ -1,74 +0,0 @@ -From 3f55d12df35552ae948587a62d6f9015664adc13 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Wed, 21 Dec 2022 12:50:12 +0100 -Subject: [PATCH 1/9] virtio_net: Modify virtio_net_get_config to early return -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 137: vDPA net SVQ guest announce support -RH-Bugzilla: 2141088 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Jason Wang -RH-Commit: [1/4] 4f5e79afd54e157f32e6fff56ae33e2b71492525 (eperezmartin/qemu-kvm) - -Next patches introduce more code on vhost-vdpa branch, with already have -too much indentation. - -Signed-off-by: Eugenio Pérez -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Michael S. Tsirkin -Acked-by: Jason Wang -Message-Id: <20221221115015.1400889-2-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit ebc141a62508dc91901373c1a19fe7e2cf560dfb) ---- - hw/net/virtio-net.c | 28 +++++++++++++++------------- - 1 file changed, 15 insertions(+), 13 deletions(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index ec974f7a76..5935e55653 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -168,20 +168,22 @@ static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) - if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { - ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg, - n->config_size); -- if (ret != -1) { -- /* -- * Some NIC/kernel combinations present 0 as the mac address. As -- * that is not a legal address, try to proceed with the -- * address from the QEMU command line in the hope that the -- * address has been configured correctly elsewhere - just not -- * reported by the device. -- */ -- if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) { -- info_report("Zero hardware mac address detected. Ignoring."); -- memcpy(netcfg.mac, n->mac, ETH_ALEN); -- } -- memcpy(config, &netcfg, n->config_size); -+ if (ret == -1) { -+ return; - } -+ -+ /* -+ * Some NIC/kernel combinations present 0 as the mac address. As that -+ * is not a legal address, try to proceed with the address from the -+ * QEMU command line in the hope that the address has been configured -+ * correctly elsewhere - just not reported by the device. -+ */ -+ if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) { -+ info_report("Zero hardware mac address detected. Ignoring."); -+ memcpy(netcfg.mac, n->mac, ETH_ALEN); -+ } -+ -+ memcpy(config, &netcfg, n->config_size); - } - } - --- -2.31.1 - diff --git a/SOURCES/kvm-virtio_net-copy-VIRTIO_NET_S_ANNOUNCE-if-device-mode.patch b/SOURCES/kvm-virtio_net-copy-VIRTIO_NET_S_ANNOUNCE-if-device-mode.patch deleted file mode 100644 index 866957c..0000000 --- a/SOURCES/kvm-virtio_net-copy-VIRTIO_NET_S_ANNOUNCE-if-device-mode.patch +++ /dev/null @@ -1,46 +0,0 @@ -From b3d728b53abaae0c9884dfb5e9c216b1088196e3 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Wed, 21 Dec 2022 12:50:13 +0100 -Subject: [PATCH 2/9] virtio_net: copy VIRTIO_NET_S_ANNOUNCE if device model - has it -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 137: vDPA net SVQ guest announce support -RH-Bugzilla: 2141088 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Jason Wang -RH-Commit: [2/4] fb04186829eb93bab3c9ececf90fa5b035ffa2ec (eperezmartin/qemu-kvm) - -Status part of the emulated feature. It will follow device model, so we -must copy it as long as NIC device model has it set. - -Signed-off-by: Eugenio Pérez -Message-Id: <20221221115015.1400889-3-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Acked-by: Jason Wang -(cherry picked from commit 4f93aafc8f9d731c6588f5dc5594c6a1dd1fbe66) ---- - hw/net/virtio-net.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index 5935e55653..948bcf33cf 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -183,6 +183,8 @@ static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) - memcpy(netcfg.mac, n->mac, ETH_ALEN); - } - -+ netcfg.status |= virtio_tswap16(vdev, -+ n->status & VIRTIO_NET_S_ANNOUNCE); - memcpy(config, &netcfg, n->config_size); - } - } --- -2.31.1 - diff --git a/SOURCES/kvm-vl.c-Create-late-backends-before-migration-object.patch b/SOURCES/kvm-vl.c-Create-late-backends-before-migration-object.patch new file mode 100644 index 0000000..e1eef6d --- /dev/null +++ b/SOURCES/kvm-vl.c-Create-late-backends-before-migration-object.patch @@ -0,0 +1,58 @@ +From 63e2339a6f38706c6fc5eb251426812520db6a6d Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Wed, 19 Apr 2023 12:17:37 -0400 +Subject: [PATCH 03/56] vl.c: Create late backends before migration object +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [2/50] 7209bb94faa48650388be8fef08c77afd26517d8 (peterx/qemu-kvm) + +The migration object may want to check against different types of memory +when initialized. Delay the creation to be after late backends. + +Signed-off-by: Peter Xu +Reviewed-by: Juan Quintela +Reviewed-by: David Hildenbrand +Signed-off-by: Juan Quintela +(cherry picked from commit cb9d8b8ce1aaf38f53295fc59ec1b8b7eb4338d2) +Signed-off-by: Peter Xu +--- + softmmu/vl.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +diff --git a/softmmu/vl.c b/softmmu/vl.c +index ad4173138d..a44b49430b 100644 +--- a/softmmu/vl.c ++++ b/softmmu/vl.c +@@ -3592,14 +3592,19 @@ void qemu_init(int argc, char **argv) + machine_class->name, machine_class->deprecation_reason); + } + ++ /* ++ * Create backends before creating migration objects, so that it can ++ * check against compatibilities on the backend memories (e.g. postcopy ++ * over memory-backend-file objects). ++ */ ++ qemu_create_late_backends(); ++ + /* + * Note: creates a QOM object, must run only after global and + * compat properties have been set up. + */ + migration_object_init(); + +- qemu_create_late_backends(); +- + /* parse features once if machine provides default cpu_type */ + current_machine->cpu_type = machine_class->default_cpu_type; + if (cpu_option) { +-- +2.39.1 + diff --git a/SPECS/qemu-kvm.spec b/SPECS/qemu-kvm.spec index 848722b..b861e9d 100644 --- a/SPECS/qemu-kvm.spec +++ b/SPECS/qemu-kvm.spec @@ -100,7 +100,7 @@ %endif %global target_list %{kvm_target}-softmmu -%global block_drivers_rw_list qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,compress +%global block_drivers_rw_list qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,compress,virtio-blk-vhost-vdpa,virtio-blk-vfio-pci,virtio-blk-vhost-user,io_uring,nvme-io_uring %global block_drivers_ro_list vdi,vmdk,vhdx,vpc,https %define qemudocdir %{_docdir}/%{name} %global firmwaredirs "%{_datadir}/qemu-firmware:%{_datadir}/ipxe/qemu:%{_datadir}/seavgabios:%{_datadir}/seabios" @@ -125,6 +125,7 @@ Requires: %{name}-device-usb-host = %{epoch}:%{version}-%{release} \ %if %{have_usbredir} \ Requires: %{name}-device-usb-redirect = %{epoch}:%{version}-%{release} \ %endif \ +Requires: %{name}-block-blkio = %{epoch}:%{version}-%{release} \ Requires: %{name}-block-rbd = %{epoch}:%{version}-%{release} \ Requires: %{name}-audio-pa = %{epoch}:%{version}-%{release} @@ -147,8 +148,8 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm -Version: 7.2.0 -Release: 14%{?rcrel}%{?dist}%{?cc_suffix}.5 +Version: 8.0.0 +Release: 16%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -184,256 +185,377 @@ Patch0011: 0011-Enable-make-check.patch Patch0012: 0012-vfio-cap-number-of-devices-that-can-be-assigned.patch Patch0013: 0013-Add-support-statement-to-help-output.patch Patch0014: 0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch -Patch0015: 0015-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch -Patch0016: 0016-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch -Patch0018: 0018-Addd-7.2-compat-bits-for-RHEL-9.1-machine-type.patch -Patch0019: 0019-redhat-Update-s390x-machine-type-compatibility-for-Q.patch -Patch0020: 0020-redhat-aarch64-add-rhel9.2.0-virt-machine-type.patch -Patch0021: 0021-redhat-Add-new-rhel-9.2.0-s390x-machine-type.patch -Patch0022: 0022-x86-rhel-9.2.0-machine-type.patch -Patch23: kvm-redhat-fix-virt-rhel9.2.0-compat-props.patch -# For bz#2113840 - [RHEL9.2] Memory mapping optimization for virt machine -Patch24: kvm-hw-arm-virt-Introduce-virt_set_high_memmap-helper.patch -# For bz#2113840 - [RHEL9.2] Memory mapping optimization for virt machine -Patch25: kvm-hw-arm-virt-Rename-variable-size-to-region_size-in-v.patch -# For bz#2113840 - [RHEL9.2] Memory mapping optimization for virt machine -Patch26: kvm-hw-arm-virt-Introduce-variable-region_base-in-virt_s.patch -# For bz#2113840 - [RHEL9.2] Memory mapping optimization for virt machine -Patch27: kvm-hw-arm-virt-Introduce-virt_get_high_memmap_enabled-h.patch -# For bz#2113840 - [RHEL9.2] Memory mapping optimization for virt machine -Patch28: kvm-hw-arm-virt-Improve-high-memory-region-address-assig.patch -# For bz#2113840 - [RHEL9.2] Memory mapping optimization for virt machine -Patch29: kvm-hw-arm-virt-Add-compact-highmem-property.patch -# For bz#2113840 - [RHEL9.2] Memory mapping optimization for virt machine -Patch30: kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch -# For bz#2113840 - [RHEL9.2] Memory mapping optimization for virt machine -Patch31: kvm-hw-arm-virt-Enable-compat-high-memory-region-address.patch -# For bz#2155749 - [regression][stable guest abi][qemu-kvm7.2]Migration failed due to virtio-rng device between RHEL8.8 and RHEL9.2/MSI-X -Patch32: kvm-virtio-rng-pci-fix-migration-compat-for-vectors.patch -# For bz#1905805 - support config interrupt in vhost-vdpa qemu -Patch33: kvm-virtio-introduce-macro-VIRTIO_CONFIG_IRQ_IDX.patch -# For bz#1905805 - support config interrupt in vhost-vdpa qemu -Patch34: kvm-virtio-pci-decouple-notifier-from-interrupt-process.patch -# For bz#1905805 - support config interrupt in vhost-vdpa qemu -Patch35: kvm-virtio-pci-decouple-the-single-vector-from-the-inter.patch -# For bz#1905805 - support config interrupt in vhost-vdpa qemu -Patch36: kvm-vhost-introduce-new-VhostOps-vhost_set_config_call.patch -# For bz#1905805 - support config interrupt in vhost-vdpa qemu -Patch37: kvm-vhost-vdpa-add-support-for-config-interrupt.patch -# For bz#1905805 - support config interrupt in vhost-vdpa qemu -Patch38: kvm-virtio-add-support-for-configure-interrupt.patch -# For bz#1905805 - support config interrupt in vhost-vdpa qemu -Patch39: kvm-vhost-add-support-for-configure-interrupt.patch -# For bz#1905805 - support config interrupt in vhost-vdpa qemu -Patch40: kvm-virtio-net-add-support-for-configure-interrupt.patch -# For bz#1905805 - support config interrupt in vhost-vdpa qemu -Patch41: kvm-virtio-mmio-add-support-for-configure-interrupt.patch -# For bz#1905805 - support config interrupt in vhost-vdpa qemu -Patch42: kvm-virtio-pci-add-support-for-configure-interrupt.patch -# For bz#2159408 - [s390x] VMs with ISM passthrough don't autostart after leapp upgrade from RHEL 8 -Patch43: kvm-s390x-s390-virtio-ccw-Activate-zPCI-features-on-s390.patch -# For bz#2124856 - VM with virtio interface and iommu=on will crash when try to migrate -Patch44: kvm-vhost-fix-vq-dirty-bitmap-syncing-when-vIOMMU-is-ena.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch45: kvm-block-drop-bdrv_remove_filter_or_cow_child.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch46: kvm-qed-Don-t-yield-in-bdrv_qed_co_drain_begin.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch47: kvm-test-bdrv-drain-Don-t-yield-in-.bdrv_co_drained_begi.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch48: kvm-block-Revert-.bdrv_drained_begin-end-to-non-coroutin.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch49: kvm-block-Remove-drained_end_counter.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch50: kvm-block-Inline-bdrv_drain_invoke.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch51: kvm-block-Fix-locking-for-bdrv_reopen_queue_child.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch52: kvm-block-Drain-individual-nodes-during-reopen.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch53: kvm-block-Don-t-use-subtree-drains-in-bdrv_drop_intermed.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch54: kvm-stream-Replace-subtree-drain-with-a-single-node-drai.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch55: kvm-block-Remove-subtree-drains.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch56: kvm-block-Call-drain-callbacks-only-once.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch57: kvm-block-Remove-ignore_bds_parents-parameter-from-drain.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch58: kvm-block-Drop-out-of-coroutine-in-bdrv_do_drained_begin.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch59: kvm-block-Don-t-poll-in-bdrv_replace_child_noperm.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch60: kvm-block-Remove-poll-parameter-from-bdrv_parent_drained.patch -# For bz#1979276 - SVM: non atomic memslot updates cause boot failure with seabios and cpu-pm=on -Patch61: kvm-accel-introduce-accelerator-blocker-API.patch -# For bz#1979276 - SVM: non atomic memslot updates cause boot failure with seabios and cpu-pm=on -Patch62: kvm-KVM-keep-track-of-running-ioctls.patch -# For bz#1979276 - SVM: non atomic memslot updates cause boot failure with seabios and cpu-pm=on -Patch63: kvm-kvm-Atomic-memslot-updates.patch -# For bz#2141088 - vDPA SVQ guest announce support -Patch64: kvm-virtio_net-Modify-virtio_net_get_config-to-early-ret.patch -# For bz#2141088 - vDPA SVQ guest announce support -Patch65: kvm-virtio_net-copy-VIRTIO_NET_S_ANNOUNCE-if-device-mode.patch -# For bz#2141088 - vDPA SVQ guest announce support -Patch66: kvm-vdpa-handle-VIRTIO_NET_CTRL_ANNOUNCE-in-vhost_vdpa_n.patch -# For bz#2141088 - vDPA SVQ guest announce support -Patch67: kvm-vdpa-do-not-handle-VIRTIO_NET_F_GUEST_ANNOUNCE-in-vh.patch -# For bz#2122523 - Secure guest can't boot with maximal number of vcpus (248) -Patch68: kvm-s390x-pv-Implement-a-CGS-check-helper.patch -# For bz#2163701 - [s390x] VM fails to start with ISM passed through -Patch69: kvm-s390x-pci-coalesce-unmap-operations.patch -# For bz#2163701 - [s390x] VM fails to start with ISM passed through -Patch70: kvm-s390x-pci-shrink-DMA-aperture-to-be-bound-by-vfio-DM.patch -# For bz#2163701 - [s390x] VM fails to start with ISM passed through -Patch71: kvm-s390x-pci-reset-ISM-passthrough-devices-on-shutdown-.patch -# For bz#2149191 - [RFE][guest-agent] - USB bus type support -Patch72: kvm-qga-linux-add-usb-support-to-guest-get-fsinfo.patch -# For bz#2104412 - vDPA ASID support in Qemu -Patch73: kvm-vdpa-use-v-shadow_vqs_enabled-in-vhost_vdpa_svqs_sta.patch -# For bz#2104412 - vDPA ASID support in Qemu -Patch74: kvm-vhost-set-SVQ-device-call-handler-at-SVQ-start.patch -# For bz#2104412 - vDPA ASID support in Qemu -Patch75: kvm-vhost-allocate-SVQ-device-file-descriptors-at-device.patch -# For bz#2104412 - vDPA ASID support in Qemu -Patch76: kvm-vhost-move-iova_tree-set-to-vhost_svq_start.patch -# For bz#2104412 - vDPA ASID support in Qemu -Patch77: kvm-vdpa-add-vhost_vdpa_net_valid_svq_features.patch -# For bz#2104412 - vDPA ASID support in Qemu -Patch78: kvm-vdpa-request-iova_range-only-once.patch -# For bz#2104412 - vDPA ASID support in Qemu -Patch79: kvm-vdpa-move-SVQ-vring-features-check-to-net.patch -# For bz#2104412 - vDPA ASID support in Qemu -Patch80: kvm-vdpa-allocate-SVQ-array-unconditionally.patch -# For bz#2104412 - vDPA ASID support in Qemu -Patch81: kvm-vdpa-add-asid-parameter-to-vhost_vdpa_dma_map-unmap.patch -# For bz#2104412 - vDPA ASID support in Qemu -Patch82: kvm-vdpa-store-x-svq-parameter-in-VhostVDPAState.patch -# For bz#2104412 - vDPA ASID support in Qemu -Patch83: kvm-vdpa-add-shadow_data-to-vhost_vdpa.patch -# For bz#2104412 - vDPA ASID support in Qemu -Patch84: kvm-vdpa-always-start-CVQ-in-SVQ-mode-if-possible.patch -# For bz#2104412 - vDPA ASID support in Qemu -Patch85: kvm-vdpa-fix-VHOST_BACKEND_F_IOTLB_ASID-flag-check.patch -# For bz#2150180 - qemu-img finishes successfully while having errors in commit or bitmaps operations -Patch86: kvm-qcow2-Fix-theoretical-corruption-in-store_bitmap-err.patch -# For bz#2150180 - qemu-img finishes successfully while having errors in commit or bitmaps operations -Patch87: kvm-qemu-img-commit-Report-errors-while-closing-the-imag.patch -# For bz#2150180 - qemu-img finishes successfully while having errors in commit or bitmaps operations -Patch88: kvm-qemu-img-bitmap-Report-errors-while-closing-the-imag.patch -# For bz#2150180 - qemu-img finishes successfully while having errors in commit or bitmaps operations -Patch89: kvm-qemu-iotests-Test-qemu-img-bitmap-commit-exit-code-o.patch -# For bz#2165280 - [kvm-unit-tests] debug-wp-migration fails -Patch90: kvm-accel-tcg-Test-CPUJumpCache-in-tb_jmp_cache_clear_pa.patch -# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info -Patch91: kvm-block-Improve-empty-format-specific-info-dump.patch -# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info -Patch92: kvm-block-file-Add-file-specific-image-info.patch -# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info -Patch93: kvm-block-vmdk-Change-extent-info-type.patch -# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info -Patch94: kvm-block-Split-BlockNodeInfo-off-of-ImageInfo.patch -# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info -Patch95: kvm-qemu-img-Use-BlockNodeInfo.patch -# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info -Patch96: kvm-block-qapi-Let-bdrv_query_image_info-recurse.patch -# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info -Patch97: kvm-block-qapi-Introduce-BlockGraphInfo.patch -# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info -Patch98: kvm-block-qapi-Add-indentation-to-bdrv_node_info_dump.patch -# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info -Patch99: kvm-iotests-Filter-child-node-information.patch -# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info -Patch100: kvm-iotests-106-214-308-Read-only-one-size-line.patch -# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info -Patch101: kvm-qemu-img-Let-info-print-block-graph.patch -# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info -Patch102: kvm-qemu-img-Change-info-key-names-for-protocol-nodes.patch -# For bz#2155173 - [vhost-user] unable to start vhost net: 71: falling back on userspace -Patch103: kvm-Revert-vhost-user-Monitor-slave-channel-in-vhost_use.patch -# For bz#2155173 - [vhost-user] unable to start vhost net: 71: falling back on userspace -Patch104: kvm-Revert-vhost-user-Introduce-nested-event-loop-in-vho.patch -# For bz#2162569 - [transitional device][virtio-rng-pci-transitional]Stable Guest ABI failed between RHEL 8.6 to RHEL 9.2 -Patch105: kvm-virtio-rng-pci-fix-transitional-migration-compat-for.patch -# For bz#2169232 - RFE: reconnect option for stream socket back-end -Patch106: kvm-tests-qtest-netdev-test-stream-and-dgram-backends.patch -# For bz#2169232 - RFE: reconnect option for stream socket back-end -Patch107: kvm-net-stream-add-a-new-option-to-automatically-reconne.patch -# For bz#2158704 - RFE: Prefer /dev/userfaultfd over userfaultfd(2) syscall -Patch108: kvm-linux-headers-Update-to-v6.1.patch -# For bz#2158704 - RFE: Prefer /dev/userfaultfd over userfaultfd(2) syscall -Patch109: kvm-util-userfaultfd-Add-uffd_open.patch -# For bz#2158704 - RFE: Prefer /dev/userfaultfd over userfaultfd(2) syscall -Patch110: kvm-util-userfaultfd-Support-dev-userfaultfd.patch -# For bz#2169732 - Multifd migration fails under a weak network/socket ordering race -Patch111: kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch -# For bz#2169732 - Multifd migration fails under a weak network/socket ordering race -Patch112: kvm-migration-check-magic-value-for-deciding-the-mapping.patch -# For bz#2168172 - [s390x] qemu-kvm coredumps when SE crashes -Patch113: kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch -# For bz#2168209 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch114: kvm-block-temporarily-hold-the-new-AioContext-of-bs_top-.patch -# For bz#2169904 - [SVVP] job 'Check SMBIOS Table Specific Requirements' failed on win2022 -Patch115: kvm-hw-smbios-fix-field-corruption-in-type-4-table.patch -# For bz#2155748 - qemu crash on void blk_drain(BlockBackend *): Assertion qemu_in_main_thread() failed -Patch116: kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch -# For bz#2155748 - qemu crash on void blk_drain(BlockBackend *): Assertion qemu_in_main_thread() failed -Patch117: kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch -# For bz#2155748 - qemu crash on void blk_drain(BlockBackend *): Assertion qemu_in_main_thread() failed -Patch118: kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch -# For bz#2175660 - Guest hangs when starting or rebooting -Patch119: kvm-qatomic-add-smp_mb__before-after_rmw.patch -# For bz#2175660 - Guest hangs when starting or rebooting -Patch120: kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch -# For bz#2175660 - Guest hangs when starting or rebooting -Patch121: kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch -# For bz#2175660 - Guest hangs when starting or rebooting -Patch122: kvm-edu-add-smp_mb__after_rmw.patch -# For bz#2175660 - Guest hangs when starting or rebooting -Patch123: kvm-aio-wait-switch-to-smp_mb__after_rmw.patch -# For bz#2175660 - Guest hangs when starting or rebooting -Patch124: kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch -# For bz#2175660 - Guest hangs when starting or rebooting -Patch125: kvm-physmem-add-missing-memory-barrier.patch -# For bz#2175660 - Guest hangs when starting or rebooting -Patch126: kvm-async-update-documentation-of-the-memory-barriers.patch -# For bz#2175660 - Guest hangs when starting or rebooting -Patch127: kvm-async-clarify-usage-of-barriers-in-the-polling-case.patch -# For bz#2173590 - bugs in emulation of BMI instructions (for libguestfs without KVM) -Patch128: kvm-target-i386-fix-operand-size-of-unary-SSE-operations.patch -# For bz#2173590 - bugs in emulation of BMI instructions (for libguestfs without KVM) -Patch129: kvm-tests-tcg-i386-Introduce-and-use-reg_t-consistently.patch -# For bz#2173590 - bugs in emulation of BMI instructions (for libguestfs without KVM) -Patch130: kvm-target-i386-Fix-BEXTR-instruction.patch -# For bz#2173590 - bugs in emulation of BMI instructions (for libguestfs without KVM) -Patch131: kvm-target-i386-Fix-C-flag-for-BLSI-BLSMSK-BLSR.patch -# For bz#2173590 - bugs in emulation of BMI instructions (for libguestfs without KVM) -Patch132: kvm-target-i386-fix-ADOX-followed-by-ADCX.patch -# For bz#2173590 - bugs in emulation of BMI instructions (for libguestfs without KVM) -Patch133: kvm-target-i386-Fix-32-bit-AD-CO-X-insns-in-64-bit-mode.patch -# For bz#2173590 - bugs in emulation of BMI instructions (for libguestfs without KVM) -Patch134: kvm-target-i386-Fix-BZHI-instruction.patch -# For bz#2156876 - [virtual network][rhel7.9_guest] qemu-kvm: vhost vring error in virtqueue 1: Invalid argument (22) -Patch135: kvm-intel-iommu-fail-DEVIOTLB_UNMAP-without-dt-mode.patch -# For bz#2203745 - Disk detach is unsuccessful while the guest is still booting [rhel-9.2.0.z] -Patch136: kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch -# For bz#2213864 - [mlx vhost_vdpa][rhel 9.2]qemu core dump when hot unplug then hotplug a vdpa interface with multi-queue setting [rhel-9.2.0.z] -Patch137: kvm-vdpa-stop-all-svq-on-device-deletion.patch -# For bz#2221219 - query-stats QMP command interrupts vcpus, the Max Latencies could be more than 100us (rhel 9.3.0 clone) [rhel-9.2.0.z] -Patch138: kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch -# For bz#2211923 - [qemu-kvm] rhel guest failed boot with multi disks on error Failed to start udev Wait for Complete Device Initialization [rhel-9.2.0.z] -Patch139: kvm-aio-posix-fix-race-between-epoll-upgrade-and-aio_set.patch -# For bz#2227721 - [rhel9.2] hotplug/hotunplug mlx vdpa device to the occupied addr port, then qemu core dump occurs after shutdown guest [rhel-9.2.0.z] -Patch140: kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch -# For RHEL-1060 - [vhost-vdpa][rhel 9.2]Boot a guest with "x-svq=on", then hot unplug this nic, guest trigger qemu core dump [rhel-9.2.0.z] -Patch141: kvm-vdpa-net-move-iova-tree-creation-from-init-to-start.patch -# For RHEL-1060 - [vhost-vdpa][rhel 9.2]Boot a guest with "x-svq=on", then hot unplug this nic, guest trigger qemu core dump [rhel-9.2.0.z] -Patch142: kvm-vdpa-reorder-vhost_vdpa_net_cvq_cmd_page_len-functio.patch -# For RHEL-1060 - [vhost-vdpa][rhel 9.2]Boot a guest with "x-svq=on", then hot unplug this nic, guest trigger qemu core dump [rhel-9.2.0.z] -Patch143: kvm-vdpa-map-shadow-vrings-with-MAP_SHARED.patch -# For bz#2216503 - CVE-2023-3354 qemu-kvm: QEMU: VNC: improper I/O watch removal in TLS handshake can lead to remote unauthenticated denial of service [rhel-9.2.0.z] -Patch144: kvm-io-remove-io-watch-if-TLS-channel-is-closed-during-h.patch +Patch0015: 0015-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch +Patch0016: 0016-qga-linux-add-usb-support-to-guest-get-fsinfo.patch +Patch0017: 0017-Add-RHEL-9.2.0-compat-structure.patch +Patch0018: 0018-redhat-hw-i386-pc-Update-x86-machine-type-compatibil.patch +Patch0019: 0019-Disable-unwanted-new-devices.patch +# For bz#2087047 - Disk detach is unsuccessful while the guest is still booting +Patch20: kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch +# For bz#1934134 - ACPI table limits warning when booting guest with 512 VCPUs +Patch21: kvm-hw-acpi-limit-warning-on-acpi-table-size-to-pc-machi.patch +# For bz#1934134 - ACPI table limits warning when booting guest with 512 VCPUs +Patch22: kvm-hw-acpi-Mark-acpi-blobs-as-resizable-on-RHEL-pc-mach.patch +# For bz#2058982 - Qemu core dump if cut off nfs storage during migration +Patch23: kvm-migration-Handle-block-device-inactivation-failures-.patch +# For bz#2058982 - Qemu core dump if cut off nfs storage during migration +Patch24: kvm-migration-Minor-control-flow-simplification.patch +# For bz#2058982 - Qemu core dump if cut off nfs storage during migration +Patch25: kvm-migration-Attempt-disk-reactivation-in-more-failure-.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch26: kvm-util-mmap-alloc-qemu_fd_getfs.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch27: kvm-vl.c-Create-late-backends-before-migration-object.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch28: kvm-migration-postcopy-Detect-file-system-on-dest-host.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch29: kvm-migration-mark-mixed-functions-that-can-suspend.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch30: kvm-postcopy-ram-do-not-use-qatomic_mb_read.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch31: kvm-migration-remove-extra-whitespace-character-for-code.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch32: kvm-migration-Merge-ram_counters-and-ram_atomic_counters.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch33: kvm-migration-Update-atomic-stats-out-of-the-mutex.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch34: kvm-migration-Make-multifd_bytes-atomic.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch35: kvm-migration-Make-dirty_sync_missed_zero_copy-atomic.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch36: kvm-migration-Make-precopy_bytes-atomic.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch37: kvm-migration-Make-downtime_bytes-atomic.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch38: kvm-migration-Make-dirty_sync_count-atomic.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch39: kvm-migration-Make-postcopy_requests-atomic.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch40: kvm-migration-Rename-duplicate-to-zero_pages.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch41: kvm-migration-Rename-normal-to-normal_pages.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch42: kvm-migration-rename-enabled_capabilities-to-capabilitie.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch43: kvm-migration-Pass-migrate_caps_check-the-old-and-new-ca.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch44: kvm-migration-move-migration_global_dump-to-migration-hm.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch45: kvm-spice-move-client_migrate_info-command-to-ui.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch46: kvm-migration-Create-migrate_cap_set.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch47: kvm-migration-Create-options.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch48: kvm-migration-Move-migrate_colo_enabled-to-options.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch49: kvm-migration-Move-migrate_use_compression-to-options.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch50: kvm-migration-Move-migrate_use_events-to-options.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch51: kvm-migration-Move-migrate_use_multifd-to-options.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch52: kvm-migration-Move-migrate_use_zero_copy_send-to-options.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch53: kvm-migration-Move-migrate_use_xbzrle-to-options.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch54: kvm-migration-Move-migrate_use_block-to-options.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch55: kvm-migration-Move-migrate_use_return-to-options.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch56: kvm-migration-Create-migrate_rdma_pin_all-function.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch57: kvm-migration-Move-migrate_caps_check-to-options.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch58: kvm-migration-Move-qmp_query_migrate_capabilities-to-opt.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch59: kvm-migration-Move-qmp_migrate_set_capabilities-to-optio.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch60: kvm-migration-Move-migrate_cap_set-to-options.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch61: kvm-migration-Move-parameters-functions-to-option.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch62: kvm-migration-Use-migrate_max_postcopy_bandwidth.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch63: kvm-migration-Move-migrate_use_block_incremental-to-opti.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch64: kvm-migration-Create-migrate_throttle_trigger_threshold.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch65: kvm-migration-Create-migrate_checkpoint_delay.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch66: kvm-migration-Create-migrate_max_cpu_throttle.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch67: kvm-migration-Move-migrate_announce_params-to-option.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch68: kvm-migration-Create-migrate_cpu_throttle_initial-to-opt.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch69: kvm-migration-Create-migrate_cpu_throttle_increment-func.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch70: kvm-migration-Create-migrate_cpu_throttle_tailslow-funct.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch71: kvm-migration-Move-migrate_postcopy-to-options.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch72: kvm-migration-Create-migrate_max_bandwidth-function.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch73: kvm-migration-Move-migrate_use_tls-to-options.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch74: kvm-migration-Move-qmp_migrate_set_parameters-to-options.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch75: kvm-migration-Allow-postcopy_ram_supported_by_host-to-re.patch +# For bz#2185688 - [qemu-kvm] no response with QMP command block_resize +Patch76: kvm-block-bdrv-blk_co_unref-for-calls-in-coroutine-conte.patch +# For bz#2185688 - [qemu-kvm] no response with QMP command block_resize +Patch77: kvm-block-Don-t-call-no_coroutine_fns-in-qmp_block_resiz.patch +# For bz#2185688 - [qemu-kvm] no response with QMP command block_resize +Patch78: kvm-iotests-Use-alternative-CPU-type-that-is-not-depreca.patch +# For bz#2185688 - [qemu-kvm] no response with QMP command block_resize +Patch79: kvm-iotests-Test-resizing-image-attached-to-an-iothread.patch +# For bz#2186725 - Qemu hang when commit during fio running(iothread enable) +Patch80: kvm-block-compile-out-assert_bdrv_graph_readable-by-defa.patch +# For bz#2186725 - Qemu hang when commit during fio running(iothread enable) +Patch81: kvm-graph-lock-Disable-locking-for-now.patch +# For bz#2186725 - Qemu hang when commit during fio running(iothread enable) +Patch82: kvm-nbd-server-Fix-drained_poll-to-wake-coroutine-in-rig.patch +# For bz#2186725 - Qemu hang when commit during fio running(iothread enable) +Patch83: kvm-iotests-Test-commit-with-iothreads-and-ongoing-I-O.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch84: kvm-memory-prevent-dma-reentracy-issues.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch85: kvm-async-Add-an-optional-reentrancy-guard-to-the-BH-API.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch86: kvm-checkpatch-add-qemu_bh_new-aio_bh_new-checks.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch87: kvm-hw-replace-most-qemu_bh_new-calls-with-qemu_bh_new_g.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch88: kvm-lsi53c895a-disable-reentrancy-detection-for-script-R.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch89: kvm-bcm2835_property-disable-reentrancy-detection-for-io.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch90: kvm-raven-disable-reentrancy-detection-for-iomem.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch91: kvm-apic-disable-reentrancy-detection-for-apic-msi.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch92: kvm-async-avoid-use-after-free-on-re-entrancy-guard.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch93: kvm-loongarch-mark-loongarch_ipi_iocsr-re-entrnacy-safe.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch94: kvm-memory-stricter-checks-prior-to-unsetting-engaged_in.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch95: kvm-lsi53c895a-disable-reentrancy-detection-for-MMIO-reg.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch96: kvm-hw-scsi-lsi53c895a-Fix-reentrancy-issues-in-the-LSI-.patch +# For bz#2189423 - Failed to migrate VM from rhel 9.3 to rhel 9.2 +Patch97: kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-register-for-machi.patch +# For bz#2196289 - Fix number of ready channels on multifd +Patch98: kvm-multifd-Fix-the-number-of-channels-ready.patch +# For bz#2168500 - [IBM 9.3 FEAT] KVM: Improve memory reclaiming for z15 Secure Execution guests - qemu part +Patch99: kvm-util-async-teardown-wire-up-query-command-line-optio.patch +# For bz#2168500 - [IBM 9.3 FEAT] KVM: Improve memory reclaiming for z15 Secure Execution guests - qemu part +Patch100: kvm-s390x-pv-Fix-spurious-warning-with-asynchronous-tear.patch +# For bz#2216201 - [qemu-kvm]VM reports vulnerabilty to mmio_stale_data on patched host with microcode +Patch101: kvm-target-i386-add-support-for-FLUSH_L1D-feature.patch +# For bz#2216201 - [qemu-kvm]VM reports vulnerabilty to mmio_stale_data on patched host with microcode +Patch102: kvm-target-i386-add-support-for-FB_CLEAR-feature.patch +# For bz#2180076 - [qemu-kvm] support fd passing for libblkio QEMU BlockDrivers +Patch103: kvm-block-blkio-use-qemu_open-to-support-fd-passing-for-.patch +# For bz#2180076 - [qemu-kvm] support fd passing for libblkio QEMU BlockDrivers +Patch104: kvm-qapi-add-fdset-feature-for-BlockdevOptionsVirtioBlkV.patch +# For bz#2171363 - [aarch64] Kernel hits Call trace with irregular CPU-to-NUMA association +Patch105: kvm-numa-Validate-cluster-and-NUMA-node-boundary-if-requ.patch +# For bz#2171363 - [aarch64] Kernel hits Call trace with irregular CPU-to-NUMA association +Patch106: kvm-hw-arm-Validate-cluster-and-NUMA-node-boundary.patch +# For bz#2171363 - [aarch64] Kernel hits Call trace with irregular CPU-to-NUMA association +Patch107: kvm-hw-arm-virt-Validate-cluster-and-NUMA-node-boundary-.patch +# For RHEL-330 - [virtual network][qemu-kvm-8.0.0-rc1]qemu core dump: qemu-kvm: ../softmmu/memory.c:2592: void memory_region_del_eventfd(MemoryRegion *, hwaddr, unsigned int, _Bool, uint64_t, EventNotifier *): Assertion `i != mr->ioeventfd_nb' failed +Patch108: kvm-vhost-fix-vhost_dev_enable_notifiers-error-case.patch +# For bz#2218644 - query-stats QMP command interrupts vcpus, the Max Latencies could be more than 100us (rhel 9.3.0 clone) +Patch109: kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch +# For bz#2128929 - [rhel9.2] hotplug/hotunplug mlx vdpa device to the occupied addr port, then qemu core dump occurs after shutdown guest +Patch110: kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch +# For bz#2211609 - With virtio-iommu and vfio-pci, qemu reports "warning: virtio-iommu page mask 0xfffffffffffff000 does not match 0x40201000" +# For bz#2211634 - [aarch64] With virtio-iommu and vfio-pci, qemu coredump when host using kernel-64k package +Patch111: kvm-virtio-iommu-Fix-64kB-host-page-size-VFIO-device-ass.patch +# For bz#2211609 - With virtio-iommu and vfio-pci, qemu reports "warning: virtio-iommu page mask 0xfffffffffffff000 does not match 0x40201000" +# For bz#2211634 - [aarch64] With virtio-iommu and vfio-pci, qemu coredump when host using kernel-64k package +Patch112: kvm-virtio-iommu-Rework-the-traces-in-virtio_iommu_set_p.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch113: kvm-vfio-pci-add-support-for-VF-token.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch114: kvm-vfio-migration-Skip-log_sync-during-migration-SETUP-.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch115: kvm-vfio-pci-Static-Resizable-BAR-capability.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch116: kvm-vfio-pci-Fix-a-use-after-free-issue.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch117: kvm-util-vfio-helpers-Use-g_file_read_link.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch118: kvm-migration-Make-all-functions-check-have-the-same-for.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch119: kvm-migration-Move-migration_properties-to-options.c.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch120: kvm-migration-Add-switchover-ack-capability.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch121: kvm-migration-Implement-switchover-ack-logic.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch122: kvm-migration-Enable-switchover-ack-capability.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch123: kvm-vfio-migration-Refactor-vfio_save_block-to-return-sa.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch124: kvm-vfio-migration-Store-VFIO-migration-flags-in-VFIOMig.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch125: kvm-vfio-migration-Add-VFIO-migration-pre-copy-support.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch126: kvm-vfio-migration-Add-support-for-switchover-ack-capabi.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch127: kvm-vfio-Implement-a-common-device-info-helper.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch128: kvm-hw-vfio-pci-quirks-Support-alternate-offset-for-GPUD.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch129: kvm-vfio-pci-Call-vfio_prepare_kvm_msi_virq_batch-in-MSI.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch130: kvm-vfio-migration-Reset-bytes_transferred-properly.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch131: kvm-vfio-migration-Make-VFIO-migration-non-experimental.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch132: kvm-vfio-pci-Fix-a-segfault-in-vfio_realize.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch133: kvm-vfio-pci-Free-leaked-timer-in-vfio_realize-error-pat.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch134: kvm-hw-vfio-pci-quirks-Sanitize-capability-pointer.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch135: kvm-vfio-pci-Disable-INTx-in-vfio_realize-error-path.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch136: kvm-vfio-migration-Change-vIOMMU-blocker-from-global-to-.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch137: kvm-vfio-migration-Free-resources-when-vfio_migration_re.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch138: kvm-vfio-migration-Remove-print-of-Migration-disabled.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch139: kvm-vfio-migration-Return-bool-type-for-vfio_migration_r.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch140: kvm-vfio-Fix-null-pointer-dereference-bug-in-vfio_bars_f.patch +# For bz#2220866 - Misaligned symbol for s390-ccw image during qemu-kvm build +Patch141: kvm-pc-bios-s390-ccw-Makefile-Use-z-noexecstack-to-silen.patch +# For bz#2220866 - Misaligned symbol for s390-ccw image during qemu-kvm build +Patch142: kvm-pc-bios-s390-ccw-Fix-indentation-in-start.S.patch +# For bz#2220866 - Misaligned symbol for s390-ccw image during qemu-kvm build +Patch143: kvm-pc-bios-s390-ccw-Provide-space-for-initial-stack-fra.patch +# For bz#2220866 - Misaligned symbol for s390-ccw image during qemu-kvm build +Patch144: kvm-pc-bios-s390-ccw-Don-t-use-__bss_start-with-the-larl.patch +# For bz#2222579 - PNG screendump doesn't save screen correctly +Patch145: kvm-ui-Fix-pixel-colour-channel-order-for-PNG-screenshot.patch +# For bz#2213317 - Enable libblkio-based block drivers in QEMU +Patch146: kvm-block-blkio-fix-module_block.py-parsing.patch +# For bz#2176702 - [RHEL9][virtio-scsi] scsi-hd cannot hot-plug successfully after hot-plug it repeatly +Patch147: kvm-scsi-fetch-unit-attention-when-creating-the-request.patch +# For bz#2176702 - [RHEL9][virtio-scsi] scsi-hd cannot hot-plug successfully after hot-plug it repeatly +Patch148: kvm-scsi-cleanup-scsi_clear_unit_attention.patch +# For bz#2176702 - [RHEL9][virtio-scsi] scsi-hd cannot hot-plug successfully after hot-plug it repeatly +Patch149: kvm-scsi-clear-unit-attention-only-for-REPORT-LUNS-comma.patch +# For RHEL-794 - Backport s390x fixes from QEMU 8.1 +Patch150: kvm-s390x-ap-Wire-up-the-device-request-notifier-interfa.patch +# For bz#2196295 - Multifd flushes its channels 10 times per second +Patch151: kvm-multifd-Create-property-multifd-flush-after-each-sec.patch +# For bz#2196295 - Multifd flushes its channels 10 times per second +Patch152: kvm-multifd-Protect-multifd_send_sync_main-calls.patch +# For bz#2196295 - Multifd flushes its channels 10 times per second +Patch153: kvm-multifd-Only-flush-once-each-full-round-of-memory.patch +# For RHEL-582 - [passt][rhel 9.3] qemu core dump occurs when guest is shutdown after hotunplug/hotplug a passt interface +Patch154: kvm-net-socket-prepare-to-cleanup-net_init_socket.patch +# For RHEL-582 - [passt][rhel 9.3] qemu core dump occurs when guest is shutdown after hotunplug/hotplug a passt interface +Patch155: kvm-net-socket-move-fd-type-checking-to-its-own-function.patch +# For RHEL-582 - [passt][rhel 9.3] qemu core dump occurs when guest is shutdown after hotunplug/hotplug a passt interface +Patch156: kvm-net-socket-remove-net_init_socket.patch +# For bz#2215819 - Migration test failed while guest with PCIe devices +Patch157: kvm-pcie-Add-hotplug-detect-state-register-to-cmask.patch +# For bz#2174676 - Guest hit EXT4-fs error on host 4K disk when repeatedly hot-plug/unplug running IO disk [RHEL9] +Patch158: kvm-util-iov-Make-qiov_slice-public.patch +# For bz#2174676 - Guest hit EXT4-fs error on host 4K disk when repeatedly hot-plug/unplug running IO disk [RHEL9] +Patch159: kvm-block-Collapse-padded-I-O-vecs-exceeding-IOV_MAX.patch +# For bz#2174676 - Guest hit EXT4-fs error on host 4K disk when repeatedly hot-plug/unplug running IO disk [RHEL9] +Patch160: kvm-util-iov-Remove-qemu_iovec_init_extended.patch +# For bz#2174676 - Guest hit EXT4-fs error on host 4K disk when repeatedly hot-plug/unplug running IO disk [RHEL9] +Patch161: kvm-iotests-iov-padding-New-test.patch +# For bz#2174676 - Guest hit EXT4-fs error on host 4K disk when repeatedly hot-plug/unplug running IO disk [RHEL9] +Patch162: kvm-block-Fix-pad_request-s-request-restriction.patch +# For RHEL-573 - [mlx vhost_vdpa][rhel 9.3]live migration fail with "net vdpa cannot migrate with CVQ feature" +Patch163: kvm-vdpa-do-not-block-migration-if-device-has-cvq-and-x-.patch +# For bz#2040509 - [RFE]:Add support for changing "tx_queue_size" to a setable value +Patch164: kvm-virtio-net-correctly-report-maximum-tx_queue_size-va.patch +# For bz#2223691 - [machine type 9.2]Failed to migrate VM from RHEL 9.3 to RHEL 9.2 +Patch165: kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-reg-for-machine-ty.patch +# For bz#2141965 - [TPM][vhost-vdpa][rhel9.2]Boot a guest with "vhost-vdpa + TPM emulator", qemu output: qemu-kvm: vhost_vdpa_listener_region_add received unaligned region +Patch166: kvm-vhost-vdpa-mute-unaligned-memory-error-report.patch +# For bz#2225354 - [vdpa-blk] The new driver virtio-blk-vhost-user not work in VM booting +# For bz#2225439 - [vdpa-blk] read-only=on option not work on driver virtio-blk-vhost-vdpa +Patch167: kvm-block-blkio-enable-the-completion-eventfd.patch +# For bz#2225354 - [vdpa-blk] The new driver virtio-blk-vhost-user not work in VM booting +# For bz#2225439 - [vdpa-blk] read-only=on option not work on driver virtio-blk-vhost-vdpa +Patch168: kvm-block-blkio-do-not-use-open-flags-in-qemu_open.patch +# For bz#2225354 - [vdpa-blk] The new driver virtio-blk-vhost-user not work in VM booting +# For bz#2225439 - [vdpa-blk] read-only=on option not work on driver virtio-blk-vhost-vdpa +Patch169: kvm-block-blkio-move-blkio_connect-in-the-drivers-functi.patch +# For bz#2225354 - [vdpa-blk] The new driver virtio-blk-vhost-user not work in VM booting +# For bz#2225439 - [vdpa-blk] read-only=on option not work on driver virtio-blk-vhost-vdpa +Patch170: kvm-block-blkio-retry-blkio_connect-if-it-fails-using-fd.patch +# For bz#2225354 - [vdpa-blk] The new driver virtio-blk-vhost-user not work in VM booting +# For bz#2225439 - [vdpa-blk] read-only=on option not work on driver virtio-blk-vhost-vdpa +Patch171: kvm-block-blkio-fall-back-on-using-path-when-fd-setting-.patch +# For bz#2225354 - [vdpa-blk] The new driver virtio-blk-vhost-user not work in VM booting +# For bz#2225439 - [vdpa-blk] read-only=on option not work on driver virtio-blk-vhost-vdpa +Patch172: kvm-block-blkio-use-blkio_set_int-fd-to-check-fd-support.patch +# For bz#2229133 - Backport some virtio-iommu and smmu fixes +Patch173: kvm-hw-virtio-iommu-Fix-potential-OOB-access-in-virtio_i.patch +# For bz#2229133 - Backport some virtio-iommu and smmu fixes +Patch174: kvm-virtio-iommu-Standardize-granule-extraction-and-form.patch +# For bz#2229133 - Backport some virtio-iommu and smmu fixes +Patch175: kvm-hw-arm-smmu-Handle-big-endian-hosts-correctly.patch +# For bz#2214839 - [AMDSERVER 9.3 Bug] Qemu SEV reduced-phys-bits fixes +Patch176: kvm-qapi-i386-sev-Change-the-reduced-phys-bits-value-fro.patch +# For bz#2214839 - [AMDSERVER 9.3 Bug] Qemu SEV reduced-phys-bits fixes +Patch177: kvm-qemu-options.hx-Update-the-reduced-phys-bits-documen.patch +# For bz#2214839 - [AMDSERVER 9.3 Bug] Qemu SEV reduced-phys-bits fixes +Patch178: kvm-i386-sev-Update-checks-and-information-related-to-re.patch +# For bz#2214839 - [AMDSERVER 9.3 Bug] Qemu SEV reduced-phys-bits fixes +Patch179: kvm-i386-cpu-Update-how-the-EBX-register-of-CPUID-0x8000.patch +# For bz#2094913 - Add EPYC-Genoa CPU model in qemu +Patch180: kvm-target-i386-allow-versioned-CPUs-to-specify-new-cach.patch +# For bz#2094913 - Add EPYC-Genoa CPU model in qemu +Patch181: kvm-target-i386-Add-new-EPYC-CPU-versions-with-updated-c.patch +# For bz#2094913 - Add EPYC-Genoa CPU model in qemu +Patch182: kvm-target-i386-Add-a-couple-of-feature-bits-in-8000_000.patch +# For bz#2094913 - Add EPYC-Genoa CPU model in qemu +Patch183: kvm-target-i386-Add-feature-bits-for-CPUID_Fn80000021_EA.patch +# For bz#2094913 - Add EPYC-Genoa CPU model in qemu +Patch184: kvm-target-i386-Add-missing-feature-bits-in-EPYC-Milan-m.patch +# For bz#2094913 - Add EPYC-Genoa CPU model in qemu +Patch185: kvm-target-i386-Add-VNMI-and-automatic-IBRS-feature-bits.patch +# For bz#2094913 - Add EPYC-Genoa CPU model in qemu +Patch186: kvm-target-i386-Add-EPYC-Genoa-model-to-support-Zen-4-pr.patch +# For RHEL-923 - vhost shadow virtqueue: state restore through CVQ +Patch187: kvm-vdpa-return-errno-in-vhost_vdpa_get_vring_group-erro.patch +# For RHEL-923 - vhost shadow virtqueue: state restore through CVQ +Patch188: kvm-vdpa-move-CVQ-isolation-check-to-net_init_vhost_vdpa.patch +# For RHEL-923 - vhost shadow virtqueue: state restore through CVQ +Patch189: kvm-vdpa-use-first-queue-SVQ-state-for-CVQ-default.patch +# For RHEL-923 - vhost shadow virtqueue: state restore through CVQ +Patch190: kvm-vdpa-export-vhost_vdpa_set_vring_ready.patch +# For RHEL-923 - vhost shadow virtqueue: state restore through CVQ +Patch191: kvm-vdpa-rename-vhost_vdpa_net_load-to-vhost_vdpa_net_cv.patch +# For RHEL-923 - vhost shadow virtqueue: state restore through CVQ +Patch192: kvm-vdpa-move-vhost_vdpa_set_vring_ready-to-the-caller.patch +# For RHEL-923 - vhost shadow virtqueue: state restore through CVQ +Patch193: kvm-vdpa-remove-net-cvq-migration-blocker.patch +# For bz#2216504 - CVE-2023-3354 qemu-kvm: QEMU: VNC: improper I/O watch removal in TLS handshake can lead to remote unauthenticated denial of service [rhel-9.3.0] +Patch194: kvm-io-remove-io-watch-if-TLS-channel-is-closed-during-h.patch +# For bz#2229868 - [vfio migration]Disable postcopy for VM with migratable vfio device +Patch195: kvm-migration-Add-migration-prefix-to-functions-in-targe.patch +# For bz#2229868 - [vfio migration]Disable postcopy for VM with migratable vfio device +Patch196: kvm-migration-Move-more-initializations-to-migrate_init.patch +# For bz#2229868 - [vfio migration]Disable postcopy for VM with migratable vfio device +Patch197: kvm-migration-Add-.save_prepare-handler-to-struct-SaveVM.patch +# For bz#2229868 - [vfio migration]Disable postcopy for VM with migratable vfio device +Patch198: kvm-vfio-migration-Block-VFIO-migration-with-postcopy-mi.patch %if %{have_clang} BuildRequires: clang @@ -450,6 +572,8 @@ BuildRequires: glib2-devel BuildRequires: gnutls-devel BuildRequires: cyrus-sasl-devel BuildRequires: libaio-devel +BuildRequires: libblkio-devel +BuildRequires: liburing-devel BuildRequires: python3-devel BuildRequires: libattr-devel BuildRequires: libusbx-devel >= %{libusbx_version} @@ -636,6 +760,17 @@ Install this package if you want access to the avocado_qemu tests, or qemu-iotests. +%package block-blkio +Summary: QEMU libblkio block drivers +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +%description block-blkio +This package provides the additional libblkio block drivers for QEMU. + +Install this package if you want to use virtio-blk-vdpa-blk, +virtio-blk-vfio-pci, virtio-blk-vhost-user, io_uring, and nvme-io_uring block +drivers provided by libblkio. + + %package block-curl Summary: QEMU CURL block driver Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} @@ -732,6 +867,19 @@ Obsoletes: %{name}-hw-usbredir <= %{epoch}:%{version} This package provides usbredir support. %endif +%package ui-dbus +Summary: QEMU D-Bus UI driver +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +%description ui-dbus +This package provides the additional D-Bus UI for QEMU. + +%package audio-dbus +Summary: QEMU D-Bus audio driver +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +Requires: %{name}-ui-dbus = %{epoch}:%{version}-%{release} +%description audio-dbus +This package provides the additional D-Bus audio driver for QEMU. + %prep %setup -q -n qemu-%{version}%{?rcstr} %autopatch -p1 @@ -752,6 +900,7 @@ ulimit -n 10240 --disable-auth-pam \\\ --disable-avx2 \\\ --disable-avx512f \\\ + --disable-avx512bw \\\ --disable-blkio \\\ --disable-block-drv-whitelist-in-tools \\\ --disable-bochs \\\ @@ -795,6 +944,7 @@ ulimit -n 10240 --disable-kvm \\\ --disable-l2tpv3 \\\ --disable-libdaxctl \\\ + --disable-libdw \\\ --disable-libiscsi \\\ --disable-libnfs \\\ --disable-libpmem \\\ @@ -866,7 +1016,6 @@ ulimit -n 10240 --disable-vhost-vdpa \\\ --disable-virglrenderer \\\ --disable-virtfs \\\ - --disable-virtiofsd \\\ --disable-vnc \\\ --disable-vnc-jpeg \\\ --disable-png \\\ @@ -901,7 +1050,7 @@ run_configure() { --with-suffix="%{name}" \ --firmwarepath=%{firmwaredirs} \ --meson="%{__meson}" \ - --enable-trace-backend=dtrace \ + --enable-trace-backends=dtrace \ --with-coroutine=ucontext \ --with-git=git \ --tls-priority=@QEMU,SYSTEM \ @@ -928,15 +1077,18 @@ run_configure \ --block-drv-ro-whitelist=%{block_drivers_ro_list} \ %endif --enable-attr \ + --enable-blkio \ --enable-cap-ng \ --enable-capstone \ --enable-coroutine-pool \ --enable-curl \ + --enable-dbus-display \ --enable-debug-info \ --enable-docs \ %if %{have_fdt} --enable-fdt=system \ %endif + --enable-gio \ --enable-gnutls \ --enable-guest-agent \ --enable-iconv \ @@ -947,6 +1099,7 @@ run_configure \ --enable-libusb \ --enable-libudev \ --enable-linux-aio \ + --enable-linux-io-uring \ --enable-lzo \ --enable-malloc-trim \ --enable-modules \ @@ -978,7 +1131,6 @@ run_configure \ --enable-usb-redir \ %endif --enable-vdi \ - --enable-virtiofsd \ --enable-vhost-kernel \ --enable-vhost-net \ --enable-vhost-user \ @@ -1114,10 +1266,6 @@ popd mkdir -p %{buildroot}%{_datadir}/systemtap/tapset -# Move vhost-user JSON files to the standard "qemu" directory -mkdir -p %{buildroot}%{_datadir}/qemu -mv %{buildroot}%{_datadir}/%{name}/vhost-user %{buildroot}%{_datadir}/qemu/ - install -m 0755 %{qemu_kvm_build}/%{kvm_target}-softmmu/qemu-system-%{kvm_target} %{buildroot}%{_libexecdir}/qemu-kvm install -m 0644 %{qemu_kvm_build}/qemu-kvm.stp %{buildroot}%{_datadir}/systemtap/tapset/ install -m 0644 %{qemu_kvm_build}/qemu-kvm-log.stp %{buildroot}%{_datadir}/systemtap/tapset/ @@ -1133,7 +1281,6 @@ rm %{buildroot}%{_bindir}/qemu-system-%{kvm_target} rm %{buildroot}%{_datadir}/systemtap/tapset/qemu-system-%{kvm_target}.stp rm %{buildroot}%{_datadir}/systemtap/tapset/qemu-system-%{kvm_target}-simpletrace.stp rm %{buildroot}%{_datadir}/systemtap/tapset/qemu-system-%{kvm_target}-log.stp -rm %{buildroot}%{_bindir}/elf2dmp # Install simpletrace install -m 0755 scripts/simpletrace.py %{buildroot}%{_datadir}/%{name}/simpletrace.py @@ -1273,17 +1420,13 @@ rm -rf %{buildroot}%{qemudocdir}/specs # endif !tools_only %endif -# Remove virtiofsd (we use separate package for virtiofsd) -rm -rf %{buildroot}%{_mandir}/man1/virtiofsd.1* -rm -rf %{buildroot}%{_libexecdir}/virtiofsd -rm -rf %{buildroot}%{_datadir}/qemu/vhost-user/50-qemu-virtiofsd.json - %check %if !%{tools_only} pushd %{qemu_kvm_build} echo "Testing %{name}-build" -%make_build check +#%make_build check +make V=1 check popd # endif !tools_only @@ -1349,6 +1492,7 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_bindir}/qemu-keymap %{_bindir}/qemu-edid %{_bindir}/qemu-trace-stap +%{_bindir}/elf2dmp %{_datadir}/%{name}/simpletrace.py* %{_datadir}/%{name}/tracetool/*.py* %{_datadir}/%{name}/tracetool/backend/*.py* @@ -1436,6 +1580,9 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{testsdir} %{_libdir}/%{name}/accel-qtest-%{kvm_target}.so +%files block-blkio +%{_libdir}/%{name}/block-blkio.so + %files block-curl %{_libdir}/%{name}/block-curl.so %if %{have_block_rbd} @@ -1460,42 +1607,322 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_libdir}/%{name}/hw-usb-redirect.so %endif +%files audio-dbus +%{_libdir}/%{name}/audio-dbus.so + +%files ui-dbus +%{_libdir}/%{name}/ui-dbus.so + # endif !tools_only %endif %changelog -* Mon Aug 21 2023 Miroslav Rezanina - 7.2.0-14.el9_2.5 -- kvm-vdpa-net-move-iova-tree-creation-from-init-to-start.patch [RHEL-1060] -- kvm-vdpa-reorder-vhost_vdpa_net_cvq_cmd_page_len-functio.patch [RHEL-1060] -- kvm-vdpa-map-shadow-vrings-with-MAP_SHARED.patch [RHEL-1060] -- kvm-io-remove-io-watch-if-TLS-channel-is-closed-during-h.patch [bz#2216503] -- Resolves: RHEL-1060 - ([vhost-vdpa][rhel 9.2]Boot a guest with "x-svq=on", then hot unplug this nic, guest trigger qemu core dump [rhel-9.2.0.z]) -- Resolves: bz#2216503 - (CVE-2023-3354 qemu-kvm: QEMU: VNC: improper I/O watch removal in TLS handshake can lead to remote unauthenticated denial of service [rhel-9.2.0.z]) - -* Mon Aug 07 2023 Miroslav Rezanina - 7.2.0-14.el9_2.4 -- kvm-aio-posix-fix-race-between-epoll-upgrade-and-aio_set.patch [bz#2211923] -- kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch [bz#2227721] -- Resolves: bz#2211923 - ([qemu-kvm] rhel guest failed boot with multi disks on error Failed to start udev Wait for Complete Device Initialization [rhel-9.2.0.z]) -- Resolves: bz#2227721 - ([rhel9.2] hotplug/hotunplug mlx vdpa device to the occupied addr port, then qemu core dump occurs after shutdown guest [rhel-9.2.0.z]) - -* Tue Jul 11 2023 Miroslav Rezanina - 7.2.0-14.el9_2.3 -- kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch [bz#2221219] -- Resolves: bz#2221219 - (query-stats QMP command interrupts vcpus, the Max Latencies could be more than 100us (rhel 9.3.0 clone) [rhel-9.2.0.z]) - -* Mon Jun 19 2023 Miroslav Rezanina - 7.2.0-14.el9_2.2 -- kvm-vdpa-stop-all-svq-on-device-deletion.patch [bz#2213864] -- Resolves: bz#2213864 - ([mlx vhost_vdpa][rhel 9.2]qemu core dump when hot unplug then hotplug a vdpa interface with multi-queue setting [rhel-9.2.0.z]) - -* Thu May 25 2023 Miroslav Rezanina - 7.2.0-14.el9_2.1 -- kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch [bz#2203745] -- Resolves: bz#2203745 - (Disk detach is unsuccessful while the guest is still booting [rhel-9.2.0.z]) +* Mon Sep 18 2023 Miroslav Rezanina - 8.0.0-16.el9_3 +- kvm-migration-Add-migration-prefix-to-functions-in-targe.patch [bz#2229868] +- kvm-migration-Move-more-initializations-to-migrate_init.patch [bz#2229868] +- kvm-migration-Add-.save_prepare-handler-to-struct-SaveVM.patch [bz#2229868] +- kvm-vfio-migration-Block-VFIO-migration-with-postcopy-mi.patch [bz#2229868] +- Resolves: bz#2229868 + ([vfio migration]Disable postcopy for VM with migratable vfio device) + +* Fri Sep 15 2023 Miroslav Rezanina - 8.0.0-15.el9_3 +- kvm-io-remove-io-watch-if-TLS-channel-is-closed-during-h.patch [bz#2216504] +- Resolves: bz#2216504 + (CVE-2023-3354 qemu-kvm: QEMU: VNC: improper I/O watch removal in TLS handshake can lead to remote unauthenticated denial of service [rhel-9.3.0]) + +* Thu Aug 24 2023 Miroslav Rezanina - 8.0.0-13 +- kvm-vdpa-return-errno-in-vhost_vdpa_get_vring_group-erro.patch [RHEL-923] +- kvm-vdpa-move-CVQ-isolation-check-to-net_init_vhost_vdpa.patch [RHEL-923] +- kvm-vdpa-use-first-queue-SVQ-state-for-CVQ-default.patch [RHEL-923] +- kvm-vdpa-export-vhost_vdpa_set_vring_ready.patch [RHEL-923] +- kvm-vdpa-rename-vhost_vdpa_net_load-to-vhost_vdpa_net_cv.patch [RHEL-923] +- kvm-vdpa-move-vhost_vdpa_set_vring_ready-to-the-caller.patch [RHEL-923] +- kvm-vdpa-remove-net-cvq-migration-blocker.patch [RHEL-923] +- Resolves: RHEL-923 + (vhost shadow virtqueue: state restore through CVQ) + +* Mon Aug 21 2023 Miroslav Rezanina - 8.0.0-12 +- kvm-target-i386-allow-versioned-CPUs-to-specify-new-cach.patch [bz#2094913] +- kvm-target-i386-Add-new-EPYC-CPU-versions-with-updated-c.patch [bz#2094913] +- kvm-target-i386-Add-a-couple-of-feature-bits-in-8000_000.patch [bz#2094913] +- kvm-target-i386-Add-feature-bits-for-CPUID_Fn80000021_EA.patch [bz#2094913] +- kvm-target-i386-Add-missing-feature-bits-in-EPYC-Milan-m.patch [bz#2094913] +- kvm-target-i386-Add-VNMI-and-automatic-IBRS-feature-bits.patch [bz#2094913] +- kvm-target-i386-Add-EPYC-Genoa-model-to-support-Zen-4-pr.patch [bz#2094913] +- Resolves: bz#2094913 + (Add EPYC-Genoa CPU model in qemu) + +* Mon Aug 07 2023 Miroslav Rezanina - 8.0.0-11 +- kvm-block-blkio-enable-the-completion-eventfd.patch [bz#2225354 bz#2225439] +- kvm-block-blkio-do-not-use-open-flags-in-qemu_open.patch [bz#2225354 bz#2225439] +- kvm-block-blkio-move-blkio_connect-in-the-drivers-functi.patch [bz#2225354 bz#2225439] +- kvm-block-blkio-retry-blkio_connect-if-it-fails-using-fd.patch [bz#2225354 bz#2225439] +- kvm-block-blkio-fall-back-on-using-path-when-fd-setting-.patch [bz#2225354 bz#2225439] +- kvm-block-blkio-use-blkio_set_int-fd-to-check-fd-support.patch [bz#2225354 bz#2225439] +- kvm-hw-virtio-iommu-Fix-potential-OOB-access-in-virtio_i.patch [bz#2229133] +- kvm-virtio-iommu-Standardize-granule-extraction-and-form.patch [bz#2229133] +- kvm-hw-arm-smmu-Handle-big-endian-hosts-correctly.patch [bz#2229133] +- kvm-qapi-i386-sev-Change-the-reduced-phys-bits-value-fro.patch [bz#2214839] +- kvm-qemu-options.hx-Update-the-reduced-phys-bits-documen.patch [bz#2214839] +- kvm-i386-sev-Update-checks-and-information-related-to-re.patch [bz#2214839] +- kvm-i386-cpu-Update-how-the-EBX-register-of-CPUID-0x8000.patch [bz#2214839] +- kvm-Provide-elf2dmp-binary-in-qemu-tools.patch [bz#2165917] +- Resolves: bz#2225354 + ([vdpa-blk] The new driver virtio-blk-vhost-user not work in VM booting) +- Resolves: bz#2225439 + ([vdpa-blk] read-only=on option not work on driver virtio-blk-vhost-vdpa) +- Resolves: bz#2229133 + (Backport some virtio-iommu and smmu fixes) +- Resolves: bz#2214839 + ([AMDSERVER 9.3 Bug] Qemu SEV reduced-phys-bits fixes) +- Resolves: bz#2165917 + (qemu-kvm: contrib/elf2dmp: Windows Server 2022 support) + +* Mon Jul 31 2023 Miroslav Rezanina - 8.0.0-10 +- kvm-util-iov-Make-qiov_slice-public.patch [bz#2174676] +- kvm-block-Collapse-padded-I-O-vecs-exceeding-IOV_MAX.patch [bz#2174676] +- kvm-util-iov-Remove-qemu_iovec_init_extended.patch [bz#2174676] +- kvm-iotests-iov-padding-New-test.patch [bz#2174676] +- kvm-block-Fix-pad_request-s-request-restriction.patch [bz#2174676] +- kvm-vdpa-do-not-block-migration-if-device-has-cvq-and-x-.patch [RHEL-573] +- kvm-virtio-net-correctly-report-maximum-tx_queue_size-va.patch [bz#2040509] +- kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-reg-for-machine-ty.patch [bz#2223691] +- kvm-vhost-vdpa-mute-unaligned-memory-error-report.patch [bz#2141965] +- Resolves: bz#2174676 + (Guest hit EXT4-fs error on host 4K disk when repeatedly hot-plug/unplug running IO disk [RHEL9]) +- Resolves: RHEL-573 + ([mlx vhost_vdpa][rhel 9.3]live migration fail with "net vdpa cannot migrate with CVQ feature") +- Resolves: bz#2040509 + ([RFE]:Add support for changing "tx_queue_size" to a setable value) +- Resolves: bz#2223691 + ([machine type 9.2]Failed to migrate VM from RHEL 9.3 to RHEL 9.2) +- Resolves: bz#2141965 + ([TPM][vhost-vdpa][rhel9.2]Boot a guest with "vhost-vdpa + TPM emulator", qemu output: qemu-kvm: vhost_vdpa_listener_region_add received unaligned region) + +* Mon Jul 24 2023 Miroslav Rezanina - 8.0.0-9 +- kvm-scsi-fetch-unit-attention-when-creating-the-request.patch [bz#2176702] +- kvm-scsi-cleanup-scsi_clear_unit_attention.patch [bz#2176702] +- kvm-scsi-clear-unit-attention-only-for-REPORT-LUNS-comma.patch [bz#2176702] +- kvm-s390x-ap-Wire-up-the-device-request-notifier-interfa.patch [RHEL-794] +- kvm-multifd-Create-property-multifd-flush-after-each-sec.patch [bz#2196295] +- kvm-multifd-Protect-multifd_send_sync_main-calls.patch [bz#2196295] +- kvm-multifd-Only-flush-once-each-full-round-of-memory.patch [bz#2196295] +- kvm-net-socket-prepare-to-cleanup-net_init_socket.patch [RHEL-582] +- kvm-net-socket-move-fd-type-checking-to-its-own-function.patch [RHEL-582] +- kvm-net-socket-remove-net_init_socket.patch [RHEL-582] +- kvm-pcie-Add-hotplug-detect-state-register-to-cmask.patch [bz#2215819] +- kvm-spec-Build-DBUS-display.patch [bz#2207940] +- Resolves: bz#2176702 + ([RHEL9][virtio-scsi] scsi-hd cannot hot-plug successfully after hot-plug it repeatly) +- Resolves: RHEL-794 + (Backport s390x fixes from QEMU 8.1) +- Resolves: bz#2196295 + (Multifd flushes its channels 10 times per second) +- Resolves: RHEL-582 + ([passt][rhel 9.3] qemu core dump occurs when guest is shutdown after hotunplug/hotplug a passt interface) +- Resolves: bz#2215819 + (Migration test failed while guest with PCIe devices) +- Resolves: bz#2207940 + ([RFE] Enable qemu-ui-dbus subpackage) + +* Mon Jul 17 2023 Miroslav Rezanina - 8.0.0-8 +- kvm-virtio-iommu-Fix-64kB-host-page-size-VFIO-device-ass.patch [bz#2211609 bz#2211634] +- kvm-virtio-iommu-Rework-the-traces-in-virtio_iommu_set_p.patch [bz#2211609 bz#2211634] +- kvm-vfio-pci-add-support-for-VF-token.patch [bz#2192818] +- kvm-vfio-migration-Skip-log_sync-during-migration-SETUP-.patch [bz#2192818] +- kvm-vfio-pci-Static-Resizable-BAR-capability.patch [bz#2192818] +- kvm-vfio-pci-Fix-a-use-after-free-issue.patch [bz#2192818] +- kvm-util-vfio-helpers-Use-g_file_read_link.patch [bz#2192818] +- kvm-migration-Make-all-functions-check-have-the-same-for.patch [bz#2192818] +- kvm-migration-Move-migration_properties-to-options.c.patch [bz#2192818] +- kvm-migration-Add-switchover-ack-capability.patch [bz#2192818] +- kvm-migration-Implement-switchover-ack-logic.patch [bz#2192818] +- kvm-migration-Enable-switchover-ack-capability.patch [bz#2192818] +- kvm-vfio-migration-Refactor-vfio_save_block-to-return-sa.patch [bz#2192818] +- kvm-vfio-migration-Store-VFIO-migration-flags-in-VFIOMig.patch [bz#2192818] +- kvm-vfio-migration-Add-VFIO-migration-pre-copy-support.patch [bz#2192818] +- kvm-vfio-migration-Add-support-for-switchover-ack-capabi.patch [bz#2192818] +- kvm-vfio-Implement-a-common-device-info-helper.patch [bz#2192818] +- kvm-hw-vfio-pci-quirks-Support-alternate-offset-for-GPUD.patch [bz#2192818] +- kvm-vfio-pci-Call-vfio_prepare_kvm_msi_virq_batch-in-MSI.patch [bz#2192818] +- kvm-vfio-migration-Reset-bytes_transferred-properly.patch [bz#2192818] +- kvm-vfio-migration-Make-VFIO-migration-non-experimental.patch [bz#2192818] +- kvm-vfio-pci-Fix-a-segfault-in-vfio_realize.patch [bz#2192818] +- kvm-vfio-pci-Free-leaked-timer-in-vfio_realize-error-pat.patch [bz#2192818] +- kvm-hw-vfio-pci-quirks-Sanitize-capability-pointer.patch [bz#2192818] +- kvm-vfio-pci-Disable-INTx-in-vfio_realize-error-path.patch [bz#2192818] +- kvm-vfio-migration-Change-vIOMMU-blocker-from-global-to-.patch [bz#2192818] +- kvm-vfio-migration-Free-resources-when-vfio_migration_re.patch [bz#2192818] +- kvm-vfio-migration-Remove-print-of-Migration-disabled.patch [bz#2192818] +- kvm-vfio-migration-Return-bool-type-for-vfio_migration_r.patch [bz#2192818] +- kvm-vfio-Fix-null-pointer-dereference-bug-in-vfio_bars_f.patch [bz#2192818] +- kvm-pc-bios-s390-ccw-Makefile-Use-z-noexecstack-to-silen.patch [bz#2220866] +- kvm-pc-bios-s390-ccw-Fix-indentation-in-start.S.patch [bz#2220866] +- kvm-pc-bios-s390-ccw-Provide-space-for-initial-stack-fra.patch [bz#2220866] +- kvm-pc-bios-s390-ccw-Don-t-use-__bss_start-with-the-larl.patch [bz#2220866] +- kvm-ui-Fix-pixel-colour-channel-order-for-PNG-screenshot.patch [bz#2222579] +- kvm-block-blkio-fix-module_block.py-parsing.patch [bz#2213317] +- kvm-Fix-virtio-blk-vhost-vdpa-typo-in-spec-file.patch [bz#2213317] +- Resolves: bz#2211609 + (With virtio-iommu and vfio-pci, qemu reports "warning: virtio-iommu page mask 0xfffffffffffff000 does not match 0x40201000") +- Resolves: bz#2211634 + ([aarch64] With virtio-iommu and vfio-pci, qemu coredump when host using kernel-64k package) +- Resolves: bz#2192818 + ([VFIO LM] Live migration) +- Resolves: bz#2220866 + (Misaligned symbol for s390-ccw image during qemu-kvm build) +- Resolves: bz#2222579 + (PNG screendump doesn't save screen correctly) +- Resolves: bz#2213317 + (Enable libblkio-based block drivers in QEMU) + +* Mon Jul 10 2023 Miroslav Rezanina - 8.0.0-7 +- kvm-numa-Validate-cluster-and-NUMA-node-boundary-if-requ.patch [bz#2171363] +- kvm-hw-arm-Validate-cluster-and-NUMA-node-boundary.patch [bz#2171363] +- kvm-hw-arm-virt-Validate-cluster-and-NUMA-node-boundary-.patch [bz#2171363] +- kvm-vhost-fix-vhost_dev_enable_notifiers-error-case.patch [RHEL-330] +- kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch [bz#2218644] +- kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch [bz#2128929] +- Resolves: bz#2171363 + ([aarch64] Kernel hits Call trace with irregular CPU-to-NUMA association) +- Resolves: RHEL-330 + ([virtual network][qemu-kvm-8.0.0-rc1]qemu core dump: qemu-kvm: ../softmmu/memory.c:2592: void memory_region_del_eventfd(MemoryRegion *, hwaddr, unsigned int, _Bool, uint64_t, EventNotifier *): Assertion `i != mr->ioeventfd_nb' failed) +- Resolves: bz#2218644 + (query-stats QMP command interrupts vcpus, the Max Latencies could be more than 100us (rhel 9.3.0 clone)) +- Resolves: bz#2128929 + ([rhel9.2] hotplug/hotunplug mlx vdpa device to the occupied addr port, then qemu core dump occurs after shutdown guest) + +* Mon Jun 26 2023 Miroslav Rezanina - 8.0.0-6 +- kvm-target-i386-add-support-for-FLUSH_L1D-feature.patch [bz#2216201] +- kvm-target-i386-add-support-for-FB_CLEAR-feature.patch [bz#2216201] +- kvm-block-blkio-use-qemu_open-to-support-fd-passing-for-.patch [bz#2180076] +- kvm-qapi-add-fdset-feature-for-BlockdevOptionsVirtioBlkV.patch [bz#2180076] +- kvm-Enable-libblkio-block-drivers.patch [bz#2213317] +- Resolves: bz#2216201 + ([qemu-kvm]VM reports vulnerabilty to mmio_stale_data on patched host with microcode) +- Resolves: bz#2180076 + ([qemu-kvm] support fd passing for libblkio QEMU BlockDrivers) +- Resolves: bz#2213317 + (Enable libblkio-based block drivers in QEMU) + +* Tue Jun 13 2023 Miroslav Rezanina - 8.0.0-5 +- kvm-block-compile-out-assert_bdrv_graph_readable-by-defa.patch [bz#2186725] +- kvm-graph-lock-Disable-locking-for-now.patch [bz#2186725] +- kvm-nbd-server-Fix-drained_poll-to-wake-coroutine-in-rig.patch [bz#2186725] +- kvm-iotests-Test-commit-with-iothreads-and-ongoing-I-O.patch [bz#2186725] +- kvm-memory-prevent-dma-reentracy-issues.patch [RHEL-516] +- kvm-async-Add-an-optional-reentrancy-guard-to-the-BH-API.patch [RHEL-516] +- kvm-checkpatch-add-qemu_bh_new-aio_bh_new-checks.patch [RHEL-516] +- kvm-hw-replace-most-qemu_bh_new-calls-with-qemu_bh_new_g.patch [RHEL-516] +- kvm-lsi53c895a-disable-reentrancy-detection-for-script-R.patch [RHEL-516] +- kvm-bcm2835_property-disable-reentrancy-detection-for-io.patch [RHEL-516] +- kvm-raven-disable-reentrancy-detection-for-iomem.patch [RHEL-516] +- kvm-apic-disable-reentrancy-detection-for-apic-msi.patch [RHEL-516] +- kvm-async-avoid-use-after-free-on-re-entrancy-guard.patch [RHEL-516] +- kvm-loongarch-mark-loongarch_ipi_iocsr-re-entrnacy-safe.patch [RHEL-516] +- kvm-memory-stricter-checks-prior-to-unsetting-engaged_in.patch [RHEL-516] +- kvm-lsi53c895a-disable-reentrancy-detection-for-MMIO-reg.patch [RHEL-516] +- kvm-hw-scsi-lsi53c895a-Fix-reentrancy-issues-in-the-LSI-.patch [RHEL-516] +- kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-register-for-machi.patch [bz#2189423] +- kvm-multifd-Fix-the-number-of-channels-ready.patch [bz#2196289] +- kvm-util-async-teardown-wire-up-query-command-line-optio.patch [bz#2168500] +- kvm-s390x-pv-Fix-spurious-warning-with-asynchronous-tear.patch [bz#2168500] +- Resolves: bz#2186725 + (Qemu hang when commit during fio running(iothread enable)) +- Resolves: RHEL-516 + (CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9]) +- Resolves: bz#2189423 + (Failed to migrate VM from rhel 9.3 to rhel 9.2) +- Resolves: bz#2196289 + (Fix number of ready channels on multifd) +- Resolves: bz#2168500 + ([IBM 9.3 FEAT] KVM: Improve memory reclaiming for z15 Secure Execution guests - qemu part) + +* Mon May 22 2023 Miroslav Rezanina - 8.0.0-4 +- kvm-migration-Attempt-disk-reactivation-in-more-failure-.patch [bz#2058982] +- kvm-util-mmap-alloc-qemu_fd_getfs.patch [bz#2057267] +- kvm-vl.c-Create-late-backends-before-migration-object.patch [bz#2057267] +- kvm-migration-postcopy-Detect-file-system-on-dest-host.patch [bz#2057267] +- kvm-migration-mark-mixed-functions-that-can-suspend.patch [bz#2057267] +- kvm-postcopy-ram-do-not-use-qatomic_mb_read.patch [bz#2057267] +- kvm-migration-remove-extra-whitespace-character-for-code.patch [bz#2057267] +- kvm-migration-Merge-ram_counters-and-ram_atomic_counters.patch [bz#2057267] +- kvm-migration-Update-atomic-stats-out-of-the-mutex.patch [bz#2057267] +- kvm-migration-Make-multifd_bytes-atomic.patch [bz#2057267] +- kvm-migration-Make-dirty_sync_missed_zero_copy-atomic.patch [bz#2057267] +- kvm-migration-Make-precopy_bytes-atomic.patch [bz#2057267] +- kvm-migration-Make-downtime_bytes-atomic.patch [bz#2057267] +- kvm-migration-Make-dirty_sync_count-atomic.patch [bz#2057267] +- kvm-migration-Make-postcopy_requests-atomic.patch [bz#2057267] +- kvm-migration-Rename-duplicate-to-zero_pages.patch [bz#2057267] +- kvm-migration-Rename-normal-to-normal_pages.patch [bz#2057267] +- kvm-migration-rename-enabled_capabilities-to-capabilitie.patch [bz#2057267] +- kvm-migration-Pass-migrate_caps_check-the-old-and-new-ca.patch [bz#2057267] +- kvm-migration-move-migration_global_dump-to-migration-hm.patch [bz#2057267] +- kvm-spice-move-client_migrate_info-command-to-ui.patch [bz#2057267] +- kvm-migration-Create-migrate_cap_set.patch [bz#2057267] +- kvm-migration-Create-options.c.patch [bz#2057267] +- kvm-migration-Move-migrate_colo_enabled-to-options.c.patch [bz#2057267] +- kvm-migration-Move-migrate_use_compression-to-options.c.patch [bz#2057267] +- kvm-migration-Move-migrate_use_events-to-options.c.patch [bz#2057267] +- kvm-migration-Move-migrate_use_multifd-to-options.c.patch [bz#2057267] +- kvm-migration-Move-migrate_use_zero_copy_send-to-options.patch [bz#2057267] +- kvm-migration-Move-migrate_use_xbzrle-to-options.c.patch [bz#2057267] +- kvm-migration-Move-migrate_use_block-to-options.c.patch [bz#2057267] +- kvm-migration-Move-migrate_use_return-to-options.c.patch [bz#2057267] +- kvm-migration-Create-migrate_rdma_pin_all-function.patch [bz#2057267] +- kvm-migration-Move-migrate_caps_check-to-options.c.patch [bz#2057267] +- kvm-migration-Move-qmp_query_migrate_capabilities-to-opt.patch [bz#2057267] +- kvm-migration-Move-qmp_migrate_set_capabilities-to-optio.patch [bz#2057267] +- kvm-migration-Move-migrate_cap_set-to-options.c.patch [bz#2057267] +- kvm-migration-Move-parameters-functions-to-option.c.patch [bz#2057267] +- kvm-migration-Use-migrate_max_postcopy_bandwidth.patch [bz#2057267] +- kvm-migration-Move-migrate_use_block_incremental-to-opti.patch [bz#2057267] +- kvm-migration-Create-migrate_throttle_trigger_threshold.patch [bz#2057267] +- kvm-migration-Create-migrate_checkpoint_delay.patch [bz#2057267] +- kvm-migration-Create-migrate_max_cpu_throttle.patch [bz#2057267] +- kvm-migration-Move-migrate_announce_params-to-option.c.patch [bz#2057267] +- kvm-migration-Create-migrate_cpu_throttle_initial-to-opt.patch [bz#2057267] +- kvm-migration-Create-migrate_cpu_throttle_increment-func.patch [bz#2057267] +- kvm-migration-Create-migrate_cpu_throttle_tailslow-funct.patch [bz#2057267] +- kvm-migration-Move-migrate_postcopy-to-options.c.patch [bz#2057267] +- kvm-migration-Create-migrate_max_bandwidth-function.patch [bz#2057267] +- kvm-migration-Move-migrate_use_tls-to-options.c.patch [bz#2057267] +- kvm-migration-Move-qmp_migrate_set_parameters-to-options.patch [bz#2057267] +- kvm-migration-Allow-postcopy_ram_supported_by_host-to-re.patch [bz#2057267] +- kvm-block-bdrv-blk_co_unref-for-calls-in-coroutine-conte.patch [bz#2185688] +- kvm-block-Don-t-call-no_coroutine_fns-in-qmp_block_resiz.patch [bz#2185688] +- kvm-iotests-Use-alternative-CPU-type-that-is-not-depreca.patch [bz#2185688] +- kvm-iotests-Test-resizing-image-attached-to-an-iothread.patch [bz#2185688] +- kvm-Enable-Linux-io_uring.patch [bz#1947230] +- Resolves: bz#2058982 + (Qemu core dump if cut off nfs storage during migration) +- Resolves: bz#2057267 + (Migration with postcopy fail when vm set with shared memory) +- Resolves: bz#2185688 + ([qemu-kvm] no response with QMP command block_resize) +- Resolves: bz#1947230 + (Enable QEMU support for io_uring in RHEL9) + +* Mon May 15 2023 Miroslav Rezanina - 8.0.0-3 +- kvm-migration-Handle-block-device-inactivation-failures-.patch [bz#2058982] +- kvm-migration-Minor-control-flow-simplification.patch [bz#2058982] +- Resolves: bz#2058982 + (Qemu core dump if cut off nfs storage during migration) + +* Mon May 08 2023 Miroslav Rezanina - 8.0.0-2 +- kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch [bz#2087047] +- kvm-hw-acpi-limit-warning-on-acpi-table-size-to-pc-machi.patch [bz#1934134] +- kvm-hw-acpi-Mark-acpi-blobs-as-resizable-on-RHEL-pc-mach.patch [bz#1934134] +- Resolves: bz#2087047 + (Disk detach is unsuccessful while the guest is still booting) +- Resolves: bz#1934134 + (ACPI table limits warning when booting guest with 512 VCPUs) + +* Thu Apr 20 2023 Miroslav Rezanina - 8.0.0-1 +- Rebase to QEMU 8.0.0 +- Resolves: bz#2180898 + (Rebase to QEMU 8.0.0 for RHEL 9.3.0) * Mon Mar 20 2023 Miroslav Rezanina - 7.2.0-14 - Rebuild for 9.2 release