From 2c6097766c8e88d5a4f11007332adb36cd18a6a8 Mon Sep 17 00:00:00 2001 From: MSVSphere Packaging Team Date: Wed, 8 Nov 2023 16:48:28 +0300 Subject: [PATCH 1/5] import qemu-kvm-8.0.0-16.el9_3 --- .gitignore | 2 +- .qemu-kvm.metadata | 2 +- SOURCES/0004-Initial-redhat-build.patch | 40 +- ...0005-Enable-disable-devices-for-RHEL.patch | 85 +- ...Machine-type-related-general-changes.patch | 78 +- SOURCES/0007-Add-aarch64-machine-types.patch | 177 +-- SOURCES/0008-Add-ppc64-machine-types.patch | 52 +- SOURCES/0009-Add-s390x-machine-types.patch | 68 +- SOURCES/0010-Add-x86_64-machine-types.patch | 141 +-- SOURCES/0011-Enable-make-check.patch | 136 ++- ...mber-of-devices-that-can-be-assigned.patch | 10 +- ...Add-support-statement-to-help-output.patch | 6 +- ...documentation-instead-of-qemu-system.patch | 8 +- ...e-at-least-64kiB-pages-for-downstrea.patch | 60 - ...n-warning-when-opening-v2-images-rw.patch} | 10 +- ...add-usb-support-to-guest-get-fsinfo.patch} | 16 +- ...0017-Add-RHEL-9.2.0-compat-structure.patch | 110 ++ ...ompat-bits-for-RHEL-9.1-machine-type.patch | 26 - ...c-Update-x86-machine-type-compatibil.patch | 76 ++ .../0019-Disable-unwanted-new-devices.patch | 83 ++ ...90x-machine-type-compatibility-for-Q.patch | 47 - ...ch64-add-rhel9.2.0-virt-machine-type.patch | 43 - ...dd-new-rhel-9.2.0-s390x-machine-type.patch | 62 - .../0022-x86-rhel-9.2.0-machine-type.patch | 75 -- ...kvm-KVM-keep-track-of-running-ioctls.patch | 82 -- ...r-Introduce-nested-event-loop-in-vho.patch | 140 --- ...r-Monitor-slave-channel-in-vhost_use.patch | 143 --- ...el-introduce-accelerator-blocker-API.patch | 348 ------ ...PUJumpCache-in-tb_jmp_cache_clear_pa.patch | 58 - ...-allow-repeating-hot-unplug-requests.patch | 17 +- ...ce-between-epoll-upgrade-and-aio_set.patch | 90 -- ...aio-wait-switch-to-smp_mb__after_rmw.patch | 50 - ...le-reentrancy-detection-for-apic-msi.patch | 55 + ...ional-reentrancy-guard-to-the-BH-API.patch | 231 ++++ ...-use-after-free-on-re-entrancy-guard.patch | 70 ++ ...sage-of-barriers-in-the-polling-case.patch | 66 -- ...documentation-of-the-memory-barriers.patch | 111 -- ...-disable-reentrancy-detection-for-io.patch | 57 + ...block-Call-drain-callbacks-only-once.patch | 250 ---- ...se-padded-I-O-vecs-exceeding-IOV_MAX.patch | 354 ++++++ ...-no_coroutine_fns-in-qmp_block_resiz.patch | 56 + ...-t-poll-in-bdrv_replace_child_noperm.patch | 298 ----- ...subtree-drains-in-bdrv_drop_intermed.patch | 54 - ...Drain-individual-nodes-during-reopen.patch | 157 --- ...f-coroutine-in-bdrv_do_drained_begin.patch | 96 -- ...-locking-for-bdrv_reopen_queue_child.patch | 67 -- ...ix-pad_request-s-request-restriction.patch | 73 ++ ...rove-empty-format-specific-info-dump.patch | 132 --- .../kvm-block-Inline-bdrv_drain_invoke.patch | 81 -- ...kvm-block-Remove-drained_end_counter.patch | 433 ------- ...ore_bds_parents-parameter-from-drain.patch | 274 ----- ...l-parameter-from-bdrv_parent_drained.patch | 106 -- SOURCES/kvm-block-Remove-subtree-drains.patch | 896 -------------- ...rv_drained_begin-end-to-non-coroutin.patch | 302 ----- ...Split-BlockNodeInfo-off-of-ImageInfo.patch | 246 ---- ...o_unref-for-calls-in-coroutine-conte.patch | 386 +++++++ ...o-do-not-use-open-flags-in-qemu_open.patch | 74 ++ ...-blkio-enable-the-completion-eventfd.patch | 54 + ...-back-on-using-path-when-fd-setting-.patch | 67 ++ ...ck-blkio-fix-module_block.py-parsing.patch | 205 ++++ ...-blkio_connect-in-the-drivers-functi.patch | 151 +++ ...y-blkio_connect-if-it-fails-using-fd.patch | 85 ++ ...blkio_set_int-fd-to-check-fd-support.patch | 49 + ...qemu_open-to-support-fd-passing-for-.patch | 108 ++ ...t-assert_bdrv_graph_readable-by-defa.patch | 121 ++ ...drop-bdrv_remove_filter_or_cow_child.patch | 70 -- ...ck-file-Add-file-specific-image-info.patch | 145 --- ...d-indentation-to-bdrv_node_info_dump.patch | 206 ---- ...-block-qapi-Introduce-BlockGraphInfo.patch | 155 --- ...pi-Let-bdrv_query_image_info-recurse.patch | 197 ---- ...y-hold-the-new-AioContext-of-bs_top-.patch | 99 -- ...m-block-vmdk-Change-extent-info-type.patch | 140 --- ...ch-add-qemu_bh_new-aio_bh_new-checks.patch | 55 + ...ent-dma_blk_cb-vs-dma_aio_cancel-rac.patch | 127 -- SOURCES/kvm-edu-add-smp_mb__after_rmw.patch | 61 - ...m-graph-lock-Disable-locking-for-now.patch | 153 +++ ...i-blobs-as-resizable-on-RHEL-pc-mach.patch | 40 + ...rning-on-acpi-table-size-to-pc-machi.patch | 101 ++ ...idate-cluster-and-NUMA-node-boundary.patch | 60 + ...mu-Handle-big-endian-hosts-correctly.patch | 166 +++ ...rm-virt-Add-compact-highmem-property.patch | 169 --- ...properties-to-disable-high-memory-re.patch | 179 --- ...le-compat-high-memory-region-address.patch | 51 - ...ove-high-memory-region-address-assig.patch | 112 -- ...oduce-variable-region_base-in-virt_s.patch | 82 -- ...oduce-virt_get_high_memmap_enabled-h.patch | 95 -- ...ntroduce-virt_set_high_memmap-helper.patch | 130 --- ...me-variable-size-to-region_size-in-v.patch | 83 -- ...date-cluster-and-NUMA-node-boundary-.patch | 41 + ...CI_ERR_UNCOR_MASK-reg-for-machine-ty.patch | 44 + ...CI_ERR_UNCOR_MASK-register-for-machi.patch | 118 ++ ...qemu_bh_new-calls-with-qemu_bh_new_g.patch | 470 ++++++++ ...5a-Fix-reentrancy-issues-in-the-LSI-.patch | 141 +++ ...fix-field-corruption-in-type-4-table.patch | 59 - ...i-quirks-Sanitize-capability-pointer.patch | 76 ++ ...ks-Support-alternate-offset-for-GPUD.patch | 110 ++ ...Fix-potential-OOB-access-in-virtio_i.patch | 62 + ...how-the-EBX-register-of-CPUID-0x8000.patch | 52 + ...checks-and-information-related-to-re.patch | 77 ++ ...-fail-DEVIOTLB_UNMAP-without-dt-mode.patch | 64 - ...port-for-MSG_PEEK-for-socket-channel.patch | 386 ------- ...ch-if-TLS-channel-is-closed-during-h.patch | 23 +- ...-106-214-308-Read-only-one-size-line.patch | 99 -- ...otests-Filter-child-node-information.patch | 171 --- ...ommit-with-iothreads-and-ongoing-I-O.patch | 144 +++ ...sizing-image-attached-to-an-iothread.patch | 132 +++ ...rnative-CPU-type-that-is-not-depreca.patch | 44 + .../kvm-iotests-iov-padding-New-test.patch | 186 +++ SOURCES/kvm-kvm-Atomic-memslot-updates.patch | 286 ----- ...pu-stats-fd-to-avoid-vcpu-interrupti.patch | 45 +- .../kvm-linux-headers-Update-to-v6.1.patch | 577 --------- ...loongarch_ipi_iocsr-re-entrnacy-safe.patch | 53 + ...le-reentrancy-detection-for-MMIO-reg.patch | 70 ++ ...le-reentrancy-detection-for-script-R.patch | 58 + ...-memory-prevent-dma-reentracy-issues.patch | 150 +++ ...checks-prior-to-unsetting-engaged_in.patch | 67 ++ ...ave_prepare-handler-to-struct-SaveVM.patch | 186 +++ ...gration-prefix-to-functions-in-targe.patch | 139 +++ ...ration-Add-switchover-ack-capability.patch | 162 +++ ...postcopy_ram_supported_by_host-to-re.patch | 308 +++++ ...t-disk-reactivation-in-more-failure-.patch | 111 ++ ...kvm-migration-Create-migrate_cap_set.patch | 93 ++ ...tion-Create-migrate_checkpoint_delay.patch | 84 ++ ...-migrate_cpu_throttle_increment-func.patch | 75 ++ ...-migrate_cpu_throttle_initial-to-opt.patch | 75 ++ ...-migrate_cpu_throttle_tailslow-funct.patch | 78 ++ ...reate-migrate_max_bandwidth-function.patch | 232 ++++ ...tion-Create-migrate_max_cpu_throttle.patch | 88 ++ ...Create-migrate_rdma_pin_all-function.patch | 95 ++ ...e-migrate_throttle_trigger_threshold.patch | 75 ++ SOURCES/kvm-migration-Create-options.c.patch | 524 +++++++++ ...ion-Enable-switchover-ack-capability.patch | 56 + ...-block-device-inactivation-failures-.patch | 116 ++ ...ation-Implement-switchover-ack-logic.patch | 339 ++++++ ...ll-functions-check-have-the-same-for.patch | 431 +++++++ ...gration-Make-dirty_sync_count-atomic.patch | 105 ++ ...e-dirty_sync_missed_zero_copy-atomic.patch | 92 ++ ...migration-Make-downtime_bytes-atomic.patch | 68 ++ ...-migration-Make-multifd_bytes-atomic.patch | 99 ++ ...ration-Make-postcopy_requests-atomic.patch | 69 ++ ...-migration-Make-precopy_bytes-atomic.patch | 68 ++ ...ram_counters-and-ram_atomic_counters.patch | 270 +++++ ...on-Minor-control-flow-simplification.patch | 52 + ...-migrate_announce_params-to-option.c.patch | 90 ++ ...on-Move-migrate_cap_set-to-options.c.patch | 110 ++ ...Move-migrate_caps_check-to-options.c.patch | 458 ++++++++ ...ve-migrate_colo_enabled-to-options.c.patch | 136 +++ ...n-Move-migrate_postcopy-to-options.c.patch | 98 ++ ...-Move-migrate_use_block-to-options.c.patch | 134 +++ ...igrate_use_block_incremental-to-opti.patch | 121 ++ ...migrate_use_compression-to-options.c.patch | 183 +++ ...Move-migrate_use_events-to-options.c.patch | 120 ++ ...ove-migrate_use_multifd-to-options.c.patch | 247 ++++ ...Move-migrate_use_return-to-options.c.patch | 138 +++ ...on-Move-migrate_use_tls-to-options.c.patch | 134 +++ ...Move-migrate_use_xbzrle-to-options.c.patch | 156 +++ ...igrate_use_zero_copy_send-to-options.patch | 167 +++ ...ve-migration_properties-to-options.c.patch | 409 +++++++ ...more-initializations-to-migrate_init.patch | 94 ++ ...ove-parameters-functions-to-option.c.patch | 317 +++++ ...mp_migrate_set_capabilities-to-optio.patch | 100 ++ ...mp_migrate_set_parameters-to-options.patch | 943 +++++++++++++++ ...mp_query_migrate_capabilities-to-opt.patch | 100 ++ ...igrate_caps_check-the-old-and-new-ca.patch | 226 ++++ ...ation-Rename-duplicate-to-zero_pages.patch | 109 ++ ...ration-Rename-normal-to-normal_pages.patch | 109 ++ ...Update-atomic-stats-out-of-the-mutex.patch | 52 + ...n-Use-migrate_max_postcopy_bandwidth.patch | 40 + ...magic-value-for-deciding-the-mapping.patch | 330 ------ ...ark-mixed-functions-that-can-suspend.patch | 153 +++ ...igration_global_dump-to-migration-hm.patch | 121 ++ ...copy-Detect-file-system-on-dest-host.patch | 117 ++ ...-extra-whitespace-character-for-code.patch | 44 + ...-enabled_capabilities-to-capabilitie.patch | 329 ++++++ ...roperty-multifd-flush-after-each-sec.patch | 127 ++ ...ifd-Fix-the-number-of-channels-ready.patch | 58 + ...flush-once-each-full-round-of-memory.patch | 166 +++ ...Protect-multifd_send_sync_main-calls.patch | 78 ++ ...rained_poll-to-wake-coroutine-in-rig.patch | 159 +++ ...fd-type-checking-to-its-own-function.patch | 78 ++ ...t-prepare-to-cleanup-net_init_socket.patch | 60 + ...vm-net-socket-remove-net_init_socket.patch | 102 ++ ...-new-option-to-automatically-reconne.patch | 325 ------ ...uster-and-NUMA-node-boundary-if-requ.patch | 145 +++ ...-Don-t-use-__bss_start-with-the-larl.patch | 78 ++ ...-s390-ccw-Fix-indentation-in-start.S.patch | 218 ++++ ...-Makefile-Use-z-noexecstack-to-silen.patch | 50 + ...-Provide-space-for-initial-stack-fra.patch | 59 + ...tplug-detect-state-register-to-cmask.patch | 87 ++ ...m-physmem-add-missing-memory-barrier.patch | 55 - ...tcopy-ram-do-not-use-qatomic_mb_read.patch | 42 + ...eature-for-BlockdevOptionsVirtioBlkV.patch | 79 ++ ...ange-the-reduced-phys-bits-value-fro.patch | 50 + ...qatomic-add-smp_mb__before-after_rmw.patch | 177 --- ...tical-corruption-in-store_bitmap-err.patch | 67 -- ...n-t-yield-in-bdrv_qed_co_drain_begin.patch | 84 -- ...coroutine-lock-add-smp_mb__after_rmw.patch | 75 -- ...ge-info-key-names-for-protocol-nodes.patch | 197 ---- ...-qemu-img-Let-info-print-block-graph.patch | 261 ----- SOURCES/kvm-qemu-img-Use-BlockNodeInfo.patch | 241 ---- ...Report-errors-while-closing-the-imag.patch | 70 -- ...Report-errors-while-closing-the-imag.patch | 67 -- ...t-qemu-img-bitmap-commit-exit-code-o.patch | 166 --- ...Update-the-reduced-phys-bits-documen.patch | 60 + ...posix-cleanup-fix-document-QemuEvent.patch | 146 --- ...win32-cleanup-fix-document-QemuEvent.patch | 162 --- ...sable-reentrancy-detection-for-iomem.patch | 54 + ...dhat-fix-virt-rhel9.2.0-compat-props.patch | 43 - ...-the-device-request-notifier-interfa.patch | 220 ++++ ...-s390x-pci-coalesce-unmap-operations.patch | 125 -- ...ISM-passthrough-devices-on-shutdown-.patch | 147 --- ...-DMA-aperture-to-be-bound-by-vfio-DM.patch | 91 -- ...rious-warning-with-asynchronous-tear.patch | 129 +++ ...390x-pv-Implement-a-CGS-check-helper.patch | 109 -- ...o-ccw-Activate-zPCI-features-on-s390.patch | 70 -- ...si-cleanup-scsi_clear_unit_attention.patch | 81 ++ ...attention-only-for-REPORT-LUNS-comma.patch | 110 ++ ...-attention-when-creating-the-request.patch | 132 +++ ...otect-req-aiocb-with-AioContext-lock.patch | 176 --- ...ve-client_migrate_info-command-to-ui.patch | 248 ++++ ...ubtree-drain-with-a-single-node-drai.patch | 159 --- ...EPYC-Genoa-model-to-support-Zen-4-pr.patch | 203 ++++ ...VNMI-and-automatic-IBRS-feature-bits.patch | 105 ++ ...a-couple-of-feature-bits-in-8000_000.patch | 94 ++ ...feature-bits-for-CPUID_Fn80000021_EA.patch | 126 ++ ...missing-feature-bits-in-EPYC-Milan-m.patch | 152 +++ ...new-EPYC-CPU-versions-with-updated-c.patch | 192 +++ ...-32-bit-AD-CO-X-insns-in-64-bit-mode.patch | 144 --- ...vm-target-i386-Fix-BEXTR-instruction.patch | 110 -- ...kvm-target-i386-Fix-BZHI-instruction.patch | 77 -- ...i386-Fix-C-flag-for-BLSI-BLSMSK-BLSR.patch | 60 - ...386-add-support-for-FB_CLEAR-feature.patch | 71 ++ ...86-add-support-for-FLUSH_L1D-feature.patch | 70 ++ ...w-versioned-CPUs-to-specify-new-cach.patch | 116 ++ ...arget-i386-fix-ADOX-followed-by-ADCX.patch | 205 ---- ...operand-size-of-unary-SSE-operations.patch | 77 -- ...h_dump-Fix-memory-corruption-in-s390.patch | 50 - ...Don-t-yield-in-.bdrv_co_drained_begi.patch | 153 --- ...etdev-test-stream-and-dgram-backends.patch | 505 -------- ...Introduce-and-use-reg_t-consistently.patch | 299 ----- ...our-channel-order-for-PNG-screenshot.patch | 88 ++ ...own-wire-up-query-command-line-optio.patch | 180 +++ .../kvm-util-iov-Make-qiov_slice-public.patch | 97 ++ ...-iov-Remove-qemu_iovec_init_extended.patch | 156 +++ .../kvm-util-mmap-alloc-qemu_fd_getfs.patch | 95 ++ .../kvm-util-userfaultfd-Add-uffd_open.patch | 169 --- ...-userfaultfd-Support-dev-userfaultfd.patch | 94 -- ...il-vfio-helpers-Use-g_file_read_link.patch | 82 ++ ...arameter-to-vhost_vdpa_dma_map-unmap.patch | 221 ---- ...m-vdpa-add-shadow_data-to-vhost_vdpa.patch | 94 -- ...dd-vhost_vdpa_net_valid_svq_features.patch | 76 -- ...a-allocate-SVQ-array-unconditionally.patch | 50 - ...ys-start-CVQ-in-SVQ-mode-if-possible.patch | 193 ---- ...k-migration-if-device-has-cvq-and-x-.patch | 61 + ...le-VIRTIO_NET_F_GUEST_ANNOUNCE-in-vh.patch | 44 - ...pa-export-vhost_vdpa_set_vring_ready.patch | 105 ++ ...HOST_BACKEND_F_IOTLB_ASID-flag-check.patch | 48 - ...IO_NET_CTRL_ANNOUNCE-in-vhost_vdpa_n.patch | 59 - ...pa-map-shadow-vrings-with-MAP_SHARED.patch | 131 --- ...olation-check-to-net_init_vhost_vdpa.patch | 286 +++++ ...move-SVQ-vring-features-check-to-net.patch | 118 -- ...t_vdpa_set_vring_ready-to-the-caller.patch | 134 +++ ...ova-tree-creation-from-init-to-start.patch | 268 ----- ...dpa-remove-net-cvq-migration-blocker.patch | 51 + ...t_vdpa_net_load-to-vhost_vdpa_net_cv.patch | 49 + ...st_vdpa_net_cvq_cmd_page_len-functio.patch | 84 -- ...vm-vdpa-request-iova_range-only-once.patch | 145 --- ...o-in-vhost_vdpa_get_vring_group-erro.patch | 67 ++ ...vdpa-stop-all-svq-on-device-deletion.patch | 84 -- ...re-x-svq-parameter-in-VhostVDPAState.patch | 62 - ...irst-queue-SVQ-state-for-CVQ-default.patch | 46 + ...w_vqs_enabled-in-vhost_vdpa_svqs_sta.patch | 58 - ...inter-dereference-bug-in-vfio_bars_f.patch | 72 ++ ...mplement-a-common-device-info-helper.patch | 196 ++++ ...-Add-VFIO-migration-pre-copy-support.patch | 438 +++++++ ...dd-support-for-switchover-ack-capabi.patch | 192 +++ ...lock-VFIO-migration-with-postcopy-mi.patch | 90 ++ ...hange-vIOMMU-blocker-from-global-to-.patch | 171 +++ ...ree-resources-when-vfio_migration_re.patch | 145 +++ ...Make-VFIO-migration-non-experimental.patch | 283 +++++ ...efactor-vfio_save_block-to-return-sa.patch | 102 ++ ...n-Remove-print-of-Migration-disabled.patch | 56 + ...ion-Reset-bytes_transferred-properly.patch | 165 +++ ...eturn-bool-type-for-vfio_migration_r.patch | 125 ++ ...kip-log_sync-during-migration-SETUP-.patch | 68 ++ ...tore-VFIO-migration-flags-in-VFIOMig.patch | 70 ++ ...io_prepare_kvm_msi_virq_batch-in-MSI.patch | 67 ++ ...able-INTx-in-vfio_realize-error-path.patch | 54 + ...o-pci-Fix-a-segfault-in-vfio_realize.patch | 67 ++ ...-vfio-pci-Fix-a-use-after-free-issue.patch | 56 + ...aked-timer-in-vfio_realize-error-pat.patch | 55 + ...-pci-Static-Resizable-BAR-capability.patch | 141 +++ ...vm-vfio-pci-add-support-for-VF-token.patch | 104 ++ ...-add-support-for-configure-interrupt.patch | 185 --- ...VQ-device-file-descriptors-at-device.patch | 171 --- ...host_dev_enable_notifiers-error-case.patch | 138 +++ ...ty-bitmap-syncing-when-vIOMMU-is-ena.patch | 157 --- ...e-new-VhostOps-vhost_set_config_call.patch | 56 - ...ove-iova_tree-set-to-vhost_svq_start.patch | 122 -- ...SVQ-device-call-handler-at-SVQ-start.patch | 73 -- ...dpa-add-support-for-config-interrupt.patch | 73 -- ...t-cleanup-the-vdpa-vhost-net-structu.patch | 22 +- ...a-mute-unaligned-memory-error-report.patch | 86 ++ ...-add-support-for-configure-interrupt.patch | 115 -- ...ntroduce-macro-VIRTIO_CONFIG_IRQ_IDX.patch | 262 ----- ...-64kB-host-page-size-VFIO-device-ass.patch | 151 +++ ...ork-the-traces-in-virtio_iommu_set_p.patch | 83 ++ ...ndardize-granule-extraction-and-form.patch | 88 ++ ...-add-support-for-configure-interrupt.patch | 80 -- ...-add-support-for-configure-interrupt.patch | 115 -- ...ctly-report-maximum-tx_queue_size-va.patch | 92 ++ ...-add-support-for-configure-interrupt.patch | 274 ----- ...uple-notifier-from-interrupt-process.patch | 272 ----- ...ple-the-single-vector-from-the-inter.patch | 212 ---- ...pci-fix-migration-compat-for-vectors.patch | 53 - ...ix-transitional-migration-compat-for.patch | 47 - ...t-SCSI-devices-from-main-loop-thread.patch | 325 ------ ...y-virtio_net_get_config-to-early-ret.patch | 74 -- ...VIRTIO_NET_S_ANNOUNCE-if-device-mode.patch | 46 - ...ate-backends-before-migration-object.patch | 58 + SPECS/qemu-kvm.spec | 1027 ++++++++++++----- 321 files changed, 24768 insertions(+), 18955 deletions(-) delete mode 100644 SOURCES/0015-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch rename SOURCES/{0016-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch => 0015-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch} (92%) rename SOURCES/{kvm-qga-linux-add-usb-support-to-guest-get-fsinfo.patch => 0016-qga-linux-add-usb-support-to-guest-get-fsinfo.patch} (81%) create mode 100644 SOURCES/0017-Add-RHEL-9.2.0-compat-structure.patch delete mode 100644 SOURCES/0018-Addd-7.2-compat-bits-for-RHEL-9.1-machine-type.patch create mode 100644 SOURCES/0018-redhat-hw-i386-pc-Update-x86-machine-type-compatibil.patch create mode 100644 SOURCES/0019-Disable-unwanted-new-devices.patch delete mode 100644 SOURCES/0019-redhat-Update-s390x-machine-type-compatibility-for-Q.patch delete mode 100644 SOURCES/0020-redhat-aarch64-add-rhel9.2.0-virt-machine-type.patch delete mode 100644 SOURCES/0021-redhat-Add-new-rhel-9.2.0-s390x-machine-type.patch delete mode 100644 SOURCES/0022-x86-rhel-9.2.0-machine-type.patch delete mode 100644 SOURCES/kvm-KVM-keep-track-of-running-ioctls.patch delete mode 100644 SOURCES/kvm-Revert-vhost-user-Introduce-nested-event-loop-in-vho.patch delete mode 100644 SOURCES/kvm-Revert-vhost-user-Monitor-slave-channel-in-vhost_use.patch delete mode 100644 SOURCES/kvm-accel-introduce-accelerator-blocker-API.patch delete mode 100644 SOURCES/kvm-accel-tcg-Test-CPUJumpCache-in-tb_jmp_cache_clear_pa.patch delete mode 100644 SOURCES/kvm-aio-posix-fix-race-between-epoll-upgrade-and-aio_set.patch delete mode 100644 SOURCES/kvm-aio-wait-switch-to-smp_mb__after_rmw.patch create mode 100644 SOURCES/kvm-apic-disable-reentrancy-detection-for-apic-msi.patch create mode 100644 SOURCES/kvm-async-Add-an-optional-reentrancy-guard-to-the-BH-API.patch create mode 100644 SOURCES/kvm-async-avoid-use-after-free-on-re-entrancy-guard.patch delete mode 100644 SOURCES/kvm-async-clarify-usage-of-barriers-in-the-polling-case.patch delete mode 100644 SOURCES/kvm-async-update-documentation-of-the-memory-barriers.patch create mode 100644 SOURCES/kvm-bcm2835_property-disable-reentrancy-detection-for-io.patch delete mode 100644 SOURCES/kvm-block-Call-drain-callbacks-only-once.patch create mode 100644 SOURCES/kvm-block-Collapse-padded-I-O-vecs-exceeding-IOV_MAX.patch create mode 100644 SOURCES/kvm-block-Don-t-call-no_coroutine_fns-in-qmp_block_resiz.patch delete mode 100644 SOURCES/kvm-block-Don-t-poll-in-bdrv_replace_child_noperm.patch delete mode 100644 SOURCES/kvm-block-Don-t-use-subtree-drains-in-bdrv_drop_intermed.patch delete mode 100644 SOURCES/kvm-block-Drain-individual-nodes-during-reopen.patch delete mode 100644 SOURCES/kvm-block-Drop-out-of-coroutine-in-bdrv_do_drained_begin.patch delete mode 100644 SOURCES/kvm-block-Fix-locking-for-bdrv_reopen_queue_child.patch create mode 100644 SOURCES/kvm-block-Fix-pad_request-s-request-restriction.patch delete mode 100644 SOURCES/kvm-block-Improve-empty-format-specific-info-dump.patch delete mode 100644 SOURCES/kvm-block-Inline-bdrv_drain_invoke.patch delete mode 100644 SOURCES/kvm-block-Remove-drained_end_counter.patch delete mode 100644 SOURCES/kvm-block-Remove-ignore_bds_parents-parameter-from-drain.patch delete mode 100644 SOURCES/kvm-block-Remove-poll-parameter-from-bdrv_parent_drained.patch delete mode 100644 SOURCES/kvm-block-Remove-subtree-drains.patch delete mode 100644 SOURCES/kvm-block-Revert-.bdrv_drained_begin-end-to-non-coroutin.patch delete mode 100644 SOURCES/kvm-block-Split-BlockNodeInfo-off-of-ImageInfo.patch create mode 100644 SOURCES/kvm-block-bdrv-blk_co_unref-for-calls-in-coroutine-conte.patch create mode 100644 SOURCES/kvm-block-blkio-do-not-use-open-flags-in-qemu_open.patch create mode 100644 SOURCES/kvm-block-blkio-enable-the-completion-eventfd.patch create mode 100644 SOURCES/kvm-block-blkio-fall-back-on-using-path-when-fd-setting-.patch create mode 100644 SOURCES/kvm-block-blkio-fix-module_block.py-parsing.patch create mode 100644 SOURCES/kvm-block-blkio-move-blkio_connect-in-the-drivers-functi.patch create mode 100644 SOURCES/kvm-block-blkio-retry-blkio_connect-if-it-fails-using-fd.patch create mode 100644 SOURCES/kvm-block-blkio-use-blkio_set_int-fd-to-check-fd-support.patch create mode 100644 SOURCES/kvm-block-blkio-use-qemu_open-to-support-fd-passing-for-.patch create mode 100644 SOURCES/kvm-block-compile-out-assert_bdrv_graph_readable-by-defa.patch delete mode 100644 SOURCES/kvm-block-drop-bdrv_remove_filter_or_cow_child.patch delete mode 100644 SOURCES/kvm-block-file-Add-file-specific-image-info.patch delete mode 100644 SOURCES/kvm-block-qapi-Add-indentation-to-bdrv_node_info_dump.patch delete mode 100644 SOURCES/kvm-block-qapi-Introduce-BlockGraphInfo.patch delete mode 100644 SOURCES/kvm-block-qapi-Let-bdrv_query_image_info-recurse.patch delete mode 100644 SOURCES/kvm-block-temporarily-hold-the-new-AioContext-of-bs_top-.patch delete mode 100644 SOURCES/kvm-block-vmdk-Change-extent-info-type.patch create mode 100644 SOURCES/kvm-checkpatch-add-qemu_bh_new-aio_bh_new-checks.patch delete mode 100644 SOURCES/kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch delete mode 100644 SOURCES/kvm-edu-add-smp_mb__after_rmw.patch create mode 100644 SOURCES/kvm-graph-lock-Disable-locking-for-now.patch create mode 100644 SOURCES/kvm-hw-acpi-Mark-acpi-blobs-as-resizable-on-RHEL-pc-mach.patch create mode 100644 SOURCES/kvm-hw-acpi-limit-warning-on-acpi-table-size-to-pc-machi.patch create mode 100644 SOURCES/kvm-hw-arm-Validate-cluster-and-NUMA-node-boundary.patch create mode 100644 SOURCES/kvm-hw-arm-smmu-Handle-big-endian-hosts-correctly.patch delete mode 100644 SOURCES/kvm-hw-arm-virt-Add-compact-highmem-property.patch delete mode 100644 SOURCES/kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch delete mode 100644 SOURCES/kvm-hw-arm-virt-Enable-compat-high-memory-region-address.patch delete mode 100644 SOURCES/kvm-hw-arm-virt-Improve-high-memory-region-address-assig.patch delete mode 100644 SOURCES/kvm-hw-arm-virt-Introduce-variable-region_base-in-virt_s.patch delete mode 100644 SOURCES/kvm-hw-arm-virt-Introduce-virt_get_high_memmap_enabled-h.patch delete mode 100644 SOURCES/kvm-hw-arm-virt-Introduce-virt_set_high_memmap-helper.patch delete mode 100644 SOURCES/kvm-hw-arm-virt-Rename-variable-size-to-region_size-in-v.patch create mode 100644 SOURCES/kvm-hw-arm-virt-Validate-cluster-and-NUMA-node-boundary-.patch create mode 100644 SOURCES/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-reg-for-machine-ty.patch create mode 100644 SOURCES/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-register-for-machi.patch create mode 100644 SOURCES/kvm-hw-replace-most-qemu_bh_new-calls-with-qemu_bh_new_g.patch create mode 100644 SOURCES/kvm-hw-scsi-lsi53c895a-Fix-reentrancy-issues-in-the-LSI-.patch delete mode 100644 SOURCES/kvm-hw-smbios-fix-field-corruption-in-type-4-table.patch create mode 100644 SOURCES/kvm-hw-vfio-pci-quirks-Sanitize-capability-pointer.patch create mode 100644 SOURCES/kvm-hw-vfio-pci-quirks-Support-alternate-offset-for-GPUD.patch create mode 100644 SOURCES/kvm-hw-virtio-iommu-Fix-potential-OOB-access-in-virtio_i.patch create mode 100644 SOURCES/kvm-i386-cpu-Update-how-the-EBX-register-of-CPUID-0x8000.patch create mode 100644 SOURCES/kvm-i386-sev-Update-checks-and-information-related-to-re.patch delete mode 100644 SOURCES/kvm-intel-iommu-fail-DEVIOTLB_UNMAP-without-dt-mode.patch delete mode 100644 SOURCES/kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch delete mode 100644 SOURCES/kvm-iotests-106-214-308-Read-only-one-size-line.patch delete mode 100644 SOURCES/kvm-iotests-Filter-child-node-information.patch create mode 100644 SOURCES/kvm-iotests-Test-commit-with-iothreads-and-ongoing-I-O.patch create mode 100644 SOURCES/kvm-iotests-Test-resizing-image-attached-to-an-iothread.patch create mode 100644 SOURCES/kvm-iotests-Use-alternative-CPU-type-that-is-not-depreca.patch create mode 100644 SOURCES/kvm-iotests-iov-padding-New-test.patch delete mode 100644 SOURCES/kvm-kvm-Atomic-memslot-updates.patch delete mode 100644 SOURCES/kvm-linux-headers-Update-to-v6.1.patch create mode 100644 SOURCES/kvm-loongarch-mark-loongarch_ipi_iocsr-re-entrnacy-safe.patch create mode 100644 SOURCES/kvm-lsi53c895a-disable-reentrancy-detection-for-MMIO-reg.patch create mode 100644 SOURCES/kvm-lsi53c895a-disable-reentrancy-detection-for-script-R.patch create mode 100644 SOURCES/kvm-memory-prevent-dma-reentracy-issues.patch create mode 100644 SOURCES/kvm-memory-stricter-checks-prior-to-unsetting-engaged_in.patch create mode 100644 SOURCES/kvm-migration-Add-.save_prepare-handler-to-struct-SaveVM.patch create mode 100644 SOURCES/kvm-migration-Add-migration-prefix-to-functions-in-targe.patch create mode 100644 SOURCES/kvm-migration-Add-switchover-ack-capability.patch create mode 100644 SOURCES/kvm-migration-Allow-postcopy_ram_supported_by_host-to-re.patch create mode 100644 SOURCES/kvm-migration-Attempt-disk-reactivation-in-more-failure-.patch create mode 100644 SOURCES/kvm-migration-Create-migrate_cap_set.patch create mode 100644 SOURCES/kvm-migration-Create-migrate_checkpoint_delay.patch create mode 100644 SOURCES/kvm-migration-Create-migrate_cpu_throttle_increment-func.patch create mode 100644 SOURCES/kvm-migration-Create-migrate_cpu_throttle_initial-to-opt.patch create mode 100644 SOURCES/kvm-migration-Create-migrate_cpu_throttle_tailslow-funct.patch create mode 100644 SOURCES/kvm-migration-Create-migrate_max_bandwidth-function.patch create mode 100644 SOURCES/kvm-migration-Create-migrate_max_cpu_throttle.patch create mode 100644 SOURCES/kvm-migration-Create-migrate_rdma_pin_all-function.patch create mode 100644 SOURCES/kvm-migration-Create-migrate_throttle_trigger_threshold.patch create mode 100644 SOURCES/kvm-migration-Create-options.c.patch create mode 100644 SOURCES/kvm-migration-Enable-switchover-ack-capability.patch create mode 100644 SOURCES/kvm-migration-Handle-block-device-inactivation-failures-.patch create mode 100644 SOURCES/kvm-migration-Implement-switchover-ack-logic.patch create mode 100644 SOURCES/kvm-migration-Make-all-functions-check-have-the-same-for.patch create mode 100644 SOURCES/kvm-migration-Make-dirty_sync_count-atomic.patch create mode 100644 SOURCES/kvm-migration-Make-dirty_sync_missed_zero_copy-atomic.patch create mode 100644 SOURCES/kvm-migration-Make-downtime_bytes-atomic.patch create mode 100644 SOURCES/kvm-migration-Make-multifd_bytes-atomic.patch create mode 100644 SOURCES/kvm-migration-Make-postcopy_requests-atomic.patch create mode 100644 SOURCES/kvm-migration-Make-precopy_bytes-atomic.patch create mode 100644 SOURCES/kvm-migration-Merge-ram_counters-and-ram_atomic_counters.patch create mode 100644 SOURCES/kvm-migration-Minor-control-flow-simplification.patch create mode 100644 SOURCES/kvm-migration-Move-migrate_announce_params-to-option.c.patch create mode 100644 SOURCES/kvm-migration-Move-migrate_cap_set-to-options.c.patch create mode 100644 SOURCES/kvm-migration-Move-migrate_caps_check-to-options.c.patch create mode 100644 SOURCES/kvm-migration-Move-migrate_colo_enabled-to-options.c.patch create mode 100644 SOURCES/kvm-migration-Move-migrate_postcopy-to-options.c.patch create mode 100644 SOURCES/kvm-migration-Move-migrate_use_block-to-options.c.patch create mode 100644 SOURCES/kvm-migration-Move-migrate_use_block_incremental-to-opti.patch create mode 100644 SOURCES/kvm-migration-Move-migrate_use_compression-to-options.c.patch create mode 100644 SOURCES/kvm-migration-Move-migrate_use_events-to-options.c.patch create mode 100644 SOURCES/kvm-migration-Move-migrate_use_multifd-to-options.c.patch create mode 100644 SOURCES/kvm-migration-Move-migrate_use_return-to-options.c.patch create mode 100644 SOURCES/kvm-migration-Move-migrate_use_tls-to-options.c.patch create mode 100644 SOURCES/kvm-migration-Move-migrate_use_xbzrle-to-options.c.patch create mode 100644 SOURCES/kvm-migration-Move-migrate_use_zero_copy_send-to-options.patch create mode 100644 SOURCES/kvm-migration-Move-migration_properties-to-options.c.patch create mode 100644 SOURCES/kvm-migration-Move-more-initializations-to-migrate_init.patch create mode 100644 SOURCES/kvm-migration-Move-parameters-functions-to-option.c.patch create mode 100644 SOURCES/kvm-migration-Move-qmp_migrate_set_capabilities-to-optio.patch create mode 100644 SOURCES/kvm-migration-Move-qmp_migrate_set_parameters-to-options.patch create mode 100644 SOURCES/kvm-migration-Move-qmp_query_migrate_capabilities-to-opt.patch create mode 100644 SOURCES/kvm-migration-Pass-migrate_caps_check-the-old-and-new-ca.patch create mode 100644 SOURCES/kvm-migration-Rename-duplicate-to-zero_pages.patch create mode 100644 SOURCES/kvm-migration-Rename-normal-to-normal_pages.patch create mode 100644 SOURCES/kvm-migration-Update-atomic-stats-out-of-the-mutex.patch create mode 100644 SOURCES/kvm-migration-Use-migrate_max_postcopy_bandwidth.patch delete mode 100644 SOURCES/kvm-migration-check-magic-value-for-deciding-the-mapping.patch create mode 100644 SOURCES/kvm-migration-mark-mixed-functions-that-can-suspend.patch create mode 100644 SOURCES/kvm-migration-move-migration_global_dump-to-migration-hm.patch create mode 100644 SOURCES/kvm-migration-postcopy-Detect-file-system-on-dest-host.patch create mode 100644 SOURCES/kvm-migration-remove-extra-whitespace-character-for-code.patch create mode 100644 SOURCES/kvm-migration-rename-enabled_capabilities-to-capabilitie.patch create mode 100644 SOURCES/kvm-multifd-Create-property-multifd-flush-after-each-sec.patch create mode 100644 SOURCES/kvm-multifd-Fix-the-number-of-channels-ready.patch create mode 100644 SOURCES/kvm-multifd-Only-flush-once-each-full-round-of-memory.patch create mode 100644 SOURCES/kvm-multifd-Protect-multifd_send_sync_main-calls.patch create mode 100644 SOURCES/kvm-nbd-server-Fix-drained_poll-to-wake-coroutine-in-rig.patch create mode 100644 SOURCES/kvm-net-socket-move-fd-type-checking-to-its-own-function.patch create mode 100644 SOURCES/kvm-net-socket-prepare-to-cleanup-net_init_socket.patch create mode 100644 SOURCES/kvm-net-socket-remove-net_init_socket.patch delete mode 100644 SOURCES/kvm-net-stream-add-a-new-option-to-automatically-reconne.patch create mode 100644 SOURCES/kvm-numa-Validate-cluster-and-NUMA-node-boundary-if-requ.patch create mode 100644 SOURCES/kvm-pc-bios-s390-ccw-Don-t-use-__bss_start-with-the-larl.patch create mode 100644 SOURCES/kvm-pc-bios-s390-ccw-Fix-indentation-in-start.S.patch create mode 100644 SOURCES/kvm-pc-bios-s390-ccw-Makefile-Use-z-noexecstack-to-silen.patch create mode 100644 SOURCES/kvm-pc-bios-s390-ccw-Provide-space-for-initial-stack-fra.patch create mode 100644 SOURCES/kvm-pcie-Add-hotplug-detect-state-register-to-cmask.patch delete mode 100644 SOURCES/kvm-physmem-add-missing-memory-barrier.patch create mode 100644 SOURCES/kvm-postcopy-ram-do-not-use-qatomic_mb_read.patch create mode 100644 SOURCES/kvm-qapi-add-fdset-feature-for-BlockdevOptionsVirtioBlkV.patch create mode 100644 SOURCES/kvm-qapi-i386-sev-Change-the-reduced-phys-bits-value-fro.patch delete mode 100644 SOURCES/kvm-qatomic-add-smp_mb__before-after_rmw.patch delete mode 100644 SOURCES/kvm-qcow2-Fix-theoretical-corruption-in-store_bitmap-err.patch delete mode 100644 SOURCES/kvm-qed-Don-t-yield-in-bdrv_qed_co_drain_begin.patch delete mode 100644 SOURCES/kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch delete mode 100644 SOURCES/kvm-qemu-img-Change-info-key-names-for-protocol-nodes.patch delete mode 100644 SOURCES/kvm-qemu-img-Let-info-print-block-graph.patch delete mode 100644 SOURCES/kvm-qemu-img-Use-BlockNodeInfo.patch delete mode 100644 SOURCES/kvm-qemu-img-bitmap-Report-errors-while-closing-the-imag.patch delete mode 100644 SOURCES/kvm-qemu-img-commit-Report-errors-while-closing-the-imag.patch delete mode 100644 SOURCES/kvm-qemu-iotests-Test-qemu-img-bitmap-commit-exit-code-o.patch create mode 100644 SOURCES/kvm-qemu-options.hx-Update-the-reduced-phys-bits-documen.patch delete mode 100644 SOURCES/kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch delete mode 100644 SOURCES/kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch create mode 100644 SOURCES/kvm-raven-disable-reentrancy-detection-for-iomem.patch delete mode 100644 SOURCES/kvm-redhat-fix-virt-rhel9.2.0-compat-props.patch create mode 100644 SOURCES/kvm-s390x-ap-Wire-up-the-device-request-notifier-interfa.patch delete mode 100644 SOURCES/kvm-s390x-pci-coalesce-unmap-operations.patch delete mode 100644 SOURCES/kvm-s390x-pci-reset-ISM-passthrough-devices-on-shutdown-.patch delete mode 100644 SOURCES/kvm-s390x-pci-shrink-DMA-aperture-to-be-bound-by-vfio-DM.patch create mode 100644 SOURCES/kvm-s390x-pv-Fix-spurious-warning-with-asynchronous-tear.patch delete mode 100644 SOURCES/kvm-s390x-pv-Implement-a-CGS-check-helper.patch delete mode 100644 SOURCES/kvm-s390x-s390-virtio-ccw-Activate-zPCI-features-on-s390.patch create mode 100644 SOURCES/kvm-scsi-cleanup-scsi_clear_unit_attention.patch create mode 100644 SOURCES/kvm-scsi-clear-unit-attention-only-for-REPORT-LUNS-comma.patch create mode 100644 SOURCES/kvm-scsi-fetch-unit-attention-when-creating-the-request.patch delete mode 100644 SOURCES/kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch create mode 100644 SOURCES/kvm-spice-move-client_migrate_info-command-to-ui.patch delete mode 100644 SOURCES/kvm-stream-Replace-subtree-drain-with-a-single-node-drai.patch create mode 100644 SOURCES/kvm-target-i386-Add-EPYC-Genoa-model-to-support-Zen-4-pr.patch create mode 100644 SOURCES/kvm-target-i386-Add-VNMI-and-automatic-IBRS-feature-bits.patch create mode 100644 SOURCES/kvm-target-i386-Add-a-couple-of-feature-bits-in-8000_000.patch create mode 100644 SOURCES/kvm-target-i386-Add-feature-bits-for-CPUID_Fn80000021_EA.patch create mode 100644 SOURCES/kvm-target-i386-Add-missing-feature-bits-in-EPYC-Milan-m.patch create mode 100644 SOURCES/kvm-target-i386-Add-new-EPYC-CPU-versions-with-updated-c.patch delete mode 100644 SOURCES/kvm-target-i386-Fix-32-bit-AD-CO-X-insns-in-64-bit-mode.patch delete mode 100644 SOURCES/kvm-target-i386-Fix-BEXTR-instruction.patch delete mode 100644 SOURCES/kvm-target-i386-Fix-BZHI-instruction.patch delete mode 100644 SOURCES/kvm-target-i386-Fix-C-flag-for-BLSI-BLSMSK-BLSR.patch create mode 100644 SOURCES/kvm-target-i386-add-support-for-FB_CLEAR-feature.patch create mode 100644 SOURCES/kvm-target-i386-add-support-for-FLUSH_L1D-feature.patch create mode 100644 SOURCES/kvm-target-i386-allow-versioned-CPUs-to-specify-new-cach.patch delete mode 100644 SOURCES/kvm-target-i386-fix-ADOX-followed-by-ADCX.patch delete mode 100644 SOURCES/kvm-target-i386-fix-operand-size-of-unary-SSE-operations.patch delete mode 100644 SOURCES/kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch delete mode 100644 SOURCES/kvm-test-bdrv-drain-Don-t-yield-in-.bdrv_co_drained_begi.patch delete mode 100644 SOURCES/kvm-tests-qtest-netdev-test-stream-and-dgram-backends.patch delete mode 100644 SOURCES/kvm-tests-tcg-i386-Introduce-and-use-reg_t-consistently.patch create mode 100644 SOURCES/kvm-ui-Fix-pixel-colour-channel-order-for-PNG-screenshot.patch create mode 100644 SOURCES/kvm-util-async-teardown-wire-up-query-command-line-optio.patch create mode 100644 SOURCES/kvm-util-iov-Make-qiov_slice-public.patch create mode 100644 SOURCES/kvm-util-iov-Remove-qemu_iovec_init_extended.patch create mode 100644 SOURCES/kvm-util-mmap-alloc-qemu_fd_getfs.patch delete mode 100644 SOURCES/kvm-util-userfaultfd-Add-uffd_open.patch delete mode 100644 SOURCES/kvm-util-userfaultfd-Support-dev-userfaultfd.patch create mode 100644 SOURCES/kvm-util-vfio-helpers-Use-g_file_read_link.patch delete mode 100644 SOURCES/kvm-vdpa-add-asid-parameter-to-vhost_vdpa_dma_map-unmap.patch delete mode 100644 SOURCES/kvm-vdpa-add-shadow_data-to-vhost_vdpa.patch delete mode 100644 SOURCES/kvm-vdpa-add-vhost_vdpa_net_valid_svq_features.patch delete mode 100644 SOURCES/kvm-vdpa-allocate-SVQ-array-unconditionally.patch delete mode 100644 SOURCES/kvm-vdpa-always-start-CVQ-in-SVQ-mode-if-possible.patch create mode 100644 SOURCES/kvm-vdpa-do-not-block-migration-if-device-has-cvq-and-x-.patch delete mode 100644 SOURCES/kvm-vdpa-do-not-handle-VIRTIO_NET_F_GUEST_ANNOUNCE-in-vh.patch create mode 100644 SOURCES/kvm-vdpa-export-vhost_vdpa_set_vring_ready.patch delete mode 100644 SOURCES/kvm-vdpa-fix-VHOST_BACKEND_F_IOTLB_ASID-flag-check.patch delete mode 100644 SOURCES/kvm-vdpa-handle-VIRTIO_NET_CTRL_ANNOUNCE-in-vhost_vdpa_n.patch delete mode 100644 SOURCES/kvm-vdpa-map-shadow-vrings-with-MAP_SHARED.patch create mode 100644 SOURCES/kvm-vdpa-move-CVQ-isolation-check-to-net_init_vhost_vdpa.patch delete mode 100644 SOURCES/kvm-vdpa-move-SVQ-vring-features-check-to-net.patch create mode 100644 SOURCES/kvm-vdpa-move-vhost_vdpa_set_vring_ready-to-the-caller.patch delete mode 100644 SOURCES/kvm-vdpa-net-move-iova-tree-creation-from-init-to-start.patch create mode 100644 SOURCES/kvm-vdpa-remove-net-cvq-migration-blocker.patch create mode 100644 SOURCES/kvm-vdpa-rename-vhost_vdpa_net_load-to-vhost_vdpa_net_cv.patch delete mode 100644 SOURCES/kvm-vdpa-reorder-vhost_vdpa_net_cvq_cmd_page_len-functio.patch delete mode 100644 SOURCES/kvm-vdpa-request-iova_range-only-once.patch create mode 100644 SOURCES/kvm-vdpa-return-errno-in-vhost_vdpa_get_vring_group-erro.patch delete mode 100644 SOURCES/kvm-vdpa-stop-all-svq-on-device-deletion.patch delete mode 100644 SOURCES/kvm-vdpa-store-x-svq-parameter-in-VhostVDPAState.patch create mode 100644 SOURCES/kvm-vdpa-use-first-queue-SVQ-state-for-CVQ-default.patch delete mode 100644 SOURCES/kvm-vdpa-use-v-shadow_vqs_enabled-in-vhost_vdpa_svqs_sta.patch create mode 100644 SOURCES/kvm-vfio-Fix-null-pointer-dereference-bug-in-vfio_bars_f.patch create mode 100644 SOURCES/kvm-vfio-Implement-a-common-device-info-helper.patch create mode 100644 SOURCES/kvm-vfio-migration-Add-VFIO-migration-pre-copy-support.patch create mode 100644 SOURCES/kvm-vfio-migration-Add-support-for-switchover-ack-capabi.patch create mode 100644 SOURCES/kvm-vfio-migration-Block-VFIO-migration-with-postcopy-mi.patch create mode 100644 SOURCES/kvm-vfio-migration-Change-vIOMMU-blocker-from-global-to-.patch create mode 100644 SOURCES/kvm-vfio-migration-Free-resources-when-vfio_migration_re.patch create mode 100644 SOURCES/kvm-vfio-migration-Make-VFIO-migration-non-experimental.patch create mode 100644 SOURCES/kvm-vfio-migration-Refactor-vfio_save_block-to-return-sa.patch create mode 100644 SOURCES/kvm-vfio-migration-Remove-print-of-Migration-disabled.patch create mode 100644 SOURCES/kvm-vfio-migration-Reset-bytes_transferred-properly.patch create mode 100644 SOURCES/kvm-vfio-migration-Return-bool-type-for-vfio_migration_r.patch create mode 100644 SOURCES/kvm-vfio-migration-Skip-log_sync-during-migration-SETUP-.patch create mode 100644 SOURCES/kvm-vfio-migration-Store-VFIO-migration-flags-in-VFIOMig.patch create mode 100644 SOURCES/kvm-vfio-pci-Call-vfio_prepare_kvm_msi_virq_batch-in-MSI.patch create mode 100644 SOURCES/kvm-vfio-pci-Disable-INTx-in-vfio_realize-error-path.patch create mode 100644 SOURCES/kvm-vfio-pci-Fix-a-segfault-in-vfio_realize.patch create mode 100644 SOURCES/kvm-vfio-pci-Fix-a-use-after-free-issue.patch create mode 100644 SOURCES/kvm-vfio-pci-Free-leaked-timer-in-vfio_realize-error-pat.patch create mode 100644 SOURCES/kvm-vfio-pci-Static-Resizable-BAR-capability.patch create mode 100644 SOURCES/kvm-vfio-pci-add-support-for-VF-token.patch delete mode 100644 SOURCES/kvm-vhost-add-support-for-configure-interrupt.patch delete mode 100644 SOURCES/kvm-vhost-allocate-SVQ-device-file-descriptors-at-device.patch create mode 100644 SOURCES/kvm-vhost-fix-vhost_dev_enable_notifiers-error-case.patch delete mode 100644 SOURCES/kvm-vhost-fix-vq-dirty-bitmap-syncing-when-vIOMMU-is-ena.patch delete mode 100644 SOURCES/kvm-vhost-introduce-new-VhostOps-vhost_set_config_call.patch delete mode 100644 SOURCES/kvm-vhost-move-iova_tree-set-to-vhost_svq_start.patch delete mode 100644 SOURCES/kvm-vhost-set-SVQ-device-call-handler-at-SVQ-start.patch delete mode 100644 SOURCES/kvm-vhost-vdpa-add-support-for-config-interrupt.patch create mode 100644 SOURCES/kvm-vhost-vdpa-mute-unaligned-memory-error-report.patch delete mode 100644 SOURCES/kvm-virtio-add-support-for-configure-interrupt.patch delete mode 100644 SOURCES/kvm-virtio-introduce-macro-VIRTIO_CONFIG_IRQ_IDX.patch create mode 100644 SOURCES/kvm-virtio-iommu-Fix-64kB-host-page-size-VFIO-device-ass.patch create mode 100644 SOURCES/kvm-virtio-iommu-Rework-the-traces-in-virtio_iommu_set_p.patch create mode 100644 SOURCES/kvm-virtio-iommu-Standardize-granule-extraction-and-form.patch delete mode 100644 SOURCES/kvm-virtio-mmio-add-support-for-configure-interrupt.patch delete mode 100644 SOURCES/kvm-virtio-net-add-support-for-configure-interrupt.patch create mode 100644 SOURCES/kvm-virtio-net-correctly-report-maximum-tx_queue_size-va.patch delete mode 100644 SOURCES/kvm-virtio-pci-add-support-for-configure-interrupt.patch delete mode 100644 SOURCES/kvm-virtio-pci-decouple-notifier-from-interrupt-process.patch delete mode 100644 SOURCES/kvm-virtio-pci-decouple-the-single-vector-from-the-inter.patch delete mode 100644 SOURCES/kvm-virtio-rng-pci-fix-migration-compat-for-vectors.patch delete mode 100644 SOURCES/kvm-virtio-rng-pci-fix-transitional-migration-compat-for.patch delete mode 100644 SOURCES/kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch delete mode 100644 SOURCES/kvm-virtio_net-Modify-virtio_net_get_config-to-early-ret.patch delete mode 100644 SOURCES/kvm-virtio_net-copy-VIRTIO_NET_S_ANNOUNCE-if-device-mode.patch create mode 100644 SOURCES/kvm-vl.c-Create-late-backends-before-migration-object.patch diff --git a/.gitignore b/.gitignore index 459c79b..7dc73be 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -SOURCES/qemu-7.2.0.tar.xz +SOURCES/qemu-8.0.0.tar.xz diff --git a/.qemu-kvm.metadata b/.qemu-kvm.metadata index ed38614..a158c44 100644 --- a/.qemu-kvm.metadata +++ b/.qemu-kvm.metadata @@ -1 +1 @@ -634a3e4b381cbf13085eb1568accb85cbd9d89c4 SOURCES/qemu-7.2.0.tar.xz +17d54a85aa5d7f5dcfc619aa34049f9a91ceed0d SOURCES/qemu-8.0.0.tar.xz diff --git a/SOURCES/0004-Initial-redhat-build.patch b/SOURCES/0004-Initial-redhat-build.patch index 0f9cc55..612633e 100644 --- a/SOURCES/0004-Initial-redhat-build.patch +++ b/SOURCES/0004-Initial-redhat-build.patch @@ -1,4 +1,4 @@ -From ccc4a5bdc8c2f27678312364a7c12aeafd009bb6 Mon Sep 17 00:00:00 2001 +From 84039bfc860878f3c3421de4a1836ac5d6300ed7 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 26 May 2021 10:56:02 +0200 Subject: Initial redhat build @@ -13,7 +13,7 @@ several issues are fixed in QEMU tree: We disable make check due to issues with some of the tests. -This rebase is based on qemu-kvm-7.1.0-7.el9 +This rebase is based on qemu-kvm-7.2.0-14.el9 Signed-off-by: Miroslav Rezanina -- @@ -66,6 +66,16 @@ Rebase changes (7.2.0): - Fix SRPM name generation to work on Fedora 37 - Switch back to system meson +Rebase changes (8.0.0-rc1): +- use enable-dtrace-backands instead of enable-dtrace-backend +- Removed qemu virtiofsd bits + +Rebase changes (8.0.0-rc2): +- test/check-block.sh removed (upstream) + +Rebase changes (8.0.0-rc3): +- Add new --disable-* options for configure + Merged patches (6.0.0): - 605758c902 Limit build on Power to qemu-img and qemu-ga only @@ -162,16 +172,18 @@ Merged patches (7.2.0 rc4): - 8c6834feb6 Remove opengl display device subpackages (C9S MR 124) - 0ecc97f29e spec: Add requires for packages with additional virtio-gpu variants (C9S MR 124) -Signed-off-by: Miroslav Rezanina +Merged patches (8.0.0-rc1): +- 7754f6ba78 Minor packaging fixes +- 401af56187 spec: Disable VDUSE -fix +Signed-off-by: Miroslav Rezanina --- .distro/Makefile | 100 + .distro/Makefile.common | 41 + .distro/README.tests | 39 + .distro/modules-load.conf | 4 + .distro/qemu-guest-agent.service | 1 - - .distro/qemu-kvm.spec.template | 4315 +++++++++++++++++++++++ + .distro/qemu-kvm.spec.template | 4528 +++++++++++++++++++++++ .distro/rpminspect.yaml | 6 +- .distro/scripts/extract_build_cmd.py | 12 + .distro/scripts/process-patches.sh | 4 + @@ -180,9 +192,8 @@ fix scripts/qemu-guest-agent/fsfreeze-hook | 2 +- scripts/systemtap/conf.d/qemu_kvm.conf | 4 + scripts/systemtap/script.d/qemu_kvm.stp | 1 + - tests/check-block.sh | 2 + ui/vnc-auth-sasl.c | 2 +- - 16 files changed, 4573 insertions(+), 4 deletions(-) + 15 files changed, 4784 insertions(+), 4 deletions(-) create mode 100644 .distro/Makefile create mode 100644 .distro/Makefile.common create mode 100644 .distro/README.tests @@ -271,19 +282,6 @@ index 0000000000..c04abf9449 +++ b/scripts/systemtap/script.d/qemu_kvm.stp @@ -0,0 +1 @@ +probe qemu.kvm.simpletrace.handle_qmp_command,qemu.kvm.simpletrace.monitor_protocol_*,qemu.kvm.simpletrace.migrate_set_state {} -diff --git a/tests/check-block.sh b/tests/check-block.sh -index 5de2c1ba0b..6af743f441 100755 ---- a/tests/check-block.sh -+++ b/tests/check-block.sh -@@ -22,6 +22,8 @@ if [ -z "$(find . -name 'qemu-system-*' -print)" ]; then - skip "No qemu-system binary available ==> Not running the qemu-iotests." - fi - -+exit 0 -+ - cd tests/qemu-iotests - - # QEMU_CHECK_BLOCK_AUTO is used to disable some unstable sub-tests diff --git a/ui/vnc-auth-sasl.c b/ui/vnc-auth-sasl.c index 47fdae5b21..2a950caa2a 100644 --- a/ui/vnc-auth-sasl.c @@ -298,5 +296,5 @@ index 47fdae5b21..2a950caa2a 100644 if (saslErr != SASL_OK) { error_setg(errp, "Failed to initialize SASL auth: %s", -- -2.31.1 +2.39.1 diff --git a/SOURCES/0005-Enable-disable-devices-for-RHEL.patch b/SOURCES/0005-Enable-disable-devices-for-RHEL.patch index 767389f..14dd3f9 100644 --- a/SOURCES/0005-Enable-disable-devices-for-RHEL.patch +++ b/SOURCES/0005-Enable-disable-devices-for-RHEL.patch @@ -1,4 +1,4 @@ -From 90366cd2ead5a5301aaceed56477d2e6d9f1b3cd Mon Sep 17 00:00:00 2001 +From 63829772dbc2075fc014a9d52e3968735d228018 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 7 Dec 2022 03:05:48 -0500 Subject: Enable/disable devices for RHEL @@ -32,6 +32,11 @@ Rebase notes (7.1.0 rc3): Rebase notes (7.2.0 rc20): - Removed disabling a15mpcore.c as no longer needed +Rebase notes (8.0.0-rc1): +- Rename CONFIG_ACPI_X86_ICH to CONFIG_ACPI_ICH9 +- Inlude qemu/error-report.h in hw/display/cirrus_vga.c +- Change virtiofsd dependency version + Merged patches (6.1.0): - c51bf45304 Remove SPICE and QXL from x86_64-rh-devices.mak - 02fc745601 aarch64-rh-devices: add CONFIG_PVPANIC_PCI @@ -63,7 +68,7 @@ Merged patches (7.1.0 rc0): hw/arm/meson.build | 2 +- hw/block/fdc.c | 10 ++ hw/cpu/meson.build | 3 +- - hw/display/cirrus_vga.c | 5 +- + hw/display/cirrus_vga.c | 7 +- hw/ide/piix.c | 5 +- hw/input/pckbd.c | 2 + hw/net/e1000.c | 2 + @@ -73,7 +78,7 @@ Merged patches (7.1.0 rc0): target/ppc/cpu-models.c | 9 ++ target/s390x/cpu_models_sysemu.c | 3 + target/s390x/kvm/kvm.c | 8 ++ - 19 files changed, 283 insertions(+), 13 deletions(-) + 19 files changed, 285 insertions(+), 13 deletions(-) create mode 100644 configs/devices/aarch64-softmmu/aarch64-rh-devices.mak create mode 100644 configs/devices/ppc64-softmmu/ppc64-rh-devices.mak create mode 100644 configs/devices/rh-virtio.mak @@ -212,7 +217,7 @@ index 0000000000..69a799adbd +CONFIG_VHOST_USER_FS=y diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak new file mode 100644 -index 0000000000..10cb0a14e0 +index 0000000000..668b2d0e18 --- /dev/null +++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak @@ -0,0 +1,109 @@ @@ -226,7 +231,7 @@ index 0000000000..10cb0a14e0 +CONFIG_ACPI_SMBUS=y +CONFIG_ACPI_VMGENID=y +CONFIG_ACPI_X86=y -+CONFIG_ACPI_X86_ICH=y ++CONFIG_ACPI_ICH9=y +CONFIG_AHCI=y +CONFIG_APIC=y +CONFIG_APM=y @@ -326,10 +331,10 @@ index 0000000000..10cb0a14e0 +CONFIG_VHOST_USER_VSOCK=y +CONFIG_VHOST_USER_FS=y diff --git a/hw/arm/meson.build b/hw/arm/meson.build -index 92f9f6e000..c5e94c997c 100644 +index b545ba0e4f..a41a16cba7 100644 --- a/hw/arm/meson.build +++ b/hw/arm/meson.build -@@ -30,7 +30,7 @@ arm_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c')) +@@ -29,7 +29,7 @@ arm_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c')) arm_ss.add(when: 'CONFIG_ZYNQ', if_true: files('xilinx_zynq.c')) arm_ss.add(when: 'CONFIG_SABRELITE', if_true: files('sabrelite.c')) @@ -339,7 +344,7 @@ index 92f9f6e000..c5e94c997c 100644 arm_ss.add(when: 'CONFIG_PXA2XX', if_true: files('pxa2xx.c', 'pxa2xx_gpio.c', 'pxa2xx_pic.c')) arm_ss.add(when: 'CONFIG_DIGIC', if_true: files('digic.c')) diff --git a/hw/block/fdc.c b/hw/block/fdc.c -index 64ae4a6899..9b8e782c19 100644 +index d7cc4d3ec1..12d0a60905 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -49,6 +49,8 @@ @@ -367,7 +372,7 @@ index 64ae4a6899..9b8e782c19 100644 error_setg(errp, "Cannot choose a fallback FDrive type of 'auto'"); return; diff --git a/hw/cpu/meson.build b/hw/cpu/meson.build -index 9e52fee9e7..87c209a754 100644 +index e37490074f..4431e3731c 100644 --- a/hw/cpu/meson.build +++ b/hw/cpu/meson.build @@ -1,4 +1,5 @@ @@ -375,13 +380,29 @@ index 9e52fee9e7..87c209a754 100644 +#softmmu_ss.add(files('core.c', 'cluster.c')) +softmmu_ss.add(files('core.c')) - specific_ss.add(when: 'CONFIG_ARM11MPCORE', if_true: files('arm11mpcore.c')) - specific_ss.add(when: 'CONFIG_REALVIEW', if_true: files('realview_mpcore.c')) + softmmu_ss.add(when: 'CONFIG_ARM11MPCORE', if_true: files('arm11mpcore.c')) + softmmu_ss.add(when: 'CONFIG_REALVIEW', if_true: files('realview_mpcore.c')) diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c -index 6e8c747c46..1948ebee8e 100644 +index b80f98b6c4..cbde6a8f15 100644 --- a/hw/display/cirrus_vga.c +++ b/hw/display/cirrus_vga.c -@@ -2946,7 +2946,10 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) +@@ -36,6 +36,7 @@ + #include "qemu/module.h" + #include "qemu/units.h" + #include "qemu/log.h" ++#include "qemu/error-report.h" + #include "sysemu/reset.h" + #include "qapi/error.h" + #include "trace.h" +@@ -47,6 +48,7 @@ + #include "qom/object.h" + #include "ui/console.h" + ++ + /* + * TODO: + * - destination write mask support not complete (bits 5..7) +@@ -2946,7 +2948,10 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev); int16_t device_id = pc->device_id; @@ -394,10 +415,10 @@ index 6e8c747c46..1948ebee8e 100644 * Also accept 8 MB/16 MB for backward compatibility. */ diff --git a/hw/ide/piix.c b/hw/ide/piix.c -index 267dbf37db..87fcda4062 100644 +index 41d60921e3..a4af45b4e8 100644 --- a/hw/ide/piix.c +++ b/hw/ide/piix.c -@@ -199,7 +199,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) +@@ -193,7 +193,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) k->device_id = PCI_DEVICE_ID_INTEL_82371SB_1; k->class_id = PCI_CLASS_STORAGE_IDE; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); @@ -407,7 +428,7 @@ index 267dbf37db..87fcda4062 100644 } static const TypeInfo piix3_ide_info = { -@@ -222,6 +223,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) +@@ -216,6 +217,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) k->class_id = PCI_CLASS_STORAGE_IDE; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); dc->hotpluggable = false; @@ -430,10 +451,10 @@ index b92b63bedc..3b6235dde6 100644 static const TypeInfo i8042_info = { diff --git a/hw/net/e1000.c b/hw/net/e1000.c -index e26e0a64c1..41492fae79 100644 +index 23d660619f..b75c9aa799 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c -@@ -1824,6 +1824,7 @@ static const E1000Info e1000_devices[] = { +@@ -1805,6 +1805,7 @@ static const E1000Info e1000_devices[] = { .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, }, @@ -441,7 +462,7 @@ index e26e0a64c1..41492fae79 100644 { .name = "e1000-82544gc", .device_id = E1000_DEV_ID_82544GC_COPPER, -@@ -1836,6 +1837,7 @@ static const E1000Info e1000_devices[] = { +@@ -1817,6 +1818,7 @@ static const E1000Info e1000_devices[] = { .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, }, @@ -467,7 +488,7 @@ index 8a4861f45a..fcb5dfe792 100644 DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"), DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"), diff --git a/hw/usb/meson.build b/hw/usb/meson.build -index 793df42e21..cd3c305471 100644 +index 599dc24f0d..905a994c3a 100644 --- a/hw/usb/meson.build +++ b/hw/usb/meson.build @@ -52,7 +52,7 @@ softmmu_ss.add(when: 'CONFIG_USB_SMARTCARD', if_true: files('dev-smartcard-reade @@ -480,10 +501,10 @@ index 793df42e21..cd3c305471 100644 endif diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c -index 9a2cef7d05..a528ff9a3d 100644 +index df0c45e523..c154a4dcf2 100644 --- a/target/arm/cpu_tcg.c +++ b/target/arm/cpu_tcg.c -@@ -151,6 +151,7 @@ void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu) +@@ -155,6 +155,7 @@ void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu) /* CPU models. These are not needed for the AArch64 linux-user build. */ #if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) @@ -491,7 +512,7 @@ index 9a2cef7d05..a528ff9a3d 100644 #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG) static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) { -@@ -504,6 +505,7 @@ static void cortex_a9_initfn(Object *obj) +@@ -508,6 +509,7 @@ static void cortex_a9_initfn(Object *obj) cpu->isar.reset_pmcr_el0 = 0x41093000; define_arm_cp_regs(cpu, cortexa9_cp_reginfo); } @@ -499,7 +520,7 @@ index 9a2cef7d05..a528ff9a3d 100644 #ifndef CONFIG_USER_ONLY static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri) -@@ -528,6 +530,7 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { +@@ -532,6 +534,7 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, }; @@ -507,7 +528,7 @@ index 9a2cef7d05..a528ff9a3d 100644 static void cortex_a7_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); -@@ -576,6 +579,7 @@ static void cortex_a7_initfn(Object *obj) +@@ -580,6 +583,7 @@ static void cortex_a7_initfn(Object *obj) cpu->isar.reset_pmcr_el0 = 0x41072000; define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */ } @@ -515,7 +536,7 @@ index 9a2cef7d05..a528ff9a3d 100644 static void cortex_a15_initfn(Object *obj) { -@@ -624,6 +628,7 @@ static void cortex_a15_initfn(Object *obj) +@@ -628,6 +632,7 @@ static void cortex_a15_initfn(Object *obj) define_arm_cp_regs(cpu, cortexa15_cp_reginfo); } @@ -523,7 +544,7 @@ index 9a2cef7d05..a528ff9a3d 100644 static void cortex_m0_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); -@@ -1065,6 +1070,7 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) +@@ -1110,6 +1115,7 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) cc->gdb_core_xml_file = "arm-m-profile.xml"; } @@ -531,7 +552,7 @@ index 9a2cef7d05..a528ff9a3d 100644 #ifndef TARGET_AARCH64 /* -@@ -1132,6 +1138,7 @@ static void arm_max_initfn(Object *obj) +@@ -1177,6 +1183,7 @@ static void arm_max_initfn(Object *obj) #endif /* !TARGET_AARCH64 */ static const ARMCPUInfo arm_tcg_cpus[] = { @@ -539,7 +560,7 @@ index 9a2cef7d05..a528ff9a3d 100644 { .name = "arm926", .initfn = arm926_initfn }, { .name = "arm946", .initfn = arm946_initfn }, { .name = "arm1026", .initfn = arm1026_initfn }, -@@ -1147,7 +1154,9 @@ static const ARMCPUInfo arm_tcg_cpus[] = { +@@ -1192,7 +1199,9 @@ static const ARMCPUInfo arm_tcg_cpus[] = { { .name = "cortex-a7", .initfn = cortex_a7_initfn }, { .name = "cortex-a8", .initfn = cortex_a8_initfn }, { .name = "cortex-a9", .initfn = cortex_a9_initfn }, @@ -549,7 +570,7 @@ index 9a2cef7d05..a528ff9a3d 100644 { .name = "cortex-m0", .initfn = cortex_m0_initfn, .class_init = arm_v7m_class_init }, { .name = "cortex-m3", .initfn = cortex_m3_initfn, -@@ -1178,6 +1187,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { +@@ -1224,6 +1233,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { { .name = "pxa270-b1", .initfn = pxa270b1_initfn }, { .name = "pxa270-c0", .initfn = pxa270c0_initfn }, { .name = "pxa270-c5", .initfn = pxa270c5_initfn }, @@ -620,7 +641,7 @@ index 912b037c63..cd3ff700ac 100644 { NULL, NULL } }; diff --git a/target/s390x/cpu_models_sysemu.c b/target/s390x/cpu_models_sysemu.c -index d8a141a023..d086b1c39c 100644 +index 63981bf36b..87a4480c05 100644 --- a/target/s390x/cpu_models_sysemu.c +++ b/target/s390x/cpu_models_sysemu.c @@ -35,6 +35,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, @@ -653,5 +674,5 @@ index 3ac7ec9acf..97da1a6424 100644 prop.ibc = s390_ibc_from_cpu_model(model); /* configure cpu features indicated via STFL(e) */ -- -2.31.1 +2.39.1 diff --git a/SOURCES/0006-Machine-type-related-general-changes.patch b/SOURCES/0006-Machine-type-related-general-changes.patch index fc2a89d..5dd591f 100644 --- a/SOURCES/0006-Machine-type-related-general-changes.patch +++ b/SOURCES/0006-Machine-type-related-general-changes.patch @@ -1,4 +1,4 @@ -From 0208f38671b9de4036c0d56142a7f22e5091bae0 Mon Sep 17 00:00:00 2001 +From c13f8e21b32aa06b08847e88080f2fdea5084a9b Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 11 Jan 2019 09:54:45 +0100 Subject: Machine type related general changes @@ -46,28 +46,33 @@ Merged patches (7.1.0 rc0): Merged patches (7.2.0 rc0): - 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts) + +Merged patches (8.0.0-rc1): +- 21ed34787b Addd 7.2 compat bits for RHEL 9.1 machine type +- e5c8d5d603 virtio-rng-pci: fix migration compat for vectors +- 5a5fa77059 virtio-rng-pci: fix transitional migration compat for vectors --- hw/acpi/piix4.c | 2 +- hw/arm/virt.c | 2 +- - hw/core/machine.c | 222 +++++++++++++++++++++++++++++++++++ + hw/core/machine.c | 229 +++++++++++++++++++++++++++++++++++ hw/display/vga-isa.c | 2 +- hw/i386/pc_piix.c | 2 + hw/i386/pc_q35.c | 2 + hw/net/rtl8139.c | 4 +- - hw/smbios/smbios.c | 46 +++++++- + hw/smbios/smbios.c | 46 ++++++- hw/timer/i8254_common.c | 2 +- - hw/usb/hcd-xhci-pci.c | 59 +++++++--- + hw/usb/hcd-xhci-pci.c | 59 ++++++--- hw/usb/hcd-xhci-pci.h | 1 + include/hw/boards.h | 31 +++++ include/hw/firmware/smbios.h | 5 +- include/hw/i386/pc.h | 3 + - 14 files changed, 360 insertions(+), 23 deletions(-) + 14 files changed, 367 insertions(+), 23 deletions(-) diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c -index 0a81f1ad93..dbfb362a8f 100644 +index 63d2113b86..a24b9aac92 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c -@@ -248,7 +248,7 @@ static bool vmstate_test_migrate_acpi_index(void *opaque, int version_id) +@@ -247,7 +247,7 @@ static bool vmstate_test_migrate_acpi_index(void *opaque, int version_id) static const VMStateDescription vmstate_acpi = { .name = "piix4_pm", .version_id = 3, @@ -77,25 +82,25 @@ index 0a81f1ad93..dbfb362a8f 100644 .fields = (VMStateField[]) { VMSTATE_PCI_DEVICE(parent_obj, PIIX4PMState), diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index b871350856..d633300fdc 100644 +index ac626b3bef..4a6e89c7bc 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -1619,7 +1619,7 @@ static void virt_build_smbios(VirtMachineState *vms) +@@ -1629,7 +1629,7 @@ static void virt_build_smbios(VirtMachineState *vms) smbios_set_defaults("QEMU", product, vmc->smbios_old_sys_ver ? "1.0" : mc->name, false, - true, SMBIOS_ENTRY_POINT_TYPE_64); + true, NULL, NULL, SMBIOS_ENTRY_POINT_TYPE_64); - smbios_get_tables(MACHINE(vms), NULL, 0, - &smbios_tables, &smbios_tables_len, + /* build the array of physical mem area from base_memmap */ + mem_array.address = vms->memmap[VIRT_MEM].base; diff --git a/hw/core/machine.c b/hw/core/machine.c -index 8d34caa31d..9edec1ca05 100644 +index cd13b8b0a3..5aa567fad3 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c -@@ -40,6 +40,228 @@ - #include "hw/virtio/virtio-pci.h" - #include "qom/object_interfaces.h" +@@ -46,6 +46,235 @@ GlobalProperty hw_compat_7_2[] = { + }; + const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2); +/* + * RHEL only: machine types for previous major releases are deprecated @@ -111,6 +116,13 @@ index 8d34caa31d..9edec1ca05 100644 + { "arm-gicv3-common", "force-8-bit-prio", "on" }, + /* hw_compat_rhel_9_1 from hw_compat_7_0 */ + { "nvme-ns", "eui64-default", "on"}, ++ /* hw_compat_rhel_9_1 from hw_compat_7_1 */ ++ { "virtio-device", "queue_reset", "false" }, ++ /* hw_compat_rhel_9_1 bz 2155749 */ ++ { "virtio-rng-pci", "vectors", "0" }, ++ /* hw_compat_rhel_9_1 bz 2162569 */ ++ { "virtio-rng-pci-transitional", "vectors", "0" }, ++ { "virtio-rng-pci-non-transitional", "vectors", "0" }, +}; +const size_t hw_compat_rhel_9_1_len = G_N_ELEMENTS(hw_compat_rhel_9_1); + @@ -321,7 +333,7 @@ index 8d34caa31d..9edec1ca05 100644 + GlobalProperty hw_compat_7_1[] = { { "virtio-device", "queue_reset", "false" }, - }; + { "virtio-rng-pci", "vectors", "0" }, diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c index 2a5437d803..0db2c2b2a1 100644 --- a/hw/display/vga-isa.c @@ -336,10 +348,10 @@ index 2a5437d803..0db2c2b2a1 100644 }; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 0ad0ed1603..0985ff67d2 100644 +index 30eedd62a3..14a794081e 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c -@@ -187,6 +187,8 @@ static void pc_init1(MachineState *machine, +@@ -201,6 +201,8 @@ static void pc_init1(MachineState *machine, smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)", mc->name, pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded, @@ -349,10 +361,10 @@ index 0ad0ed1603..0985ff67d2 100644 } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index a496bd6e74..ea582254e3 100644 +index 797ba347fd..dc0ba5f9e7 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c -@@ -201,6 +201,8 @@ static void pc_q35_init(MachineState *machine) +@@ -202,6 +202,8 @@ static void pc_q35_init(MachineState *machine) smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)", mc->name, pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded, @@ -362,7 +374,7 @@ index a496bd6e74..ea582254e3 100644 } diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c -index 700b1b66b6..13693aeb4f 100644 +index 5a5aaf868d..3d473d5869 100644 --- a/hw/net/rtl8139.c +++ b/hw/net/rtl8139.c @@ -3178,7 +3178,7 @@ static int rtl8139_pre_save(void *opaque) @@ -385,10 +397,10 @@ index 700b1b66b6..13693aeb4f 100644 VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State), diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index b4243de735..c5ad69237e 100644 +index d2007e70fb..319eae9e9d 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c -@@ -57,6 +57,9 @@ static bool smbios_legacy = true; +@@ -58,6 +58,9 @@ static bool smbios_legacy = true; static bool smbios_uuid_encoded = true; /* end: legacy structures & constants for <= 2.0 machines */ @@ -398,7 +410,7 @@ index b4243de735..c5ad69237e 100644 uint8_t *smbios_tables; size_t smbios_tables_len; -@@ -669,7 +672,7 @@ static void smbios_build_type_1_table(void) +@@ -670,7 +673,7 @@ static void smbios_build_type_1_table(void) static void smbios_build_type_2_table(void) { @@ -407,7 +419,7 @@ index b4243de735..c5ad69237e 100644 SMBIOS_TABLE_SET_STR(2, manufacturer_str, type2.manufacturer); SMBIOS_TABLE_SET_STR(2, product_str, type2.product); -@@ -977,7 +980,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) +@@ -980,7 +983,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) void smbios_set_defaults(const char *manufacturer, const char *product, const char *version, bool legacy_mode, @@ -419,7 +431,7 @@ index b4243de735..c5ad69237e 100644 { smbios_have_defaults = true; smbios_legacy = legacy_mode; -@@ -998,11 +1004,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product, +@@ -1001,11 +1007,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product, g_free(smbios_entries); } @@ -579,10 +591,10 @@ index 643d4643e4..529bad9366 100644 dc->vmsd = &vmstate_xhci_pci; set_bit(DEVICE_CATEGORY_USB, dc->categories); diff --git a/hw/usb/hcd-xhci-pci.h b/hw/usb/hcd-xhci-pci.h -index c193f79443..086a1feb1e 100644 +index 08f70ce97c..1be7527c1b 100644 --- a/hw/usb/hcd-xhci-pci.h +++ b/hw/usb/hcd-xhci-pci.h -@@ -39,6 +39,7 @@ typedef struct XHCIPciState { +@@ -40,6 +40,7 @@ typedef struct XHCIPciState { XHCIState xhci; OnOffAuto msi; OnOffAuto msix; @@ -591,10 +603,10 @@ index c193f79443..086a1feb1e 100644 #endif diff --git a/include/hw/boards.h b/include/hw/boards.h -index 90f1dd3aeb..2209d4e416 100644 +index 6fbbfd56c8..c5a965d27f 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -454,4 +454,35 @@ extern const size_t hw_compat_2_2_len; +@@ -459,4 +459,35 @@ extern const size_t hw_compat_2_2_len; extern GlobalProperty hw_compat_2_1[]; extern const size_t hw_compat_2_1_len; @@ -647,10 +659,10 @@ index 7f3259a630..d24b3ccd32 100644 void smbios_get_tables(MachineState *ms, const struct smbios_phys_mem_area *mem_array, diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index c95333514e..3754eaa97d 100644 +index 8206d5405a..908a275736 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -112,6 +112,9 @@ struct PCMachineClass { +@@ -111,6 +111,9 @@ struct PCMachineClass { bool smbios_defaults; bool smbios_legacy_mode; bool smbios_uuid_encoded; @@ -661,5 +673,5 @@ index c95333514e..3754eaa97d 100644 /* RAM / address space compat: */ bool gigabyte_align; -- -2.31.1 +2.39.1 diff --git a/SOURCES/0007-Add-aarch64-machine-types.patch b/SOURCES/0007-Add-aarch64-machine-types.patch index 06611e7..f47bbd0 100644 --- a/SOURCES/0007-Add-aarch64-machine-types.patch +++ b/SOURCES/0007-Add-aarch64-machine-types.patch @@ -1,4 +1,4 @@ -From 8501581c99760ed8a800d0c98eeb17a4bf450366 Mon Sep 17 00:00:00 2001 +From ec6468b65a3af0e2b84575c9f965f61916d0d8ea Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 12:53:31 +0200 Subject: Add aarch64 machine types @@ -26,6 +26,9 @@ Rebase notes (7.1.0 rc3): Rebase notes (7.2.0 rc0): - Disabled cortex-a35 +Rebase notes (8.0.0-rc1): +- Moved changed code from target/arm/helper.c to target/arm/arm-qmp-cmds.c + Merged patches (6.2.0): - 9a3d4fde0e hw/arm/virt: Remove 9.0 machine type - f7d04d6695 hw: arm: virt: Add hw_compat_rhel_8_5 to 8.5 machine type @@ -49,23 +52,27 @@ Merged patches (7.1.0 rc0): Merged patches (7.2.0 rc0): - 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts) + +Merged patches (8.0.0-rc1): +- c1a21266d8 redhat: aarch64: add rhel9.2.0 virt machine type +- d97cd7c513 redhat: fix virt-rhel9.2.0 compat props --- - hw/arm/virt.c | 237 ++++++++++++++++++++++++++++++++- + hw/arm/virt.c | 251 ++++++++++++++++++++++++++++++++- include/hw/arm/virt.h | 8 ++ + target/arm/arm-qmp-cmds.c | 2 + target/arm/cpu-qom.h | 1 + target/arm/cpu.c | 5 + target/arm/cpu.h | 2 + target/arm/cpu64.c | 16 ++- target/arm/cpu_tcg.c | 12 +- - target/arm/helper.c | 2 + tests/qtest/arm-cpu-features.c | 6 + - 9 files changed, 277 insertions(+), 12 deletions(-) + 9 files changed, 289 insertions(+), 14 deletions(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index d633300fdc..dfcab40a73 100644 +index 4a6e89c7bc..1ae1654be5 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -80,6 +80,7 @@ +@@ -81,6 +81,7 @@ #include "hw/char/pl011.h" #include "qemu/guest-random.h" @@ -73,7 +80,7 @@ index d633300fdc..dfcab40a73 100644 #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ void *data) \ -@@ -106,7 +107,48 @@ +@@ -107,7 +108,48 @@ DEFINE_VIRT_MACHINE_LATEST(major, minor, true) #define DEFINE_VIRT_MACHINE(major, minor) \ DEFINE_VIRT_MACHINE_LATEST(major, minor, false) @@ -123,7 +130,7 @@ index d633300fdc..dfcab40a73 100644 /* Number of external interrupt lines to configure the GIC with */ #define NUM_IRQS 256 -@@ -197,15 +239,19 @@ static const int a15irqmap[] = { +@@ -204,16 +246,20 @@ static const int a15irqmap[] = { }; static const char *valid_cpus[] = { @@ -132,6 +139,7 @@ index d633300fdc..dfcab40a73 100644 ARM_CPU_TYPE_NAME("cortex-a15"), ARM_CPU_TYPE_NAME("cortex-a35"), ARM_CPU_TYPE_NAME("cortex-a53"), + ARM_CPU_TYPE_NAME("cortex-a55"), +#endif /* disabled for RHEL */ ARM_CPU_TYPE_NAME("cortex-a57"), +#if 0 /* Disabled for Red Hat Enterprise Linux */ @@ -143,7 +151,7 @@ index d633300fdc..dfcab40a73 100644 ARM_CPU_TYPE_NAME("host"), ARM_CPU_TYPE_NAME("max"), }; -@@ -2290,6 +2336,7 @@ static void machvirt_init(MachineState *machine) +@@ -2339,6 +2385,7 @@ static void machvirt_init(MachineState *machine) qemu_add_machine_init_done_notifier(&vms->machine_done); } @@ -151,7 +159,7 @@ index d633300fdc..dfcab40a73 100644 static bool virt_get_secure(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2317,6 +2364,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) +@@ -2366,6 +2413,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) vms->virt = value; } @@ -159,7 +167,25 @@ index d633300fdc..dfcab40a73 100644 static bool virt_get_highmem(Object *obj, Error **errp) { -@@ -2346,6 +2394,7 @@ static void virt_set_its(Object *obj, bool value, Error **errp) +@@ -2380,7 +2428,7 @@ static void virt_set_highmem(Object *obj, bool value, Error **errp) + + vms->highmem = value; + } +- ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static bool virt_get_compact_highmem(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -2436,7 +2484,7 @@ static void virt_set_highmem_mmio(Object *obj, bool value, Error **errp) + + vms->highmem_mmio = value; + } +- ++#endif /* disabled for RHEL */ + + static bool virt_get_its(Object *obj, Error **errp) + { +@@ -2452,6 +2500,7 @@ static void virt_set_its(Object *obj, bool value, Error **errp) vms->its = value; } @@ -167,7 +193,7 @@ index d633300fdc..dfcab40a73 100644 static bool virt_get_dtb_randomness(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2359,6 +2408,7 @@ static void virt_set_dtb_randomness(Object *obj, bool value, Error **errp) +@@ -2465,6 +2514,7 @@ static void virt_set_dtb_randomness(Object *obj, bool value, Error **errp) vms->dtb_randomness = value; } @@ -175,7 +201,7 @@ index d633300fdc..dfcab40a73 100644 static char *virt_get_oem_id(Object *obj, Error **errp) { -@@ -2442,6 +2492,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp) +@@ -2548,6 +2598,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp) vms->ras = value; } @@ -183,7 +209,7 @@ index d633300fdc..dfcab40a73 100644 static bool virt_get_mte(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2455,6 +2506,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) +@@ -2561,6 +2612,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) vms->mte = value; } @@ -191,7 +217,7 @@ index d633300fdc..dfcab40a73 100644 static char *virt_get_gic_version(Object *obj, Error **errp) { -@@ -2886,6 +2938,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) +@@ -2988,6 +3040,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) return fixed_ipa ? 0 : requested_pa_size; } @@ -199,7 +225,7 @@ index d633300fdc..dfcab40a73 100644 static void virt_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); -@@ -3294,3 +3347,185 @@ static void virt_machine_2_6_options(MachineClass *mc) +@@ -3441,3 +3494,195 @@ static void virt_machine_2_6_options(MachineClass *mc) vmc->no_pmu = true; } DEFINE_VIRT_MACHINE(2, 6) @@ -312,6 +338,7 @@ index d633300fdc..dfcab40a73 100644 + + /* High memory is enabled by default */ + vms->highmem = true; ++ vms->highmem_compact = !vmc->no_highmem_compact; + vms->gic_version = VIRT_GIC_VERSION_NOSEL; + + vms->highmem_ecam = !vmc->no_highmem_ecam; @@ -374,22 +401,31 @@ index d633300fdc..dfcab40a73 100644 +} +type_init(rhel_machine_init); + ++static void rhel920_virt_options(MachineClass *mc) ++{ ++ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); ++} ++DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0) ++ +static void rhel900_virt_options(MachineClass *mc) +{ + VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); + -+ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); ++ rhel920_virt_options(mc); ++ + compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len); + + /* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */ + vmc->no_tcg_lpa2 = true; ++ /* Compact layout for high memory regions was introduced with 9.2.0 */ ++ vmc->no_highmem_compact = true; +} -+DEFINE_RHEL_MACHINE_AS_LATEST(9, 0, 0) ++DEFINE_RHEL_MACHINE(9, 0, 0) diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index 6ec479ca2b..22b54ec510 100644 +index e1ddbea96b..81c2363a40 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h -@@ -180,9 +180,17 @@ struct VirtMachineState { +@@ -187,9 +187,17 @@ struct VirtMachineState { #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) @@ -407,8 +443,28 @@ index 6ec479ca2b..22b54ec510 100644 void virt_acpi_setup(VirtMachineState *vms); bool virt_is_acpi_enabled(VirtMachineState *vms); +diff --git a/target/arm/arm-qmp-cmds.c b/target/arm/arm-qmp-cmds.c +index c8fa524002..3aa089abf3 100644 +--- a/target/arm/arm-qmp-cmds.c ++++ b/target/arm/arm-qmp-cmds.c +@@ -231,6 +231,7 @@ CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type, + static void arm_cpu_add_definition(gpointer data, gpointer user_data) + { + ObjectClass *oc = data; ++ CPUClass *cc = CPU_CLASS(oc); + CpuDefinitionInfoList **cpu_list = user_data; + CpuDefinitionInfo *info; + const char *typename; +@@ -240,6 +241,7 @@ static void arm_cpu_add_definition(gpointer data, gpointer user_data) + info->name = g_strndup(typename, + strlen(typename) - strlen("-" TYPE_ARM_CPU)); + info->q_typename = g_strdup(typename); ++ info->deprecated = !!cc->deprecation_note; + + QAPI_LIST_PREPEND(*cpu_list, info); + } diff --git a/target/arm/cpu-qom.h b/target/arm/cpu-qom.h -index 64c44cef2d..82e97249bc 100644 +index 514c22ced9..f789173451 100644 --- a/target/arm/cpu-qom.h +++ b/target/arm/cpu-qom.h @@ -35,6 +35,7 @@ typedef struct ARMCPUInfo { @@ -420,10 +476,10 @@ index 64c44cef2d..82e97249bc 100644 void arm_cpu_register(const ARMCPUInfo *info); diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index 38d066c294..a845814bfb 100644 +index 5182ed0c91..6740a8b940 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c -@@ -2250,8 +2250,13 @@ static void arm_cpu_instance_init(Object *obj) +@@ -2290,8 +2290,13 @@ static void arm_cpu_instance_init(Object *obj) static void cpu_register_class_init(ObjectClass *oc, void *data) { ARMCPUClass *acc = ARM_CPU_CLASS(oc); @@ -438,7 +494,7 @@ index 38d066c294..a845814bfb 100644 void arm_cpu_register(const ARMCPUInfo *info) diff --git a/target/arm/cpu.h b/target/arm/cpu.h -index 9aeed3c848..f9f504d89e 100644 +index c097cae988..829d4a2328 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -34,6 +34,8 @@ @@ -451,10 +507,10 @@ index 9aeed3c848..f9f504d89e 100644 #define EXCP_SWI 2 /* software interrupt */ #define EXCP_PREFETCH_ABORT 3 diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c -index 3d74f134f5..4b330a52b5 100644 +index 0fb07cc7b6..47459627fb 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c -@@ -36,6 +36,7 @@ +@@ -31,6 +31,7 @@ #include "hw/qdev-properties.h" #include "internals.h" @@ -462,7 +518,7 @@ index 3d74f134f5..4b330a52b5 100644 static void aarch64_a35_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); -@@ -115,6 +116,7 @@ static void aarch64_a35_initfn(Object *obj) +@@ -110,6 +111,7 @@ static void aarch64_a35_initfn(Object *obj) /* These values are the same with A53/A57/A72. */ define_cortex_a72_a57_a53_cp_reginfo(cpu); } @@ -470,7 +526,7 @@ index 3d74f134f5..4b330a52b5 100644 void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) { -@@ -735,6 +737,7 @@ static void aarch64_a57_initfn(Object *obj) +@@ -730,6 +732,7 @@ static void aarch64_a57_initfn(Object *obj) define_cortex_a72_a57_a53_cp_reginfo(cpu); } @@ -478,15 +534,15 @@ index 3d74f134f5..4b330a52b5 100644 static void aarch64_a53_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); -@@ -1033,6 +1036,7 @@ static void aarch64_neoverse_n1_initfn(Object *obj) - /* From D5.1 AArch64 PMU register summary */ - cpu->isar.reset_pmcr_el0 = 0x410c3000; +@@ -1164,6 +1167,7 @@ static void aarch64_neoverse_n1_initfn(Object *obj) + + define_neoverse_n1_cp_reginfo(cpu); } +#endif /* disabled for RHEL */ static void aarch64_host_initfn(Object *obj) { -@@ -1240,13 +1244,18 @@ static void aarch64_max_initfn(Object *obj) +@@ -1373,14 +1377,19 @@ static void aarch64_max_initfn(Object *obj) } static const ARMCPUInfo aarch64_cpus[] = { @@ -498,6 +554,7 @@ index 3d74f134f5..4b330a52b5 100644 + .deprecation_note = RHEL_CPU_DEPRECATION }, +#if 0 /* Disabled for Red Hat Enterprise Linux */ { .name = "cortex-a53", .initfn = aarch64_a53_initfn }, + { .name = "cortex-a55", .initfn = aarch64_a55_initfn }, { .name = "cortex-a72", .initfn = aarch64_a72_initfn }, { .name = "cortex-a76", .initfn = aarch64_a76_initfn }, { .name = "a64fx", .initfn = aarch64_a64fx_initfn }, @@ -506,7 +563,7 @@ index 3d74f134f5..4b330a52b5 100644 { .name = "max", .initfn = aarch64_max_initfn }, #if defined(CONFIG_KVM) || defined(CONFIG_HVF) { .name = "host", .initfn = aarch64_host_initfn }, -@@ -1318,8 +1327,13 @@ static void aarch64_cpu_instance_init(Object *obj) +@@ -1452,8 +1461,13 @@ static void aarch64_cpu_instance_init(Object *obj) static void cpu_register_class_init(ObjectClass *oc, void *data) { ARMCPUClass *acc = ARM_CPU_CLASS(oc); @@ -521,10 +578,10 @@ index 3d74f134f5..4b330a52b5 100644 void aarch64_cpu_register(const ARMCPUInfo *info) diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c -index a528ff9a3d..053f70e399 100644 +index c154a4dcf2..f29425b656 100644 --- a/target/arm/cpu_tcg.c +++ b/target/arm/cpu_tcg.c -@@ -148,10 +148,10 @@ void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu) +@@ -152,10 +152,10 @@ void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu) } #endif /* !CONFIG_USER_ONLY */ @@ -536,7 +593,7 @@ index a528ff9a3d..053f70e399 100644 #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG) static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) { -@@ -505,7 +505,6 @@ static void cortex_a9_initfn(Object *obj) +@@ -509,7 +509,6 @@ static void cortex_a9_initfn(Object *obj) cpu->isar.reset_pmcr_el0 = 0x41093000; define_arm_cp_regs(cpu, cortexa9_cp_reginfo); } @@ -544,7 +601,7 @@ index a528ff9a3d..053f70e399 100644 #ifndef CONFIG_USER_ONLY static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri) -@@ -530,7 +529,6 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { +@@ -534,7 +533,6 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, }; @@ -552,7 +609,7 @@ index a528ff9a3d..053f70e399 100644 static void cortex_a7_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); -@@ -579,7 +577,6 @@ static void cortex_a7_initfn(Object *obj) +@@ -583,7 +581,6 @@ static void cortex_a7_initfn(Object *obj) cpu->isar.reset_pmcr_el0 = 0x41072000; define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */ } @@ -560,7 +617,7 @@ index a528ff9a3d..053f70e399 100644 static void cortex_a15_initfn(Object *obj) { -@@ -628,7 +625,6 @@ static void cortex_a15_initfn(Object *obj) +@@ -632,7 +629,6 @@ static void cortex_a15_initfn(Object *obj) define_arm_cp_regs(cpu, cortexa15_cp_reginfo); } @@ -568,7 +625,7 @@ index a528ff9a3d..053f70e399 100644 static void cortex_m0_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); -@@ -1070,7 +1066,6 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) +@@ -1115,7 +1111,6 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) cc->gdb_core_xml_file = "arm-m-profile.xml"; } @@ -576,7 +633,7 @@ index a528ff9a3d..053f70e399 100644 #ifndef TARGET_AARCH64 /* -@@ -1138,7 +1133,6 @@ static void arm_max_initfn(Object *obj) +@@ -1183,7 +1178,6 @@ static void arm_max_initfn(Object *obj) #endif /* !TARGET_AARCH64 */ static const ARMCPUInfo arm_tcg_cpus[] = { @@ -584,7 +641,7 @@ index a528ff9a3d..053f70e399 100644 { .name = "arm926", .initfn = arm926_initfn }, { .name = "arm946", .initfn = arm946_initfn }, { .name = "arm1026", .initfn = arm1026_initfn }, -@@ -1154,9 +1148,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { +@@ -1199,9 +1193,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { { .name = "cortex-a7", .initfn = cortex_a7_initfn }, { .name = "cortex-a8", .initfn = cortex_a8_initfn }, { .name = "cortex-a9", .initfn = cortex_a9_initfn }, @@ -594,7 +651,7 @@ index a528ff9a3d..053f70e399 100644 { .name = "cortex-m0", .initfn = cortex_m0_initfn, .class_init = arm_v7m_class_init }, { .name = "cortex-m3", .initfn = cortex_m3_initfn, -@@ -1187,7 +1179,6 @@ static const ARMCPUInfo arm_tcg_cpus[] = { +@@ -1233,7 +1225,6 @@ static const ARMCPUInfo arm_tcg_cpus[] = { { .name = "pxa270-b1", .initfn = pxa270b1_initfn }, { .name = "pxa270-c0", .initfn = pxa270c0_initfn }, { .name = "pxa270-c5", .initfn = pxa270c5_initfn }, @@ -602,36 +659,16 @@ index a528ff9a3d..053f70e399 100644 #ifndef TARGET_AARCH64 { .name = "max", .initfn = arm_max_initfn }, #endif -@@ -1215,3 +1206,4 @@ static void arm_tcg_cpu_register_types(void) +@@ -1261,3 +1252,4 @@ static void arm_tcg_cpu_register_types(void) type_init(arm_tcg_cpu_register_types) #endif /* !CONFIG_USER_ONLY || !TARGET_AARCH64 */ +#endif /* disabled for RHEL */ -diff --git a/target/arm/helper.c b/target/arm/helper.c -index d8c8223ec3..ad9d235773 100644 ---- a/target/arm/helper.c -+++ b/target/arm/helper.c -@@ -8476,6 +8476,7 @@ void arm_cpu_list(void) - static void arm_cpu_add_definition(gpointer data, gpointer user_data) - { - ObjectClass *oc = data; -+ CPUClass *cc = CPU_CLASS(oc); - CpuDefinitionInfoList **cpu_list = user_data; - CpuDefinitionInfo *info; - const char *typename; -@@ -8485,6 +8486,7 @@ static void arm_cpu_add_definition(gpointer data, gpointer user_data) - info->name = g_strndup(typename, - strlen(typename) - strlen("-" TYPE_ARM_CPU)); - info->q_typename = g_strdup(typename); -+ info->deprecated = !!cc->deprecation_note; - - QAPI_LIST_PREPEND(*cpu_list, info); - } diff --git a/tests/qtest/arm-cpu-features.c b/tests/qtest/arm-cpu-features.c -index 5a14527386..a3579fc303 100644 +index 1cb08138ad..834497dfec 100644 --- a/tests/qtest/arm-cpu-features.c +++ b/tests/qtest/arm-cpu-features.c -@@ -440,8 +440,10 @@ static void test_query_cpu_model_expansion(const void *data) +@@ -441,8 +441,10 @@ static void test_query_cpu_model_expansion(const void *data) assert_error(qts, "host", "The CPU type 'host' requires KVM", NULL); /* Test expected feature presence/absence for some cpu types */ @@ -642,7 +679,7 @@ index 5a14527386..a3579fc303 100644 /* Enabling and disabling pmu should always work. */ assert_has_feature_enabled(qts, "max", "pmu"); -@@ -458,6 +460,7 @@ static void test_query_cpu_model_expansion(const void *data) +@@ -459,6 +461,7 @@ static void test_query_cpu_model_expansion(const void *data) assert_has_feature_enabled(qts, "cortex-a57", "pmu"); assert_has_feature_enabled(qts, "cortex-a57", "aarch64"); @@ -650,7 +687,7 @@ index 5a14527386..a3579fc303 100644 assert_has_feature_enabled(qts, "a64fx", "pmu"); assert_has_feature_enabled(qts, "a64fx", "aarch64"); /* -@@ -470,6 +473,7 @@ static void test_query_cpu_model_expansion(const void *data) +@@ -471,6 +474,7 @@ static void test_query_cpu_model_expansion(const void *data) "{ 'sve384': true }"); assert_error(qts, "a64fx", "cannot enable sve640", "{ 'sve640': true }"); @@ -658,7 +695,7 @@ index 5a14527386..a3579fc303 100644 sve_tests_default(qts, "max"); pauth_tests_default(qts, "max"); -@@ -505,9 +509,11 @@ static void test_query_cpu_model_expansion_kvm(const void *data) +@@ -506,9 +510,11 @@ static void test_query_cpu_model_expansion_kvm(const void *data) QDict *resp; char *error; @@ -671,5 +708,5 @@ index 5a14527386..a3579fc303 100644 assert_has_feature_enabled(qts, "host", "aarch64"); -- -2.31.1 +2.39.1 diff --git a/SOURCES/0008-Add-ppc64-machine-types.patch b/SOURCES/0008-Add-ppc64-machine-types.patch index a3cb0a3..ab78cae 100644 --- a/SOURCES/0008-Add-ppc64-machine-types.patch +++ b/SOURCES/0008-Add-ppc64-machine-types.patch @@ -1,4 +1,4 @@ -From 2c523f1b6c9470e1cd517ba99e414cde02727e16 Mon Sep 17 00:00:00 2001 +From 401d0ebf1ee959fd944df6b5b4ae9c51c36d1244 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:27:13 +0200 Subject: Add ppc64 machine types @@ -34,10 +34,10 @@ Merged patches (7.1.0 rc0): 8 files changed, 314 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 66b414d2e9..499eb49253 100644 +index 4921198b9d..e24b3e22e3 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c -@@ -1633,6 +1633,9 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason) +@@ -1634,6 +1634,9 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason) pef_kvm_reset(machine->cgs, &error_fatal); spapr_caps_apply(spapr); @@ -47,7 +47,7 @@ index 66b414d2e9..499eb49253 100644 first_ppc_cpu = POWERPC_CPU(first_cpu); if (kvm_enabled() && kvmppc_has_cap_mmu_radix() && -@@ -3347,6 +3350,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp) +@@ -3348,6 +3351,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp) spapr->host_serial = g_strdup(value); } @@ -68,7 +68,7 @@ index 66b414d2e9..499eb49253 100644 static void spapr_instance_init(Object *obj) { SpaprMachineState *spapr = SPAPR_MACHINE(obj); -@@ -3425,6 +3442,12 @@ static void spapr_instance_init(Object *obj) +@@ -3426,6 +3443,12 @@ static void spapr_instance_init(Object *obj) spapr_get_host_serial, spapr_set_host_serial); object_property_set_description(obj, "host-serial", "Host serial number to advertise in guest device tree"); @@ -81,7 +81,7 @@ index 66b414d2e9..499eb49253 100644 } static void spapr_machine_finalizefn(Object *obj) -@@ -4682,6 +4705,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) +@@ -4683,6 +4706,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) vmc->client_architecture_support = spapr_vof_client_architecture_support; vmc->quiesce = spapr_vof_quiesce; vmc->setprop = spapr_vof_setprop; @@ -89,15 +89,15 @@ index 66b414d2e9..499eb49253 100644 } static const TypeInfo spapr_machine_info = { -@@ -4733,6 +4757,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) +@@ -4734,6 +4758,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) } \ type_init(spapr_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ /* - * pseries-7.2 + * pseries-8.0 */ -@@ -4882,6 +4907,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) +@@ -4894,6 +4919,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) } DEFINE_SPAPR_MACHINE(4_1, "4.1", false); @@ -105,7 +105,7 @@ index 66b414d2e9..499eb49253 100644 /* * pseries-4.0 -@@ -4901,6 +4927,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, +@@ -4913,6 +4939,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, *nv2atsd = 0; return true; } @@ -114,7 +114,7 @@ index 66b414d2e9..499eb49253 100644 static void spapr_machine_4_0_class_options(MachineClass *mc) { SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -@@ -5228,6 +5256,221 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) +@@ -5240,6 +5268,221 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) compat_props_add(mc->compat_props, hw_compat_2_1, hw_compat_2_1_len); } DEFINE_SPAPR_MACHINE(2_1, "2.1", false); @@ -375,10 +375,10 @@ index fcb5dfe792..ab8fb5bf62 100644 qdev_unrealize(DEVICE(cpu)); return false; diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h -index 04a95669ab..d5f4cf5e03 100644 +index 5c8aabd444..04489d5808 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h -@@ -154,6 +154,7 @@ struct SpaprMachineClass { +@@ -155,6 +155,7 @@ struct SpaprMachineClass { bool pre_5_2_numa_associativity; bool pre_6_2_numa_affinity; @@ -386,7 +386,7 @@ index 04a95669ab..d5f4cf5e03 100644 bool (*phb_placement)(SpaprMachineState *spapr, uint32_t index, uint64_t *buid, hwaddr *pio, hwaddr *mmio32, hwaddr *mmio64, -@@ -256,6 +257,9 @@ struct SpaprMachineState { +@@ -257,6 +258,9 @@ struct SpaprMachineState { /* Set by -boot */ char *boot_device; @@ -434,10 +434,10 @@ index cd3ff700ac..1cb49c8087 100644 { "405cr", "405crc" }, { "405gp", "405gpd" }, diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h -index 81d4263a07..508fbed90b 100644 +index 557d736dab..6646ec1c27 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h -@@ -1467,6 +1467,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) +@@ -1482,6 +1482,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) /* Compatibility modes */ #if defined(TARGET_PPC64) @@ -446,10 +446,10 @@ index 81d4263a07..508fbed90b 100644 uint32_t min_compat_pvr, uint32_t max_compat_pvr); bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr, diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c -index 7c25348b7b..83671c955f 100644 +index 78f6fc50cd..68d06c3f8f 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c -@@ -89,6 +89,7 @@ static int cap_ppc_nested_kvm_hv; +@@ -88,6 +88,7 @@ static int cap_ppc_nested_kvm_hv; static int cap_large_decr; static int cap_fwnmi; static int cap_rpt_invalidate; @@ -457,7 +457,7 @@ index 7c25348b7b..83671c955f 100644 static uint32_t debug_inst_opcode; -@@ -136,6 +137,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) +@@ -135,6 +136,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT); kvmppc_get_cpu_characteristics(s); cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV); @@ -465,7 +465,7 @@ index 7c25348b7b..83671c955f 100644 cap_large_decr = kvmppc_get_dec_bits(); cap_fwnmi = kvm_vm_check_extension(s, KVM_CAP_PPC_FWNMI); /* -@@ -2570,6 +2572,16 @@ int kvmppc_has_cap_rpt_invalidate(void) +@@ -2569,6 +2571,16 @@ int kvmppc_has_cap_rpt_invalidate(void) return cap_rpt_invalidate; } @@ -482,7 +482,7 @@ index 7c25348b7b..83671c955f 100644 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) { uint32_t host_pvr = mfpvr(); -@@ -2970,3 +2982,18 @@ bool kvm_arch_cpu_check_are_resettable(void) +@@ -2969,3 +2981,18 @@ bool kvm_arch_cpu_check_are_resettable(void) void kvm_arch_accel_class_init(ObjectClass *oc) { } @@ -502,10 +502,10 @@ index 7c25348b7b..83671c955f 100644 + } +} diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h -index ee9325bf9a..20dbb95989 100644 +index 5fd9753953..b5ebfe2be0 100644 --- a/target/ppc/kvm_ppc.h +++ b/target/ppc/kvm_ppc.h -@@ -40,6 +40,7 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu); +@@ -43,6 +43,7 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu); target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu, bool radix, bool gtse, uint64_t proc_tbl); @@ -513,7 +513,7 @@ index ee9325bf9a..20dbb95989 100644 #ifndef CONFIG_USER_ONLY bool kvmppc_spapr_use_multitce(void); int kvmppc_spapr_enable_inkernel_multitce(void); -@@ -74,6 +75,8 @@ int kvmppc_get_cap_large_decr(void); +@@ -77,6 +78,8 @@ int kvmppc_get_cap_large_decr(void); int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable); int kvmppc_has_cap_rpt_invalidate(void); int kvmppc_enable_hwrng(void); @@ -522,7 +522,7 @@ index ee9325bf9a..20dbb95989 100644 int kvmppc_put_books_sregs(PowerPCCPU *cpu); PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void); void kvmppc_check_papr_resize_hpt(Error **errp); -@@ -393,6 +396,16 @@ static inline int kvmppc_has_cap_rpt_invalidate(void) +@@ -396,6 +399,16 @@ static inline int kvmppc_has_cap_rpt_invalidate(void) return false; } @@ -540,5 +540,5 @@ index ee9325bf9a..20dbb95989 100644 { return -1; -- -2.31.1 +2.39.1 diff --git a/SOURCES/0009-Add-s390x-machine-types.patch b/SOURCES/0009-Add-s390x-machine-types.patch index 5860009..07dfb57 100644 --- a/SOURCES/0009-Add-s390x-machine-types.patch +++ b/SOURCES/0009-Add-s390x-machine-types.patch @@ -1,4 +1,4 @@ -From 1973257ed781a93943f27f1518933e8c09c50f88 Mon Sep 17 00:00:00 2001 +From 3c7647197729fcd76e219070c6f359bb3667d04d Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:47:32 +0200 Subject: Add s390x machine types @@ -30,45 +30,72 @@ Merged patches (7.1.0 rc0): Merged patches (7.2.0 rc0): - 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts) + +Merged patches (8.0.0-rc1): +- 27c188c6a4 redhat: Update s390x machine type compatibility for QEMU 7.2.0 update +- a932b8d429 redhat: Add new rhel-9.2.0 s390x machine type +- ac88104bad s390x/s390-virtio-ccw: Activate zPCI features on s390-ccw-virtio-rhel8.6.0 --- - hw/s390x/s390-virtio-ccw.c | 108 +++++++++++++++++++++++++++++++ - target/s390x/cpu_models.c | 11 ++++ + hw/s390x/s390-virtio-ccw.c | 143 +++++++++++++++++++++++++++++++ + target/s390x/cpu_models.c | 11 +++ target/s390x/cpu_models.h | 2 + target/s390x/cpu_models_sysemu.c | 2 + - 4 files changed, 123 insertions(+) + 4 files changed, 158 insertions(+) diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 2e64ffab45..8d5221fbb1 100644 +index 503f212a31..dcd3b966b0 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c -@@ -823,6 +823,7 @@ bool css_migration_enabled(void) +@@ -826,6 +826,7 @@ bool css_migration_enabled(void) } \ type_init(ccw_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void ccw_machine_7_2_instance_options(MachineState *machine) + static void ccw_machine_8_0_instance_options(MachineState *machine) { } -@@ -1186,6 +1187,113 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) +@@ -1201,6 +1202,148 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); } DEFINE_CCW_MACHINE(2_4, "2.4", false); +#endif + + ++static void ccw_machine_rhel920_instance_options(MachineState *machine) ++{ ++} ++ ++static void ccw_machine_rhel920_class_options(MachineClass *mc) ++{ ++} ++DEFINE_CCW_MACHINE(rhel920, "rhel9.2.0", true); ++ +static void ccw_machine_rhel900_instance_options(MachineState *machine) +{ + static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_2 }; + ++ ccw_machine_rhel920_instance_options(machine); ++ + s390_set_qemu_cpu_model(0x3906, 14, 2, qemu_cpu_feat); ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_PAIE); +} + +static void ccw_machine_rhel900_class_options(MachineClass *mc) +{ ++ S390CcwMachineClass *s390mc = S390_CCW_MACHINE_CLASS(mc); ++ static GlobalProperty compat[] = { ++ { TYPE_S390_PCI_DEVICE, "interpret", "off", }, ++ { TYPE_S390_PCI_DEVICE, "forwarding-assist", "off", }, ++ }; ++ ++ ccw_machine_rhel920_class_options(mc); ++ ++ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len); + compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len); ++ s390mc->max_threads = S390_MAX_CPUS; +} -+DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", true); ++DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", false); + +static void ccw_machine_rhel860_instance_options(MachineState *machine) +{ @@ -78,7 +105,14 @@ index 2e64ffab45..8d5221fbb1 100644 + +static void ccw_machine_rhel860_class_options(MachineClass *mc) +{ ++ static GlobalProperty compat[] = { ++ { TYPE_S390_PCI_DEVICE, "interpret", "on", }, ++ { TYPE_S390_PCI_DEVICE, "forwarding-assist", "on", }, ++ }; ++ + ccw_machine_rhel900_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_6, hw_compat_rhel_8_6_len); ++ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + + /* All RHEL machines for prior major releases are deprecated */ + mc->deprecation_reason = rhel_old_machine_deprecation; @@ -102,8 +136,14 @@ index 2e64ffab45..8d5221fbb1 100644 + +static void ccw_machine_rhel850_class_options(MachineClass *mc) +{ ++ static GlobalProperty compat[] = { ++ { TYPE_S390_PCI_DEVICE, "interpret", "off", }, ++ { TYPE_S390_PCI_DEVICE, "forwarding-assist", "off", }, ++ }; ++ + ccw_machine_rhel860_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); ++ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + mc->smp_props.prefer_sockets = true; +} +DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", false); @@ -164,10 +204,10 @@ index 2e64ffab45..8d5221fbb1 100644 static void ccw_machine_register_types(void) { diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c -index c3a4f80633..739770dc15 100644 +index 457b5cb10c..ff6b9463cb 100644 --- a/target/s390x/cpu_models.c +++ b/target/s390x/cpu_models.c -@@ -45,6 +45,9 @@ +@@ -46,6 +46,9 @@ * of a following release have been a superset of the previous release. With * generation 15 one base feature and one optional feature have been deprecated. */ @@ -177,7 +217,7 @@ index c3a4f80633..739770dc15 100644 static S390CPUDef s390_cpu_defs[] = { CPUDEF_INIT(0x2064, 7, 1, 38, 0x00000000U, "z900", "IBM zSeries 900 GA1"), CPUDEF_INIT(0x2064, 7, 2, 38, 0x00000000U, "z900.2", "IBM zSeries 900 GA2"), -@@ -854,22 +857,30 @@ static void s390_host_cpu_model_class_init(ObjectClass *oc, void *data) +@@ -857,22 +860,30 @@ static void s390_host_cpu_model_class_init(ObjectClass *oc, void *data) static void s390_base_cpu_model_class_init(ObjectClass *oc, void *data) { S390CPUClass *xcc = S390_CPU_CLASS(oc); @@ -222,7 +262,7 @@ index fb1adc8b21..d76745afa9 100644 /* CPU model based on a CPU definition */ diff --git a/target/s390x/cpu_models_sysemu.c b/target/s390x/cpu_models_sysemu.c -index d086b1c39c..1b9cc66405 100644 +index 87a4480c05..28c1b0486c 100644 --- a/target/s390x/cpu_models_sysemu.c +++ b/target/s390x/cpu_models_sysemu.c @@ -60,6 +60,7 @@ static void create_cpu_model_list(ObjectClass *klass, void *opaque) @@ -242,5 +282,5 @@ index d086b1c39c..1b9cc66405 100644 if (cpu_list_data->model) { Object *obj; -- -2.31.1 +2.39.1 diff --git a/SOURCES/0010-Add-x86_64-machine-types.patch b/SOURCES/0010-Add-x86_64-machine-types.patch index 181342a..9685338 100644 --- a/SOURCES/0010-Add-x86_64-machine-types.patch +++ b/SOURCES/0010-Add-x86_64-machine-types.patch @@ -1,4 +1,4 @@ -From 0935624ccdddc286d6eeeb0c1b70d78983c21aa2 Mon Sep 17 00:00:00 2001 +From 510291040cb280e1f68b793a84ec0f7d1c88aafa Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:10:31 +0200 Subject: Add x86_64 machine types @@ -13,6 +13,9 @@ Rebase notes (6.1.0): Rebase notes (7.0.0): - Reset alias for all machine-types except latest one +Rebase notes (8.0.0-rc1): +- remove legacy_no_rng_seed usage (removed upstream) + Merged patches (6.1.0): - 59c284ad3b x86: Add x86 rhel8.5 machine types - a8868b42fe redhat: x86: Enable 'kvm-asyncpf-int' by default @@ -39,24 +42,26 @@ Merged patches (7.1.0 rc0): Merged patches (7.2.0 rc0): - 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts) + +Merged patches (8.0.0-rc1): +- f33ca8aed4 x86: rhel 9.2.0 machine type --- - hw/i386/pc.c | 147 ++++++++++++++++++++++- - hw/i386/pc_piix.c | 86 +++++++++++++- - hw/i386/pc_q35.c | 234 ++++++++++++++++++++++++++++++++++++- - hw/s390x/s390-virtio-ccw.c | 1 + + hw/i386/pc.c | 147 +++++++++++++++++++++- + hw/i386/pc_piix.c | 86 ++++++++++++- + hw/i386/pc_q35.c | 252 ++++++++++++++++++++++++++++++++++++- include/hw/boards.h | 2 + - include/hw/i386/pc.h | 27 +++++ + include/hw/i386/pc.h | 27 ++++ target/i386/cpu.c | 21 ++++ target/i386/kvm/kvm-cpu.c | 1 + target/i386/kvm/kvm.c | 4 + tests/qtest/pvpanic-test.c | 5 +- - 10 files changed, 521 insertions(+), 7 deletions(-) + 9 files changed, 538 insertions(+), 7 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 546b703cb4..c7b1350e64 100644 +index 1489abf010..8abb1f872e 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c -@@ -393,6 +393,149 @@ GlobalProperty pc_compat_1_4[] = { +@@ -407,6 +407,149 @@ GlobalProperty pc_compat_1_4[] = { }; const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); @@ -206,7 +211,7 @@ index 546b703cb4..c7b1350e64 100644 GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled) { GSIState *s; -@@ -1907,6 +2050,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -1944,6 +2087,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->pvh_enabled = true; pcmc->kvmclock_create_always = true; assert(!mc->get_hotplug_handler); @@ -214,7 +219,7 @@ index 546b703cb4..c7b1350e64 100644 mc->get_hotplug_handler = pc_get_hotplug_handler; mc->hotplug_allowed = pc_hotplug_allowed; mc->cpu_index_to_instance_props = x86_cpu_index_to_props; -@@ -1917,7 +2061,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -1954,7 +2098,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) mc->has_hotpluggable_cpus = true; mc->default_boot_order = "cad"; mc->block_default_type = IF_IDE; @@ -225,10 +230,10 @@ index 546b703cb4..c7b1350e64 100644 mc->wakeup = pc_machine_wakeup; hc->pre_plug = pc_machine_device_pre_plug_cb; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 0985ff67d2..173a1fd10b 100644 +index 14a794081e..3e330fd36f 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c -@@ -53,6 +53,7 @@ +@@ -54,6 +54,7 @@ #include "qapi/error.h" #include "qemu/error-report.h" #include "sysemu/xen.h" @@ -236,7 +241,7 @@ index 0985ff67d2..173a1fd10b 100644 #ifdef CONFIG_XEN #include #include "hw/xen/xen_pt.h" -@@ -184,8 +185,8 @@ static void pc_init1(MachineState *machine, +@@ -198,8 +199,8 @@ static void pc_init1(MachineState *machine, if (pcmc->smbios_defaults) { MachineClass *mc = MACHINE_GET_CLASS(machine); /* These values are guest ABI, do not change */ @@ -247,7 +252,7 @@ index 0985ff67d2..173a1fd10b 100644 pcmc->smbios_uuid_encoded, pcmc->smbios_stream_product, pcmc->smbios_stream_version, -@@ -334,6 +335,7 @@ static void pc_init1(MachineState *machine, +@@ -351,6 +352,7 @@ static void pc_init1(MachineState *machine, * hw_compat_*, pc_compat_*, or * pc_*_machine_options(). */ @@ -255,7 +260,7 @@ index 0985ff67d2..173a1fd10b 100644 static void pc_compat_2_3_fn(MachineState *machine) { X86MachineState *x86ms = X86_MACHINE(machine); -@@ -896,3 +898,83 @@ static void xenfv_3_1_machine_options(MachineClass *m) +@@ -899,3 +901,83 @@ static void xenfv_3_1_machine_options(MachineClass *m) DEFINE_PC_MACHINE(xenfv, "xenfv-3.1", pc_xen_hvm_init, xenfv_3_1_machine_options); #endif @@ -304,7 +309,7 @@ index 0985ff67d2..173a1fd10b 100644 + pcmc->kvmclock_create_always = false; + /* From pc_i440fx_5_1_machine_options() */ + pcmc->pci_root_uid = 1; -+ pcmc->legacy_no_rng_seed = true; ++ pcmc->enforce_amd_1tb_hole = false; + compat_props_add(m->compat_props, hw_compat_rhel_9_1, + hw_compat_rhel_9_1_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_0, @@ -340,10 +345,10 @@ index 0985ff67d2..173a1fd10b 100644 +DEFINE_PC_MACHINE(rhel760, "pc-i440fx-rhel7.6.0", pc_init_rhel760, + pc_machine_rhel760_options); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index ea582254e3..97c3630021 100644 +index dc0ba5f9e7..98601bb76f 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c -@@ -198,8 +198,8 @@ static void pc_q35_init(MachineState *machine) +@@ -199,8 +199,8 @@ static void pc_q35_init(MachineState *machine) if (pcmc->smbios_defaults) { /* These values are guest ABI, do not change */ @@ -354,7 +359,7 @@ index ea582254e3..97c3630021 100644 pcmc->smbios_uuid_encoded, pcmc->smbios_stream_product, pcmc->smbios_stream_version, -@@ -352,6 +352,7 @@ static void pc_q35_init(MachineState *machine) +@@ -354,6 +354,7 @@ static void pc_q35_init(MachineState *machine) DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn) @@ -362,7 +367,7 @@ index ea582254e3..97c3630021 100644 static void pc_q35_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -@@ -666,3 +667,232 @@ static void pc_q35_2_4_machine_options(MachineClass *m) +@@ -663,3 +664,250 @@ static void pc_q35_2_4_machine_options(MachineClass *m) DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, pc_q35_2_4_machine_options); @@ -391,6 +396,23 @@ index ea582254e3..97c3630021 100644 + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); +} + ++static void pc_q35_init_rhel920(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel920_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel_options(m); ++ m->desc = "RHEL-9.2.0 PC (Q35 + ICH9, 2009)"; ++ pcmc->smbios_stream_product = "RHEL"; ++ pcmc->smbios_stream_version = "9.2.0"; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920, ++ pc_q35_machine_rhel920_options); ++ +static void pc_q35_init_rhel900(MachineState *machine) +{ + pc_q35_init(machine); @@ -399,11 +421,12 @@ index ea582254e3..97c3630021 100644 +static void pc_q35_machine_rhel900_options(MachineClass *m) +{ + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -+ pc_q35_machine_rhel_options(m); ++ pc_q35_machine_rhel920_options(m); + m->desc = "RHEL-9.0.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; + pcmc->smbios_stream_product = "RHEL"; + pcmc->smbios_stream_version = "9.0.0"; -+ pcmc->legacy_no_rng_seed = true; ++ pcmc->enforce_amd_1tb_hole = false; + compat_props_add(m->compat_props, hw_compat_rhel_9_1, + hw_compat_rhel_9_1_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_0, @@ -595,23 +618,11 @@ index ea582254e3..97c3630021 100644 + +DEFINE_PC_MACHINE(q35_rhel760, "pc-q35-rhel7.6.0", pc_q35_init_rhel760, + pc_q35_machine_rhel760_options); -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 8d5221fbb1..ba640e3d9e 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -1213,6 +1213,7 @@ static void ccw_machine_rhel860_instance_options(MachineState *machine) - static void ccw_machine_rhel860_class_options(MachineClass *mc) - { - ccw_machine_rhel900_class_options(mc); -+ compat_props_add(mc->compat_props, hw_compat_rhel_8_6, hw_compat_rhel_8_6_len); - - /* All RHEL machines for prior major releases are deprecated */ - mc->deprecation_reason = rhel_old_machine_deprecation; diff --git a/include/hw/boards.h b/include/hw/boards.h -index 2209d4e416..fd75f551b1 100644 +index c5a965d27f..5e7446ee40 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -266,6 +266,8 @@ struct MachineClass { +@@ -268,6 +268,8 @@ struct MachineClass { strList *allowed_dynamic_sysbus_devices; bool auto_enable_numa_with_memhp; bool auto_enable_numa_with_memdev; @@ -621,12 +632,12 @@ index 2209d4e416..fd75f551b1 100644 bool smbus_no_migration_support; bool nvdimm_supported; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 3754eaa97d..4266fe2fdb 100644 +index 908a275736..4376f64a47 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -293,6 +293,33 @@ extern const size_t pc_compat_1_5_len; - extern GlobalProperty pc_compat_1_4[]; - extern const size_t pc_compat_1_4_len; +@@ -293,6 +293,33 @@ extern const size_t pc_compat_1_4_len; + + int pc_machine_kvm_type(MachineState *machine, const char *vm_type); +extern GlobalProperty pc_rhel_compat[]; +extern const size_t pc_rhel_compat_len; @@ -659,10 +670,10 @@ index 3754eaa97d..4266fe2fdb 100644 static void pc_machine_##suffix##_class_init(ObjectClass *oc, void *data) \ { \ diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 22b681ca37..f7c526cbe6 100644 +index 6576287e5b..0ef2bf1b93 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c -@@ -1832,9 +1832,13 @@ static const CPUCaches epyc_milan_cache_info = { +@@ -1834,9 +1834,13 @@ static const CPUCaches epyc_milan_cache_info = { * PT in VMX operation */ @@ -676,7 +687,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 0xd, .vendor = CPUID_VENDOR_AMD, .family = 15, -@@ -1855,6 +1859,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -1857,6 +1861,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "phenom", @@ -684,7 +695,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 5, .vendor = CPUID_VENDOR_AMD, .family = 16, -@@ -1887,6 +1892,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -1889,6 +1894,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "core2duo", @@ -692,7 +703,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 10, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -1929,6 +1935,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -1931,6 +1937,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "kvm64", @@ -700,7 +711,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 0xd, .vendor = CPUID_VENDOR_INTEL, .family = 15, -@@ -1970,6 +1977,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -1972,6 +1979,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "qemu32", @@ -708,7 +719,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 4, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -1984,6 +1992,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -1986,6 +1994,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "kvm32", @@ -716,7 +727,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 5, .vendor = CPUID_VENDOR_INTEL, .family = 15, -@@ -2014,6 +2023,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2016,6 +2025,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "coreduo", @@ -724,7 +735,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 10, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -2047,6 +2057,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2049,6 +2059,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "486", @@ -732,7 +743,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 1, .vendor = CPUID_VENDOR_INTEL, .family = 4, -@@ -2059,6 +2070,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2061,6 +2072,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "pentium", @@ -740,7 +751,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 1, .vendor = CPUID_VENDOR_INTEL, .family = 5, -@@ -2071,6 +2083,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2073,6 +2085,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "pentium2", @@ -748,7 +759,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 2, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -2083,6 +2096,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2085,6 +2098,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "pentium3", @@ -756,7 +767,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 3, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -2095,6 +2109,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2097,6 +2111,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "athlon", @@ -764,7 +775,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 2, .vendor = CPUID_VENDOR_AMD, .family = 6, -@@ -2110,6 +2125,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2112,6 +2127,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "n270", @@ -772,7 +783,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 10, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -2135,6 +2151,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2137,6 +2153,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "Conroe", @@ -780,7 +791,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 10, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -2175,6 +2192,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2177,6 +2194,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "Penryn", @@ -788,7 +799,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 10, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -3762,6 +3780,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -3893,6 +3911,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "Opteron_G1", @@ -796,7 +807,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 5, .vendor = CPUID_VENDOR_AMD, .family = 15, -@@ -3782,6 +3801,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -3913,6 +3932,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "Opteron_G2", @@ -804,7 +815,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 5, .vendor = CPUID_VENDOR_AMD, .family = 15, -@@ -3804,6 +3824,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -3935,6 +3955,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "Opteron_G3", @@ -825,10 +836,10 @@ index 7237378a7d..7b8a3d5af0 100644 }; diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c -index a213209379..81526a1575 100644 +index de531842f6..8d82304609 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c -@@ -3707,6 +3707,7 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -3822,6 +3822,7 @@ static int kvm_get_msrs(X86CPU *cpu) struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries; int ret, i; uint64_t mtrr_top_bits; @@ -836,7 +847,7 @@ index a213209379..81526a1575 100644 kvm_msr_buf_reset(cpu); -@@ -4062,6 +4063,9 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -4177,6 +4178,9 @@ static int kvm_get_msrs(X86CPU *cpu) break; case MSR_KVM_ASYNC_PF_EN: env->async_pf_en_msr = msrs[i].data; @@ -847,7 +858,7 @@ index a213209379..81526a1575 100644 case MSR_KVM_ASYNC_PF_INT: env->async_pf_int_msr = msrs[i].data; diff --git a/tests/qtest/pvpanic-test.c b/tests/qtest/pvpanic-test.c -index bc7b7dfc39..96e6dee3a1 100644 +index 78f1cf8186..ac954c9b06 100644 --- a/tests/qtest/pvpanic-test.c +++ b/tests/qtest/pvpanic-test.c @@ -17,7 +17,7 @@ static void test_panic_nopause(void) @@ -870,5 +881,5 @@ index bc7b7dfc39..96e6dee3a1 100644 val = qtest_inb(qts, 0x505); g_assert_cmpuint(val, ==, 3); -- -2.31.1 +2.39.1 diff --git a/SOURCES/0011-Enable-make-check.patch b/SOURCES/0011-Enable-make-check.patch index d0be8e6..cc91302 100644 --- a/SOURCES/0011-Enable-make-check.patch +++ b/SOURCES/0011-Enable-make-check.patch @@ -1,4 +1,4 @@ -From badfb1290c8eea8a2e1769b2392c7899d5077698 Mon Sep 17 00:00:00 2001 +From 738db8353055eb6fd902513949c6659af8b401d0 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 2 Sep 2020 09:39:41 +0200 Subject: Enable make check @@ -27,28 +27,37 @@ Rebase changes (7.0.0): Rebase changes (7.1.0 rc0): - Disable bcm2835-dma-test (added upstream) +Rebase changes (8.0.0-rc1): +- Removed chunks for disabling bios-table-test (protected upstream) + +Rebase change (8.0.0-rc2): +- Disable new qemu-iotests execution +- Revert change in tco qtest (blocking test run) + Merged patches (6.1.0): - 2f129df7d3 redhat: Enable the 'test-block-iothread' test again Merged patches (7.1.0 rc0): - 64d736640e RHEL-only: tests/avocado: Switch aarch64 tests from a53 to a57 --- - .distro/qemu-kvm.spec.template | 5 ++--- - tests/avocado/replay_kernel.py | 2 +- - tests/avocado/reverse_debugging.py | 2 +- - tests/avocado/tcg_plugins.py | 6 +++--- - tests/qtest/fuzz-e1000e-test.c | 2 +- - tests/qtest/fuzz-virtio-scsi-test.c | 2 +- - tests/qtest/intel-hda-test.c | 2 +- - tests/qtest/libqos/meson.build | 2 +- - tests/qtest/lpc-ich9-test.c | 2 +- - tests/qtest/meson.build | 7 +------ - tests/qtest/usb-hcd-xhci-test.c | 4 ++++ - tests/qtest/virtio-net-failover.c | 1 + - 12 files changed, 18 insertions(+), 19 deletions(-) + .distro/qemu-kvm.spec.template | 4 ++-- + tests/avocado/replay_kernel.py | 2 +- + tests/avocado/reverse_debugging.py | 2 +- + tests/avocado/tcg_plugins.py | 6 ++--- + tests/qemu-iotests/meson.build | 34 ++++++++++++++--------------- + tests/qtest/fuzz-e1000e-test.c | 2 +- + tests/qtest/fuzz-virtio-scsi-test.c | 2 +- + tests/qtest/intel-hda-test.c | 2 +- + tests/qtest/libqos/meson.build | 2 +- + tests/qtest/lpc-ich9-test.c | 2 +- + tests/qtest/meson.build | 2 -- + tests/qtest/tco-test.c | 2 +- + tests/qtest/usb-hcd-xhci-test.c | 4 ++++ + tests/qtest/virtio-net-failover.c | 1 + + 14 files changed, 35 insertions(+), 32 deletions(-) diff --git a/tests/avocado/replay_kernel.py b/tests/avocado/replay_kernel.py -index 00a26e4a0c..fe5ecf238a 100644 +index f13456e1ec..2fee270a42 100644 --- a/tests/avocado/replay_kernel.py +++ b/tests/avocado/replay_kernel.py @@ -147,7 +147,7 @@ def test_aarch64_virt(self): @@ -61,10 +70,10 @@ index 00a26e4a0c..fe5ecf238a 100644 kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora' '/linux/releases/29/Everything/aarch64/os/images/pxeboot' diff --git a/tests/avocado/reverse_debugging.py b/tests/avocado/reverse_debugging.py -index d2921e70c3..66d185ed42 100644 +index 680c314cfc..71eccb8fb6 100644 --- a/tests/avocado/reverse_debugging.py +++ b/tests/avocado/reverse_debugging.py -@@ -198,7 +198,7 @@ def test_aarch64_virt(self): +@@ -206,7 +206,7 @@ def test_aarch64_virt(self): """ :avocado: tags=arch:aarch64 :avocado: tags=machine:virt @@ -104,6 +113,49 @@ index 642d2e49e3..93b3afd823 100644 """ kernel_path = self._grab_aarch64_kernel() kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + +diff --git a/tests/qemu-iotests/meson.build b/tests/qemu-iotests/meson.build +index 9735071a29..32002335f4 100644 +--- a/tests/qemu-iotests/meson.build ++++ b/tests/qemu-iotests/meson.build +@@ -51,21 +51,21 @@ foreach format, speed: qemu_iotests_formats + check: true, + ) + +- foreach item: rc.stdout().strip().split() +- args = [qemu_iotests_check_cmd, +- '-tap', '-' + format, item, +- '--source-dir', meson.current_source_dir(), +- '--build-dir', meson.current_build_dir()] +- # Some individual tests take as long as 45 seconds +- # Bump the timeout to 3 minutes for some headroom +- # on slow machines to minimize spurious failures +- test('io-' + format + '-' + item, +- python, +- args: args, +- depends: qemu_iotests_binaries, +- env: qemu_iotests_env, +- protocol: 'tap', +- timeout: 180, +- suite: suites) +- endforeach ++# foreach item: rc.stdout().strip().split() ++# args = [qemu_iotests_check_cmd, ++# '-tap', '-' + format, item, ++# '--source-dir', meson.current_source_dir(), ++# '--build-dir', meson.current_build_dir()] ++# # Some individual tests take as long as 45 seconds ++# # Bump the timeout to 3 minutes for some headroom ++# # on slow machines to minimize spurious failures ++# test('io-' + format + '-' + item, ++# python, ++# args: args, ++# depends: qemu_iotests_binaries, ++# env: qemu_iotests_env, ++# protocol: 'tap', ++# timeout: 180, ++# suite: suites) ++# endforeach + endforeach diff --git a/tests/qtest/fuzz-e1000e-test.c b/tests/qtest/fuzz-e1000e-test.c index 5052883fb6..b5286f4b12 100644 --- a/tests/qtest/fuzz-e1000e-test.c @@ -144,10 +196,10 @@ index d4a8db6fd6..1a796ec15a 100644 qtest_outl(s, 0xcf8, 0x80000804); diff --git a/tests/qtest/libqos/meson.build b/tests/qtest/libqos/meson.build -index 32f028872c..1e78a1a055 100644 +index cc209a8de5..42a7c529c9 100644 --- a/tests/qtest/libqos/meson.build +++ b/tests/qtest/libqos/meson.build -@@ -43,7 +43,7 @@ libqos_srcs = files( +@@ -44,7 +44,7 @@ libqos_srcs = files( 'virtio-rng.c', 'virtio-scsi.c', 'virtio-serial.c', @@ -170,18 +222,10 @@ index 8ac95b89f7..cd2102555c 100644 qtest_outl(s, 0xcf8, 0x8000f840); /* PMBASE */ diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build -index c07a5b1a5f..9df3f9f8b9 100644 +index 85ea4e8d99..893afc8eeb 100644 --- a/tests/qtest/meson.build +++ b/tests/qtest/meson.build -@@ -82,7 +82,6 @@ qtests_i386 = \ - config_all_devices.has_key('CONFIG_Q35') and \ - config_all_devices.has_key('CONFIG_VIRTIO_PCI') and \ - slirp.found() ? ['virtio-net-failover'] : []) + \ -- (unpack_edk2_blobs ? ['bios-tables-test'] : []) + \ - qtests_pci + \ - qtests_cxl + \ - ['fdc-test', -@@ -96,7 +95,6 @@ qtests_i386 = \ +@@ -94,7 +94,6 @@ qtests_i386 = \ 'drive_del-test', 'tco-test', 'cpu-plug-test', @@ -189,24 +233,7 @@ index c07a5b1a5f..9df3f9f8b9 100644 'vmgenid-test', 'migration-test', 'test-x86-cpuid-compat', -@@ -209,15 +207,13 @@ qtests_arm = \ - - # TODO: once aarch64 TCG is fixed on ARM 32 bit host, make bios-tables-test unconditional - qtests_aarch64 = \ -- (cpu != 'arm' and unpack_edk2_blobs ? ['bios-tables-test'] : []) + \ - (config_all_devices.has_key('CONFIG_TPM_TIS_SYSBUS') ? ['tpm-tis-device-test'] : []) + \ - (config_all_devices.has_key('CONFIG_TPM_TIS_SYSBUS') ? ['tpm-tis-device-swtpm-test'] : []) + \ - (config_all_devices.has_key('CONFIG_XLNX_ZYNQMP_ARM') ? ['xlnx-can-test', 'fuzz-xlnx-dp-test'] : []) + \ - ['arm-cpu-features', - 'numa-test', - 'boot-serial-test', -- 'migration-test', -- 'bcm2835-dma-test'] -+ 'migration-test'] - - qtests_s390x = \ - (slirp.found() ? ['pxe-test', 'test-netfilter'] : []) + \ -@@ -225,7 +221,6 @@ qtests_s390x = \ +@@ -223,7 +222,6 @@ qtests_s390x = \ (config_host.has_key('CONFIG_POSIX') ? ['test-filter-redirector'] : []) + \ ['boot-serial-test', 'drive_del-test', @@ -214,6 +241,19 @@ index c07a5b1a5f..9df3f9f8b9 100644 'virtio-ccw-test', 'cpu-plug-test', 'migration-test'] +diff --git a/tests/qtest/tco-test.c b/tests/qtest/tco-test.c +index 0547d41173..3756ce82d8 100644 +--- a/tests/qtest/tco-test.c ++++ b/tests/qtest/tco-test.c +@@ -60,7 +60,7 @@ static void test_init(TestData *d) + QTestState *qs; + + qs = qtest_initf("-machine q35 %s %s", +- d->noreboot ? "-global ICH9-LPC.noreboot=true" : "", ++ d->noreboot ? "" : "-global ICH9-LPC.noreboot=false", + !d->args ? "" : d->args); + qtest_irq_intercept_in(qs, "ioapic"); + diff --git a/tests/qtest/usb-hcd-xhci-test.c b/tests/qtest/usb-hcd-xhci-test.c index 10ef9d2a91..3855873050 100644 --- a/tests/qtest/usb-hcd-xhci-test.c @@ -257,5 +297,5 @@ index 4a809590bf..1bf3fa641c 100644 "-device pcie-root-port,id=root1,addr=0x2,bus=pcie.0,chassis=2 " -- -2.31.1 +2.39.1 diff --git a/SOURCES/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch b/SOURCES/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch index 477a75d..430959b 100644 --- a/SOURCES/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch +++ b/SOURCES/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -1,4 +1,4 @@ -From 0804844e4755377be6d2ebad578794ad9f4f3f31 Mon Sep 17 00:00:00 2001 +From 34cb4f7ddd762ec46ed1a6a4261aebde39360ca4 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 3 Dec 2013 20:05:13 +0100 Subject: vfio: cap number of devices that can be assigned @@ -32,7 +32,7 @@ Signed-off-by: Bandan Das 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 939dcc3d4a..acbc6673ce 100644 +index ec9a854361..a779053be3 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -48,6 +48,9 @@ @@ -77,7 +77,7 @@ index 939dcc3d4a..acbc6673ce 100644 if (!vbasedev->sysfsdev) { if (!(~vdev->host.domain || ~vdev->host.bus || ~vdev->host.slot || ~vdev->host.function)) { -@@ -3293,6 +3317,9 @@ static Property vfio_pci_dev_properties[] = { +@@ -3294,6 +3318,9 @@ static Property vfio_pci_dev_properties[] = { DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false), DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice, no_geforce_quirks, false), @@ -88,7 +88,7 @@ index 939dcc3d4a..acbc6673ce 100644 false), DEFINE_PROP_BOOL("x-no-vfio-ioeventfd", VFIOPCIDevice, no_vfio_ioeventfd, diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h -index 7c236a52f4..7b7d036a8f 100644 +index 177abcc8fb..45235d38ba 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -140,6 +140,7 @@ struct VFIOPCIDevice { @@ -100,5 +100,5 @@ index 7c236a52f4..7b7d036a8f 100644 uint32_t device_id; uint32_t sub_vendor_id; -- -2.31.1 +2.39.1 diff --git a/SOURCES/0013-Add-support-statement-to-help-output.patch b/SOURCES/0013-Add-support-statement-to-help-output.patch index 022f194..25db0b8 100644 --- a/SOURCES/0013-Add-support-statement-to-help-output.patch +++ b/SOURCES/0013-Add-support-statement-to-help-output.patch @@ -1,4 +1,4 @@ -From 283a0e258dc2f3b83c58e6f948bafe430cd2c1d5 Mon Sep 17 00:00:00 2001 +From 8964a3e8835992442902d35b011a708787366d82 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 4 Dec 2013 18:53:17 +0100 Subject: Add support statement to -help output @@ -21,7 +21,7 @@ Signed-off-by: Eduardo Habkost 1 file changed, 9 insertions(+) diff --git a/softmmu/vl.c b/softmmu/vl.c -index 5115221efe..17188df528 100644 +index ea20b23e4c..ad4173138d 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c @@ -834,9 +834,17 @@ static void version(void) @@ -51,5 +51,5 @@ index 5115221efe..17188df528 100644 } -- -2.31.1 +2.39.1 diff --git a/SOURCES/0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/SOURCES/0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch index e39555b..b97c844 100644 --- a/SOURCES/0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +++ b/SOURCES/0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -1,4 +1,4 @@ -From d8ded821aa698b3b03bd9089fbd6c2b33da87b9e Mon Sep 17 00:00:00 2001 +From 0b72d348fa0714de641ee242e5cee97df006e8fd Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 8 Jul 2020 08:35:50 +0200 Subject: Use qemu-kvm in documentation instead of qemu-system- @@ -36,10 +36,10 @@ index 52d6454b93..d74dbdeca9 100644 .. |I2C| replace:: I\ :sup:`2`\ C .. |I2S| replace:: I\ :sup:`2`\ S diff --git a/qemu-options.hx b/qemu-options.hx -index 7f99d15b23..ea02ca3a45 100644 +index 59bdf67a2c..52b49f1f6a 100644 --- a/qemu-options.hx +++ b/qemu-options.hx -@@ -3300,11 +3300,11 @@ SRST +@@ -3296,11 +3296,11 @@ SRST :: @@ -57,5 +57,5 @@ index 7f99d15b23..ea02ca3a45 100644 ``-netdev vhost-vdpa[,vhostdev=/path/to/dev][,vhostfd=h]`` Establish a vhost-vdpa netdev. -- -2.31.1 +2.39.1 diff --git a/SOURCES/0015-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch b/SOURCES/0015-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch deleted file mode 100644 index 2bedb0b..0000000 --- a/SOURCES/0015-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 9c6acadb444c9300d7c18b6939ce4f96484aeacc Mon Sep 17 00:00:00 2001 -From: David Gibson -Date: Wed, 6 Feb 2019 03:58:56 +0000 -Subject: BZ1653590: Require at least 64kiB pages for downstream guests & hosts - -RH-Author: David Gibson -Message-id: <20190206035856.19058-1-dgibson@redhat.com> -Patchwork-id: 84246 -O-Subject: [RHELAV-8.0/rhel qemu-kvm PATCH] BZ1653590: Require at least 64kiB pages for downstream guests & hosts -Bugzilla: 1653590 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Serhii Popovych -RH-Acked-by: Thomas Huth - -Most current POWER guests require 64kiB page support, so that's the default -for the cap-hpt-max-pagesize option in qemu which limits available guest -page sizes. We warn if the value is set smaller than that, but don't -outright fail upstream, because we need to allow for the possibility of -guest (and/or host) kernels configured for 4kiB page sizes. - -Downstream, however, we simply don't support 4kiB pagesize configured -kernels in guest or host, so we can have qemu simply error out in this -situation. - -Testing: Attempted to start a guest with cap-hpt-max-page-size=4k and verified - it failed immediately with a qemu error - -Signed-off-by: David Gibson -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/spapr_caps.c | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c -index b4283055c1..59b88aadff 100644 ---- a/hw/ppc/spapr_caps.c -+++ b/hw/ppc/spapr_caps.c -@@ -329,12 +329,19 @@ bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, - static void cap_hpt_maxpagesize_apply(SpaprMachineState *spapr, - uint8_t val, Error **errp) - { -+#if 0 /* disabled for RHEL */ - if (val < 12) { - error_setg(errp, "Require at least 4kiB hpt-max-page-size"); - return; - } else if (val < 16) { - warn_report("Many guests require at least 64kiB hpt-max-page-size"); - } -+#else /* Only page sizes >=64kiB supported for RHEL */ -+ if (val < 16) { -+ error_setg(errp, "Require at least 64kiB hpt-max-page-size"); -+ return; -+ } -+#endif - - spapr_check_pagesize(spapr, qemu_minrampagesize(), errp); - } --- -2.31.1 - diff --git a/SOURCES/0016-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch b/SOURCES/0015-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch similarity index 92% rename from SOURCES/0016-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch rename to SOURCES/0015-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch index cee5476..1e2f8e1 100644 --- a/SOURCES/0016-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch +++ b/SOURCES/0015-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch @@ -1,4 +1,4 @@ -From 02fde2a0cbd679ebd4104fe5522572c31ec23abd Mon Sep 17 00:00:00 2001 +From bd6bcebfd783fa49e283d035d378fb5240423d84 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 20 Aug 2021 18:25:12 +0200 Subject: qcow2: Deprecation warning when opening v2 images rw @@ -44,10 +44,10 @@ Rebase notes (6.1.0): 2 files changed, 7 insertions(+) diff --git a/block/qcow2.c b/block/qcow2.c -index 4d6666d3ff..d2ba263e9d 100644 +index 30fd53fa64..22084730f9 100644 --- a/block/qcow2.c +++ b/block/qcow2.c -@@ -1336,6 +1336,12 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, +@@ -1337,6 +1337,12 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, ret = -ENOTSUP; goto fail; } @@ -61,7 +61,7 @@ index 4d6666d3ff..d2ba263e9d 100644 s->qcow_version = header.version; diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter -index cc9f1a5891..6a13757177 100644 +index 6b32c7fbfa..6ddda2ee64 100644 --- a/tests/qemu-iotests/common.filter +++ b/tests/qemu-iotests/common.filter @@ -83,6 +83,7 @@ _filter_qemu() @@ -73,5 +73,5 @@ index cc9f1a5891..6a13757177 100644 } -- -2.31.1 +2.39.1 diff --git a/SOURCES/kvm-qga-linux-add-usb-support-to-guest-get-fsinfo.patch b/SOURCES/0016-qga-linux-add-usb-support-to-guest-get-fsinfo.patch similarity index 81% rename from SOURCES/kvm-qga-linux-add-usb-support-to-guest-get-fsinfo.patch rename to SOURCES/0016-qga-linux-add-usb-support-to-guest-get-fsinfo.patch index d039212..bb9455a 100644 --- a/SOURCES/kvm-qga-linux-add-usb-support-to-guest-get-fsinfo.patch +++ b/SOURCES/0016-qga-linux-add-usb-support-to-guest-get-fsinfo.patch @@ -1,7 +1,7 @@ -From 48f45171b89b8ed24f2b2484d63b00ea7818b5c3 Mon Sep 17 00:00:00 2001 +From 78a42cf27aa519bb71214443ab570b40e156fa9c Mon Sep 17 00:00:00 2001 From: Kfir Manor Date: Sun, 22 Jan 2023 17:33:07 +0200 -Subject: [PATCH 9/9] qga/linux: add usb support to guest-get-fsinfo +Subject: qga/linux: add usb support to guest-get-fsinfo RH-Author: Kostiantyn Kostiuk RH-MergeRequest: 140: qga/linux: add usb support to guest-get-fsinfo @@ -16,15 +16,19 @@ Upstream patch: https://patchew.org/QEMU/20230122153307.1050593-1-kfir@daynix.co Signed-off-by: Kfir Manor Reviewed-by: Konstantin Kostiuk Signed-off-by: Konstantin Kostiuk + +Patch-name: kvm-qga-linux-add-usb-support-to-guest-get-fsinfo.patch +Patch-id: 72 +Patch-present-in-specfile: True --- qga/commands-posix.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/qga/commands-posix.c b/qga/commands-posix.c -index 32493d6383..f1b2b87c13 100644 +index 079689d79a..97754930c1 100644 --- a/qga/commands-posix.c +++ b/qga/commands-posix.c -@@ -877,7 +877,9 @@ static bool build_guest_fsinfo_for_pci_dev(char const *syspath, +@@ -879,7 +879,9 @@ static bool build_guest_fsinfo_for_pci_dev(char const *syspath, g_str_equal(driver, "sym53c8xx") || g_str_equal(driver, "virtio-pci") || g_str_equal(driver, "ahci") || @@ -35,7 +39,7 @@ index 32493d6383..f1b2b87c13 100644 break; } -@@ -974,6 +976,8 @@ static bool build_guest_fsinfo_for_pci_dev(char const *syspath, +@@ -976,6 +978,8 @@ static bool build_guest_fsinfo_for_pci_dev(char const *syspath, } } else if (strcmp(driver, "nvme") == 0) { disk->bus_type = GUEST_DISK_BUS_TYPE_NVME; @@ -45,5 +49,5 @@ index 32493d6383..f1b2b87c13 100644 g_debug("unknown driver '%s' (sysfs path '%s')", driver, syspath); goto cleanup; -- -2.31.1 +2.39.1 diff --git a/SOURCES/0017-Add-RHEL-9.2.0-compat-structure.patch b/SOURCES/0017-Add-RHEL-9.2.0-compat-structure.patch new file mode 100644 index 0000000..ce0ba5c --- /dev/null +++ b/SOURCES/0017-Add-RHEL-9.2.0-compat-structure.patch @@ -0,0 +1,110 @@ +From bd5d81d2865c239ffea0fecf32476732149ad05c Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Wed, 15 Feb 2023 02:03:17 -0500 +Subject: Add RHEL 9.2.0 compat structure + +Adding compatibility bits necessary to keep 9.2.0 machine +types same after rebase to 8.0. + +Signed-off-by: Miroslav Rezanina + +Rebase notes (8.0.0 rc4): +- Added migration.x-preempt-pre-7-2 compat) +--- + hw/arm/virt.c | 1 + + hw/core/machine.c | 10 ++++++++++ + hw/i386/pc_piix.c | 2 ++ + hw/i386/pc_q35.c | 3 +++ + hw/s390x/s390-virtio-ccw.c | 1 + + include/hw/boards.h | 3 +++ + 6 files changed, 20 insertions(+) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 1ae1654be5..9be53e9355 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3669,6 +3669,7 @@ type_init(rhel_machine_init); + static void rhel920_virt_options(MachineClass *mc) + { + compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len); + } + DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 5aa567fad3..0e0120b7f2 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -52,6 +52,16 @@ const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2); + const char *rhel_old_machine_deprecation = + "machine types for previous major releases are deprecated"; + ++GlobalProperty hw_compat_rhel_9_2[] = { ++ /* hw_compat_rhel_9_2 from hw_compat_7_2 */ ++ { "e1000e", "migrate-timadj", "off" }, ++ /* hw_compat_rhel_9_2 from hw_compat_7_2 */ ++ { "virtio-mem", "x-early-migration", "false" }, ++ /* hw_compat_rhel_9_2 from hw_compat_7_2 */ ++ { "migration", "x-preempt-pre-7-2", "true" }, ++}; ++const size_t hw_compat_rhel_9_2_len = G_N_ELEMENTS(hw_compat_rhel_9_2); ++ + /* + * Mostly the same as hw_compat_7_0 + */ +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 3e330fd36f..90fb6e2e03 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -947,6 +947,8 @@ static void pc_machine_rhel760_options(MachineClass *m) + /* From pc_i440fx_5_1_machine_options() */ + pcmc->pci_root_uid = 1; + pcmc->enforce_amd_1tb_hole = false; ++ compat_props_add(m->compat_props, hw_compat_rhel_9_2, ++ hw_compat_rhel_9_2_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_1, + hw_compat_rhel_9_1_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_0, +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 98601bb76f..8945b69175 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -701,6 +701,9 @@ static void pc_q35_machine_rhel920_options(MachineClass *m) + m->desc = "RHEL-9.2.0 PC (Q35 + ICH9, 2009)"; + pcmc->smbios_stream_product = "RHEL"; + pcmc->smbios_stream_version = "9.2.0"; ++ ++ compat_props_add(m->compat_props, hw_compat_rhel_9_2, ++ hw_compat_rhel_9_2_len); + } + + DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920, +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index dcd3b966b0..6a0b93c63d 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1211,6 +1211,7 @@ static void ccw_machine_rhel920_instance_options(MachineState *machine) + + static void ccw_machine_rhel920_class_options(MachineClass *mc) + { ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len); + } + DEFINE_CCW_MACHINE(rhel920, "rhel9.2.0", true); + +diff --git a/include/hw/boards.h b/include/hw/boards.h +index 5e7446ee40..5f08bd7550 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -461,6 +461,9 @@ extern const size_t hw_compat_2_2_len; + extern GlobalProperty hw_compat_2_1[]; + extern const size_t hw_compat_2_1_len; + ++extern GlobalProperty hw_compat_rhel_9_2[]; ++extern const size_t hw_compat_rhel_9_2_len; ++ + extern GlobalProperty hw_compat_rhel_9_1[]; + extern const size_t hw_compat_rhel_9_1_len; + +-- +2.39.1 + diff --git a/SOURCES/0018-Addd-7.2-compat-bits-for-RHEL-9.1-machine-type.patch b/SOURCES/0018-Addd-7.2-compat-bits-for-RHEL-9.1-machine-type.patch deleted file mode 100644 index 001880b..0000000 --- a/SOURCES/0018-Addd-7.2-compat-bits-for-RHEL-9.1-machine-type.patch +++ /dev/null @@ -1,26 +0,0 @@ -From 21ed34787b9492c2cfe3d8fc12a32748bcf02307 Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Wed, 9 Nov 2022 07:08:32 -0500 -Subject: Addd 7.2 compat bits for RHEL 9.1 machine type - -Signed-off-by: Miroslav Rezanina ---- - hw/core/machine.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 9edec1ca05..3d851d34da 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -54,6 +54,8 @@ GlobalProperty hw_compat_rhel_9_1[] = { - { "arm-gicv3-common", "force-8-bit-prio", "on" }, - /* hw_compat_rhel_9_1 from hw_compat_7_0 */ - { "nvme-ns", "eui64-default", "on"}, -+ /* hw_compat_rhel_9_1 from hw_compat_7_1 */ -+ { "virtio-device", "queue_reset", "false" }, - }; - const size_t hw_compat_rhel_9_1_len = G_N_ELEMENTS(hw_compat_rhel_9_1); - --- -2.31.1 - diff --git a/SOURCES/0018-redhat-hw-i386-pc-Update-x86-machine-type-compatibil.patch b/SOURCES/0018-redhat-hw-i386-pc-Update-x86-machine-type-compatibil.patch new file mode 100644 index 0000000..81993e9 --- /dev/null +++ b/SOURCES/0018-redhat-hw-i386-pc-Update-x86-machine-type-compatibil.patch @@ -0,0 +1,76 @@ +From c6eaf73adda2e87fe91c9a3836f45dd58a553e06 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Mon, 27 Mar 2023 15:14:03 +0200 +Subject: redhat: hw/i386/pc: Update x86 machine type compatibility for QEMU + 8.0.0 update + +Add pc_rhel_9_2_compat based on upstream pc_compat_7_2. + +Signed-off-by: Thomas Huth +--- + hw/i386/pc.c | 6 ++++++ + hw/i386/pc_piix.c | 2 ++ + hw/i386/pc_q35.c | 2 ++ + include/hw/i386/pc.h | 3 +++ + 4 files changed, 13 insertions(+) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 8abb1f872e..f216922cee 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -429,6 +429,12 @@ GlobalProperty pc_rhel_compat[] = { + }; + const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + ++GlobalProperty pc_rhel_9_2_compat[] = { ++ /* pc_rhel_9_2_compat from pc_compat_7_2 */ ++ { "ICH9-LPC", "noreboot", "true" }, ++}; ++const size_t pc_rhel_9_2_compat_len = G_N_ELEMENTS(pc_rhel_9_2_compat); ++ + GlobalProperty pc_rhel_9_0_compat[] = { + /* pc_rhel_9_0_compat from pc_compat_6_2 */ + { "virtio-mem", "unplugged-inaccessible", "off" }, +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 90fb6e2e03..fc704d783f 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -949,6 +949,8 @@ static void pc_machine_rhel760_options(MachineClass *m) + pcmc->enforce_amd_1tb_hole = false; + compat_props_add(m->compat_props, hw_compat_rhel_9_2, + hw_compat_rhel_9_2_len); ++ compat_props_add(m->compat_props, pc_rhel_9_2_compat, ++ pc_rhel_9_2_compat_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_1, + hw_compat_rhel_9_1_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_0, +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 8945b69175..e97655616a 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -704,6 +704,8 @@ static void pc_q35_machine_rhel920_options(MachineClass *m) + + compat_props_add(m->compat_props, hw_compat_rhel_9_2, + hw_compat_rhel_9_2_len); ++ compat_props_add(m->compat_props, pc_rhel_9_2_compat, ++ pc_rhel_9_2_compat_len); + } + + DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920, +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 4376f64a47..d218ad1628 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -296,6 +296,9 @@ int pc_machine_kvm_type(MachineState *machine, const char *vm_type); + extern GlobalProperty pc_rhel_compat[]; + extern const size_t pc_rhel_compat_len; + ++extern GlobalProperty pc_rhel_9_2_compat[]; ++extern const size_t pc_rhel_9_2_compat_len; ++ + extern GlobalProperty pc_rhel_9_0_compat[]; + extern const size_t pc_rhel_9_0_compat_len; + +-- +2.39.1 + diff --git a/SOURCES/0019-Disable-unwanted-new-devices.patch b/SOURCES/0019-Disable-unwanted-new-devices.patch new file mode 100644 index 0000000..f656ca9 --- /dev/null +++ b/SOURCES/0019-Disable-unwanted-new-devices.patch @@ -0,0 +1,83 @@ +From 8173d2eabaf77312d36b00c618f6770948b80593 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Mon, 17 Apr 2023 01:24:18 -0400 +Subject: Disable unwanted new devices + +QEMU 8.0 adds two new device we do not want to support that can't +be disabled using configure switch. + +1) ide-cf - virtual CompactFlash card + +2) i2c-echo - testing echo device + +Use manual disabling of the device by changing code (1) and meson configs (2). + +Signed-off-by: Miroslav Rezanina +--- + hw/ide/qdev.c | 9 +++++++++ + hw/misc/meson.build | 3 ++- + 2 files changed, 11 insertions(+), 1 deletion(-) + +diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c +index 1b3b4da01d..454bfa5783 100644 +--- a/hw/ide/qdev.c ++++ b/hw/ide/qdev.c +@@ -283,10 +283,13 @@ static void ide_cd_realize(IDEDevice *dev, Error **errp) + ide_dev_initfn(dev, IDE_CD, errp); + } + ++/* Disabled for Red Hat Enterprise Linux */ ++#if 0 + static void ide_cf_realize(IDEDevice *dev, Error **errp) + { + ide_dev_initfn(dev, IDE_CFATA, errp); + } ++#endif + + #define DEFINE_IDE_DEV_PROPERTIES() \ + DEFINE_BLOCK_PROPERTIES(IDEDrive, dev.conf), \ +@@ -346,6 +349,8 @@ static const TypeInfo ide_cd_info = { + .class_init = ide_cd_class_init, + }; + ++/* Disabled for Red Hat Enterprise Linux */ ++#if 0 + static Property ide_cf_properties[] = { + DEFINE_IDE_DEV_PROPERTIES(), + DEFINE_BLOCK_CHS_PROPERTIES(IDEDrive, dev.conf), +@@ -371,6 +376,7 @@ static const TypeInfo ide_cf_info = { + .instance_size = sizeof(IDEDrive), + .class_init = ide_cf_class_init, + }; ++#endif + + static void ide_device_class_init(ObjectClass *klass, void *data) + { +@@ -396,7 +402,10 @@ static void ide_register_types(void) + type_register_static(&ide_bus_info); + type_register_static(&ide_hd_info); + type_register_static(&ide_cd_info); ++/* Disabled for Red Hat Enterprise Linux */ ++#if 0 + type_register_static(&ide_cf_info); ++#endif + type_register_static(&ide_device_type_info); + } + +diff --git a/hw/misc/meson.build b/hw/misc/meson.build +index a40245ad44..9cc5a61ed7 100644 +--- a/hw/misc/meson.build ++++ b/hw/misc/meson.build +@@ -128,7 +128,8 @@ softmmu_ss.add(when: 'CONFIG_NRF51_SOC', if_true: files('nrf51_rng.c')) + + softmmu_ss.add(when: 'CONFIG_GRLIB', if_true: files('grlib_ahb_apb_pnp.c')) + +-softmmu_ss.add(when: 'CONFIG_I2C', if_true: files('i2c-echo.c')) ++# Disabled for Red Hat Enterprise Linux ++# softmmu_ss.add(when: 'CONFIG_I2C', if_true: files('i2c-echo.c')) + + specific_ss.add(when: 'CONFIG_AVR_POWER', if_true: files('avr_power.c')) + +-- +2.39.1 + diff --git a/SOURCES/0019-redhat-Update-s390x-machine-type-compatibility-for-Q.patch b/SOURCES/0019-redhat-Update-s390x-machine-type-compatibility-for-Q.patch deleted file mode 100644 index 2642b30..0000000 --- a/SOURCES/0019-redhat-Update-s390x-machine-type-compatibility-for-Q.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 27c188c6a4cbd908269cf06affd24025708ecb5c Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Thu, 17 Nov 2022 16:47:16 +0100 -Subject: redhat: Update s390x machine type compatibility for QEMU 7.2.0 update - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2143585 -Upstream Status: n/a (rhel-only) - -Add the compatibility handling for the rebase from QEMU 7.1 to 7.2, -i.e. the settings from ccw_machine_7_1_class_options() and -ccw_machine_7_1_instance_options() to the rhel9.1.0 machine type -(earlier settings have been added by previous rebases already). - -Signed-off-by: Thomas Huth ---- - hw/s390x/s390-virtio-ccw.c | 9 +++++++++ - 1 file changed, 9 insertions(+) - -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index ba640e3d9e..97e868ada0 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -1195,12 +1195,21 @@ static void ccw_machine_rhel900_instance_options(MachineState *machine) - static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_2 }; - - s390_set_qemu_cpu_model(0x3906, 14, 2, qemu_cpu_feat); -+ s390_cpudef_featoff_greater(16, 1, S390_FEAT_PAIE); - } - - static void ccw_machine_rhel900_class_options(MachineClass *mc) - { -+ S390CcwMachineClass *s390mc = S390_CCW_MACHINE_CLASS(mc); -+ static GlobalProperty compat[] = { -+ { TYPE_S390_PCI_DEVICE, "interpret", "off", }, -+ { TYPE_S390_PCI_DEVICE, "forwarding-assist", "off", }, -+ }; -+ -+ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); - compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len); - compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len); -+ s390mc->max_threads = S390_MAX_CPUS; - } - DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", true); - --- -2.31.1 - diff --git a/SOURCES/0020-redhat-aarch64-add-rhel9.2.0-virt-machine-type.patch b/SOURCES/0020-redhat-aarch64-add-rhel9.2.0-virt-machine-type.patch deleted file mode 100644 index cb69b93..0000000 --- a/SOURCES/0020-redhat-aarch64-add-rhel9.2.0-virt-machine-type.patch +++ /dev/null @@ -1,43 +0,0 @@ -From c1a21266d8bed27f1ef1f705818fde5f9350b73f Mon Sep 17 00:00:00 2001 -From: Cornelia Huck -Date: Wed, 23 Nov 2022 14:15:37 +0100 -Subject: redhat: aarch64: add rhel9.2.0 virt machine type - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2131982 -Upstream: RHEL only - -Signed-off-by: Cornelia Huck ---- - hw/arm/virt.c | 9 ++++++++- - 1 file changed, 8 insertions(+), 1 deletion(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index dfcab40a73..0a94f31dd1 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -3518,14 +3518,21 @@ static void rhel_machine_init(void) - } - type_init(rhel_machine_init); - -+static void rhel920_virt_options(MachineClass *mc) -+{ -+} -+DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0) -+ - static void rhel900_virt_options(MachineClass *mc) - { - VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); - -+ rhel920_virt_options(mc); -+ - compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); - compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len); - - /* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */ - vmc->no_tcg_lpa2 = true; - } --DEFINE_RHEL_MACHINE_AS_LATEST(9, 0, 0) -+DEFINE_RHEL_MACHINE(9, 0, 0) --- -2.31.1 - diff --git a/SOURCES/0021-redhat-Add-new-rhel-9.2.0-s390x-machine-type.patch b/SOURCES/0021-redhat-Add-new-rhel-9.2.0-s390x-machine-type.patch deleted file mode 100644 index 144bd92..0000000 --- a/SOURCES/0021-redhat-Add-new-rhel-9.2.0-s390x-machine-type.patch +++ /dev/null @@ -1,62 +0,0 @@ -From a932b8d4296066be01613ada84241b501488f99f Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Thu, 17 Nov 2022 17:03:24 +0100 -Subject: redhat: Add new rhel-9.2.0 s390x machine type - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2136473 -Upstream Status: n/a (rhel-only) - -RHEL 9.2 will be an EUS release - we want to have a new machine -type here to make sure that we have a spot where we can wire up -fixes later. - -Signed-off-by: Thomas Huth ---- - hw/s390x/s390-virtio-ccw.c | 15 ++++++++++++++- - 1 file changed, 14 insertions(+), 1 deletion(-) - -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 97e868ada0..aa142a1a4e 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -1190,10 +1190,21 @@ DEFINE_CCW_MACHINE(2_4, "2.4", false); - #endif - - -+static void ccw_machine_rhel920_instance_options(MachineState *machine) -+{ -+} -+ -+static void ccw_machine_rhel920_class_options(MachineClass *mc) -+{ -+} -+DEFINE_CCW_MACHINE(rhel920, "rhel9.2.0", true); -+ - static void ccw_machine_rhel900_instance_options(MachineState *machine) - { - static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_2 }; - -+ ccw_machine_rhel920_instance_options(machine); -+ - s390_set_qemu_cpu_model(0x3906, 14, 2, qemu_cpu_feat); - s390_cpudef_featoff_greater(16, 1, S390_FEAT_PAIE); - } -@@ -1206,12 +1217,14 @@ static void ccw_machine_rhel900_class_options(MachineClass *mc) - { TYPE_S390_PCI_DEVICE, "forwarding-assist", "off", }, - }; - -+ ccw_machine_rhel920_class_options(mc); -+ - compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); - compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len); - compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len); - s390mc->max_threads = S390_MAX_CPUS; - } --DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", true); -+DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", false); - - static void ccw_machine_rhel860_instance_options(MachineState *machine) - { --- -2.31.1 - diff --git a/SOURCES/0022-x86-rhel-9.2.0-machine-type.patch b/SOURCES/0022-x86-rhel-9.2.0-machine-type.patch deleted file mode 100644 index 8502b91..0000000 --- a/SOURCES/0022-x86-rhel-9.2.0-machine-type.patch +++ /dev/null @@ -1,75 +0,0 @@ -From f33ca8aed4744238230f1f2cc47df77aa4c9e0ac Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Thu, 17 Nov 2022 12:36:30 +0000 -Subject: x86: rhel 9.2.0 machine type - -Add a 9.2.0 x86 machine type, and fix up the compatibility -for 9.0.0 and older. - -pc_compat_7_1 and pc_compat_7_0 are both empty upstream so there's -nothing to do there. - -Signed-off-by: Dr. David Alan Gilbert ---- - hw/i386/pc_piix.c | 1 + - hw/i386/pc_q35.c | 21 ++++++++++++++++++++- - 2 files changed, 21 insertions(+), 1 deletion(-) - -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 173a1fd10b..fc06877344 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -944,6 +944,7 @@ static void pc_machine_rhel760_options(MachineClass *m) - /* From pc_i440fx_5_1_machine_options() */ - pcmc->pci_root_uid = 1; - pcmc->legacy_no_rng_seed = true; -+ pcmc->enforce_amd_1tb_hole = false; - compat_props_add(m->compat_props, hw_compat_rhel_9_1, - hw_compat_rhel_9_1_len); - compat_props_add(m->compat_props, hw_compat_rhel_9_0, -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 97c3630021..52cfe3bf45 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -692,6 +692,23 @@ static void pc_q35_machine_rhel_options(MachineClass *m) - compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); - } - -+static void pc_q35_init_rhel920(MachineState *machine) -+{ -+ pc_q35_init(machine); -+} -+ -+static void pc_q35_machine_rhel920_options(MachineClass *m) -+{ -+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -+ pc_q35_machine_rhel_options(m); -+ m->desc = "RHEL-9.2.0 PC (Q35 + ICH9, 2009)"; -+ pcmc->smbios_stream_product = "RHEL"; -+ pcmc->smbios_stream_version = "9.2.0"; -+} -+ -+DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920, -+ pc_q35_machine_rhel920_options); -+ - static void pc_q35_init_rhel900(MachineState *machine) - { - pc_q35_init(machine); -@@ -700,11 +717,13 @@ static void pc_q35_init_rhel900(MachineState *machine) - static void pc_q35_machine_rhel900_options(MachineClass *m) - { - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -- pc_q35_machine_rhel_options(m); -+ pc_q35_machine_rhel920_options(m); - m->desc = "RHEL-9.0.0 PC (Q35 + ICH9, 2009)"; -+ m->alias = NULL; - pcmc->smbios_stream_product = "RHEL"; - pcmc->smbios_stream_version = "9.0.0"; - pcmc->legacy_no_rng_seed = true; -+ pcmc->enforce_amd_1tb_hole = false; - compat_props_add(m->compat_props, hw_compat_rhel_9_1, - hw_compat_rhel_9_1_len); - compat_props_add(m->compat_props, hw_compat_rhel_9_0, --- -2.31.1 - diff --git a/SOURCES/kvm-KVM-keep-track-of-running-ioctls.patch b/SOURCES/kvm-KVM-keep-track-of-running-ioctls.patch deleted file mode 100644 index b7aba7e..0000000 --- a/SOURCES/kvm-KVM-keep-track-of-running-ioctls.patch +++ /dev/null @@ -1,82 +0,0 @@ -From 6aebc271d95f9c59cb63c923b6ce35f16fce10e4 Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Mon, 16 Jan 2023 07:17:23 -0500 -Subject: [PATCH 30/31] KVM: keep track of running ioctls - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 138: accel: introduce accelerator blocker API -RH-Bugzilla: 1979276 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: David Hildenbrand -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/3] f566f81dda26ae733008f32261ecd1253ec1796d (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1979276 - -commit a27dd2de68f37ba96fe164a42121daa5f0750afc -Author: Emanuele Giuseppe Esposito -Date: Fri Nov 11 10:47:57 2022 -0500 - - KVM: keep track of running ioctls - - Using the new accel-blocker API, mark where ioctls are being called - in KVM. Next, we will implement the critical section that will take - care of performing memslots modifications atomically, therefore - preventing any new ioctl from running and allowing the running ones - to finish. - - Signed-off-by: David Hildenbrand - Signed-off-by: Emanuele Giuseppe Esposito - Message-Id: <20221111154758.1372674-3-eesposit@redhat.com> - Signed-off-by: Paolo Bonzini - -Signed-off-by: Emanuele Giuseppe Esposito ---- - accel/kvm/kvm-all.c | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index f99b0becd8..ff660fd469 100644 ---- a/accel/kvm/kvm-all.c -+++ b/accel/kvm/kvm-all.c -@@ -2310,6 +2310,7 @@ static int kvm_init(MachineState *ms) - assert(TARGET_PAGE_SIZE <= qemu_real_host_page_size()); - - s->sigmask_len = 8; -+ accel_blocker_init(); - - #ifdef KVM_CAP_SET_GUEST_DEBUG - QTAILQ_INIT(&s->kvm_sw_breakpoints); -@@ -3014,7 +3015,9 @@ int kvm_vm_ioctl(KVMState *s, int type, ...) - va_end(ap); - - trace_kvm_vm_ioctl(type, arg); -+ accel_ioctl_begin(); - ret = ioctl(s->vmfd, type, arg); -+ accel_ioctl_end(); - if (ret == -1) { - ret = -errno; - } -@@ -3032,7 +3035,9 @@ int kvm_vcpu_ioctl(CPUState *cpu, int type, ...) - va_end(ap); - - trace_kvm_vcpu_ioctl(cpu->cpu_index, type, arg); -+ accel_cpu_ioctl_begin(cpu); - ret = ioctl(cpu->kvm_fd, type, arg); -+ accel_cpu_ioctl_end(cpu); - if (ret == -1) { - ret = -errno; - } -@@ -3050,7 +3055,9 @@ int kvm_device_ioctl(int fd, int type, ...) - va_end(ap); - - trace_kvm_device_ioctl(fd, type, arg); -+ accel_ioctl_begin(); - ret = ioctl(fd, type, arg); -+ accel_ioctl_end(); - if (ret == -1) { - ret = -errno; - } --- -2.31.1 - diff --git a/SOURCES/kvm-Revert-vhost-user-Introduce-nested-event-loop-in-vho.patch b/SOURCES/kvm-Revert-vhost-user-Introduce-nested-event-loop-in-vho.patch deleted file mode 100644 index 752aa08..0000000 --- a/SOURCES/kvm-Revert-vhost-user-Introduce-nested-event-loop-in-vho.patch +++ /dev/null @@ -1,140 +0,0 @@ -From 0c19fb7c4a22a30830152b224b2e66963f829a7a Mon Sep 17 00:00:00 2001 -From: Greg Kurz -Date: Thu, 19 Jan 2023 18:24:24 +0100 -Subject: [PATCH 19/20] Revert "vhost-user: Introduce nested event loop in - vhost_user_read()" -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 146: Fix vhost-user with dpdk -RH-Bugzilla: 2155173 -RH-Acked-by: Cindy Lu -RH-Acked-by: Greg Kurz (RH) -RH-Acked-by: Eugenio Pérez -RH-Commit: [2/2] 9b67041f92f29f70b7ccb41d8087801e4e4e38af (lvivier/qemu-kvm-centos) - -This reverts commit a7f523c7d114d445c5d83aecdba3efc038e5a692. - -The nested event loop is broken by design. It's only user was removed. -Drop the code as well so that nobody ever tries to use it again. - -I had to fix a couple of trivial conflicts around return values because -of 025faa872bcf ("vhost-user: stick to -errno error return convention"). - -Signed-off-by: Greg Kurz -Message-Id: <20230119172424.478268-3-groug@kaod.org> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Acked-by: Maxime Coquelin -(cherry picked from commit 4382138f642f69fdbc79ebf4e93d84be8061191f) -Signed-off-by: Laurent Vivier ---- - hw/virtio/vhost-user.c | 65 ++++-------------------------------------- - 1 file changed, 5 insertions(+), 60 deletions(-) - -diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c -index 0ac00eb901..7cb49c50f9 100644 ---- a/hw/virtio/vhost-user.c -+++ b/hw/virtio/vhost-user.c -@@ -305,19 +305,8 @@ static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg) - return 0; - } - --struct vhost_user_read_cb_data { -- struct vhost_dev *dev; -- VhostUserMsg *msg; -- GMainLoop *loop; -- int ret; --}; -- --static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition, -- gpointer opaque) -+static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) - { -- struct vhost_user_read_cb_data *data = opaque; -- struct vhost_dev *dev = data->dev; -- VhostUserMsg *msg = data->msg; - struct vhost_user *u = dev->opaque; - CharBackend *chr = u->user->chr; - uint8_t *p = (uint8_t *) msg; -@@ -325,8 +314,7 @@ static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition, - - r = vhost_user_read_header(dev, msg); - if (r < 0) { -- data->ret = r; -- goto end; -+ return r; - } - - /* validate message size is sane */ -@@ -334,8 +322,7 @@ static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition, - error_report("Failed to read msg header." - " Size %d exceeds the maximum %zu.", msg->hdr.size, - VHOST_USER_PAYLOAD_SIZE); -- data->ret = -EPROTO; -- goto end; -+ return -EPROTO; - } - - if (msg->hdr.size) { -@@ -346,53 +333,11 @@ static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition, - int saved_errno = errno; - error_report("Failed to read msg payload." - " Read %d instead of %d.", r, msg->hdr.size); -- data->ret = r < 0 ? -saved_errno : -EIO; -- goto end; -+ return r < 0 ? -saved_errno : -EIO; - } - } - --end: -- g_main_loop_quit(data->loop); -- return G_SOURCE_REMOVE; --} -- --static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) --{ -- struct vhost_user *u = dev->opaque; -- CharBackend *chr = u->user->chr; -- GMainContext *prev_ctxt = chr->chr->gcontext; -- GMainContext *ctxt = g_main_context_new(); -- GMainLoop *loop = g_main_loop_new(ctxt, FALSE); -- struct vhost_user_read_cb_data data = { -- .dev = dev, -- .loop = loop, -- .msg = msg, -- .ret = 0 -- }; -- -- /* -- * We want to be able to monitor the slave channel fd while waiting -- * for chr I/O. This requires an event loop, but we can't nest the -- * one to which chr is currently attached : its fd handlers might not -- * be prepared for re-entrancy. So we create a new one and switch chr -- * to use it. -- */ -- qemu_chr_be_update_read_handlers(chr->chr, ctxt); -- qemu_chr_fe_add_watch(chr, G_IO_IN | G_IO_HUP, vhost_user_read_cb, &data); -- -- g_main_loop_run(loop); -- -- /* -- * Restore the previous event loop context. This also destroys/recreates -- * event sources : this guarantees that all pending events in the original -- * context that have been processed by the nested loop are purged. -- */ -- qemu_chr_be_update_read_handlers(chr->chr, prev_ctxt); -- -- g_main_loop_unref(loop); -- g_main_context_unref(ctxt); -- -- return data.ret; -+ return 0; - } - - static int process_message_reply(struct vhost_dev *dev, --- -2.31.1 - diff --git a/SOURCES/kvm-Revert-vhost-user-Monitor-slave-channel-in-vhost_use.patch b/SOURCES/kvm-Revert-vhost-user-Monitor-slave-channel-in-vhost_use.patch deleted file mode 100644 index 8e7b906..0000000 --- a/SOURCES/kvm-Revert-vhost-user-Monitor-slave-channel-in-vhost_use.patch +++ /dev/null @@ -1,143 +0,0 @@ -From 9fb47ad317ad8cdda9960190d499ad6c3a9817f0 Mon Sep 17 00:00:00 2001 -From: Greg Kurz -Date: Thu, 19 Jan 2023 18:24:23 +0100 -Subject: [PATCH 18/20] Revert "vhost-user: Monitor slave channel in - vhost_user_read()" -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 146: Fix vhost-user with dpdk -RH-Bugzilla: 2155173 -RH-Acked-by: Cindy Lu -RH-Acked-by: Greg Kurz (RH) -RH-Acked-by: Eugenio Pérez -RH-Commit: [1/2] c583a7f121ca9c93c9a2ad17bf0ccf5c1241dc99 (lvivier/qemu-kvm-centos) - -This reverts commit db8a3772e300c1a656331a92da0785d81667dc81. - -Motivation : this is breaking vhost-user with DPDK as reported in [0]. - -Received unexpected msg type. Expected 22 received 40 -Fail to update device iotlb -Received unexpected msg type. Expected 40 received 22 -Received unexpected msg type. Expected 22 received 11 -Fail to update device iotlb -Received unexpected msg type. Expected 11 received 22 -vhost VQ 1 ring restore failed: -71: Protocol error (71) -Received unexpected msg type. Expected 22 received 11 -Fail to update device iotlb -Received unexpected msg type. Expected 11 received 22 -vhost VQ 0 ring restore failed: -71: Protocol error (71) -unable to start vhost net: 71: falling back on userspace virtio - -The failing sequence that leads to the first error is : -- QEMU sends a VHOST_USER_GET_STATUS (40) request to DPDK on the master - socket -- QEMU starts a nested event loop in order to wait for the - VHOST_USER_GET_STATUS response and to be able to process messages from - the slave channel -- DPDK sends a couple of legitimate IOTLB miss messages on the slave - channel -- QEMU processes each IOTLB request and sends VHOST_USER_IOTLB_MSG (22) - updates on the master socket -- QEMU assumes to receive a response for the latest VHOST_USER_IOTLB_MSG - but it gets the response for the VHOST_USER_GET_STATUS instead - -The subsequent errors have the same root cause : the nested event loop -breaks the order by design. It lures QEMU to expect responses to the -latest message sent on the master socket to arrive first. - -Since this was only needed for DAX enablement which is still not merged -upstream, just drop the code for now. A working solution will have to -be merged later on. Likely protect the master socket with a mutex -and service the slave channel with a separate thread, as discussed with -Maxime in the mail thread below. - -[0] https://lore.kernel.org/qemu-devel/43145ede-89dc-280e-b953-6a2b436de395@redhat.com/ - -Reported-by: Yanghang Liu -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2155173 -Signed-off-by: Greg Kurz -Message-Id: <20230119172424.478268-2-groug@kaod.org> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Acked-by: Stefan Hajnoczi -Acked-by: Maxime Coquelin -(cherry picked from commit f340a59d5a852d75ae34555723694c7e8eafbd0c) -Signed-off-by: Laurent Vivier ---- - hw/virtio/vhost-user.c | 35 +++-------------------------------- - 1 file changed, 3 insertions(+), 32 deletions(-) - -diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c -index 8f635844af..0ac00eb901 100644 ---- a/hw/virtio/vhost-user.c -+++ b/hw/virtio/vhost-user.c -@@ -356,35 +356,6 @@ end: - return G_SOURCE_REMOVE; - } - --static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, -- gpointer opaque); -- --/* -- * This updates the read handler to use a new event loop context. -- * Event sources are removed from the previous context : this ensures -- * that events detected in the previous context are purged. They will -- * be re-detected and processed in the new context. -- */ --static void slave_update_read_handler(struct vhost_dev *dev, -- GMainContext *ctxt) --{ -- struct vhost_user *u = dev->opaque; -- -- if (!u->slave_ioc) { -- return; -- } -- -- if (u->slave_src) { -- g_source_destroy(u->slave_src); -- g_source_unref(u->slave_src); -- } -- -- u->slave_src = qio_channel_add_watch_source(u->slave_ioc, -- G_IO_IN | G_IO_HUP, -- slave_read, dev, NULL, -- ctxt); --} -- - static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) - { - struct vhost_user *u = dev->opaque; -@@ -406,7 +377,6 @@ static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) - * be prepared for re-entrancy. So we create a new one and switch chr - * to use it. - */ -- slave_update_read_handler(dev, ctxt); - qemu_chr_be_update_read_handlers(chr->chr, ctxt); - qemu_chr_fe_add_watch(chr, G_IO_IN | G_IO_HUP, vhost_user_read_cb, &data); - -@@ -418,7 +388,6 @@ static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) - * context that have been processed by the nested loop are purged. - */ - qemu_chr_be_update_read_handlers(chr->chr, prev_ctxt); -- slave_update_read_handler(dev, NULL); - - g_main_loop_unref(loop); - g_main_context_unref(ctxt); -@@ -1802,7 +1771,9 @@ static int vhost_setup_slave_channel(struct vhost_dev *dev) - return -ECONNREFUSED; - } - u->slave_ioc = ioc; -- slave_update_read_handler(dev, NULL); -+ u->slave_src = qio_channel_add_watch_source(u->slave_ioc, -+ G_IO_IN | G_IO_HUP, -+ slave_read, dev, NULL, NULL); - - if (reply_supported) { - msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; --- -2.31.1 - diff --git a/SOURCES/kvm-accel-introduce-accelerator-blocker-API.patch b/SOURCES/kvm-accel-introduce-accelerator-blocker-API.patch deleted file mode 100644 index 29a8ac5..0000000 --- a/SOURCES/kvm-accel-introduce-accelerator-blocker-API.patch +++ /dev/null @@ -1,348 +0,0 @@ -From ae2077fd5d351a68c313c64f07fb225dff694a8f Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Mon, 16 Jan 2023 07:16:41 -0500 -Subject: [PATCH 29/31] accel: introduce accelerator blocker API -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 138: accel: introduce accelerator blocker API -RH-Bugzilla: 1979276 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: David Hildenbrand -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/3] 56b07cd7db516c5066e6d66b4695064fdf73abbf (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1979276 - -commit bd688fc93120fb3e28aa70e3dfdf567ccc1e0bc1 -Author: Emanuele Giuseppe Esposito -Date: Fri Nov 11 10:47:56 2022 -0500 - - accel: introduce accelerator blocker API - - This API allows the accelerators to prevent vcpus from issuing - new ioctls while execting a critical section marked with the - accel_ioctl_inhibit_begin/end functions. - - Note that all functions submitting ioctls must mark where the - ioctl is being called with accel_{cpu_}ioctl_begin/end(). - - This API requires the caller to always hold the BQL. - API documentation is in sysemu/accel-blocker.h - - Internally, it uses a QemuLockCnt together with a per-CPU QemuLockCnt - (to minimize cache line bouncing) to keep avoid that new ioctls - run when the critical section starts, and a QemuEvent to wait - that all running ioctls finish. - - Signed-off-by: Emanuele Giuseppe Esposito - Reviewed-by: Philippe Mathieu-Daudé - Message-Id: <20221111154758.1372674-2-eesposit@redhat.com> - Signed-off-by: Paolo Bonzini - -Conflicts: - util/meson.build: "interval-tree.c" does not exist - -Signed-off-by: Emanuele Giuseppe Esposito ---- - accel/accel-blocker.c | 154 +++++++++++++++++++++++++++++++++ - accel/meson.build | 2 +- - hw/core/cpu-common.c | 2 + - include/hw/core/cpu.h | 3 + - include/sysemu/accel-blocker.h | 56 ++++++++++++ - util/meson.build | 2 +- - 6 files changed, 217 insertions(+), 2 deletions(-) - create mode 100644 accel/accel-blocker.c - create mode 100644 include/sysemu/accel-blocker.h - -diff --git a/accel/accel-blocker.c b/accel/accel-blocker.c -new file mode 100644 -index 0000000000..1e7f423462 ---- /dev/null -+++ b/accel/accel-blocker.c -@@ -0,0 +1,154 @@ -+/* -+ * Lock to inhibit accelerator ioctls -+ * -+ * Copyright (c) 2022 Red Hat Inc. -+ * -+ * Author: Emanuele Giuseppe Esposito -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this software and associated documentation files (the "Software"), to deal -+ * in the Software without restriction, including without limitation the rights -+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -+ * copies of the Software, and to permit persons to whom the Software is -+ * furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -+ * THE SOFTWARE. -+ */ -+ -+#include "qemu/osdep.h" -+#include "qemu/thread.h" -+#include "qemu/main-loop.h" -+#include "hw/core/cpu.h" -+#include "sysemu/accel-blocker.h" -+ -+static QemuLockCnt accel_in_ioctl_lock; -+static QemuEvent accel_in_ioctl_event; -+ -+void accel_blocker_init(void) -+{ -+ qemu_lockcnt_init(&accel_in_ioctl_lock); -+ qemu_event_init(&accel_in_ioctl_event, false); -+} -+ -+void accel_ioctl_begin(void) -+{ -+ if (likely(qemu_mutex_iothread_locked())) { -+ return; -+ } -+ -+ /* block if lock is taken in kvm_ioctl_inhibit_begin() */ -+ qemu_lockcnt_inc(&accel_in_ioctl_lock); -+} -+ -+void accel_ioctl_end(void) -+{ -+ if (likely(qemu_mutex_iothread_locked())) { -+ return; -+ } -+ -+ qemu_lockcnt_dec(&accel_in_ioctl_lock); -+ /* change event to SET. If event was BUSY, wake up all waiters */ -+ qemu_event_set(&accel_in_ioctl_event); -+} -+ -+void accel_cpu_ioctl_begin(CPUState *cpu) -+{ -+ if (unlikely(qemu_mutex_iothread_locked())) { -+ return; -+ } -+ -+ /* block if lock is taken in kvm_ioctl_inhibit_begin() */ -+ qemu_lockcnt_inc(&cpu->in_ioctl_lock); -+} -+ -+void accel_cpu_ioctl_end(CPUState *cpu) -+{ -+ if (unlikely(qemu_mutex_iothread_locked())) { -+ return; -+ } -+ -+ qemu_lockcnt_dec(&cpu->in_ioctl_lock); -+ /* change event to SET. If event was BUSY, wake up all waiters */ -+ qemu_event_set(&accel_in_ioctl_event); -+} -+ -+static bool accel_has_to_wait(void) -+{ -+ CPUState *cpu; -+ bool needs_to_wait = false; -+ -+ CPU_FOREACH(cpu) { -+ if (qemu_lockcnt_count(&cpu->in_ioctl_lock)) { -+ /* exit the ioctl, if vcpu is running it */ -+ qemu_cpu_kick(cpu); -+ needs_to_wait = true; -+ } -+ } -+ -+ return needs_to_wait || qemu_lockcnt_count(&accel_in_ioctl_lock); -+} -+ -+void accel_ioctl_inhibit_begin(void) -+{ -+ CPUState *cpu; -+ -+ /* -+ * We allow to inhibit only when holding the BQL, so we can identify -+ * when an inhibitor wants to issue an ioctl easily. -+ */ -+ g_assert(qemu_mutex_iothread_locked()); -+ -+ /* Block further invocations of the ioctls outside the BQL. */ -+ CPU_FOREACH(cpu) { -+ qemu_lockcnt_lock(&cpu->in_ioctl_lock); -+ } -+ qemu_lockcnt_lock(&accel_in_ioctl_lock); -+ -+ /* Keep waiting until there are running ioctls */ -+ while (true) { -+ -+ /* Reset event to FREE. */ -+ qemu_event_reset(&accel_in_ioctl_event); -+ -+ if (accel_has_to_wait()) { -+ /* -+ * If event is still FREE, and there are ioctls still in progress, -+ * wait. -+ * -+ * If an ioctl finishes before qemu_event_wait(), it will change -+ * the event state to SET. This will prevent qemu_event_wait() from -+ * blocking, but it's not a problem because if other ioctls are -+ * still running the loop will iterate once more and reset the event -+ * status to FREE so that it can wait properly. -+ * -+ * If an ioctls finishes while qemu_event_wait() is blocking, then -+ * it will be waken up, but also here the while loop makes sure -+ * to re-enter the wait if there are other running ioctls. -+ */ -+ qemu_event_wait(&accel_in_ioctl_event); -+ } else { -+ /* No ioctl is running */ -+ return; -+ } -+ } -+} -+ -+void accel_ioctl_inhibit_end(void) -+{ -+ CPUState *cpu; -+ -+ qemu_lockcnt_unlock(&accel_in_ioctl_lock); -+ CPU_FOREACH(cpu) { -+ qemu_lockcnt_unlock(&cpu->in_ioctl_lock); -+ } -+} -+ -diff --git a/accel/meson.build b/accel/meson.build -index 259c35c4c8..061332610f 100644 ---- a/accel/meson.build -+++ b/accel/meson.build -@@ -1,4 +1,4 @@ --specific_ss.add(files('accel-common.c')) -+specific_ss.add(files('accel-common.c', 'accel-blocker.c')) - softmmu_ss.add(files('accel-softmmu.c')) - user_ss.add(files('accel-user.c')) - -diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c -index f9fdd46b9d..8d6a4b1b65 100644 ---- a/hw/core/cpu-common.c -+++ b/hw/core/cpu-common.c -@@ -237,6 +237,7 @@ static void cpu_common_initfn(Object *obj) - cpu->nr_threads = 1; - - qemu_mutex_init(&cpu->work_mutex); -+ qemu_lockcnt_init(&cpu->in_ioctl_lock); - QSIMPLEQ_INIT(&cpu->work_list); - QTAILQ_INIT(&cpu->breakpoints); - QTAILQ_INIT(&cpu->watchpoints); -@@ -248,6 +249,7 @@ static void cpu_common_finalize(Object *obj) - { - CPUState *cpu = CPU(obj); - -+ qemu_lockcnt_destroy(&cpu->in_ioctl_lock); - qemu_mutex_destroy(&cpu->work_mutex); - } - -diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h -index 8830546121..2417597236 100644 ---- a/include/hw/core/cpu.h -+++ b/include/hw/core/cpu.h -@@ -398,6 +398,9 @@ struct CPUState { - uint32_t kvm_fetch_index; - uint64_t dirty_pages; - -+ /* Use by accel-block: CPU is executing an ioctl() */ -+ QemuLockCnt in_ioctl_lock; -+ - /* Used for events with 'vcpu' and *without* the 'disabled' properties */ - DECLARE_BITMAP(trace_dstate_delayed, CPU_TRACE_DSTATE_MAX_EVENTS); - DECLARE_BITMAP(trace_dstate, CPU_TRACE_DSTATE_MAX_EVENTS); -diff --git a/include/sysemu/accel-blocker.h b/include/sysemu/accel-blocker.h -new file mode 100644 -index 0000000000..72020529ef ---- /dev/null -+++ b/include/sysemu/accel-blocker.h -@@ -0,0 +1,56 @@ -+/* -+ * Accelerator blocking API, to prevent new ioctls from starting and wait the -+ * running ones finish. -+ * This mechanism differs from pause/resume_all_vcpus() in that it does not -+ * release the BQL. -+ * -+ * Copyright (c) 2022 Red Hat Inc. -+ * -+ * Author: Emanuele Giuseppe Esposito -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or later. -+ * See the COPYING file in the top-level directory. -+ */ -+#ifndef ACCEL_BLOCKER_H -+#define ACCEL_BLOCKER_H -+ -+#include "qemu/osdep.h" -+#include "sysemu/cpus.h" -+ -+extern void accel_blocker_init(void); -+ -+/* -+ * accel_{cpu_}ioctl_begin/end: -+ * Mark when ioctl is about to run or just finished. -+ * -+ * accel_{cpu_}ioctl_begin will block after accel_ioctl_inhibit_begin() is -+ * called, preventing new ioctls to run. They will continue only after -+ * accel_ioctl_inibith_end(). -+ */ -+extern void accel_ioctl_begin(void); -+extern void accel_ioctl_end(void); -+extern void accel_cpu_ioctl_begin(CPUState *cpu); -+extern void accel_cpu_ioctl_end(CPUState *cpu); -+ -+/* -+ * accel_ioctl_inhibit_begin: start critical section -+ * -+ * This function makes sure that: -+ * 1) incoming accel_{cpu_}ioctl_begin() calls block -+ * 2) wait that all ioctls that were already running reach -+ * accel_{cpu_}ioctl_end(), kicking vcpus if necessary. -+ * -+ * This allows the caller to access shared data or perform operations without -+ * worrying of concurrent vcpus accesses. -+ */ -+extern void accel_ioctl_inhibit_begin(void); -+ -+/* -+ * accel_ioctl_inhibit_end: end critical section started by -+ * accel_ioctl_inhibit_begin() -+ * -+ * This function allows blocked accel_{cpu_}ioctl_begin() to continue. -+ */ -+extern void accel_ioctl_inhibit_end(void); -+ -+#endif /* ACCEL_BLOCKER_H */ -diff --git a/util/meson.build b/util/meson.build -index 25b9b61f98..85a5504c4d 100644 ---- a/util/meson.build -+++ b/util/meson.build -@@ -57,6 +57,7 @@ util_ss.add(files('guest-random.c')) - util_ss.add(files('yank.c')) - util_ss.add(files('int128.c')) - util_ss.add(files('memalign.c')) -+util_ss.add(files('lockcnt.c')) - - if have_user - util_ss.add(files('selfmap.c')) -@@ -71,7 +72,6 @@ endif - if have_block or have_ga - util_ss.add(files('aiocb.c', 'async.c')) - util_ss.add(files('base64.c')) -- util_ss.add(files('lockcnt.c')) - util_ss.add(files('main-loop.c')) - util_ss.add(files('qemu-coroutine.c', 'qemu-coroutine-lock.c', 'qemu-coroutine-io.c')) - util_ss.add(files('coroutine-@0@.c'.format(config_host['CONFIG_COROUTINE_BACKEND']))) --- -2.31.1 - diff --git a/SOURCES/kvm-accel-tcg-Test-CPUJumpCache-in-tb_jmp_cache_clear_pa.patch b/SOURCES/kvm-accel-tcg-Test-CPUJumpCache-in-tb_jmp_cache_clear_pa.patch deleted file mode 100644 index 0680a26..0000000 --- a/SOURCES/kvm-accel-tcg-Test-CPUJumpCache-in-tb_jmp_cache_clear_pa.patch +++ /dev/null @@ -1,58 +0,0 @@ -From ab68e13b7628f2348d41a4518a92508542af712f Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Fri, 3 Feb 2023 18:15:10 +0100 -Subject: [PATCH 05/20] accel/tcg: Test CPUJumpCache in tb_jmp_cache_clear_page - -RH-Author: Eric Auger -RH-MergeRequest: 144: accel/tcg: Test CPUJumpCache in tb_jmp_cache_clear_page -RH-Bugzilla: 2165280 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Gavin Shan -RH-Acked-by: Shaoqin Huang -RH-Commit: [1/1] 5b0863c34ba06c01c4e343d1ecd72402779c7de3 (eauger1/centos-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/2165280 -Upstream: yes -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=50530041 -Test: 'kvm unit test ./run_tests.sh -g debug' does not SIGSEV anymore - -After commit 4e4fa6c12d ("accel/tcg: Complete cpu initialization -before registration"), it looks the CPUJumpCache pointer can be NULL. -This causes a SIGSEV when running debug-wp-migration kvm unit test. - -At the first place it should be clarified why this TCG code is called -with KVM acceleration. This may hide another bug. - -Fixes: 4e4fa6c12d ("accel/tcg: Complete cpu initialization before registration") -Signed-off-by: Eric Auger -Message-Id: <20230203171510.2867451-1-eric.auger@redhat.com> -Signed-off-by: Richard Henderson -(cherry picked from commit 99ab4d500af638ba3ebb20e8aa89d72201b70860) -Signed-off-by: Eric Auger ---- - accel/tcg/cputlb.c | 7 ++++++- - 1 file changed, 6 insertions(+), 1 deletion(-) - -diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c -index 6f1c00682b..4244b0e4e3 100644 ---- a/accel/tcg/cputlb.c -+++ b/accel/tcg/cputlb.c -@@ -100,9 +100,14 @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns, - - static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr) - { -- int i, i0 = tb_jmp_cache_hash_page(page_addr); - CPUJumpCache *jc = cpu->tb_jmp_cache; -+ int i, i0; - -+ if (unlikely(!jc)) { -+ return; -+ } -+ -+ i0 = tb_jmp_cache_hash_page(page_addr); - for (i = 0; i < TB_JMP_PAGE_SIZE; i++) { - qatomic_set(&jc->array[i0 + i].tb, NULL); - } --- -2.31.1 - diff --git a/SOURCES/kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch b/SOURCES/kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch index a4fb6b1..b937d27 100644 --- a/SOURCES/kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch +++ b/SOURCES/kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch @@ -1,15 +1,16 @@ -From d110c11b5658df93533698fdb428455f5e770866 Mon Sep 17 00:00:00 2001 +From 61256a82ce78f40222455becb8850b5f5ebb5d72 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Tue, 18 Apr 2023 11:04:49 +0200 -Subject: [PATCH] acpi: pcihp: allow repeating hot-unplug requests +Subject: [PATCH 1/3] acpi: pcihp: allow repeating hot-unplug requests RH-Author: Igor Mammedov -RH-MergeRequest: 280: acpi: pcihp: allow repeating hot-unplug requests -RH-Bugzilla: 2203745 +RH-MergeRequest: 159: acpi: pcihp: allow repeating hot-unplug requests +RH-Bugzilla: 2087047 RH-Acked-by: Ani Sinha -RH-Acked-by: MST RH-Acked-by: Julia Suvorova -RH-Commit: [1/1] e884ac48ebd43c3ebdbc65b01ce5ad75f4cb9284 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: MST +RH-Commit: [1/1] 9c597232466b27d91f127ee6004322d6ba69755f (imammedo/qemu-kvm-c-9-s-imam) with Q35 using ACPI PCI hotplug by default, user's request to unplug device is ignored when it's issued before guest OS has been booted. @@ -58,10 +59,10 @@ Signed-off-by: Igor Mammedov 1 file changed, 10 insertions(+) diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c -index 84d75e6b84..a2a3738b46 100644 +index dcfb779a7a..cdd6f775a1 100644 --- a/hw/acpi/pcihp.c +++ b/hw/acpi/pcihp.c -@@ -429,6 +429,16 @@ void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev, +@@ -357,6 +357,16 @@ void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev, * acpi_pcihp_eject_slot() when the operation is completed. */ pdev->qdev.pending_deleted_event = true; diff --git a/SOURCES/kvm-aio-posix-fix-race-between-epoll-upgrade-and-aio_set.patch b/SOURCES/kvm-aio-posix-fix-race-between-epoll-upgrade-and-aio_set.patch deleted file mode 100644 index 5ee3270..0000000 --- a/SOURCES/kvm-aio-posix-fix-race-between-epoll-upgrade-and-aio_set.patch +++ /dev/null @@ -1,90 +0,0 @@ -From 244e92fea388d2be9fe81a5c5912d92b8f599caa Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 23 Mar 2023 10:48:59 -0400 -Subject: [PATCH 1/2] aio-posix: fix race between epoll upgrade and - aio_set_fd_handler() - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 292: aio-posix: fix race between epoll upgrade and aio_set_fd_handler() -RH-Bugzilla: 2211923 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Paolo Bonzini -RH-Commit: [1/1] 182471bac79fa2b2ae8a34087eb6c4ab1af786e1 - -If another thread calls aio_set_fd_handler() while the IOThread event -loop is upgrading from ppoll(2) to epoll(7) then we might miss new -AioHandlers. The epollfd will not monitor the new AioHandler's fd, -resulting in hangs. - -Take the AioHandler list lock while upgrading to epoll. This prevents -AioHandlers from changing while epoll is being set up. If we cannot lock -because we're in a nested event loop, then don't upgrade to epoll (it -will happen next time we're not in a nested call). - -The downside to taking the lock is that the aio_set_fd_handler() thread -has to wait until the epoll upgrade is finished, which involves many -epoll_ctl(2) system calls. However, this scenario is rare and I couldn't -think of another solution that is still simple. - -Reported-by: Qing Wang -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2090998 -Cc: Paolo Bonzini -Cc: Fam Zheng -Signed-off-by: Stefan Hajnoczi -Message-Id: <20230323144859.1338495-1-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit e62da98527fa35fe5f532cded01a33edf9fbe7b2) -Signed-off-by: Stefan Hajnoczi ---- - util/fdmon-epoll.c | 25 ++++++++++++++++++------- - 1 file changed, 18 insertions(+), 7 deletions(-) - -diff --git a/util/fdmon-epoll.c b/util/fdmon-epoll.c -index e11a8a022e..1683aa1105 100644 ---- a/util/fdmon-epoll.c -+++ b/util/fdmon-epoll.c -@@ -127,6 +127,8 @@ static bool fdmon_epoll_try_enable(AioContext *ctx) - - bool fdmon_epoll_try_upgrade(AioContext *ctx, unsigned npfd) - { -+ bool ok; -+ - if (ctx->epollfd < 0) { - return false; - } -@@ -136,14 +138,23 @@ bool fdmon_epoll_try_upgrade(AioContext *ctx, unsigned npfd) - return false; - } - -- if (npfd >= EPOLL_ENABLE_THRESHOLD) { -- if (fdmon_epoll_try_enable(ctx)) { -- return true; -- } else { -- fdmon_epoll_disable(ctx); -- } -+ if (npfd < EPOLL_ENABLE_THRESHOLD) { -+ return false; -+ } -+ -+ /* The list must not change while we add fds to epoll */ -+ if (!qemu_lockcnt_dec_if_lock(&ctx->list_lock)) { -+ return false; -+ } -+ -+ ok = fdmon_epoll_try_enable(ctx); -+ -+ qemu_lockcnt_inc_and_unlock(&ctx->list_lock); -+ -+ if (!ok) { -+ fdmon_epoll_disable(ctx); - } -- return false; -+ return ok; - } - - void fdmon_epoll_setup(AioContext *ctx) --- -2.39.3 - diff --git a/SOURCES/kvm-aio-wait-switch-to-smp_mb__after_rmw.patch b/SOURCES/kvm-aio-wait-switch-to-smp_mb__after_rmw.patch deleted file mode 100644 index ee7e7f9..0000000 --- a/SOURCES/kvm-aio-wait-switch-to-smp_mb__after_rmw.patch +++ /dev/null @@ -1,50 +0,0 @@ -From e9a9c0b023ae0dcbb14543b74063cca931d8230f Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Thu, 9 Mar 2023 08:24:36 -0500 -Subject: [PATCH 08/12] aio-wait: switch to smp_mb__after_rmw() - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() -RH-Bugzilla: 2175660 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Eric Auger -RH-Commit: [5/9] a90c96d148fdbec340a45dc6cedf3660d8be2aab (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 - -commit b532526a07ef3b903ead2e055fe6cc87b41057a3 -Author: Paolo Bonzini -Date: Fri Mar 3 11:03:52 2023 +0100 - - aio-wait: switch to smp_mb__after_rmw() - - The barrier comes after an atomic increment, so it is enough to use - smp_mb__after_rmw(); this avoids a double barrier on x86 systems. - - Reviewed-by: Richard Henderson - Reviewed-by: David Hildenbrand - Signed-off-by: Paolo Bonzini - -Signed-off-by: Emanuele Giuseppe Esposito ---- - include/block/aio-wait.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h -index dd9a7f6461..da13357bb8 100644 ---- a/include/block/aio-wait.h -+++ b/include/block/aio-wait.h -@@ -85,7 +85,7 @@ extern AioWait global_aio_wait; - /* Increment wait_->num_waiters before evaluating cond. */ \ - qatomic_inc(&wait_->num_waiters); \ - /* Paired with smp_mb in aio_wait_kick(). */ \ -- smp_mb(); \ -+ smp_mb__after_rmw(); \ - if (ctx_ && in_aio_context_home_thread(ctx_)) { \ - while ((cond)) { \ - aio_poll(ctx_, true); \ --- -2.39.1 - diff --git a/SOURCES/kvm-apic-disable-reentrancy-detection-for-apic-msi.patch b/SOURCES/kvm-apic-disable-reentrancy-detection-for-apic-msi.patch new file mode 100644 index 0000000..69505f8 --- /dev/null +++ b/SOURCES/kvm-apic-disable-reentrancy-detection-for-apic-msi.patch @@ -0,0 +1,55 @@ +From 5beea8b889a38aa59259679d7f1ba050f09eb0f0 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 May 2023 10:29:03 -0400 +Subject: [PATCH 12/21] apic: disable reentrancy detection for apic-msi + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [8/13] 329f3b1c02fc42d85c821dd14c70e6b885cf849a (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit 50795ee051a342c681a9b45671c552fbd6274db8 +Author: Alexander Bulekov +Date: Thu Apr 27 17:10:13 2023 -0400 + + apic: disable reentrancy detection for apic-msi + + As the code is designed for re-entrant calls to apic-msi, mark apic-msi + as reentrancy-safe. + + Signed-off-by: Alexander Bulekov + Reviewed-by: Darren Kenny + Message-Id: <20230427211013.2994127-9-alxndr@bu.edu> + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + hw/intc/apic.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/hw/intc/apic.c b/hw/intc/apic.c +index 20b5a94073..ac3d47d231 100644 +--- a/hw/intc/apic.c ++++ b/hw/intc/apic.c +@@ -885,6 +885,13 @@ static void apic_realize(DeviceState *dev, Error **errp) + memory_region_init_io(&s->io_memory, OBJECT(s), &apic_io_ops, s, "apic-msi", + APIC_SPACE_SIZE); + ++ /* ++ * apic-msi's apic_mem_write can call into ioapic_eoi_broadcast, which can ++ * write back to apic-msi. As such mark the apic-msi region re-entrancy ++ * safe. ++ */ ++ s->io_memory.disable_reentrancy_guard = true; ++ + s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, apic_timer, s); + local_apics[s->id] = s; + +-- +2.39.3 + diff --git a/SOURCES/kvm-async-Add-an-optional-reentrancy-guard-to-the-BH-API.patch b/SOURCES/kvm-async-Add-an-optional-reentrancy-guard-to-the-BH-API.patch new file mode 100644 index 0000000..65ba3be --- /dev/null +++ b/SOURCES/kvm-async-Add-an-optional-reentrancy-guard-to-the-BH-API.patch @@ -0,0 +1,231 @@ +From f6db359f543723e2eb840653d35004af357ea5ac Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 May 2023 10:29:03 -0400 +Subject: [PATCH 06/21] async: Add an optional reentrancy guard to the BH API + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/13] 009a9a68c1c25b9ad0cd9bc0d73b3e07bee2a19d (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit 9c86c97f12c060bf7484dd931f38634e166a81f0 +Author: Alexander Bulekov +Date: Thu Apr 27 17:10:07 2023 -0400 + + async: Add an optional reentrancy guard to the BH API + + Devices can pass their MemoryReentrancyGuard (from their DeviceState), + when creating new BHes. Then, the async API will toggle the guard + before/after calling the BH call-back. This prevents bh->mmio reentrancy + issues. + + Signed-off-by: Alexander Bulekov + Reviewed-by: Darren Kenny + Message-Id: <20230427211013.2994127-3-alxndr@bu.edu> + [thuth: Fix "line over 90 characters" checkpatch.pl error] + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + docs/devel/multiple-iothreads.txt | 7 +++++++ + include/block/aio.h | 18 ++++++++++++++++-- + include/qemu/main-loop.h | 7 +++++-- + tests/unit/ptimer-test-stubs.c | 3 ++- + util/async.c | 18 +++++++++++++++++- + util/main-loop.c | 6 ++++-- + util/trace-events | 1 + + 7 files changed, 52 insertions(+), 8 deletions(-) + +diff --git a/docs/devel/multiple-iothreads.txt b/docs/devel/multiple-iothreads.txt +index 343120f2ef..a3e949f6b3 100644 +--- a/docs/devel/multiple-iothreads.txt ++++ b/docs/devel/multiple-iothreads.txt +@@ -61,6 +61,7 @@ There are several old APIs that use the main loop AioContext: + * LEGACY qemu_aio_set_event_notifier() - monitor an event notifier + * LEGACY timer_new_ms() - create a timer + * LEGACY qemu_bh_new() - create a BH ++ * LEGACY qemu_bh_new_guarded() - create a BH with a device re-entrancy guard + * LEGACY qemu_aio_wait() - run an event loop iteration + + Since they implicitly work on the main loop they cannot be used in code that +@@ -72,8 +73,14 @@ Instead, use the AioContext functions directly (see include/block/aio.h): + * aio_set_event_notifier() - monitor an event notifier + * aio_timer_new() - create a timer + * aio_bh_new() - create a BH ++ * aio_bh_new_guarded() - create a BH with a device re-entrancy guard + * aio_poll() - run an event loop iteration + ++The qemu_bh_new_guarded/aio_bh_new_guarded APIs accept a "MemReentrancyGuard" ++argument, which is used to check for and prevent re-entrancy problems. For ++BHs associated with devices, the reentrancy-guard is contained in the ++corresponding DeviceState and named "mem_reentrancy_guard". ++ + The AioContext can be obtained from the IOThread using + iothread_get_aio_context() or for the main loop using qemu_get_aio_context(). + Code that takes an AioContext argument works both in IOThreads or the main +diff --git a/include/block/aio.h b/include/block/aio.h +index 543717f294..db6f23c619 100644 +--- a/include/block/aio.h ++++ b/include/block/aio.h +@@ -23,6 +23,8 @@ + #include "qemu/thread.h" + #include "qemu/timer.h" + #include "block/graph-lock.h" ++#include "hw/qdev-core.h" ++ + + typedef struct BlockAIOCB BlockAIOCB; + typedef void BlockCompletionFunc(void *opaque, int ret); +@@ -331,9 +333,11 @@ void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, + * is opaque and must be allocated prior to its use. + * + * @name: A human-readable identifier for debugging purposes. ++ * @reentrancy_guard: A guard set when entering a cb to prevent ++ * device-reentrancy issues + */ + QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, +- const char *name); ++ const char *name, MemReentrancyGuard *reentrancy_guard); + + /** + * aio_bh_new: Allocate a new bottom half structure +@@ -342,7 +346,17 @@ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, + * string. + */ + #define aio_bh_new(ctx, cb, opaque) \ +- aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb))) ++ aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), NULL) ++ ++/** ++ * aio_bh_new_guarded: Allocate a new bottom half structure with a ++ * reentrancy_guard ++ * ++ * A convenience wrapper for aio_bh_new_full() that uses the cb as the name ++ * string. ++ */ ++#define aio_bh_new_guarded(ctx, cb, opaque, guard) \ ++ aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), guard) + + /** + * aio_notify: Force processing of pending events. +diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h +index b3e54e00bc..68e70e61aa 100644 +--- a/include/qemu/main-loop.h ++++ b/include/qemu/main-loop.h +@@ -387,9 +387,12 @@ void qemu_cond_timedwait_iothread(QemuCond *cond, int ms); + + /* internal interfaces */ + ++#define qemu_bh_new_guarded(cb, opaque, guard) \ ++ qemu_bh_new_full((cb), (opaque), (stringify(cb)), guard) + #define qemu_bh_new(cb, opaque) \ +- qemu_bh_new_full((cb), (opaque), (stringify(cb))) +-QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name); ++ qemu_bh_new_full((cb), (opaque), (stringify(cb)), NULL) ++QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name, ++ MemReentrancyGuard *reentrancy_guard); + void qemu_bh_schedule_idle(QEMUBH *bh); + + enum { +diff --git a/tests/unit/ptimer-test-stubs.c b/tests/unit/ptimer-test-stubs.c +index f2bfcede93..8c9407c560 100644 +--- a/tests/unit/ptimer-test-stubs.c ++++ b/tests/unit/ptimer-test-stubs.c +@@ -107,7 +107,8 @@ int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int attr_mask) + return deadline; + } + +-QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name) ++QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name, ++ MemReentrancyGuard *reentrancy_guard) + { + QEMUBH *bh = g_new(QEMUBH, 1); + +diff --git a/util/async.c b/util/async.c +index 21016a1ac7..a9b528c370 100644 +--- a/util/async.c ++++ b/util/async.c +@@ -65,6 +65,7 @@ struct QEMUBH { + void *opaque; + QSLIST_ENTRY(QEMUBH) next; + unsigned flags; ++ MemReentrancyGuard *reentrancy_guard; + }; + + /* Called concurrently from any thread */ +@@ -137,7 +138,7 @@ void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, + } + + QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, +- const char *name) ++ const char *name, MemReentrancyGuard *reentrancy_guard) + { + QEMUBH *bh; + bh = g_new(QEMUBH, 1); +@@ -146,13 +147,28 @@ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, + .cb = cb, + .opaque = opaque, + .name = name, ++ .reentrancy_guard = reentrancy_guard, + }; + return bh; + } + + void aio_bh_call(QEMUBH *bh) + { ++ bool last_engaged_in_io = false; ++ ++ if (bh->reentrancy_guard) { ++ last_engaged_in_io = bh->reentrancy_guard->engaged_in_io; ++ if (bh->reentrancy_guard->engaged_in_io) { ++ trace_reentrant_aio(bh->ctx, bh->name); ++ } ++ bh->reentrancy_guard->engaged_in_io = true; ++ } ++ + bh->cb(bh->opaque); ++ ++ if (bh->reentrancy_guard) { ++ bh->reentrancy_guard->engaged_in_io = last_engaged_in_io; ++ } + } + + /* Multiple occurrences of aio_bh_poll cannot be called concurrently. */ +diff --git a/util/main-loop.c b/util/main-loop.c +index e180c85145..7022f02ef8 100644 +--- a/util/main-loop.c ++++ b/util/main-loop.c +@@ -605,9 +605,11 @@ void main_loop_wait(int nonblocking) + + /* Functions to operate on the main QEMU AioContext. */ + +-QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name) ++QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name, ++ MemReentrancyGuard *reentrancy_guard) + { +- return aio_bh_new_full(qemu_aio_context, cb, opaque, name); ++ return aio_bh_new_full(qemu_aio_context, cb, opaque, name, ++ reentrancy_guard); + } + + /* +diff --git a/util/trace-events b/util/trace-events +index 16f78d8fe5..3f7e766683 100644 +--- a/util/trace-events ++++ b/util/trace-events +@@ -11,6 +11,7 @@ poll_remove(void *ctx, void *node, int fd) "ctx %p node %p fd %d" + # async.c + aio_co_schedule(void *ctx, void *co) "ctx %p co %p" + aio_co_schedule_bh_cb(void *ctx, void *co) "ctx %p co %p" ++reentrant_aio(void *ctx, const char *name) "ctx %p name %s" + + # thread-pool.c + thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p" +-- +2.39.3 + diff --git a/SOURCES/kvm-async-avoid-use-after-free-on-re-entrancy-guard.patch b/SOURCES/kvm-async-avoid-use-after-free-on-re-entrancy-guard.patch new file mode 100644 index 0000000..df71fa2 --- /dev/null +++ b/SOURCES/kvm-async-avoid-use-after-free-on-re-entrancy-guard.patch @@ -0,0 +1,70 @@ +From 137e84f68da06666ebf7f391766cc6209ce1c39c Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 May 2023 10:29:03 -0400 +Subject: [PATCH 13/21] async: avoid use-after-free on re-entrancy guard + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [9/13] d4b957108aaacf4a597122aaeeaa8e56985f1fca (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit 7915bd06f25e1803778081161bf6fa10c42dc7cd +Author: Alexander Bulekov +Date: Mon May 1 10:19:56 2023 -0400 + + async: avoid use-after-free on re-entrancy guard + + A BH callback can free the BH, causing a use-after-free in aio_bh_call. + Fix that by keeping a local copy of the re-entrancy guard pointer. + + Buglink: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=58513 + Fixes: 9c86c97f12 ("async: Add an optional reentrancy guard to the BH API") + Signed-off-by: Alexander Bulekov + Message-Id: <20230501141956.3444868-1-alxndr@bu.edu> + Reviewed-by: Thomas Huth + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + util/async.c | 14 ++++++++------ + 1 file changed, 8 insertions(+), 6 deletions(-) + +diff --git a/util/async.c b/util/async.c +index a9b528c370..cd1a1815f9 100644 +--- a/util/async.c ++++ b/util/async.c +@@ -156,18 +156,20 @@ void aio_bh_call(QEMUBH *bh) + { + bool last_engaged_in_io = false; + +- if (bh->reentrancy_guard) { +- last_engaged_in_io = bh->reentrancy_guard->engaged_in_io; +- if (bh->reentrancy_guard->engaged_in_io) { ++ /* Make a copy of the guard-pointer as cb may free the bh */ ++ MemReentrancyGuard *reentrancy_guard = bh->reentrancy_guard; ++ if (reentrancy_guard) { ++ last_engaged_in_io = reentrancy_guard->engaged_in_io; ++ if (reentrancy_guard->engaged_in_io) { + trace_reentrant_aio(bh->ctx, bh->name); + } +- bh->reentrancy_guard->engaged_in_io = true; ++ reentrancy_guard->engaged_in_io = true; + } + + bh->cb(bh->opaque); + +- if (bh->reentrancy_guard) { +- bh->reentrancy_guard->engaged_in_io = last_engaged_in_io; ++ if (reentrancy_guard) { ++ reentrancy_guard->engaged_in_io = last_engaged_in_io; + } + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-async-clarify-usage-of-barriers-in-the-polling-case.patch b/SOURCES/kvm-async-clarify-usage-of-barriers-in-the-polling-case.patch deleted file mode 100644 index 0e4a48d..0000000 --- a/SOURCES/kvm-async-clarify-usage-of-barriers-in-the-polling-case.patch +++ /dev/null @@ -1,66 +0,0 @@ -From 3d823dda6832b76fd3d776131008107b0b0f7166 Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Thu, 9 Mar 2023 08:24:36 -0500 -Subject: [PATCH 12/12] async: clarify usage of barriers in the polling case - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() -RH-Bugzilla: 2175660 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Eric Auger -RH-Commit: [9/9] b4ea298d75a75bb61e07a27d1296e0095fbc2bbf (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 - -commit 6229438cca037d42f44a96d38feb15cb102a444f -Author: Paolo Bonzini -Date: Mon Mar 6 10:43:52 2023 +0100 - - async: clarify usage of barriers in the polling case - - Explain that aio_context_notifier_poll() relies on - aio_notify_accept() to catch all the memory writes that were - done before ctx->notified was set to true. - - Reviewed-by: Richard Henderson - Reviewed-by: Stefan Hajnoczi - Signed-off-by: Paolo Bonzini - -Signed-off-by: Emanuele Giuseppe Esposito ---- - util/async.c | 10 ++++++++-- - 1 file changed, 8 insertions(+), 2 deletions(-) - -diff --git a/util/async.c b/util/async.c -index 37d3e6036d..e0846baf93 100644 ---- a/util/async.c -+++ b/util/async.c -@@ -472,8 +472,9 @@ void aio_notify_accept(AioContext *ctx) - qatomic_set(&ctx->notified, false); - - /* -- * Write ctx->notified before reading e.g. bh->flags. Pairs with smp_wmb -- * in aio_notify. -+ * Order reads of ctx->notified (in aio_context_notifier_poll()) and the -+ * above clearing of ctx->notified before reads of e.g. bh->flags. Pairs -+ * with smp_wmb() in aio_notify. - */ - smp_mb(); - } -@@ -496,6 +497,11 @@ static bool aio_context_notifier_poll(void *opaque) - EventNotifier *e = opaque; - AioContext *ctx = container_of(e, AioContext, notifier); - -+ /* -+ * No need for load-acquire because we just want to kick the -+ * event loop. aio_notify_accept() takes care of synchronizing -+ * the event loop with the producers. -+ */ - return qatomic_read(&ctx->notified); - } - --- -2.39.1 - diff --git a/SOURCES/kvm-async-update-documentation-of-the-memory-barriers.patch b/SOURCES/kvm-async-update-documentation-of-the-memory-barriers.patch deleted file mode 100644 index cb92dc9..0000000 --- a/SOURCES/kvm-async-update-documentation-of-the-memory-barriers.patch +++ /dev/null @@ -1,111 +0,0 @@ -From 29bcf843d796ffc2a0906dea947e4cdfe9f7ec60 Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Thu, 9 Mar 2023 08:24:36 -0500 -Subject: [PATCH 11/12] async: update documentation of the memory barriers - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() -RH-Bugzilla: 2175660 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Eric Auger -RH-Commit: [8/9] 5ca20e4c8983e0bc1ecee66bead3472777abe4d1 (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 - -commit 8dd48650b43dfde4ebea34191ac267e474bcc29e -Author: Paolo Bonzini -Date: Mon Mar 6 10:15:06 2023 +0100 - - async: update documentation of the memory barriers - - Ever since commit 8c6b0356b539 ("util/async: make bh_aio_poll() O(1)", - 2020-02-22), synchronization between qemu_bh_schedule() and aio_bh_poll() - is happening when the bottom half is enqueued in the bh_list; not - when the flags are set. Update the documentation to match. - - Reviewed-by: Stefan Hajnoczi - Signed-off-by: Paolo Bonzini - -Signed-off-by: Emanuele Giuseppe Esposito ---- - util/async.c | 33 +++++++++++++++++++-------------- - 1 file changed, 19 insertions(+), 14 deletions(-) - -diff --git a/util/async.c b/util/async.c -index 63434ddae4..37d3e6036d 100644 ---- a/util/async.c -+++ b/util/async.c -@@ -73,14 +73,21 @@ static void aio_bh_enqueue(QEMUBH *bh, unsigned new_flags) - unsigned old_flags; - - /* -- * The memory barrier implicit in qatomic_fetch_or makes sure that: -- * 1. idle & any writes needed by the callback are done before the -- * locations are read in the aio_bh_poll. -- * 2. ctx is loaded before the callback has a chance to execute and bh -- * could be freed. -+ * Synchronizes with atomic_fetch_and() in aio_bh_dequeue(), ensuring that -+ * insertion starts after BH_PENDING is set. - */ - old_flags = qatomic_fetch_or(&bh->flags, BH_PENDING | new_flags); -+ - if (!(old_flags & BH_PENDING)) { -+ /* -+ * At this point the bottom half becomes visible to aio_bh_poll(). -+ * This insertion thus synchronizes with QSLIST_MOVE_ATOMIC in -+ * aio_bh_poll(), ensuring that: -+ * 1. any writes needed by the callback are visible from the callback -+ * after aio_bh_dequeue() returns bh. -+ * 2. ctx is loaded before the callback has a chance to execute and bh -+ * could be freed. -+ */ - QSLIST_INSERT_HEAD_ATOMIC(&ctx->bh_list, bh, next); - } - -@@ -106,11 +113,8 @@ static QEMUBH *aio_bh_dequeue(BHList *head, unsigned *flags) - QSLIST_REMOVE_HEAD(head, next); - - /* -- * The qatomic_and is paired with aio_bh_enqueue(). The implicit memory -- * barrier ensures that the callback sees all writes done by the scheduling -- * thread. It also ensures that the scheduling thread sees the cleared -- * flag before bh->cb has run, and thus will call aio_notify again if -- * necessary. -+ * Synchronizes with qatomic_fetch_or() in aio_bh_enqueue(), ensuring that -+ * the removal finishes before BH_PENDING is reset. - */ - *flags = qatomic_fetch_and(&bh->flags, - ~(BH_PENDING | BH_SCHEDULED | BH_IDLE)); -@@ -157,6 +161,7 @@ int aio_bh_poll(AioContext *ctx) - BHListSlice *s; - int ret = 0; - -+ /* Synchronizes with QSLIST_INSERT_HEAD_ATOMIC in aio_bh_enqueue(). */ - QSLIST_MOVE_ATOMIC(&slice.bh_list, &ctx->bh_list); - QSIMPLEQ_INSERT_TAIL(&ctx->bh_slice_list, &slice, next); - -@@ -446,15 +451,15 @@ LuringState *aio_get_linux_io_uring(AioContext *ctx) - void aio_notify(AioContext *ctx) - { - /* -- * Write e.g. bh->flags before writing ctx->notified. Pairs with smp_mb in -- * aio_notify_accept. -+ * Write e.g. ctx->bh_list before writing ctx->notified. Pairs with -+ * smp_mb() in aio_notify_accept(). - */ - smp_wmb(); - qatomic_set(&ctx->notified, true); - - /* -- * Write ctx->notified before reading ctx->notify_me. Pairs -- * with smp_mb in aio_ctx_prepare or aio_poll. -+ * Write ctx->notified (and also ctx->bh_list) before reading ctx->notify_me. -+ * Pairs with smp_mb() in aio_ctx_prepare or aio_poll. - */ - smp_mb(); - if (qatomic_read(&ctx->notify_me)) { --- -2.39.1 - diff --git a/SOURCES/kvm-bcm2835_property-disable-reentrancy-detection-for-io.patch b/SOURCES/kvm-bcm2835_property-disable-reentrancy-detection-for-io.patch new file mode 100644 index 0000000..6d9abb8 --- /dev/null +++ b/SOURCES/kvm-bcm2835_property-disable-reentrancy-detection-for-io.patch @@ -0,0 +1,57 @@ +From 40866640d15e6a8c9f6af7e437edc1ec1e17ba34 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 May 2023 10:29:03 -0400 +Subject: [PATCH 10/21] bcm2835_property: disable reentrancy detection for + iomem + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [6/13] 128ebc85e228674af66553af82fba70eb87960e6 (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit 985c4a4e547afb9573b6bd6843d20eb2c3d1d1cd +Author: Alexander Bulekov +Date: Thu Apr 27 17:10:11 2023 -0400 + + bcm2835_property: disable reentrancy detection for iomem + + As the code is designed for re-entrant calls from bcm2835_property to + bcm2835_mbox and back into bcm2835_property, mark iomem as + reentrancy-safe. + + Signed-off-by: Alexander Bulekov + Reviewed-by: Thomas Huth + Message-Id: <20230427211013.2994127-7-alxndr@bu.edu> + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + hw/misc/bcm2835_property.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/hw/misc/bcm2835_property.c b/hw/misc/bcm2835_property.c +index 890ae7bae5..de056ea2df 100644 +--- a/hw/misc/bcm2835_property.c ++++ b/hw/misc/bcm2835_property.c +@@ -382,6 +382,13 @@ static void bcm2835_property_init(Object *obj) + + memory_region_init_io(&s->iomem, OBJECT(s), &bcm2835_property_ops, s, + TYPE_BCM2835_PROPERTY, 0x10); ++ ++ /* ++ * bcm2835_property_ops call into bcm2835_mbox, which in-turn reads from ++ * iomem. As such, mark iomem as re-entracy safe. ++ */ ++ s->iomem.disable_reentrancy_guard = true; ++ + sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem); + sysbus_init_irq(SYS_BUS_DEVICE(s), &s->mbox_irq); + } +-- +2.39.3 + diff --git a/SOURCES/kvm-block-Call-drain-callbacks-only-once.patch b/SOURCES/kvm-block-Call-drain-callbacks-only-once.patch deleted file mode 100644 index 04f1dda..0000000 --- a/SOURCES/kvm-block-Call-drain-callbacks-only-once.patch +++ /dev/null @@ -1,250 +0,0 @@ -From 9bb9cafd736057fd2a8ebfa6f5769668f125fbe6 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:41:06 +0100 -Subject: [PATCH 24/31] block: Call drain callbacks only once - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [12/16] ea9a433dc01d1b8539a2d4ea12887f2a3ce830ea (sgarzarella/qemu-kvm-c-9-s) - -We only need to call both the BlockDriver's callback and the parent -callbacks when going from undrained to drained or vice versa. A second -drain section doesn't make a difference for the driver or the parent, -they weren't supposed to send new requests before and after the second -drain. - -One thing that gets in the way is the 'ignore_bds_parents' parameter in -bdrv_do_drained_begin_quiesce() and bdrv_do_drained_end(): It means that -bdrv_drain_all_begin() increases bs->quiesce_counter, but does not -quiesce the parent through BdrvChildClass callbacks. If an additional -drain section is started now, bs->quiesce_counter will be non-zero, but -we would still need to quiesce the parent through BdrvChildClass in -order to keep things consistent (and unquiesce it on the matching -bdrv_drained_end(), even though the counter would not reach 0 yet as -long as the bdrv_drain_all() section is still active). - -Instead of keeping track of this, let's just get rid of the parameter. -It was introduced in commit 6cd5c9d7b2d as an optimisation so that -during bdrv_drain_all(), we wouldn't recursively drain all parents up to -the root for each node, resulting in quadratic complexity. As it happens, -calling the callbacks only once solves the same problem, so as of this -patch, we'll still have O(n) complexity and ignore_bds_parents is not -needed any more. - -This patch only ignores the 'ignore_bds_parents' parameter. It will be -removed in a separate patch. - -Signed-off-by: Kevin Wolf -Reviewed-by: Hanna Reitz -Message-Id: <20221118174110.55183-12-kwolf@redhat.com> -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Kevin Wolf -(cherry picked from commit 57e05be343f33f4e5899a8d8946a8596d68424a1) -Signed-off-by: Stefano Garzarella ---- - block.c | 25 +++++++------------------ - block/io.c | 30 ++++++++++++++++++------------ - include/block/block_int-common.h | 8 ++++---- - tests/unit/test-bdrv-drain.c | 16 ++++++++++------ - 4 files changed, 39 insertions(+), 40 deletions(-) - -diff --git a/block.c b/block.c -index e0e3b21790..5a583e260d 100644 ---- a/block.c -+++ b/block.c -@@ -2824,7 +2824,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child, - { - BlockDriverState *old_bs = child->bs; - int new_bs_quiesce_counter; -- int drain_saldo; - - assert(!child->frozen); - assert(old_bs != new_bs); -@@ -2834,16 +2833,13 @@ static void bdrv_replace_child_noperm(BdrvChild *child, - assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); - } - -- new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); -- drain_saldo = new_bs_quiesce_counter - child->parent_quiesce_counter; -- - /* - * If the new child node is drained but the old one was not, flush - * all outstanding requests to the old child node. - */ -- while (drain_saldo > 0 && child->klass->drained_begin) { -+ new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); -+ if (new_bs_quiesce_counter && !child->quiesced_parent) { - bdrv_parent_drained_begin_single(child, true); -- drain_saldo--; - } - - if (old_bs) { -@@ -2859,16 +2855,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child, - if (new_bs) { - assert_bdrv_graph_writable(new_bs); - QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); -- -- /* -- * Polling in bdrv_parent_drained_begin_single() may have led to the new -- * node's quiesce_counter having been decreased. Not a problem, we just -- * need to recognize this here and then invoke drained_end appropriately -- * more often. -- */ -- assert(new_bs->quiesce_counter <= new_bs_quiesce_counter); -- drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter; -- - if (child->klass->attach) { - child->klass->attach(child); - } -@@ -2877,10 +2863,13 @@ static void bdrv_replace_child_noperm(BdrvChild *child, - /* - * If the old child node was drained but the new one is not, allow - * requests to come in only after the new node has been attached. -+ * -+ * Update new_bs_quiesce_counter because bdrv_parent_drained_begin_single() -+ * polls, which could have changed the value. - */ -- while (drain_saldo < 0 && child->klass->drained_end) { -+ new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); -+ if (!new_bs_quiesce_counter && child->quiesced_parent) { - bdrv_parent_drained_end_single(child); -- drain_saldo++; - } - } - -diff --git a/block/io.c b/block/io.c -index 75224480d0..87d6f22ec4 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -62,8 +62,9 @@ void bdrv_parent_drained_end_single(BdrvChild *c) - { - IO_OR_GS_CODE(); - -- assert(c->parent_quiesce_counter > 0); -- c->parent_quiesce_counter--; -+ assert(c->quiesced_parent); -+ c->quiesced_parent = false; -+ - if (c->klass->drained_end) { - c->klass->drained_end(c); - } -@@ -110,7 +111,10 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll) - { - AioContext *ctx = bdrv_child_get_parent_aio_context(c); - IO_OR_GS_CODE(); -- c->parent_quiesce_counter++; -+ -+ assert(!c->quiesced_parent); -+ c->quiesced_parent = true; -+ - if (c->klass->drained_begin) { - c->klass->drained_begin(c); - } -@@ -358,11 +362,12 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, - /* Stop things in parent-to-child order */ - if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) { - aio_disable_external(bdrv_get_aio_context(bs)); -- } - -- bdrv_parent_drained_begin(bs, parent, ignore_bds_parents); -- if (bs->drv && bs->drv->bdrv_drain_begin) { -- bs->drv->bdrv_drain_begin(bs); -+ /* TODO Remove ignore_bds_parents, we don't consider it any more */ -+ bdrv_parent_drained_begin(bs, parent, false); -+ if (bs->drv && bs->drv->bdrv_drain_begin) { -+ bs->drv->bdrv_drain_begin(bs); -+ } - } - } - -@@ -413,13 +418,14 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, - assert(bs->quiesce_counter > 0); - - /* Re-enable things in child-to-parent order */ -- if (bs->drv && bs->drv->bdrv_drain_end) { -- bs->drv->bdrv_drain_end(bs); -- } -- bdrv_parent_drained_end(bs, parent, ignore_bds_parents); -- - old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter); - if (old_quiesce_counter == 1) { -+ if (bs->drv && bs->drv->bdrv_drain_end) { -+ bs->drv->bdrv_drain_end(bs); -+ } -+ /* TODO Remove ignore_bds_parents, we don't consider it any more */ -+ bdrv_parent_drained_end(bs, parent, false); -+ - aio_enable_external(bdrv_get_aio_context(bs)); - } - } -diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h -index 791dddfd7d..a6bc6b7fe9 100644 ---- a/include/block/block_int-common.h -+++ b/include/block/block_int-common.h -@@ -980,13 +980,13 @@ struct BdrvChild { - bool frozen; - - /* -- * How many times the parent of this child has been drained -+ * True if the parent of this child has been drained by this BdrvChild - * (through klass->drained_*). -- * Usually, this is equal to bs->quiesce_counter (potentially -- * reduced by bdrv_drain_all_count). It may differ while the -+ * -+ * It is generally true if bs->quiesce_counter > 0. It may differ while the - * child is entering or leaving a drained section. - */ -- int parent_quiesce_counter; -+ bool quiesced_parent; - - QLIST_ENTRY(BdrvChild) next; - QLIST_ENTRY(BdrvChild) next_parent; -diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c -index dda08de8db..172bc6debc 100644 ---- a/tests/unit/test-bdrv-drain.c -+++ b/tests/unit/test-bdrv-drain.c -@@ -296,7 +296,11 @@ static void test_quiesce_common(enum drain_type drain_type, bool recursive) - - do_drain_begin(drain_type, bs); - -- g_assert_cmpint(bs->quiesce_counter, ==, 1); -+ if (drain_type == BDRV_DRAIN_ALL) { -+ g_assert_cmpint(bs->quiesce_counter, ==, 2); -+ } else { -+ g_assert_cmpint(bs->quiesce_counter, ==, 1); -+ } - g_assert_cmpint(backing->quiesce_counter, ==, !!recursive); - - do_drain_end(drain_type, bs); -@@ -348,8 +352,8 @@ static void test_nested(void) - - for (outer = 0; outer < DRAIN_TYPE_MAX; outer++) { - for (inner = 0; inner < DRAIN_TYPE_MAX; inner++) { -- int backing_quiesce = (outer != BDRV_DRAIN) + -- (inner != BDRV_DRAIN); -+ int backing_quiesce = (outer == BDRV_DRAIN_ALL) + -+ (inner == BDRV_DRAIN_ALL); - - g_assert_cmpint(bs->quiesce_counter, ==, 0); - g_assert_cmpint(backing->quiesce_counter, ==, 0); -@@ -359,10 +363,10 @@ static void test_nested(void) - do_drain_begin(outer, bs); - do_drain_begin(inner, bs); - -- g_assert_cmpint(bs->quiesce_counter, ==, 2); -+ g_assert_cmpint(bs->quiesce_counter, ==, 2 + !!backing_quiesce); - g_assert_cmpint(backing->quiesce_counter, ==, backing_quiesce); -- g_assert_cmpint(s->drain_count, ==, 2); -- g_assert_cmpint(backing_s->drain_count, ==, backing_quiesce); -+ g_assert_cmpint(s->drain_count, ==, 1); -+ g_assert_cmpint(backing_s->drain_count, ==, !!backing_quiesce); - - do_drain_end(inner, bs); - do_drain_end(outer, bs); --- -2.31.1 - diff --git a/SOURCES/kvm-block-Collapse-padded-I-O-vecs-exceeding-IOV_MAX.patch b/SOURCES/kvm-block-Collapse-padded-I-O-vecs-exceeding-IOV_MAX.patch new file mode 100644 index 0000000..6de5d65 --- /dev/null +++ b/SOURCES/kvm-block-Collapse-padded-I-O-vecs-exceeding-IOV_MAX.patch @@ -0,0 +1,354 @@ +From ff05c0b0d3414c0e5b3903048280accdc6c75ca0 Mon Sep 17 00:00:00 2001 +From: Hanna Czenczek +Date: Tue, 11 Apr 2023 19:34:16 +0200 +Subject: [PATCH 2/9] block: Collapse padded I/O vecs exceeding IOV_MAX + +RH-Author: Hanna Czenczek +RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX +RH-Bugzilla: 2174676 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/5] 84c56bd16f841a18cf2baa918dfeab3240e3944d (hreitz/qemu-kvm-c-9-s) + +When processing vectored guest requests that are not aligned to the +storage request alignment, we pad them by adding head and/or tail +buffers for a read-modify-write cycle. + +The guest can submit I/O vectors up to IOV_MAX (1024) in length, but +with this padding, the vector can exceed that limit. As of +4c002cef0e9abe7135d7916c51abce47f7fc1ee2 ("util/iov: make +qemu_iovec_init_extended() honest"), we refuse to pad vectors beyond the +limit, instead returning an error to the guest. + +To the guest, this appears as a random I/O error. We should not return +an I/O error to the guest when it issued a perfectly valid request. + +Before 4c002cef0e9abe7135d7916c51abce47f7fc1ee2, we just made the vector +longer than IOV_MAX, which generally seems to work (because the guest +assumes a smaller alignment than we really have, file-posix's +raw_co_prw() will generally see bdrv_qiov_is_aligned() return false, and +so emulate the request, so that the IOV_MAX does not matter). However, +that does not seem exactly great. + +I see two ways to fix this problem: +1. We split such long requests into two requests. +2. We join some elements of the vector into new buffers to make it + shorter. + +I am wary of (1), because it seems like it may have unintended side +effects. + +(2) on the other hand seems relatively simple to implement, with +hopefully few side effects, so this patch does that. + +To do this, the use of qemu_iovec_init_extended() in bdrv_pad_request() +is effectively replaced by the new function bdrv_create_padded_qiov(), +which not only wraps the request IOV with padding head/tail, but also +ensures that the resulting vector will not have more than IOV_MAX +elements. Putting that functionality into qemu_iovec_init_extended() is +infeasible because it requires allocating a bounce buffer; doing so +would require many more parameters (buffer alignment, how to initialize +the buffer, and out parameters like the buffer, its length, and the +original elements), which is not reasonable. + +Conversely, it is not difficult to move qemu_iovec_init_extended()'s +functionality into bdrv_create_padded_qiov() by using public +qemu_iovec_* functions, so that is what this patch does. + +Because bdrv_pad_request() was the only "serious" user of +qemu_iovec_init_extended(), the next patch will remove the latter +function, so the functionality is not implemented twice. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2141964 +Signed-off-by: Hanna Czenczek +Message-Id: <20230411173418.19549-3-hreitz@redhat.com> +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 18743311b829cafc1737a5f20bc3248d5f91ee2a) +Signed-off-by: Hanna Czenczek +--- + block/io.c | 166 ++++++++++++++++++++++++++++++++++++++++++++++++----- + 1 file changed, 151 insertions(+), 15 deletions(-) + +diff --git a/block/io.c b/block/io.c +index 2e267a85ab..4e8e90208b 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -1439,6 +1439,14 @@ out: + * @merge_reads is true for small requests, + * if @buf_len == @head + bytes + @tail. In this case it is possible that both + * head and tail exist but @buf_len == align and @tail_buf == @buf. ++ * ++ * @write is true for write requests, false for read requests. ++ * ++ * If padding makes the vector too long (exceeding IOV_MAX), then we need to ++ * merge existing vector elements into a single one. @collapse_bounce_buf acts ++ * as the bounce buffer in such cases. @pre_collapse_qiov has the pre-collapse ++ * I/O vector elements so for read requests, the data can be copied back after ++ * the read is done. + */ + typedef struct BdrvRequestPadding { + uint8_t *buf; +@@ -1447,11 +1455,17 @@ typedef struct BdrvRequestPadding { + size_t head; + size_t tail; + bool merge_reads; ++ bool write; + QEMUIOVector local_qiov; ++ ++ uint8_t *collapse_bounce_buf; ++ size_t collapse_len; ++ QEMUIOVector pre_collapse_qiov; + } BdrvRequestPadding; + + static bool bdrv_init_padding(BlockDriverState *bs, + int64_t offset, int64_t bytes, ++ bool write, + BdrvRequestPadding *pad) + { + int64_t align = bs->bl.request_alignment; +@@ -1483,6 +1497,8 @@ static bool bdrv_init_padding(BlockDriverState *bs, + pad->tail_buf = pad->buf + pad->buf_len - align; + } + ++ pad->write = write; ++ + return true; + } + +@@ -1547,8 +1563,23 @@ zero_mem: + return 0; + } + +-static void bdrv_padding_destroy(BdrvRequestPadding *pad) ++/** ++ * Free *pad's associated buffers, and perform any necessary finalization steps. ++ */ ++static void bdrv_padding_finalize(BdrvRequestPadding *pad) + { ++ if (pad->collapse_bounce_buf) { ++ if (!pad->write) { ++ /* ++ * If padding required elements in the vector to be collapsed into a ++ * bounce buffer, copy the bounce buffer content back ++ */ ++ qemu_iovec_from_buf(&pad->pre_collapse_qiov, 0, ++ pad->collapse_bounce_buf, pad->collapse_len); ++ } ++ qemu_vfree(pad->collapse_bounce_buf); ++ qemu_iovec_destroy(&pad->pre_collapse_qiov); ++ } + if (pad->buf) { + qemu_vfree(pad->buf); + qemu_iovec_destroy(&pad->local_qiov); +@@ -1556,6 +1587,101 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad) + memset(pad, 0, sizeof(*pad)); + } + ++/* ++ * Create pad->local_qiov by wrapping @iov in the padding head and tail, while ++ * ensuring that the resulting vector will not exceed IOV_MAX elements. ++ * ++ * To ensure this, when necessary, the first two or three elements of @iov are ++ * merged into pad->collapse_bounce_buf and replaced by a reference to that ++ * bounce buffer in pad->local_qiov. ++ * ++ * After performing a read request, the data from the bounce buffer must be ++ * copied back into pad->pre_collapse_qiov (e.g. by bdrv_padding_finalize()). ++ */ ++static int bdrv_create_padded_qiov(BlockDriverState *bs, ++ BdrvRequestPadding *pad, ++ struct iovec *iov, int niov, ++ size_t iov_offset, size_t bytes) ++{ ++ int padded_niov, surplus_count, collapse_count; ++ ++ /* Assert this invariant */ ++ assert(niov <= IOV_MAX); ++ ++ /* ++ * Cannot pad if resulting length would exceed SIZE_MAX. Returning an error ++ * to the guest is not ideal, but there is little else we can do. At least ++ * this will practically never happen on 64-bit systems. ++ */ ++ if (SIZE_MAX - pad->head < bytes || ++ SIZE_MAX - pad->head - bytes < pad->tail) ++ { ++ return -EINVAL; ++ } ++ ++ /* Length of the resulting IOV if we just concatenated everything */ ++ padded_niov = !!pad->head + niov + !!pad->tail; ++ ++ qemu_iovec_init(&pad->local_qiov, MIN(padded_niov, IOV_MAX)); ++ ++ if (pad->head) { ++ qemu_iovec_add(&pad->local_qiov, pad->buf, pad->head); ++ } ++ ++ /* ++ * If padded_niov > IOV_MAX, we cannot just concatenate everything. ++ * Instead, merge the first two or three elements of @iov to reduce the ++ * number of vector elements as necessary. ++ */ ++ if (padded_niov > IOV_MAX) { ++ /* ++ * Only head and tail can have lead to the number of entries exceeding ++ * IOV_MAX, so we can exceed it by the head and tail at most. We need ++ * to reduce the number of elements by `surplus_count`, so we merge that ++ * many elements plus one into one element. ++ */ ++ surplus_count = padded_niov - IOV_MAX; ++ assert(surplus_count <= !!pad->head + !!pad->tail); ++ collapse_count = surplus_count + 1; ++ ++ /* ++ * Move the elements to collapse into `pad->pre_collapse_qiov`, then ++ * advance `iov` (and associated variables) by those elements. ++ */ ++ qemu_iovec_init(&pad->pre_collapse_qiov, collapse_count); ++ qemu_iovec_concat_iov(&pad->pre_collapse_qiov, iov, ++ collapse_count, iov_offset, SIZE_MAX); ++ iov += collapse_count; ++ iov_offset = 0; ++ niov -= collapse_count; ++ bytes -= pad->pre_collapse_qiov.size; ++ ++ /* ++ * Construct the bounce buffer to match the length of the to-collapse ++ * vector elements, and for write requests, initialize it with the data ++ * from those elements. Then add it to `pad->local_qiov`. ++ */ ++ pad->collapse_len = pad->pre_collapse_qiov.size; ++ pad->collapse_bounce_buf = qemu_blockalign(bs, pad->collapse_len); ++ if (pad->write) { ++ qemu_iovec_to_buf(&pad->pre_collapse_qiov, 0, ++ pad->collapse_bounce_buf, pad->collapse_len); ++ } ++ qemu_iovec_add(&pad->local_qiov, ++ pad->collapse_bounce_buf, pad->collapse_len); ++ } ++ ++ qemu_iovec_concat_iov(&pad->local_qiov, iov, niov, iov_offset, bytes); ++ ++ if (pad->tail) { ++ qemu_iovec_add(&pad->local_qiov, ++ pad->buf + pad->buf_len - pad->tail, pad->tail); ++ } ++ ++ assert(pad->local_qiov.niov == MIN(padded_niov, IOV_MAX)); ++ return 0; ++} ++ + /* + * bdrv_pad_request + * +@@ -1563,6 +1689,8 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad) + * read of padding, bdrv_padding_rmw_read() should be called separately if + * needed. + * ++ * @write is true for write requests, false for read requests. ++ * + * Request parameters (@qiov, &qiov_offset, &offset, &bytes) are in-out: + * - on function start they represent original request + * - on failure or when padding is not needed they are unchanged +@@ -1571,26 +1699,34 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad) + static int bdrv_pad_request(BlockDriverState *bs, + QEMUIOVector **qiov, size_t *qiov_offset, + int64_t *offset, int64_t *bytes, ++ bool write, + BdrvRequestPadding *pad, bool *padded, + BdrvRequestFlags *flags) + { + int ret; ++ struct iovec *sliced_iov; ++ int sliced_niov; ++ size_t sliced_head, sliced_tail; + + bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort); + +- if (!bdrv_init_padding(bs, *offset, *bytes, pad)) { ++ if (!bdrv_init_padding(bs, *offset, *bytes, write, pad)) { + if (padded) { + *padded = false; + } + return 0; + } + +- ret = qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head, +- *qiov, *qiov_offset, *bytes, +- pad->buf + pad->buf_len - pad->tail, +- pad->tail); ++ sliced_iov = qemu_iovec_slice(*qiov, *qiov_offset, *bytes, ++ &sliced_head, &sliced_tail, ++ &sliced_niov); ++ ++ /* Guaranteed by bdrv_check_qiov_request() */ ++ assert(*bytes <= SIZE_MAX); ++ ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov, ++ sliced_head, *bytes); + if (ret < 0) { +- bdrv_padding_destroy(pad); ++ bdrv_padding_finalize(pad); + return ret; + } + *bytes += pad->head + pad->tail; +@@ -1657,8 +1793,8 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child, + flags |= BDRV_REQ_COPY_ON_READ; + } + +- ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad, +- NULL, &flags); ++ ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, false, ++ &pad, NULL, &flags); + if (ret < 0) { + goto fail; + } +@@ -1668,7 +1804,7 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child, + bs->bl.request_alignment, + qiov, qiov_offset, flags); + tracked_request_end(&req); +- bdrv_padding_destroy(&pad); ++ bdrv_padding_finalize(&pad); + + fail: + bdrv_dec_in_flight(bs); +@@ -2000,7 +2136,7 @@ bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes, + /* This flag doesn't make sense for padding or zero writes */ + flags &= ~BDRV_REQ_REGISTERED_BUF; + +- padding = bdrv_init_padding(bs, offset, bytes, &pad); ++ padding = bdrv_init_padding(bs, offset, bytes, true, &pad); + if (padding) { + assert(!(flags & BDRV_REQ_NO_WAIT)); + bdrv_make_request_serialising(req, align); +@@ -2048,7 +2184,7 @@ bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes, + } + + out: +- bdrv_padding_destroy(&pad); ++ bdrv_padding_finalize(&pad); + + return ret; + } +@@ -2116,8 +2252,8 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child, + * bdrv_co_do_zero_pwritev() does aligning by itself, so, we do + * alignment only if there is no ZERO flag. + */ +- ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad, +- &padded, &flags); ++ ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, true, ++ &pad, &padded, &flags); + if (ret < 0) { + return ret; + } +@@ -2147,7 +2283,7 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child, + ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align, + qiov, qiov_offset, flags); + +- bdrv_padding_destroy(&pad); ++ bdrv_padding_finalize(&pad); + + out: + tracked_request_end(&req); +-- +2.39.3 + diff --git a/SOURCES/kvm-block-Don-t-call-no_coroutine_fns-in-qmp_block_resiz.patch b/SOURCES/kvm-block-Don-t-call-no_coroutine_fns-in-qmp_block_resiz.patch new file mode 100644 index 0000000..fbab82d --- /dev/null +++ b/SOURCES/kvm-block-Don-t-call-no_coroutine_fns-in-qmp_block_resiz.patch @@ -0,0 +1,56 @@ +From dfa2811e88afaf996345552330e97f0513c1803c Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 4 May 2023 13:57:34 +0200 +Subject: [PATCH 53/56] block: Don't call no_coroutine_fns in + qmp_block_resize() + +RH-Author: Kevin Wolf +RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize() +RH-Bugzilla: 2185688 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [2/4] 7ac7e34821cfc8bd5f0daadd7a1c4a5596bc60a6 (kmwolf/centos-qemu-kvm) + +This QMP handler runs in a coroutine, so it must use the corresponding +no_co_wrappers instead. + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2185688 +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +Reviewed-by: Stefan Hajnoczi +Message-Id: <20230504115750.54437-5-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 0c7d204f50c382c6baac8c94bd57af4a022b3888) +Signed-off-by: Kevin Wolf +--- + blockdev.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/blockdev.c b/blockdev.c +index d7b5c18f0a..eb509cf964 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -2430,7 +2430,7 @@ void coroutine_fn qmp_block_resize(const char *device, const char *node_name, + return; + } + +- blk = blk_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL, errp); ++ blk = blk_co_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL, errp); + if (!blk) { + return; + } +@@ -2445,7 +2445,7 @@ void coroutine_fn qmp_block_resize(const char *device, const char *node_name, + + bdrv_co_lock(bs); + bdrv_drained_end(bs); +- blk_unref(blk); ++ blk_co_unref(blk); + bdrv_co_unlock(bs); + } + +-- +2.39.1 + diff --git a/SOURCES/kvm-block-Don-t-poll-in-bdrv_replace_child_noperm.patch b/SOURCES/kvm-block-Don-t-poll-in-bdrv_replace_child_noperm.patch deleted file mode 100644 index 80018cc..0000000 --- a/SOURCES/kvm-block-Don-t-poll-in-bdrv_replace_child_noperm.patch +++ /dev/null @@ -1,298 +0,0 @@ -From 150ef3356cc6732fede7ca059168fc0565ed0b76 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:41:09 +0100 -Subject: [PATCH 27/31] block: Don't poll in bdrv_replace_child_noperm() - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [15/16] 5fc7d6b703a2d6c1118d875056f0afbd6ba5cca9 (sgarzarella/qemu-kvm-c-9-s) - -In order to make sure that bdrv_replace_child_noperm() doesn't have to -poll any more, get rid of the bdrv_parent_drained_begin_single() call. - -This is possible now because we can require that the parent is already -drained through the child in question when the function is called and we -don't call the parent drain callbacks more than once. - -The additional drain calls needed in callers cause the test case to run -its code in the drain handler too early (bdrv_attach_child() drains -now), so modify it to only enable the code after the test setup has -completed. - -Signed-off-by: Kevin Wolf -Message-Id: <20221118174110.55183-15-kwolf@redhat.com> -Reviewed-by: Hanna Reitz -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Kevin Wolf -(cherry picked from commit 23987471285a26397e3152a9244b652445fd36c4) -Signed-off-by: Stefano Garzarella ---- - block.c | 103 ++++++++++++++++++++++++++++++----- - block/io.c | 2 +- - include/block/block-io.h | 8 +++ - tests/unit/test-bdrv-drain.c | 10 ++++ - 4 files changed, 108 insertions(+), 15 deletions(-) - -diff --git a/block.c b/block.c -index af31a94863..65588d313a 100644 ---- a/block.c -+++ b/block.c -@@ -2407,6 +2407,20 @@ static void bdrv_replace_child_abort(void *opaque) - - GLOBAL_STATE_CODE(); - /* old_bs reference is transparently moved from @s to @s->child */ -+ if (!s->child->bs) { -+ /* -+ * The parents were undrained when removing old_bs from the child. New -+ * requests can't have been made, though, because the child was empty. -+ * -+ * TODO Make bdrv_replace_child_noperm() transactionable to avoid -+ * undraining the parent in the first place. Once this is done, having -+ * new_bs drained when calling bdrv_replace_child_tran() is not a -+ * requirement any more. -+ */ -+ bdrv_parent_drained_begin_single(s->child, false); -+ assert(!bdrv_parent_drained_poll_single(s->child)); -+ } -+ assert(s->child->quiesced_parent); - bdrv_replace_child_noperm(s->child, s->old_bs); - bdrv_unref(new_bs); - } -@@ -2422,12 +2436,19 @@ static TransactionActionDrv bdrv_replace_child_drv = { - * - * Note: real unref of old_bs is done only on commit. - * -+ * Both @child->bs and @new_bs (if non-NULL) must be drained. @new_bs must be -+ * kept drained until the transaction is completed. -+ * - * The function doesn't update permissions, caller is responsible for this. - */ - static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs, - Transaction *tran) - { - BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1); -+ -+ assert(child->quiesced_parent); -+ assert(!new_bs || new_bs->quiesce_counter); -+ - *s = (BdrvReplaceChildState) { - .child = child, - .old_bs = child->bs, -@@ -2819,6 +2840,14 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm) - return permissions[qapi_perm]; - } - -+/* -+ * Replaces the node that a BdrvChild points to without updating permissions. -+ * -+ * If @new_bs is non-NULL, the parent of @child must already be drained through -+ * @child. -+ * -+ * This function does not poll. -+ */ - static void bdrv_replace_child_noperm(BdrvChild *child, - BlockDriverState *new_bs) - { -@@ -2826,6 +2855,28 @@ static void bdrv_replace_child_noperm(BdrvChild *child, - int new_bs_quiesce_counter; - - assert(!child->frozen); -+ -+ /* -+ * If we want to change the BdrvChild to point to a drained node as its new -+ * child->bs, we need to make sure that its new parent is drained, too. In -+ * other words, either child->quiesce_parent must already be true or we must -+ * be able to set it and keep the parent's quiesce_counter consistent with -+ * that, but without polling or starting new requests (this function -+ * guarantees that it doesn't poll, and starting new requests would be -+ * against the invariants of drain sections). -+ * -+ * To keep things simple, we pick the first option (child->quiesce_parent -+ * must already be true). We also generalise the rule a bit to make it -+ * easier to verify in callers and more likely to be covered in test cases: -+ * The parent must be quiesced through this child even if new_bs isn't -+ * currently drained. -+ * -+ * The only exception is for callers that always pass new_bs == NULL. In -+ * this case, we obviously never need to consider the case of a drained -+ * new_bs, so we can keep the callers simpler by allowing them not to drain -+ * the parent. -+ */ -+ assert(!new_bs || child->quiesced_parent); - assert(old_bs != new_bs); - GLOBAL_STATE_CODE(); - -@@ -2833,15 +2884,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child, - assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); - } - -- /* -- * If the new child node is drained but the old one was not, flush -- * all outstanding requests to the old child node. -- */ -- new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); -- if (new_bs_quiesce_counter && !child->quiesced_parent) { -- bdrv_parent_drained_begin_single(child, true); -- } -- - if (old_bs) { - if (child->klass->detach) { - child->klass->detach(child); -@@ -2861,11 +2903,9 @@ static void bdrv_replace_child_noperm(BdrvChild *child, - } - - /* -- * If the old child node was drained but the new one is not, allow -- * requests to come in only after the new node has been attached. -- * -- * Update new_bs_quiesce_counter because bdrv_parent_drained_begin_single() -- * polls, which could have changed the value. -+ * If the parent was drained through this BdrvChild previously, but new_bs -+ * is not drained, allow requests to come in only after the new node has -+ * been attached. - */ - new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); - if (!new_bs_quiesce_counter && child->quiesced_parent) { -@@ -3002,6 +3042,24 @@ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs, - } - - bdrv_ref(child_bs); -+ /* -+ * Let every new BdrvChild start with a drained parent. Inserting the child -+ * in the graph with bdrv_replace_child_noperm() will undrain it if -+ * @child_bs is not drained. -+ * -+ * The child was only just created and is not yet visible in global state -+ * until bdrv_replace_child_noperm() inserts it into the graph, so nobody -+ * could have sent requests and polling is not necessary. -+ * -+ * Note that this means that the parent isn't fully drained yet, we only -+ * stop new requests from coming in. This is fine, we don't care about the -+ * old requests here, they are not for this child. If another place enters a -+ * drain section for the same parent, but wants it to be fully quiesced, it -+ * will not run most of the the code in .drained_begin() again (which is not -+ * a problem, we already did this), but it will still poll until the parent -+ * is fully quiesced, so it will not be negatively affected either. -+ */ -+ bdrv_parent_drained_begin_single(new_child, false); - bdrv_replace_child_noperm(new_child, child_bs); - - BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1); -@@ -5059,12 +5117,24 @@ static void bdrv_remove_child(BdrvChild *child, Transaction *tran) - } - - if (child->bs) { -+ BlockDriverState *bs = child->bs; -+ bdrv_drained_begin(bs); - bdrv_replace_child_tran(child, NULL, tran); -+ bdrv_drained_end(bs); - } - - tran_add(tran, &bdrv_remove_child_drv, child); - } - -+static void undrain_on_clean_cb(void *opaque) -+{ -+ bdrv_drained_end(opaque); -+} -+ -+static TransactionActionDrv undrain_on_clean = { -+ .clean = undrain_on_clean_cb, -+}; -+ - static int bdrv_replace_node_noperm(BlockDriverState *from, - BlockDriverState *to, - bool auto_skip, Transaction *tran, -@@ -5074,6 +5144,11 @@ static int bdrv_replace_node_noperm(BlockDriverState *from, - - GLOBAL_STATE_CODE(); - -+ bdrv_drained_begin(from); -+ bdrv_drained_begin(to); -+ tran_add(tran, &undrain_on_clean, from); -+ tran_add(tran, &undrain_on_clean, to); -+ - QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { - assert(c->bs == from); - if (!should_update_child(c, to)) { -diff --git a/block/io.c b/block/io.c -index 5e9150d92c..ae64830eac 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -81,7 +81,7 @@ static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore) - } - } - --static bool bdrv_parent_drained_poll_single(BdrvChild *c) -+bool bdrv_parent_drained_poll_single(BdrvChild *c) - { - if (c->klass->drained_poll) { - return c->klass->drained_poll(c); -diff --git a/include/block/block-io.h b/include/block/block-io.h -index 8f5e75756a..65e6d2569b 100644 ---- a/include/block/block-io.h -+++ b/include/block/block-io.h -@@ -292,6 +292,14 @@ bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); - */ - void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll); - -+/** -+ * bdrv_parent_drained_poll_single: -+ * -+ * Returns true if there is any pending activity to cease before @c can be -+ * called quiesced, false otherwise. -+ */ -+bool bdrv_parent_drained_poll_single(BdrvChild *c); -+ - /** - * bdrv_parent_drained_end_single: - * -diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c -index 172bc6debc..2686a8acee 100644 ---- a/tests/unit/test-bdrv-drain.c -+++ b/tests/unit/test-bdrv-drain.c -@@ -1654,6 +1654,7 @@ static void test_drop_intermediate_poll(void) - - - typedef struct BDRVReplaceTestState { -+ bool setup_completed; - bool was_drained; - bool was_undrained; - bool has_read; -@@ -1738,6 +1739,10 @@ static void bdrv_replace_test_drain_begin(BlockDriverState *bs) - { - BDRVReplaceTestState *s = bs->opaque; - -+ if (!s->setup_completed) { -+ return; -+ } -+ - if (!s->drain_count) { - s->drain_co = qemu_coroutine_create(bdrv_replace_test_drain_co, bs); - bdrv_inc_in_flight(bs); -@@ -1769,6 +1774,10 @@ static void bdrv_replace_test_drain_end(BlockDriverState *bs) - { - BDRVReplaceTestState *s = bs->opaque; - -+ if (!s->setup_completed) { -+ return; -+ } -+ - g_assert(s->drain_count > 0); - if (!--s->drain_count) { - s->was_undrained = true; -@@ -1867,6 +1876,7 @@ static void do_test_replace_child_mid_drain(int old_drain_count, - bdrv_ref(old_child_bs); - bdrv_attach_child(parent_bs, old_child_bs, "child", &child_of_bds, - BDRV_CHILD_COW, &error_abort); -+ parent_s->setup_completed = true; - - for (i = 0; i < old_drain_count; i++) { - bdrv_drained_begin(old_child_bs); --- -2.31.1 - diff --git a/SOURCES/kvm-block-Don-t-use-subtree-drains-in-bdrv_drop_intermed.patch b/SOURCES/kvm-block-Don-t-use-subtree-drains-in-bdrv_drop_intermed.patch deleted file mode 100644 index e3bf1e2..0000000 --- a/SOURCES/kvm-block-Don-t-use-subtree-drains-in-bdrv_drop_intermed.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 6af6de77dace29aa8548b3649dc9c6163740ac86 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:41:03 +0100 -Subject: [PATCH 21/31] block: Don't use subtree drains in - bdrv_drop_intermediate() - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [9/16] 3c06fa826f262558f57d38b0155500c2e8e23a53 (sgarzarella/qemu-kvm-c-9-s) - -Instead of using a subtree drain from the top node (which also drains -child nodes of base that we're not even interested in), use a normal -drain for base, which automatically drains all of the parents, too. - -Signed-off-by: Kevin Wolf -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Hanna Reitz -Message-Id: <20221118174110.55183-9-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 631086deefc32690ee56efed1c5b891dec31ae37) -Signed-off-by: Stefano Garzarella ---- - block.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/block.c b/block.c -index cb5e96b1cf..b3449a312e 100644 ---- a/block.c -+++ b/block.c -@@ -5586,7 +5586,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, - GLOBAL_STATE_CODE(); - - bdrv_ref(top); -- bdrv_subtree_drained_begin(top); -+ bdrv_drained_begin(base); - - if (!top->drv || !base->drv) { - goto exit; -@@ -5659,7 +5659,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, - - ret = 0; - exit: -- bdrv_subtree_drained_end(top); -+ bdrv_drained_end(base); - bdrv_unref(top); - return ret; - } --- -2.31.1 - diff --git a/SOURCES/kvm-block-Drain-individual-nodes-during-reopen.patch b/SOURCES/kvm-block-Drain-individual-nodes-during-reopen.patch deleted file mode 100644 index 24661fb..0000000 --- a/SOURCES/kvm-block-Drain-individual-nodes-during-reopen.patch +++ /dev/null @@ -1,157 +0,0 @@ -From ad52cb621daad45d3c2a0e2e670d6ca2e16690bd Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:41:02 +0100 -Subject: [PATCH 20/31] block: Drain individual nodes during reopen - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [8/16] 5441b6f0ae9102ef40d1093e1db3084eea81e3b0 (sgarzarella/qemu-kvm-c-9-s) - -bdrv_reopen() and friends use subtree drains as a lazy way of covering -all the nodes they touch. Turns out that this lazy way is a lot more -complicated than just draining the nodes individually, even not -accounting for the additional complexity in the drain mechanism itself. - -Simplify the code by switching to draining the individual nodes that are -already managed in the BlockReopenQueue anyway. - -Signed-off-by: Kevin Wolf -Message-Id: <20221118174110.55183-8-kwolf@redhat.com> -Reviewed-by: Hanna Reitz -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Kevin Wolf -(cherry picked from commit d22933acd2f470eeef779e4d444e848f76dcfaf8) -Signed-off-by: Stefano Garzarella ---- - block.c | 16 +++++++++------- - block/replication.c | 6 ------ - blockdev.c | 13 ------------- - 3 files changed, 9 insertions(+), 26 deletions(-) - -diff --git a/block.c b/block.c -index 46df410b07..cb5e96b1cf 100644 ---- a/block.c -+++ b/block.c -@@ -4150,7 +4150,7 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs, - * returns a pointer to bs_queue, which is either the newly allocated - * bs_queue, or the existing bs_queue being used. - * -- * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple(). -+ * bs is drained here and undrained by bdrv_reopen_queue_free(). - * - * To be called with bs->aio_context locked. - */ -@@ -4172,12 +4172,10 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, - int flags; - QemuOpts *opts; - -- /* Make sure that the caller remembered to use a drained section. This is -- * important to avoid graph changes between the recursive queuing here and -- * bdrv_reopen_multiple(). */ -- assert(bs->quiesce_counter > 0); - GLOBAL_STATE_CODE(); - -+ bdrv_drained_begin(bs); -+ - if (bs_queue == NULL) { - bs_queue = g_new0(BlockReopenQueue, 1); - QTAILQ_INIT(bs_queue); -@@ -4328,6 +4326,12 @@ void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue) - if (bs_queue) { - BlockReopenQueueEntry *bs_entry, *next; - QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { -+ AioContext *ctx = bdrv_get_aio_context(bs_entry->state.bs); -+ -+ aio_context_acquire(ctx); -+ bdrv_drained_end(bs_entry->state.bs); -+ aio_context_release(ctx); -+ - qobject_unref(bs_entry->state.explicit_options); - qobject_unref(bs_entry->state.options); - g_free(bs_entry); -@@ -4475,7 +4479,6 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, - - GLOBAL_STATE_CODE(); - -- bdrv_subtree_drained_begin(bs); - queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts); - - if (ctx != qemu_get_aio_context()) { -@@ -4486,7 +4489,6 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, - if (ctx != qemu_get_aio_context()) { - aio_context_acquire(ctx); - } -- bdrv_subtree_drained_end(bs); - - return ret; - } -diff --git a/block/replication.c b/block/replication.c -index f1eed25e43..c62f48a874 100644 ---- a/block/replication.c -+++ b/block/replication.c -@@ -374,9 +374,6 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable, - s->orig_secondary_read_only = bdrv_is_read_only(secondary_disk->bs); - } - -- bdrv_subtree_drained_begin(hidden_disk->bs); -- bdrv_subtree_drained_begin(secondary_disk->bs); -- - if (s->orig_hidden_read_only) { - QDict *opts = qdict_new(); - qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable); -@@ -401,9 +398,6 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable, - aio_context_acquire(ctx); - } - } -- -- bdrv_subtree_drained_end(hidden_disk->bs); -- bdrv_subtree_drained_end(secondary_disk->bs); - } - - static void backup_job_cleanup(BlockDriverState *bs) -diff --git a/blockdev.c b/blockdev.c -index 3f1dec6242..8ffb3d9537 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -3547,8 +3547,6 @@ fail: - void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) - { - BlockReopenQueue *queue = NULL; -- GSList *drained = NULL; -- GSList *p; - - /* Add each one of the BDS that we want to reopen to the queue */ - for (; reopen_list != NULL; reopen_list = reopen_list->next) { -@@ -3585,9 +3583,7 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) - ctx = bdrv_get_aio_context(bs); - aio_context_acquire(ctx); - -- bdrv_subtree_drained_begin(bs); - queue = bdrv_reopen_queue(queue, bs, qdict, false); -- drained = g_slist_prepend(drained, bs); - - aio_context_release(ctx); - } -@@ -3598,15 +3594,6 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) - - fail: - bdrv_reopen_queue_free(queue); -- for (p = drained; p; p = p->next) { -- BlockDriverState *bs = p->data; -- AioContext *ctx = bdrv_get_aio_context(bs); -- -- aio_context_acquire(ctx); -- bdrv_subtree_drained_end(bs); -- aio_context_release(ctx); -- } -- g_slist_free(drained); - } - - void qmp_blockdev_del(const char *node_name, Error **errp) --- -2.31.1 - diff --git a/SOURCES/kvm-block-Drop-out-of-coroutine-in-bdrv_do_drained_begin.patch b/SOURCES/kvm-block-Drop-out-of-coroutine-in-bdrv_do_drained_begin.patch deleted file mode 100644 index 1ae73c7..0000000 --- a/SOURCES/kvm-block-Drop-out-of-coroutine-in-bdrv_do_drained_begin.patch +++ /dev/null @@ -1,96 +0,0 @@ -From 9a789d104a4a69031ad95d7fad6380ab21e82503 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:41:08 +0100 -Subject: [PATCH 26/31] block: Drop out of coroutine in - bdrv_do_drained_begin_quiesce() - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [14/16] c9266663b822f703e55b6a07de98ceb56e69e924 (sgarzarella/qemu-kvm-c-9-s) - -The next patch adds a parent drain to bdrv_attach_child_common(), which -shouldn't be, but is currently called from coroutines in some cases (e.g. -.bdrv_co_create implementations generally open new nodes). Therefore, -the assertion that we're not in a coroutine doesn't hold true any more. - -We could just remove the assertion because there is nothing in the -function that should be in conflict with running in a coroutine, but -just to be on the safe side, we can reverse the caller relationship -between bdrv_do_drained_begin() and bdrv_do_drained_begin_quiesce() so -that the latter also just drops out of coroutine context and we can -still be certain in the future that any drain code doesn't run in -coroutines. - -As a nice side effect, the structure of bdrv_do_drained_begin() is now -symmetrical with bdrv_do_drained_end(). - -Signed-off-by: Kevin Wolf -Message-Id: <20221118174110.55183-14-kwolf@redhat.com> -Reviewed-by: Hanna Reitz -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Kevin Wolf -(cherry picked from commit 05c272ff0cf1b16cc3606f746182dd99b774f553) -Signed-off-by: Stefano Garzarella ---- - block/io.c | 25 ++++++++++++------------- - 1 file changed, 12 insertions(+), 13 deletions(-) - -diff --git a/block/io.c b/block/io.c -index 2e9503df6a..5e9150d92c 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -346,10 +346,15 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, - } - } - --void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent) -+static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, -+ bool poll) - { - IO_OR_GS_CODE(); -- assert(!qemu_in_coroutine()); -+ -+ if (qemu_in_coroutine()) { -+ bdrv_co_yield_to_drain(bs, true, parent, poll); -+ return; -+ } - - /* Stop things in parent-to-child order */ - if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) { -@@ -359,17 +364,6 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent) - bs->drv->bdrv_drain_begin(bs); - } - } --} -- --static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, -- bool poll) --{ -- if (qemu_in_coroutine()) { -- bdrv_co_yield_to_drain(bs, true, parent, poll); -- return; -- } -- -- bdrv_do_drained_begin_quiesce(bs, parent); - - /* - * Wait for drained requests to finish. -@@ -385,6 +379,11 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, - } - } - -+void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent) -+{ -+ bdrv_do_drained_begin(bs, parent, false); -+} -+ - void bdrv_drained_begin(BlockDriverState *bs) - { - IO_OR_GS_CODE(); --- -2.31.1 - diff --git a/SOURCES/kvm-block-Fix-locking-for-bdrv_reopen_queue_child.patch b/SOURCES/kvm-block-Fix-locking-for-bdrv_reopen_queue_child.patch deleted file mode 100644 index b73b8fe..0000000 --- a/SOURCES/kvm-block-Fix-locking-for-bdrv_reopen_queue_child.patch +++ /dev/null @@ -1,67 +0,0 @@ -From e790b4c20a5124239fe93e91fbc87745e5f2cea6 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:41:01 +0100 -Subject: [PATCH 19/31] block: Fix locking for bdrv_reopen_queue_child() - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [7/16] 46bb54506c4400b9a1bf66b6bd7987ff67260003 (sgarzarella/qemu-kvm-c-9-s) - -Callers don't agree whether bdrv_reopen_queue_child() should be called -with the AioContext lock held or not. Standardise on holding the lock -(as done by QMP blockdev-reopen and the replication block driver) and -fix bdrv_reopen() to do the same. - -Signed-off-by: Kevin Wolf -Message-Id: <20221118174110.55183-7-kwolf@redhat.com> -Reviewed-by: Hanna Reitz -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Kevin Wolf -(cherry picked from commit 2e117866d7c96cc17e84cd2946fee1bf3292d814) -Signed-off-by: Stefano Garzarella ---- - block.c | 7 +++++-- - 1 file changed, 5 insertions(+), 2 deletions(-) - -diff --git a/block.c b/block.c -index 7999fd08c5..46df410b07 100644 ---- a/block.c -+++ b/block.c -@@ -4151,6 +4151,8 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs, - * bs_queue, or the existing bs_queue being used. - * - * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple(). -+ * -+ * To be called with bs->aio_context locked. - */ - static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, - BlockDriverState *bs, -@@ -4309,6 +4311,7 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, - return bs_queue; - } - -+/* To be called with bs->aio_context locked */ - BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, - BlockDriverState *bs, - QDict *options, bool keep_old_opts) -@@ -4473,11 +4476,11 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, - GLOBAL_STATE_CODE(); - - bdrv_subtree_drained_begin(bs); -+ queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts); -+ - if (ctx != qemu_get_aio_context()) { - aio_context_release(ctx); - } -- -- queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts); - ret = bdrv_reopen_multiple(queue, errp); - - if (ctx != qemu_get_aio_context()) { --- -2.31.1 - diff --git a/SOURCES/kvm-block-Fix-pad_request-s-request-restriction.patch b/SOURCES/kvm-block-Fix-pad_request-s-request-restriction.patch new file mode 100644 index 0000000..c0ab8c2 --- /dev/null +++ b/SOURCES/kvm-block-Fix-pad_request-s-request-restriction.patch @@ -0,0 +1,73 @@ +From 547f6bf93734f7c13675eebb93273ef2273f7c31 Mon Sep 17 00:00:00 2001 +From: Hanna Czenczek +Date: Fri, 14 Jul 2023 10:59:38 +0200 +Subject: [PATCH 5/9] block: Fix pad_request's request restriction + +RH-Author: Hanna Czenczek +RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX +RH-Bugzilla: 2174676 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [5/5] e8abc0485f6e0608a1ec55143ff40a14d273dfc8 (hreitz/qemu-kvm-c-9-s) + +bdrv_pad_request() relies on requests' lengths not to exceed SIZE_MAX, +which bdrv_check_qiov_request() does not guarantee. + +bdrv_check_request32() however will guarantee this, and both of +bdrv_pad_request()'s callers (bdrv_co_preadv_part() and +bdrv_co_pwritev_part()) already run it before calling +bdrv_pad_request(). Therefore, bdrv_pad_request() can safely call +bdrv_check_request32() without expecting error, too. + +In effect, this patch will not change guest-visible behavior. It is a +clean-up to tighten a condition to match what is guaranteed by our +callers, and which exists purely to show clearly why the subsequent +assertion (`assert(*bytes <= SIZE_MAX)`) is always true. + +Note there is a difference between the interfaces of +bdrv_check_qiov_request() and bdrv_check_request32(): The former takes +an errp, the latter does not, so we can no longer just pass +&error_abort. Instead, we need to check the returned value. While we +do expect success (because the callers have already run this function), +an assert(ret == 0) is not much simpler than just to return an error if +it occurs, so let us handle errors by returning them up the stack now. + +Reported-by: Peter Maydell +Signed-off-by: Hanna Czenczek +Message-id: 20230714085938.202730-1-hreitz@redhat.com +Fixes: 18743311b829cafc1737a5f20bc3248d5f91ee2a + ("block: Collapse padded I/O vecs exceeding IOV_MAX") +Signed-off-by: Hanna Czenczek +Signed-off-by: Stefan Hajnoczi +--- + block/io.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/block/io.c b/block/io.c +index 4e8e90208b..807c9fb720 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -1708,7 +1708,11 @@ static int bdrv_pad_request(BlockDriverState *bs, + int sliced_niov; + size_t sliced_head, sliced_tail; + +- bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort); ++ /* Should have been checked by the caller already */ ++ ret = bdrv_check_request32(*offset, *bytes, *qiov, *qiov_offset); ++ if (ret < 0) { ++ return ret; ++ } + + if (!bdrv_init_padding(bs, *offset, *bytes, write, pad)) { + if (padded) { +@@ -1721,7 +1725,7 @@ static int bdrv_pad_request(BlockDriverState *bs, + &sliced_head, &sliced_tail, + &sliced_niov); + +- /* Guaranteed by bdrv_check_qiov_request() */ ++ /* Guaranteed by bdrv_check_request32() */ + assert(*bytes <= SIZE_MAX); + ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov, + sliced_head, *bytes); +-- +2.39.3 + diff --git a/SOURCES/kvm-block-Improve-empty-format-specific-info-dump.patch b/SOURCES/kvm-block-Improve-empty-format-specific-info-dump.patch deleted file mode 100644 index 5b54210..0000000 --- a/SOURCES/kvm-block-Improve-empty-format-specific-info-dump.patch +++ /dev/null @@ -1,132 +0,0 @@ -From 074c89b05dae971c7118cb769fd34e22135c8f4c Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Mon, 20 Jun 2022 18:26:53 +0200 -Subject: [PATCH 06/20] block: Improve empty format-specific info dump - -RH-Author: Hanna Czenczek -RH-MergeRequest: 145: Show protocol-level information in qemu-img info -RH-Bugzilla: 1860292 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella -RH-Commit: [1/12] be551e83f426e620e673302198b51368bfd324ce (hreitz/qemu-kvm-c-9-s) - -When a block driver supports obtaining format-specific information, but -that object only contains optional fields, it is possible that none of -them are present, so that dump_qobject() (called by -bdrv_image_info_specific_dump()) will not print anything. - -The callers of bdrv_image_info_specific_dump() put a header above this -information ("Format specific information:\n"), which will look strange -when there is nothing below. Modify bdrv_image_info_specific_dump() to -print this header instead of its callers, and only if there is indeed -something to be printed. - -Signed-off-by: Hanna Reitz -Message-Id: <20220620162704.80987-2-hreitz@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 3716470b24f0f63090d59bcf28ad8fe6fb7835bd) -Signed-off-by: Hanna Czenczek ---- - block/qapi.c | 41 +++++++++++++++++++++++++++++++++++++---- - include/block/qapi.h | 3 ++- - qemu-io-cmds.c | 4 ++-- - 3 files changed, 41 insertions(+), 7 deletions(-) - -diff --git a/block/qapi.c b/block/qapi.c -index cf557e3aea..51202b470a 100644 ---- a/block/qapi.c -+++ b/block/qapi.c -@@ -777,7 +777,35 @@ static void dump_qdict(int indentation, QDict *dict) - } - } - --void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec) -+/* -+ * Return whether dumping the given QObject with dump_qobject() would -+ * yield an empty dump, i.e. not print anything. -+ */ -+static bool qobject_is_empty_dump(const QObject *obj) -+{ -+ switch (qobject_type(obj)) { -+ case QTYPE_QNUM: -+ case QTYPE_QSTRING: -+ case QTYPE_QBOOL: -+ return false; -+ -+ case QTYPE_QDICT: -+ return qdict_size(qobject_to(QDict, obj)) == 0; -+ -+ case QTYPE_QLIST: -+ return qlist_empty(qobject_to(QList, obj)); -+ -+ default: -+ abort(); -+ } -+} -+ -+/** -+ * Dumps the given ImageInfoSpecific object in a human-readable form, -+ * prepending an optional prefix if the dump is not empty. -+ */ -+void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, -+ const char *prefix) - { - QObject *obj, *data; - Visitor *v = qobject_output_visitor_new(&obj); -@@ -785,7 +813,12 @@ void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec) - visit_type_ImageInfoSpecific(v, NULL, &info_spec, &error_abort); - visit_complete(v, &obj); - data = qdict_get(qobject_to(QDict, obj), "data"); -- dump_qobject(1, data); -+ if (!qobject_is_empty_dump(data)) { -+ if (prefix) { -+ qemu_printf("%s", prefix); -+ } -+ dump_qobject(1, data); -+ } - qobject_unref(obj); - visit_free(v); - } -@@ -866,7 +899,7 @@ void bdrv_image_info_dump(ImageInfo *info) - } - - if (info->has_format_specific) { -- qemu_printf("Format specific information:\n"); -- bdrv_image_info_specific_dump(info->format_specific); -+ bdrv_image_info_specific_dump(info->format_specific, -+ "Format specific information:\n"); - } - } -diff --git a/include/block/qapi.h b/include/block/qapi.h -index 22c7807c89..c09859ea78 100644 ---- a/include/block/qapi.h -+++ b/include/block/qapi.h -@@ -40,6 +40,7 @@ void bdrv_query_image_info(BlockDriverState *bs, - Error **errp); - - void bdrv_snapshot_dump(QEMUSnapshotInfo *sn); --void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec); -+void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, -+ const char *prefix); - void bdrv_image_info_dump(ImageInfo *info); - #endif -diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c -index 952dc940f1..f4a374528e 100644 ---- a/qemu-io-cmds.c -+++ b/qemu-io-cmds.c -@@ -1825,8 +1825,8 @@ static int info_f(BlockBackend *blk, int argc, char **argv) - return -EIO; - } - if (spec_info) { -- printf("Format specific information:\n"); -- bdrv_image_info_specific_dump(spec_info); -+ bdrv_image_info_specific_dump(spec_info, -+ "Format specific information:\n"); - qapi_free_ImageInfoSpecific(spec_info); - } - --- -2.31.1 - diff --git a/SOURCES/kvm-block-Inline-bdrv_drain_invoke.patch b/SOURCES/kvm-block-Inline-bdrv_drain_invoke.patch deleted file mode 100644 index 07160dc..0000000 --- a/SOURCES/kvm-block-Inline-bdrv_drain_invoke.patch +++ /dev/null @@ -1,81 +0,0 @@ -From 1808e560396872173f787f8e338e9837a4c3d626 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:41:00 +0100 -Subject: [PATCH 18/31] block: Inline bdrv_drain_invoke() - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [6/16] 2c7473a36360eb43d94b967deb12308cb5ea0d3b (sgarzarella/qemu-kvm-c-9-s) - -bdrv_drain_invoke() has now two entirely separate cases that share no -code any more and are selected depending on a bool parameter. Each case -has only one caller. Just inline the function. - -Signed-off-by: Kevin Wolf -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Stefan Hajnoczi -Reviewed-by: Emanuele Giuseppe Esposito -Reviewed-by: Hanna Reitz -Message-Id: <20221118174110.55183-6-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit c7bc05f78ab31fb02fc9635f60b9bd22efc8d121) -Signed-off-by: Stefano Garzarella ---- - block/io.c | 23 ++++++----------------- - 1 file changed, 6 insertions(+), 17 deletions(-) - -diff --git a/block/io.c b/block/io.c -index f4ca62b034..a25103be6f 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -242,21 +242,6 @@ typedef struct { - bool ignore_bds_parents; - } BdrvCoDrainData; - --/* Recursively call BlockDriver.bdrv_drain_begin/end callbacks */ --static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) --{ -- if (!bs->drv || (begin && !bs->drv->bdrv_drain_begin) || -- (!begin && !bs->drv->bdrv_drain_end)) { -- return; -- } -- -- if (begin) { -- bs->drv->bdrv_drain_begin(bs); -- } else { -- bs->drv->bdrv_drain_end(bs); -- } --} -- - /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ - bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, - BdrvChild *ignore_parent, bool ignore_bds_parents) -@@ -390,7 +375,9 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, - } - - bdrv_parent_drained_begin(bs, parent, ignore_bds_parents); -- bdrv_drain_invoke(bs, true); -+ if (bs->drv && bs->drv->bdrv_drain_begin) { -+ bs->drv->bdrv_drain_begin(bs); -+ } - } - - static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, -@@ -461,7 +448,9 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, - assert(bs->quiesce_counter > 0); - - /* Re-enable things in child-to-parent order */ -- bdrv_drain_invoke(bs, false); -+ if (bs->drv && bs->drv->bdrv_drain_end) { -+ bs->drv->bdrv_drain_end(bs); -+ } - bdrv_parent_drained_end(bs, parent, ignore_bds_parents); - - old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter); --- -2.31.1 - diff --git a/SOURCES/kvm-block-Remove-drained_end_counter.patch b/SOURCES/kvm-block-Remove-drained_end_counter.patch deleted file mode 100644 index cfafc33..0000000 --- a/SOURCES/kvm-block-Remove-drained_end_counter.patch +++ /dev/null @@ -1,433 +0,0 @@ -From 3009e49f242ab371ffad35bb29c2c26ddfac75d4 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:40:59 +0100 -Subject: [PATCH 17/31] block: Remove drained_end_counter - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [5/16] 5589e3f05dece5394a05641f7f42096e8dc62bdb (sgarzarella/qemu-kvm-c-9-s) - -drained_end_counter is unused now, nobody changes its value any more. It -can be removed. - -In cases where we had two almost identical functions that only differed -in whether the caller passes drained_end_counter, or whether they would -poll for a local drained_end_counter to reach 0, these become a single -function. - -Signed-off-by: Kevin Wolf -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Emanuele Giuseppe Esposito -Message-Id: <20221118174110.55183-5-kwolf@redhat.com> -Reviewed-by: Hanna Reitz -Signed-off-by: Kevin Wolf -(cherry picked from commit 2f65df6e16dea2d6e7212fa675f4779d9281e26f) -Signed-off-by: Stefano Garzarella ---- - block.c | 5 +- - block/block-backend.c | 4 +- - block/io.c | 98 ++++++++------------------------ - blockjob.c | 2 +- - include/block/block-io.h | 24 -------- - include/block/block_int-common.h | 6 +- - 6 files changed, 30 insertions(+), 109 deletions(-) - -diff --git a/block.c b/block.c -index 16a62a329c..7999fd08c5 100644 ---- a/block.c -+++ b/block.c -@@ -1235,11 +1235,10 @@ static bool bdrv_child_cb_drained_poll(BdrvChild *child) - return bdrv_drain_poll(bs, false, NULL, false); - } - --static void bdrv_child_cb_drained_end(BdrvChild *child, -- int *drained_end_counter) -+static void bdrv_child_cb_drained_end(BdrvChild *child) - { - BlockDriverState *bs = child->opaque; -- bdrv_drained_end_no_poll(bs, drained_end_counter); -+ bdrv_drained_end(bs); - } - - static int bdrv_child_cb_inactivate(BdrvChild *child) -diff --git a/block/block-backend.c b/block/block-backend.c -index d98a96ff37..feaf2181fa 100644 ---- a/block/block-backend.c -+++ b/block/block-backend.c -@@ -129,7 +129,7 @@ static void blk_root_inherit_options(BdrvChildRole role, bool parent_is_format, - } - static void blk_root_drained_begin(BdrvChild *child); - static bool blk_root_drained_poll(BdrvChild *child); --static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter); -+static void blk_root_drained_end(BdrvChild *child); - - static void blk_root_change_media(BdrvChild *child, bool load); - static void blk_root_resize(BdrvChild *child); -@@ -2556,7 +2556,7 @@ static bool blk_root_drained_poll(BdrvChild *child) - return busy || !!blk->in_flight; - } - --static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter) -+static void blk_root_drained_end(BdrvChild *child) - { - BlockBackend *blk = child->opaque; - assert(blk->quiesce_counter); -diff --git a/block/io.c b/block/io.c -index c2ed4b2af9..f4ca62b034 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -58,28 +58,19 @@ static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore, - } - } - --static void bdrv_parent_drained_end_single_no_poll(BdrvChild *c, -- int *drained_end_counter) -+void bdrv_parent_drained_end_single(BdrvChild *c) - { -+ IO_OR_GS_CODE(); -+ - assert(c->parent_quiesce_counter > 0); - c->parent_quiesce_counter--; - if (c->klass->drained_end) { -- c->klass->drained_end(c, drained_end_counter); -+ c->klass->drained_end(c); - } - } - --void bdrv_parent_drained_end_single(BdrvChild *c) --{ -- int drained_end_counter = 0; -- AioContext *ctx = bdrv_child_get_parent_aio_context(c); -- IO_OR_GS_CODE(); -- bdrv_parent_drained_end_single_no_poll(c, &drained_end_counter); -- AIO_WAIT_WHILE(ctx, qatomic_read(&drained_end_counter) > 0); --} -- - static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore, -- bool ignore_bds_parents, -- int *drained_end_counter) -+ bool ignore_bds_parents) - { - BdrvChild *c; - -@@ -87,7 +78,7 @@ static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore, - if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) { - continue; - } -- bdrv_parent_drained_end_single_no_poll(c, drained_end_counter); -+ bdrv_parent_drained_end_single(c); - } - } - -@@ -249,12 +240,10 @@ typedef struct { - bool poll; - BdrvChild *parent; - bool ignore_bds_parents; -- int *drained_end_counter; - } BdrvCoDrainData; - - /* Recursively call BlockDriver.bdrv_drain_begin/end callbacks */ --static void bdrv_drain_invoke(BlockDriverState *bs, bool begin, -- int *drained_end_counter) -+static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) - { - if (!bs->drv || (begin && !bs->drv->bdrv_drain_begin) || - (!begin && !bs->drv->bdrv_drain_end)) { -@@ -305,8 +294,7 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, - BdrvChild *parent, bool ignore_bds_parents, - bool poll); - static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, -- BdrvChild *parent, bool ignore_bds_parents, -- int *drained_end_counter); -+ BdrvChild *parent, bool ignore_bds_parents); - - static void bdrv_co_drain_bh_cb(void *opaque) - { -@@ -319,14 +307,12 @@ static void bdrv_co_drain_bh_cb(void *opaque) - aio_context_acquire(ctx); - bdrv_dec_in_flight(bs); - if (data->begin) { -- assert(!data->drained_end_counter); - bdrv_do_drained_begin(bs, data->recursive, data->parent, - data->ignore_bds_parents, data->poll); - } else { - assert(!data->poll); - bdrv_do_drained_end(bs, data->recursive, data->parent, -- data->ignore_bds_parents, -- data->drained_end_counter); -+ data->ignore_bds_parents); - } - aio_context_release(ctx); - } else { -@@ -342,8 +328,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, - bool begin, bool recursive, - BdrvChild *parent, - bool ignore_bds_parents, -- bool poll, -- int *drained_end_counter) -+ bool poll) - { - BdrvCoDrainData data; - Coroutine *self = qemu_coroutine_self(); -@@ -363,7 +348,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, - .parent = parent, - .ignore_bds_parents = ignore_bds_parents, - .poll = poll, -- .drained_end_counter = drained_end_counter, - }; - - if (bs) { -@@ -406,7 +390,7 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, - } - - bdrv_parent_drained_begin(bs, parent, ignore_bds_parents); -- bdrv_drain_invoke(bs, true, NULL); -+ bdrv_drain_invoke(bs, true); - } - - static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, -@@ -417,7 +401,7 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, - - if (qemu_in_coroutine()) { - bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents, -- poll, NULL); -+ poll); - return; - } - -@@ -461,38 +445,24 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs) - - /** - * This function does not poll, nor must any of its recursively called -- * functions. The *drained_end_counter pointee will be incremented -- * once for every background operation scheduled, and decremented once -- * the operation settles. Therefore, the pointer must remain valid -- * until the pointee reaches 0. That implies that whoever sets up the -- * pointee has to poll until it is 0. -- * -- * We use atomic operations to access *drained_end_counter, because -- * (1) when called from bdrv_set_aio_context_ignore(), the subgraph of -- * @bs may contain nodes in different AioContexts, -- * (2) bdrv_drain_all_end() uses the same counter for all nodes, -- * regardless of which AioContext they are in. -+ * functions. - */ - static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, -- BdrvChild *parent, bool ignore_bds_parents, -- int *drained_end_counter) -+ BdrvChild *parent, bool ignore_bds_parents) - { - BdrvChild *child; - int old_quiesce_counter; - -- assert(drained_end_counter != NULL); -- - if (qemu_in_coroutine()) { - bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents, -- false, drained_end_counter); -+ false); - return; - } - assert(bs->quiesce_counter > 0); - - /* Re-enable things in child-to-parent order */ -- bdrv_drain_invoke(bs, false, drained_end_counter); -- bdrv_parent_drained_end(bs, parent, ignore_bds_parents, -- drained_end_counter); -+ bdrv_drain_invoke(bs, false); -+ bdrv_parent_drained_end(bs, parent, ignore_bds_parents); - - old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter); - if (old_quiesce_counter == 1) { -@@ -503,32 +473,21 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, - assert(!ignore_bds_parents); - bs->recursive_quiesce_counter--; - QLIST_FOREACH(child, &bs->children, next) { -- bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents, -- drained_end_counter); -+ bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents); - } - } - } - - void bdrv_drained_end(BlockDriverState *bs) - { -- int drained_end_counter = 0; - IO_OR_GS_CODE(); -- bdrv_do_drained_end(bs, false, NULL, false, &drained_end_counter); -- BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0); --} -- --void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter) --{ -- IO_CODE(); -- bdrv_do_drained_end(bs, false, NULL, false, drained_end_counter); -+ bdrv_do_drained_end(bs, false, NULL, false); - } - - void bdrv_subtree_drained_end(BlockDriverState *bs) - { -- int drained_end_counter = 0; - IO_OR_GS_CODE(); -- bdrv_do_drained_end(bs, true, NULL, false, &drained_end_counter); -- BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0); -+ bdrv_do_drained_end(bs, true, NULL, false); - } - - void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent) -@@ -543,16 +502,12 @@ void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent) - - void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent) - { -- int drained_end_counter = 0; - int i; - IO_OR_GS_CODE(); - - for (i = 0; i < old_parent->recursive_quiesce_counter; i++) { -- bdrv_do_drained_end(child->bs, true, child, false, -- &drained_end_counter); -+ bdrv_do_drained_end(child->bs, true, child, false); - } -- -- BDRV_POLL_WHILE(child->bs, qatomic_read(&drained_end_counter) > 0); - } - - void bdrv_drain(BlockDriverState *bs) -@@ -610,7 +565,7 @@ void bdrv_drain_all_begin(void) - GLOBAL_STATE_CODE(); - - if (qemu_in_coroutine()) { -- bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true, NULL); -+ bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true); - return; - } - -@@ -649,22 +604,19 @@ void bdrv_drain_all_begin(void) - - void bdrv_drain_all_end_quiesce(BlockDriverState *bs) - { -- int drained_end_counter = 0; - GLOBAL_STATE_CODE(); - - g_assert(bs->quiesce_counter > 0); - g_assert(!bs->refcnt); - - while (bs->quiesce_counter) { -- bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter); -+ bdrv_do_drained_end(bs, false, NULL, true); - } -- BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0); - } - - void bdrv_drain_all_end(void) - { - BlockDriverState *bs = NULL; -- int drained_end_counter = 0; - GLOBAL_STATE_CODE(); - - /* -@@ -680,13 +632,11 @@ void bdrv_drain_all_end(void) - AioContext *aio_context = bdrv_get_aio_context(bs); - - aio_context_acquire(aio_context); -- bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter); -+ bdrv_do_drained_end(bs, false, NULL, true); - aio_context_release(aio_context); - } - - assert(qemu_get_current_aio_context() == qemu_get_aio_context()); -- AIO_WAIT_WHILE(NULL, qatomic_read(&drained_end_counter) > 0); -- - assert(bdrv_drain_all_count > 0); - bdrv_drain_all_count--; - } -diff --git a/blockjob.c b/blockjob.c -index f51d4e18f3..0ab721e139 100644 ---- a/blockjob.c -+++ b/blockjob.c -@@ -120,7 +120,7 @@ static bool child_job_drained_poll(BdrvChild *c) - } - } - --static void child_job_drained_end(BdrvChild *c, int *drained_end_counter) -+static void child_job_drained_end(BdrvChild *c) - { - BlockJob *job = c->opaque; - job_resume(&job->job); -diff --git a/include/block/block-io.h b/include/block/block-io.h -index b099d7db45..054e964c9b 100644 ---- a/include/block/block-io.h -+++ b/include/block/block-io.h -@@ -237,21 +237,6 @@ int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset, - int64_t bytes, BdrvRequestFlags read_flags, - BdrvRequestFlags write_flags); - --/** -- * bdrv_drained_end_no_poll: -- * -- * Same as bdrv_drained_end(), but do not poll for the subgraph to -- * actually become unquiesced. Therefore, no graph changes will occur -- * with this function. -- * -- * *drained_end_counter is incremented for every background operation -- * that is scheduled, and will be decremented for every operation once -- * it settles. The caller must poll until it reaches 0. The counter -- * should be accessed using atomic operations only. -- */ --void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter); -- -- - /* - * "I/O or GS" API functions. These functions can run without - * the BQL, but only in one specific iothread/main loop. -@@ -311,9 +296,6 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll); - * bdrv_parent_drained_end_single: - * - * End a quiesced section for the parent of @c. -- * -- * This polls @bs's AioContext until all scheduled sub-drained_ends -- * have settled, which may result in graph changes. - */ - void bdrv_parent_drained_end_single(BdrvChild *c); - -@@ -361,12 +343,6 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs); - * bdrv_drained_end: - * - * End a quiescent section started by bdrv_drained_begin(). -- * -- * This polls @bs's AioContext until all scheduled sub-drained_ends -- * have settled. On one hand, that may result in graph changes. On -- * the other, this requires that the caller either runs in the main -- * loop; or that all involved nodes (@bs and all of its parents) are -- * in the caller's AioContext. - */ - void bdrv_drained_end(BlockDriverState *bs); - -diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h -index 40d646d1ed..2b97576f6d 100644 ---- a/include/block/block_int-common.h -+++ b/include/block/block_int-common.h -@@ -939,15 +939,11 @@ struct BdrvChildClass { - * These functions must not change the graph (and therefore also must not - * call aio_poll(), which could change the graph indirectly). - * -- * If drained_end() schedules background operations, it must atomically -- * increment *drained_end_counter for each such operation and atomically -- * decrement it once the operation has settled. -- * - * Note that this can be nested. If drained_begin() was called twice, new - * I/O is allowed only after drained_end() was called twice, too. - */ - void (*drained_begin)(BdrvChild *child); -- void (*drained_end)(BdrvChild *child, int *drained_end_counter); -+ void (*drained_end)(BdrvChild *child); - - /* - * Returns whether the parent has pending requests for the child. This --- -2.31.1 - diff --git a/SOURCES/kvm-block-Remove-ignore_bds_parents-parameter-from-drain.patch b/SOURCES/kvm-block-Remove-ignore_bds_parents-parameter-from-drain.patch deleted file mode 100644 index aa64bec..0000000 --- a/SOURCES/kvm-block-Remove-ignore_bds_parents-parameter-from-drain.patch +++ /dev/null @@ -1,274 +0,0 @@ -From 0dc7990533cef41e58579ee96315aca1fdc44ea1 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:41:07 +0100 -Subject: [PATCH 25/31] block: Remove ignore_bds_parents parameter from - drain_begin/end. - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [13/16] 1ed88d975a9569bffeb33ad847874417780ce408 (sgarzarella/qemu-kvm-c-9-s) - -ignore_bds_parents is now ignored during drain_begin and drain_end, so -we can just remove it there. It is still a valid optimisation for -drain_all in bdrv_drained_poll(), so leave it around there. - -Signed-off-by: Kevin Wolf -Message-Id: <20221118174110.55183-13-kwolf@redhat.com> -Reviewed-by: Hanna Reitz -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Kevin Wolf -(cherry picked from commit a82a3bd135078d14f1bb4b5e50f51e77d3748270) -Signed-off-by: Stefano Garzarella ---- - block.c | 2 +- - block/io.c | 58 +++++++++++++++------------------------- - include/block/block-io.h | 3 +-- - 3 files changed, 24 insertions(+), 39 deletions(-) - -diff --git a/block.c b/block.c -index 5a583e260d..af31a94863 100644 ---- a/block.c -+++ b/block.c -@@ -1226,7 +1226,7 @@ static char *bdrv_child_get_parent_desc(BdrvChild *c) - static void bdrv_child_cb_drained_begin(BdrvChild *child) - { - BlockDriverState *bs = child->opaque; -- bdrv_do_drained_begin_quiesce(bs, NULL, false); -+ bdrv_do_drained_begin_quiesce(bs, NULL); - } - - static bool bdrv_child_cb_drained_poll(BdrvChild *child) -diff --git a/block/io.c b/block/io.c -index 87d6f22ec4..2e9503df6a 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -45,13 +45,12 @@ static void bdrv_parent_cb_resize(BlockDriverState *bs); - static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, - int64_t offset, int64_t bytes, BdrvRequestFlags flags); - --static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore, -- bool ignore_bds_parents) -+static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore) - { - BdrvChild *c, *next; - - QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) { -- if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) { -+ if (c == ignore) { - continue; - } - bdrv_parent_drained_begin_single(c, false); -@@ -70,13 +69,12 @@ void bdrv_parent_drained_end_single(BdrvChild *c) - } - } - --static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore, -- bool ignore_bds_parents) -+static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore) - { - BdrvChild *c; - - QLIST_FOREACH(c, &bs->parents, next_parent) { -- if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) { -+ if (c == ignore) { - continue; - } - bdrv_parent_drained_end_single(c); -@@ -242,7 +240,6 @@ typedef struct { - bool begin; - bool poll; - BdrvChild *parent; -- bool ignore_bds_parents; - } BdrvCoDrainData; - - /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ -@@ -269,9 +266,8 @@ static bool bdrv_drain_poll_top_level(BlockDriverState *bs, - } - - static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, -- bool ignore_bds_parents, bool poll); --static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, -- bool ignore_bds_parents); -+ bool poll); -+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent); - - static void bdrv_co_drain_bh_cb(void *opaque) - { -@@ -284,11 +280,10 @@ static void bdrv_co_drain_bh_cb(void *opaque) - aio_context_acquire(ctx); - bdrv_dec_in_flight(bs); - if (data->begin) { -- bdrv_do_drained_begin(bs, data->parent, data->ignore_bds_parents, -- data->poll); -+ bdrv_do_drained_begin(bs, data->parent, data->poll); - } else { - assert(!data->poll); -- bdrv_do_drained_end(bs, data->parent, data->ignore_bds_parents); -+ bdrv_do_drained_end(bs, data->parent); - } - aio_context_release(ctx); - } else { -@@ -303,7 +298,6 @@ static void bdrv_co_drain_bh_cb(void *opaque) - static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, - bool begin, - BdrvChild *parent, -- bool ignore_bds_parents, - bool poll) - { - BdrvCoDrainData data; -@@ -321,7 +315,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, - .done = false, - .begin = begin, - .parent = parent, -- .ignore_bds_parents = ignore_bds_parents, - .poll = poll, - }; - -@@ -353,8 +346,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, - } - } - --void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, -- BdrvChild *parent, bool ignore_bds_parents) -+void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent) - { - IO_OR_GS_CODE(); - assert(!qemu_in_coroutine()); -@@ -362,9 +354,7 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, - /* Stop things in parent-to-child order */ - if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) { - aio_disable_external(bdrv_get_aio_context(bs)); -- -- /* TODO Remove ignore_bds_parents, we don't consider it any more */ -- bdrv_parent_drained_begin(bs, parent, false); -+ bdrv_parent_drained_begin(bs, parent); - if (bs->drv && bs->drv->bdrv_drain_begin) { - bs->drv->bdrv_drain_begin(bs); - } -@@ -372,14 +362,14 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, - } - - static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, -- bool ignore_bds_parents, bool poll) -+ bool poll) - { - if (qemu_in_coroutine()) { -- bdrv_co_yield_to_drain(bs, true, parent, ignore_bds_parents, poll); -+ bdrv_co_yield_to_drain(bs, true, parent, poll); - return; - } - -- bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents); -+ bdrv_do_drained_begin_quiesce(bs, parent); - - /* - * Wait for drained requests to finish. -@@ -391,7 +381,6 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, - * nodes. - */ - if (poll) { -- assert(!ignore_bds_parents); - BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent)); - } - } -@@ -399,20 +388,19 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, - void bdrv_drained_begin(BlockDriverState *bs) - { - IO_OR_GS_CODE(); -- bdrv_do_drained_begin(bs, NULL, false, true); -+ bdrv_do_drained_begin(bs, NULL, true); - } - - /** - * This function does not poll, nor must any of its recursively called - * functions. - */ --static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, -- bool ignore_bds_parents) -+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent) - { - int old_quiesce_counter; - - if (qemu_in_coroutine()) { -- bdrv_co_yield_to_drain(bs, false, parent, ignore_bds_parents, false); -+ bdrv_co_yield_to_drain(bs, false, parent, false); - return; - } - assert(bs->quiesce_counter > 0); -@@ -423,9 +411,7 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, - if (bs->drv && bs->drv->bdrv_drain_end) { - bs->drv->bdrv_drain_end(bs); - } -- /* TODO Remove ignore_bds_parents, we don't consider it any more */ -- bdrv_parent_drained_end(bs, parent, false); -- -+ bdrv_parent_drained_end(bs, parent); - aio_enable_external(bdrv_get_aio_context(bs)); - } - } -@@ -433,7 +419,7 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, - void bdrv_drained_end(BlockDriverState *bs) - { - IO_OR_GS_CODE(); -- bdrv_do_drained_end(bs, NULL, false); -+ bdrv_do_drained_end(bs, NULL); - } - - void bdrv_drain(BlockDriverState *bs) -@@ -491,7 +477,7 @@ void bdrv_drain_all_begin(void) - GLOBAL_STATE_CODE(); - - if (qemu_in_coroutine()) { -- bdrv_co_yield_to_drain(NULL, true, NULL, true, true); -+ bdrv_co_yield_to_drain(NULL, true, NULL, true); - return; - } - -@@ -516,7 +502,7 @@ void bdrv_drain_all_begin(void) - AioContext *aio_context = bdrv_get_aio_context(bs); - - aio_context_acquire(aio_context); -- bdrv_do_drained_begin(bs, NULL, true, false); -+ bdrv_do_drained_begin(bs, NULL, false); - aio_context_release(aio_context); - } - -@@ -536,7 +522,7 @@ void bdrv_drain_all_end_quiesce(BlockDriverState *bs) - g_assert(!bs->refcnt); - - while (bs->quiesce_counter) { -- bdrv_do_drained_end(bs, NULL, true); -+ bdrv_do_drained_end(bs, NULL); - } - } - -@@ -558,7 +544,7 @@ void bdrv_drain_all_end(void) - AioContext *aio_context = bdrv_get_aio_context(bs); - - aio_context_acquire(aio_context); -- bdrv_do_drained_end(bs, NULL, true); -+ bdrv_do_drained_end(bs, NULL); - aio_context_release(aio_context); - } - -diff --git a/include/block/block-io.h b/include/block/block-io.h -index 9c36a16a1f..8f5e75756a 100644 ---- a/include/block/block-io.h -+++ b/include/block/block-io.h -@@ -329,8 +329,7 @@ void bdrv_drained_begin(BlockDriverState *bs); - * Quiesces a BDS like bdrv_drained_begin(), but does not wait for already - * running requests to complete. - */ --void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, -- BdrvChild *parent, bool ignore_bds_parents); -+void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent); - - /** - * bdrv_drained_end: --- -2.31.1 - diff --git a/SOURCES/kvm-block-Remove-poll-parameter-from-bdrv_parent_drained.patch b/SOURCES/kvm-block-Remove-poll-parameter-from-bdrv_parent_drained.patch deleted file mode 100644 index 94eba86..0000000 --- a/SOURCES/kvm-block-Remove-poll-parameter-from-bdrv_parent_drained.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 60b66881fb972e1cdff1cd7b4c865e5e21c141b0 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:41:10 +0100 -Subject: [PATCH 28/31] block: Remove poll parameter from - bdrv_parent_drained_begin_single() - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [16/16] fd526cc9e5bebeb256cfa56d23ec596f26caa37a (sgarzarella/qemu-kvm-c-9-s) - -All callers of bdrv_parent_drained_begin_single() pass poll=false now, -so we don't need the parameter any more. - -Signed-off-by: Kevin Wolf -Message-Id: <20221118174110.55183-16-kwolf@redhat.com> -Reviewed-by: Hanna Reitz -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Kevin Wolf -(cherry picked from commit 606ed756c1d69cba4822be8923248d2fd714f069) -Signed-off-by: Stefano Garzarella ---- - block.c | 4 ++-- - block/io.c | 8 ++------ - include/block/block-io.h | 5 ++--- - 3 files changed, 6 insertions(+), 11 deletions(-) - -diff --git a/block.c b/block.c -index 65588d313a..0d78711416 100644 ---- a/block.c -+++ b/block.c -@@ -2417,7 +2417,7 @@ static void bdrv_replace_child_abort(void *opaque) - * new_bs drained when calling bdrv_replace_child_tran() is not a - * requirement any more. - */ -- bdrv_parent_drained_begin_single(s->child, false); -+ bdrv_parent_drained_begin_single(s->child); - assert(!bdrv_parent_drained_poll_single(s->child)); - } - assert(s->child->quiesced_parent); -@@ -3059,7 +3059,7 @@ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs, - * a problem, we already did this), but it will still poll until the parent - * is fully quiesced, so it will not be negatively affected either. - */ -- bdrv_parent_drained_begin_single(new_child, false); -+ bdrv_parent_drained_begin_single(new_child); - bdrv_replace_child_noperm(new_child, child_bs); - - BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1); -diff --git a/block/io.c b/block/io.c -index ae64830eac..38e57d1f67 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -53,7 +53,7 @@ static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore) - if (c == ignore) { - continue; - } -- bdrv_parent_drained_begin_single(c, false); -+ bdrv_parent_drained_begin_single(c); - } - } - -@@ -105,9 +105,8 @@ static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore, - return busy; - } - --void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll) -+void bdrv_parent_drained_begin_single(BdrvChild *c) - { -- AioContext *ctx = bdrv_child_get_parent_aio_context(c); - IO_OR_GS_CODE(); - - assert(!c->quiesced_parent); -@@ -116,9 +115,6 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll) - if (c->klass->drained_begin) { - c->klass->drained_begin(c); - } -- if (poll) { -- AIO_WAIT_WHILE(ctx, bdrv_parent_drained_poll_single(c)); -- } - } - - static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src) -diff --git a/include/block/block-io.h b/include/block/block-io.h -index 65e6d2569b..92aaa7c1e9 100644 ---- a/include/block/block-io.h -+++ b/include/block/block-io.h -@@ -287,10 +287,9 @@ bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); - /** - * bdrv_parent_drained_begin_single: - * -- * Begin a quiesced section for the parent of @c. If @poll is true, wait for -- * any pending activity to cease. -+ * Begin a quiesced section for the parent of @c. - */ --void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll); -+void bdrv_parent_drained_begin_single(BdrvChild *c); - - /** - * bdrv_parent_drained_poll_single: --- -2.31.1 - diff --git a/SOURCES/kvm-block-Remove-subtree-drains.patch b/SOURCES/kvm-block-Remove-subtree-drains.patch deleted file mode 100644 index af9c0ff..0000000 --- a/SOURCES/kvm-block-Remove-subtree-drains.patch +++ /dev/null @@ -1,896 +0,0 @@ -From 79063522861cb2baf921b204bcdf4c3bfb5697f4 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:41:05 +0100 -Subject: [PATCH 23/31] block: Remove subtree drains - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [11/16] d92f5041cceeeec49a65441b22d20f692c0f1c77 (sgarzarella/qemu-kvm-c-9-s) - -Subtree drains are not used any more. Remove them. - -After this, BdrvChildClass.attach/detach() don't poll any more. - -Signed-off-by: Kevin Wolf -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Hanna Reitz -Message-Id: <20221118174110.55183-11-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 299403aedaeb7f08d8e98aa8614b29d4e5546066) -Signed-off-by: Stefano Garzarella ---- - block.c | 20 +-- - block/io.c | 121 +++----------- - include/block/block-io.h | 18 +-- - include/block/block_int-common.h | 1 - - include/block/block_int-io.h | 12 -- - tests/unit/test-bdrv-drain.c | 261 ++----------------------------- - 6 files changed, 44 insertions(+), 389 deletions(-) - -diff --git a/block.c b/block.c -index 5330e89903..e0e3b21790 100644 ---- a/block.c -+++ b/block.c -@@ -1232,7 +1232,7 @@ static void bdrv_child_cb_drained_begin(BdrvChild *child) - static bool bdrv_child_cb_drained_poll(BdrvChild *child) - { - BlockDriverState *bs = child->opaque; -- return bdrv_drain_poll(bs, false, NULL, false); -+ return bdrv_drain_poll(bs, NULL, false); - } - - static void bdrv_child_cb_drained_end(BdrvChild *child) -@@ -1482,8 +1482,6 @@ static void bdrv_child_cb_attach(BdrvChild *child) - assert(!bs->file); - bs->file = child; - } -- -- bdrv_apply_subtree_drain(child, bs); - } - - static void bdrv_child_cb_detach(BdrvChild *child) -@@ -1494,8 +1492,6 @@ static void bdrv_child_cb_detach(BdrvChild *child) - bdrv_backing_detach(child); - } - -- bdrv_unapply_subtree_drain(child, bs); -- - assert_bdrv_graph_writable(bs); - QLIST_REMOVE(child, next); - if (child == bs->backing) { -@@ -2851,9 +2847,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child, - } - - if (old_bs) { -- /* Detach first so that the recursive drain sections coming from @child -- * are already gone and we only end the drain sections that came from -- * elsewhere. */ - if (child->klass->detach) { - child->klass->detach(child); - } -@@ -2868,17 +2861,14 @@ static void bdrv_replace_child_noperm(BdrvChild *child, - QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); - - /* -- * Detaching the old node may have led to the new node's -- * quiesce_counter having been decreased. Not a problem, we -- * just need to recognize this here and then invoke -- * drained_end appropriately more often. -+ * Polling in bdrv_parent_drained_begin_single() may have led to the new -+ * node's quiesce_counter having been decreased. Not a problem, we just -+ * need to recognize this here and then invoke drained_end appropriately -+ * more often. - */ - assert(new_bs->quiesce_counter <= new_bs_quiesce_counter); - drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter; - -- /* Attach only after starting new drained sections, so that recursive -- * drain sections coming from @child don't get an extra .drained_begin -- * callback. */ - if (child->klass->attach) { - child->klass->attach(child); - } -diff --git a/block/io.c b/block/io.c -index a25103be6f..75224480d0 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -236,17 +236,15 @@ typedef struct { - BlockDriverState *bs; - bool done; - bool begin; -- bool recursive; - bool poll; - BdrvChild *parent; - bool ignore_bds_parents; - } BdrvCoDrainData; - - /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ --bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, -- BdrvChild *ignore_parent, bool ignore_bds_parents) -+bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent, -+ bool ignore_bds_parents) - { -- BdrvChild *child, *next; - IO_OR_GS_CODE(); - - if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) { -@@ -257,29 +255,19 @@ bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, - return true; - } - -- if (recursive) { -- assert(!ignore_bds_parents); -- QLIST_FOREACH_SAFE(child, &bs->children, next, next) { -- if (bdrv_drain_poll(child->bs, recursive, child, false)) { -- return true; -- } -- } -- } -- - return false; - } - --static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive, -+static bool bdrv_drain_poll_top_level(BlockDriverState *bs, - BdrvChild *ignore_parent) - { -- return bdrv_drain_poll(bs, recursive, ignore_parent, false); -+ return bdrv_drain_poll(bs, ignore_parent, false); - } - --static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, -- BdrvChild *parent, bool ignore_bds_parents, -- bool poll); --static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, -- BdrvChild *parent, bool ignore_bds_parents); -+static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, -+ bool ignore_bds_parents, bool poll); -+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, -+ bool ignore_bds_parents); - - static void bdrv_co_drain_bh_cb(void *opaque) - { -@@ -292,12 +280,11 @@ static void bdrv_co_drain_bh_cb(void *opaque) - aio_context_acquire(ctx); - bdrv_dec_in_flight(bs); - if (data->begin) { -- bdrv_do_drained_begin(bs, data->recursive, data->parent, -- data->ignore_bds_parents, data->poll); -+ bdrv_do_drained_begin(bs, data->parent, data->ignore_bds_parents, -+ data->poll); - } else { - assert(!data->poll); -- bdrv_do_drained_end(bs, data->recursive, data->parent, -- data->ignore_bds_parents); -+ bdrv_do_drained_end(bs, data->parent, data->ignore_bds_parents); - } - aio_context_release(ctx); - } else { -@@ -310,7 +297,7 @@ static void bdrv_co_drain_bh_cb(void *opaque) - } - - static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, -- bool begin, bool recursive, -+ bool begin, - BdrvChild *parent, - bool ignore_bds_parents, - bool poll) -@@ -329,7 +316,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, - .bs = bs, - .done = false, - .begin = begin, -- .recursive = recursive, - .parent = parent, - .ignore_bds_parents = ignore_bds_parents, - .poll = poll, -@@ -380,29 +366,16 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, - } - } - --static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, -- BdrvChild *parent, bool ignore_bds_parents, -- bool poll) -+static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, -+ bool ignore_bds_parents, bool poll) - { -- BdrvChild *child, *next; -- - if (qemu_in_coroutine()) { -- bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents, -- poll); -+ bdrv_co_yield_to_drain(bs, true, parent, ignore_bds_parents, poll); - return; - } - - bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents); - -- if (recursive) { -- assert(!ignore_bds_parents); -- bs->recursive_quiesce_counter++; -- QLIST_FOREACH_SAFE(child, &bs->children, next, next) { -- bdrv_do_drained_begin(child->bs, true, child, ignore_bds_parents, -- false); -- } -- } -- - /* - * Wait for drained requests to finish. - * -@@ -414,35 +387,27 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, - */ - if (poll) { - assert(!ignore_bds_parents); -- BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, recursive, parent)); -+ BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent)); - } - } - - void bdrv_drained_begin(BlockDriverState *bs) - { - IO_OR_GS_CODE(); -- bdrv_do_drained_begin(bs, false, NULL, false, true); --} -- --void bdrv_subtree_drained_begin(BlockDriverState *bs) --{ -- IO_OR_GS_CODE(); -- bdrv_do_drained_begin(bs, true, NULL, false, true); -+ bdrv_do_drained_begin(bs, NULL, false, true); - } - - /** - * This function does not poll, nor must any of its recursively called - * functions. - */ --static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, -- BdrvChild *parent, bool ignore_bds_parents) -+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, -+ bool ignore_bds_parents) - { -- BdrvChild *child; - int old_quiesce_counter; - - if (qemu_in_coroutine()) { -- bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents, -- false); -+ bdrv_co_yield_to_drain(bs, false, parent, ignore_bds_parents, false); - return; - } - assert(bs->quiesce_counter > 0); -@@ -457,46 +422,12 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, - if (old_quiesce_counter == 1) { - aio_enable_external(bdrv_get_aio_context(bs)); - } -- -- if (recursive) { -- assert(!ignore_bds_parents); -- bs->recursive_quiesce_counter--; -- QLIST_FOREACH(child, &bs->children, next) { -- bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents); -- } -- } - } - - void bdrv_drained_end(BlockDriverState *bs) - { - IO_OR_GS_CODE(); -- bdrv_do_drained_end(bs, false, NULL, false); --} -- --void bdrv_subtree_drained_end(BlockDriverState *bs) --{ -- IO_OR_GS_CODE(); -- bdrv_do_drained_end(bs, true, NULL, false); --} -- --void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent) --{ -- int i; -- IO_OR_GS_CODE(); -- -- for (i = 0; i < new_parent->recursive_quiesce_counter; i++) { -- bdrv_do_drained_begin(child->bs, true, child, false, true); -- } --} -- --void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent) --{ -- int i; -- IO_OR_GS_CODE(); -- -- for (i = 0; i < old_parent->recursive_quiesce_counter; i++) { -- bdrv_do_drained_end(child->bs, true, child, false); -- } -+ bdrv_do_drained_end(bs, NULL, false); - } - - void bdrv_drain(BlockDriverState *bs) -@@ -529,7 +460,7 @@ static bool bdrv_drain_all_poll(void) - while ((bs = bdrv_next_all_states(bs))) { - AioContext *aio_context = bdrv_get_aio_context(bs); - aio_context_acquire(aio_context); -- result |= bdrv_drain_poll(bs, false, NULL, true); -+ result |= bdrv_drain_poll(bs, NULL, true); - aio_context_release(aio_context); - } - -@@ -554,7 +485,7 @@ void bdrv_drain_all_begin(void) - GLOBAL_STATE_CODE(); - - if (qemu_in_coroutine()) { -- bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true); -+ bdrv_co_yield_to_drain(NULL, true, NULL, true, true); - return; - } - -@@ -579,7 +510,7 @@ void bdrv_drain_all_begin(void) - AioContext *aio_context = bdrv_get_aio_context(bs); - - aio_context_acquire(aio_context); -- bdrv_do_drained_begin(bs, false, NULL, true, false); -+ bdrv_do_drained_begin(bs, NULL, true, false); - aio_context_release(aio_context); - } - -@@ -599,7 +530,7 @@ void bdrv_drain_all_end_quiesce(BlockDriverState *bs) - g_assert(!bs->refcnt); - - while (bs->quiesce_counter) { -- bdrv_do_drained_end(bs, false, NULL, true); -+ bdrv_do_drained_end(bs, NULL, true); - } - } - -@@ -621,7 +552,7 @@ void bdrv_drain_all_end(void) - AioContext *aio_context = bdrv_get_aio_context(bs); - - aio_context_acquire(aio_context); -- bdrv_do_drained_end(bs, false, NULL, true); -+ bdrv_do_drained_end(bs, NULL, true); - aio_context_release(aio_context); - } - -diff --git a/include/block/block-io.h b/include/block/block-io.h -index 054e964c9b..9c36a16a1f 100644 ---- a/include/block/block-io.h -+++ b/include/block/block-io.h -@@ -302,8 +302,7 @@ void bdrv_parent_drained_end_single(BdrvChild *c); - /** - * bdrv_drain_poll: - * -- * Poll for pending requests in @bs, its parents (except for @ignore_parent), -- * and if @recursive is true its children as well (used for subtree drain). -+ * Poll for pending requests in @bs and its parents (except for @ignore_parent). - * - * If @ignore_bds_parents is true, parents that are BlockDriverStates must - * ignore the drain request because they will be drained separately (used for -@@ -311,8 +310,8 @@ void bdrv_parent_drained_end_single(BdrvChild *c); - * - * This is part of bdrv_drained_begin. - */ --bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, -- BdrvChild *ignore_parent, bool ignore_bds_parents); -+bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent, -+ bool ignore_bds_parents); - - /** - * bdrv_drained_begin: -@@ -333,12 +332,6 @@ void bdrv_drained_begin(BlockDriverState *bs); - void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, - BdrvChild *parent, bool ignore_bds_parents); - --/** -- * Like bdrv_drained_begin, but recursively begins a quiesced section for -- * exclusive access to all child nodes as well. -- */ --void bdrv_subtree_drained_begin(BlockDriverState *bs); -- - /** - * bdrv_drained_end: - * -@@ -346,9 +339,4 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs); - */ - void bdrv_drained_end(BlockDriverState *bs); - --/** -- * End a quiescent section started by bdrv_subtree_drained_begin(). -- */ --void bdrv_subtree_drained_end(BlockDriverState *bs); -- - #endif /* BLOCK_IO_H */ -diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h -index 2b97576f6d..791dddfd7d 100644 ---- a/include/block/block_int-common.h -+++ b/include/block/block_int-common.h -@@ -1184,7 +1184,6 @@ struct BlockDriverState { - - /* Accessed with atomic ops. */ - int quiesce_counter; -- int recursive_quiesce_counter; - - unsigned int write_gen; /* Current data generation */ - -diff --git a/include/block/block_int-io.h b/include/block/block_int-io.h -index 4b0b3e17ef..8bc061ebb8 100644 ---- a/include/block/block_int-io.h -+++ b/include/block/block_int-io.h -@@ -179,16 +179,4 @@ void bdrv_bsc_invalidate_range(BlockDriverState *bs, - */ - void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes); - -- --/* -- * "I/O or GS" API functions. These functions can run without -- * the BQL, but only in one specific iothread/main loop. -- * -- * See include/block/block-io.h for more information about -- * the "I/O or GS" API. -- */ -- --void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent); --void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent); -- - #endif /* BLOCK_INT_IO_H */ -diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c -index 695519ee02..dda08de8db 100644 ---- a/tests/unit/test-bdrv-drain.c -+++ b/tests/unit/test-bdrv-drain.c -@@ -156,7 +156,6 @@ static void call_in_coroutine(void (*entry)(void)) - enum drain_type { - BDRV_DRAIN_ALL, - BDRV_DRAIN, -- BDRV_SUBTREE_DRAIN, - DRAIN_TYPE_MAX, - }; - -@@ -165,7 +164,6 @@ static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs) - switch (drain_type) { - case BDRV_DRAIN_ALL: bdrv_drain_all_begin(); break; - case BDRV_DRAIN: bdrv_drained_begin(bs); break; -- case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_begin(bs); break; - default: g_assert_not_reached(); - } - } -@@ -175,7 +173,6 @@ static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs) - switch (drain_type) { - case BDRV_DRAIN_ALL: bdrv_drain_all_end(); break; - case BDRV_DRAIN: bdrv_drained_end(bs); break; -- case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_end(bs); break; - default: g_assert_not_reached(); - } - } -@@ -271,11 +268,6 @@ static void test_drv_cb_drain(void) - test_drv_cb_common(BDRV_DRAIN, false); - } - --static void test_drv_cb_drain_subtree(void) --{ -- test_drv_cb_common(BDRV_SUBTREE_DRAIN, true); --} -- - static void test_drv_cb_co_drain_all(void) - { - call_in_coroutine(test_drv_cb_drain_all); -@@ -286,11 +278,6 @@ static void test_drv_cb_co_drain(void) - call_in_coroutine(test_drv_cb_drain); - } - --static void test_drv_cb_co_drain_subtree(void) --{ -- call_in_coroutine(test_drv_cb_drain_subtree); --} -- - static void test_quiesce_common(enum drain_type drain_type, bool recursive) - { - BlockBackend *blk; -@@ -332,11 +319,6 @@ static void test_quiesce_drain(void) - test_quiesce_common(BDRV_DRAIN, false); - } - --static void test_quiesce_drain_subtree(void) --{ -- test_quiesce_common(BDRV_SUBTREE_DRAIN, true); --} -- - static void test_quiesce_co_drain_all(void) - { - call_in_coroutine(test_quiesce_drain_all); -@@ -347,11 +329,6 @@ static void test_quiesce_co_drain(void) - call_in_coroutine(test_quiesce_drain); - } - --static void test_quiesce_co_drain_subtree(void) --{ -- call_in_coroutine(test_quiesce_drain_subtree); --} -- - static void test_nested(void) - { - BlockBackend *blk; -@@ -402,158 +379,6 @@ static void test_nested(void) - blk_unref(blk); - } - --static void test_multiparent(void) --{ -- BlockBackend *blk_a, *blk_b; -- BlockDriverState *bs_a, *bs_b, *backing; -- BDRVTestState *a_s, *b_s, *backing_s; -- -- blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); -- bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR, -- &error_abort); -- a_s = bs_a->opaque; -- blk_insert_bs(blk_a, bs_a, &error_abort); -- -- blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); -- bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR, -- &error_abort); -- b_s = bs_b->opaque; -- blk_insert_bs(blk_b, bs_b, &error_abort); -- -- backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); -- backing_s = backing->opaque; -- bdrv_set_backing_hd(bs_a, backing, &error_abort); -- bdrv_set_backing_hd(bs_b, backing, &error_abort); -- -- g_assert_cmpint(bs_a->quiesce_counter, ==, 0); -- g_assert_cmpint(bs_b->quiesce_counter, ==, 0); -- g_assert_cmpint(backing->quiesce_counter, ==, 0); -- g_assert_cmpint(a_s->drain_count, ==, 0); -- g_assert_cmpint(b_s->drain_count, ==, 0); -- g_assert_cmpint(backing_s->drain_count, ==, 0); -- -- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); -- -- g_assert_cmpint(bs_a->quiesce_counter, ==, 1); -- g_assert_cmpint(bs_b->quiesce_counter, ==, 1); -- g_assert_cmpint(backing->quiesce_counter, ==, 1); -- g_assert_cmpint(a_s->drain_count, ==, 1); -- g_assert_cmpint(b_s->drain_count, ==, 1); -- g_assert_cmpint(backing_s->drain_count, ==, 1); -- -- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); -- -- g_assert_cmpint(bs_a->quiesce_counter, ==, 2); -- g_assert_cmpint(bs_b->quiesce_counter, ==, 2); -- g_assert_cmpint(backing->quiesce_counter, ==, 2); -- g_assert_cmpint(a_s->drain_count, ==, 2); -- g_assert_cmpint(b_s->drain_count, ==, 2); -- g_assert_cmpint(backing_s->drain_count, ==, 2); -- -- do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); -- -- g_assert_cmpint(bs_a->quiesce_counter, ==, 1); -- g_assert_cmpint(bs_b->quiesce_counter, ==, 1); -- g_assert_cmpint(backing->quiesce_counter, ==, 1); -- g_assert_cmpint(a_s->drain_count, ==, 1); -- g_assert_cmpint(b_s->drain_count, ==, 1); -- g_assert_cmpint(backing_s->drain_count, ==, 1); -- -- do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); -- -- g_assert_cmpint(bs_a->quiesce_counter, ==, 0); -- g_assert_cmpint(bs_b->quiesce_counter, ==, 0); -- g_assert_cmpint(backing->quiesce_counter, ==, 0); -- g_assert_cmpint(a_s->drain_count, ==, 0); -- g_assert_cmpint(b_s->drain_count, ==, 0); -- g_assert_cmpint(backing_s->drain_count, ==, 0); -- -- bdrv_unref(backing); -- bdrv_unref(bs_a); -- bdrv_unref(bs_b); -- blk_unref(blk_a); -- blk_unref(blk_b); --} -- --static void test_graph_change_drain_subtree(void) --{ -- BlockBackend *blk_a, *blk_b; -- BlockDriverState *bs_a, *bs_b, *backing; -- BDRVTestState *a_s, *b_s, *backing_s; -- -- blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); -- bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR, -- &error_abort); -- a_s = bs_a->opaque; -- blk_insert_bs(blk_a, bs_a, &error_abort); -- -- blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); -- bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR, -- &error_abort); -- b_s = bs_b->opaque; -- blk_insert_bs(blk_b, bs_b, &error_abort); -- -- backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); -- backing_s = backing->opaque; -- bdrv_set_backing_hd(bs_a, backing, &error_abort); -- -- g_assert_cmpint(bs_a->quiesce_counter, ==, 0); -- g_assert_cmpint(bs_b->quiesce_counter, ==, 0); -- g_assert_cmpint(backing->quiesce_counter, ==, 0); -- g_assert_cmpint(a_s->drain_count, ==, 0); -- g_assert_cmpint(b_s->drain_count, ==, 0); -- g_assert_cmpint(backing_s->drain_count, ==, 0); -- -- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); -- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); -- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); -- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); -- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); -- -- bdrv_set_backing_hd(bs_b, backing, &error_abort); -- g_assert_cmpint(bs_a->quiesce_counter, ==, 5); -- g_assert_cmpint(bs_b->quiesce_counter, ==, 5); -- g_assert_cmpint(backing->quiesce_counter, ==, 5); -- g_assert_cmpint(a_s->drain_count, ==, 5); -- g_assert_cmpint(b_s->drain_count, ==, 5); -- g_assert_cmpint(backing_s->drain_count, ==, 5); -- -- bdrv_set_backing_hd(bs_b, NULL, &error_abort); -- g_assert_cmpint(bs_a->quiesce_counter, ==, 3); -- g_assert_cmpint(bs_b->quiesce_counter, ==, 2); -- g_assert_cmpint(backing->quiesce_counter, ==, 3); -- g_assert_cmpint(a_s->drain_count, ==, 3); -- g_assert_cmpint(b_s->drain_count, ==, 2); -- g_assert_cmpint(backing_s->drain_count, ==, 3); -- -- bdrv_set_backing_hd(bs_b, backing, &error_abort); -- g_assert_cmpint(bs_a->quiesce_counter, ==, 5); -- g_assert_cmpint(bs_b->quiesce_counter, ==, 5); -- g_assert_cmpint(backing->quiesce_counter, ==, 5); -- g_assert_cmpint(a_s->drain_count, ==, 5); -- g_assert_cmpint(b_s->drain_count, ==, 5); -- g_assert_cmpint(backing_s->drain_count, ==, 5); -- -- do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); -- do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); -- do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); -- do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); -- do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); -- -- g_assert_cmpint(bs_a->quiesce_counter, ==, 0); -- g_assert_cmpint(bs_b->quiesce_counter, ==, 0); -- g_assert_cmpint(backing->quiesce_counter, ==, 0); -- g_assert_cmpint(a_s->drain_count, ==, 0); -- g_assert_cmpint(b_s->drain_count, ==, 0); -- g_assert_cmpint(backing_s->drain_count, ==, 0); -- -- bdrv_unref(backing); -- bdrv_unref(bs_a); -- bdrv_unref(bs_b); -- blk_unref(blk_a); -- blk_unref(blk_b); --} -- - static void test_graph_change_drain_all(void) - { - BlockBackend *blk_a, *blk_b; -@@ -773,12 +598,6 @@ static void test_iothread_drain(void) - test_iothread_common(BDRV_DRAIN, 1); - } - --static void test_iothread_drain_subtree(void) --{ -- test_iothread_common(BDRV_SUBTREE_DRAIN, 0); -- test_iothread_common(BDRV_SUBTREE_DRAIN, 1); --} -- - - typedef struct TestBlockJob { - BlockJob common; -@@ -863,7 +682,6 @@ enum test_job_result { - enum test_job_drain_node { - TEST_JOB_DRAIN_SRC, - TEST_JOB_DRAIN_SRC_CHILD, -- TEST_JOB_DRAIN_SRC_PARENT, - }; - - static void test_blockjob_common_drain_node(enum drain_type drain_type, -@@ -901,9 +719,6 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, - case TEST_JOB_DRAIN_SRC_CHILD: - drain_bs = src_backing; - break; -- case TEST_JOB_DRAIN_SRC_PARENT: -- drain_bs = src_overlay; -- break; - default: - g_assert_not_reached(); - } -@@ -1055,10 +870,6 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread, - TEST_JOB_DRAIN_SRC); - test_blockjob_common_drain_node(drain_type, use_iothread, result, - TEST_JOB_DRAIN_SRC_CHILD); -- if (drain_type == BDRV_SUBTREE_DRAIN) { -- test_blockjob_common_drain_node(drain_type, use_iothread, result, -- TEST_JOB_DRAIN_SRC_PARENT); -- } - } - - static void test_blockjob_drain_all(void) -@@ -1071,11 +882,6 @@ static void test_blockjob_drain(void) - test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_SUCCESS); - } - --static void test_blockjob_drain_subtree(void) --{ -- test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_SUCCESS); --} -- - static void test_blockjob_error_drain_all(void) - { - test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_FAIL_RUN); -@@ -1088,12 +894,6 @@ static void test_blockjob_error_drain(void) - test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_FAIL_PREPARE); - } - --static void test_blockjob_error_drain_subtree(void) --{ -- test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_RUN); -- test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_PREPARE); --} -- - static void test_blockjob_iothread_drain_all(void) - { - test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_SUCCESS); -@@ -1104,11 +904,6 @@ static void test_blockjob_iothread_drain(void) - test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_SUCCESS); - } - --static void test_blockjob_iothread_drain_subtree(void) --{ -- test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_SUCCESS); --} -- - static void test_blockjob_iothread_error_drain_all(void) - { - test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_FAIL_RUN); -@@ -1121,12 +916,6 @@ static void test_blockjob_iothread_error_drain(void) - test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_FAIL_PREPARE); - } - --static void test_blockjob_iothread_error_drain_subtree(void) --{ -- test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_RUN); -- test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_PREPARE); --} -- - - typedef struct BDRVTestTopState { - BdrvChild *wait_child; -@@ -1273,14 +1062,6 @@ static void do_test_delete_by_drain(bool detach_instead_of_delete, - bdrv_drain(child_bs); - bdrv_unref(child_bs); - break; -- case BDRV_SUBTREE_DRAIN: -- /* Would have to ref/unref bs here for !detach_instead_of_delete, but -- * then the whole test becomes pointless because the graph changes -- * don't occur during the drain any more. */ -- assert(detach_instead_of_delete); -- bdrv_subtree_drained_begin(bs); -- bdrv_subtree_drained_end(bs); -- break; - case BDRV_DRAIN_ALL: - bdrv_drain_all_begin(); - bdrv_drain_all_end(); -@@ -1315,11 +1096,6 @@ static void test_detach_by_drain(void) - do_test_delete_by_drain(true, BDRV_DRAIN); - } - --static void test_detach_by_drain_subtree(void) --{ -- do_test_delete_by_drain(true, BDRV_SUBTREE_DRAIN); --} -- - - struct detach_by_parent_data { - BlockDriverState *parent_b; -@@ -1452,7 +1228,10 @@ static void test_detach_indirect(bool by_parent_cb) - g_assert(acb != NULL); - - /* Drain and check the expected result */ -- bdrv_subtree_drained_begin(parent_b); -+ bdrv_drained_begin(parent_b); -+ bdrv_drained_begin(a); -+ bdrv_drained_begin(b); -+ bdrv_drained_begin(c); - - g_assert(detach_by_parent_data.child_c != NULL); - -@@ -1467,12 +1246,15 @@ static void test_detach_indirect(bool by_parent_cb) - g_assert(QLIST_NEXT(child_a, next) == NULL); - - g_assert_cmpint(parent_a->quiesce_counter, ==, 1); -- g_assert_cmpint(parent_b->quiesce_counter, ==, 1); -+ g_assert_cmpint(parent_b->quiesce_counter, ==, 3); - g_assert_cmpint(a->quiesce_counter, ==, 1); -- g_assert_cmpint(b->quiesce_counter, ==, 0); -+ g_assert_cmpint(b->quiesce_counter, ==, 1); - g_assert_cmpint(c->quiesce_counter, ==, 1); - -- bdrv_subtree_drained_end(parent_b); -+ bdrv_drained_end(parent_b); -+ bdrv_drained_end(a); -+ bdrv_drained_end(b); -+ bdrv_drained_end(c); - - bdrv_unref(parent_b); - blk_unref(blk); -@@ -2202,70 +1984,47 @@ int main(int argc, char **argv) - - g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all); - g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain); -- g_test_add_func("/bdrv-drain/driver-cb/drain_subtree", -- test_drv_cb_drain_subtree); - - g_test_add_func("/bdrv-drain/driver-cb/co/drain_all", - test_drv_cb_co_drain_all); - g_test_add_func("/bdrv-drain/driver-cb/co/drain", test_drv_cb_co_drain); -- g_test_add_func("/bdrv-drain/driver-cb/co/drain_subtree", -- test_drv_cb_co_drain_subtree); -- - - g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all); - g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain); -- g_test_add_func("/bdrv-drain/quiesce/drain_subtree", -- test_quiesce_drain_subtree); - - g_test_add_func("/bdrv-drain/quiesce/co/drain_all", - test_quiesce_co_drain_all); - g_test_add_func("/bdrv-drain/quiesce/co/drain", test_quiesce_co_drain); -- g_test_add_func("/bdrv-drain/quiesce/co/drain_subtree", -- test_quiesce_co_drain_subtree); - - g_test_add_func("/bdrv-drain/nested", test_nested); -- g_test_add_func("/bdrv-drain/multiparent", test_multiparent); - -- g_test_add_func("/bdrv-drain/graph-change/drain_subtree", -- test_graph_change_drain_subtree); - g_test_add_func("/bdrv-drain/graph-change/drain_all", - test_graph_change_drain_all); - - g_test_add_func("/bdrv-drain/iothread/drain_all", test_iothread_drain_all); - g_test_add_func("/bdrv-drain/iothread/drain", test_iothread_drain); -- g_test_add_func("/bdrv-drain/iothread/drain_subtree", -- test_iothread_drain_subtree); - - g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all); - g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain); -- g_test_add_func("/bdrv-drain/blockjob/drain_subtree", -- test_blockjob_drain_subtree); - - g_test_add_func("/bdrv-drain/blockjob/error/drain_all", - test_blockjob_error_drain_all); - g_test_add_func("/bdrv-drain/blockjob/error/drain", - test_blockjob_error_drain); -- g_test_add_func("/bdrv-drain/blockjob/error/drain_subtree", -- test_blockjob_error_drain_subtree); - - g_test_add_func("/bdrv-drain/blockjob/iothread/drain_all", - test_blockjob_iothread_drain_all); - g_test_add_func("/bdrv-drain/blockjob/iothread/drain", - test_blockjob_iothread_drain); -- g_test_add_func("/bdrv-drain/blockjob/iothread/drain_subtree", -- test_blockjob_iothread_drain_subtree); - - g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_all", - test_blockjob_iothread_error_drain_all); - g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain", - test_blockjob_iothread_error_drain); -- g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_subtree", -- test_blockjob_iothread_error_drain_subtree); - - g_test_add_func("/bdrv-drain/deletion/drain", test_delete_by_drain); - g_test_add_func("/bdrv-drain/detach/drain_all", test_detach_by_drain_all); - g_test_add_func("/bdrv-drain/detach/drain", test_detach_by_drain); -- g_test_add_func("/bdrv-drain/detach/drain_subtree", test_detach_by_drain_subtree); - g_test_add_func("/bdrv-drain/detach/parent_cb", test_detach_by_parent_cb); - g_test_add_func("/bdrv-drain/detach/driver_cb", test_detach_by_driver_cb); - --- -2.31.1 - diff --git a/SOURCES/kvm-block-Revert-.bdrv_drained_begin-end-to-non-coroutin.patch b/SOURCES/kvm-block-Revert-.bdrv_drained_begin-end-to-non-coroutin.patch deleted file mode 100644 index 1529fdb..0000000 --- a/SOURCES/kvm-block-Revert-.bdrv_drained_begin-end-to-non-coroutin.patch +++ /dev/null @@ -1,302 +0,0 @@ -From 0e894c93cae97bb792dc483be8e295d097ebd7a1 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:40:58 +0100 -Subject: [PATCH 16/31] block: Revert .bdrv_drained_begin/end to - non-coroutine_fn - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [4/16] 86d6049e40a99604e414c2572b67f74b85868832 (sgarzarella/qemu-kvm-c-9-s) - -Polling during bdrv_drained_end() can be problematic (and in the future, -we may get cases for bdrv_drained_begin() where polling is forbidden, -and we don't care about already in-flight requests, but just want to -prevent new requests from arriving). - -The .bdrv_drained_begin/end callbacks running in a coroutine is the only -reason why we have to do this polling, so make them non-coroutine -callbacks again. None of the callers actually yield any more. - -This means that bdrv_drained_end() effectively doesn't poll any more, -even if AIO_WAIT_WHILE() loops are still there (their condition is false -from the beginning). This is generally not a problem, but in -test-bdrv-drain, some additional explicit aio_poll() calls need to be -added because the test case wants to verify the final state after BHs -have executed. - -Signed-off-by: Kevin Wolf -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Emanuele Giuseppe Esposito -Reviewed-by: Hanna Reitz -Message-Id: <20221118174110.55183-4-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 5e8ac21717373cbe96ef7a91e216bf5788815d63) -Signed-off-by: Stefano Garzarella ---- - block.c | 4 +-- - block/io.c | 49 +++++--------------------------- - block/qed.c | 6 ++-- - block/throttle.c | 8 +++--- - include/block/block_int-common.h | 10 ++++--- - tests/unit/test-bdrv-drain.c | 18 ++++++------ - 6 files changed, 32 insertions(+), 63 deletions(-) - -diff --git a/block.c b/block.c -index ec184150a2..16a62a329c 100644 ---- a/block.c -+++ b/block.c -@@ -1713,8 +1713,8 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, - assert(is_power_of_2(bs->bl.request_alignment)); - - for (i = 0; i < bs->quiesce_counter; i++) { -- if (drv->bdrv_co_drain_begin) { -- drv->bdrv_co_drain_begin(bs); -+ if (drv->bdrv_drain_begin) { -+ drv->bdrv_drain_begin(bs); - } - } - -diff --git a/block/io.c b/block/io.c -index b9424024f9..c2ed4b2af9 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -252,55 +252,20 @@ typedef struct { - int *drained_end_counter; - } BdrvCoDrainData; - --static void coroutine_fn bdrv_drain_invoke_entry(void *opaque) --{ -- BdrvCoDrainData *data = opaque; -- BlockDriverState *bs = data->bs; -- -- if (data->begin) { -- bs->drv->bdrv_co_drain_begin(bs); -- } else { -- bs->drv->bdrv_co_drain_end(bs); -- } -- -- /* Set data->done and decrement drained_end_counter before bdrv_wakeup() */ -- qatomic_mb_set(&data->done, true); -- if (!data->begin) { -- qatomic_dec(data->drained_end_counter); -- } -- bdrv_dec_in_flight(bs); -- -- g_free(data); --} -- --/* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */ -+/* Recursively call BlockDriver.bdrv_drain_begin/end callbacks */ - static void bdrv_drain_invoke(BlockDriverState *bs, bool begin, - int *drained_end_counter) - { -- BdrvCoDrainData *data; -- -- if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) || -- (!begin && !bs->drv->bdrv_co_drain_end)) { -+ if (!bs->drv || (begin && !bs->drv->bdrv_drain_begin) || -+ (!begin && !bs->drv->bdrv_drain_end)) { - return; - } - -- data = g_new(BdrvCoDrainData, 1); -- *data = (BdrvCoDrainData) { -- .bs = bs, -- .done = false, -- .begin = begin, -- .drained_end_counter = drained_end_counter, -- }; -- -- if (!begin) { -- qatomic_inc(drained_end_counter); -+ if (begin) { -+ bs->drv->bdrv_drain_begin(bs); -+ } else { -+ bs->drv->bdrv_drain_end(bs); - } -- -- /* Make sure the driver callback completes during the polling phase for -- * drain_begin. */ -- bdrv_inc_in_flight(bs); -- data->co = qemu_coroutine_create(bdrv_drain_invoke_entry, data); -- aio_co_schedule(bdrv_get_aio_context(bs), data->co); - } - - /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ -diff --git a/block/qed.c b/block/qed.c -index 013f826c44..c2691a85b1 100644 ---- a/block/qed.c -+++ b/block/qed.c -@@ -262,7 +262,7 @@ static bool coroutine_fn qed_plug_allocating_write_reqs(BDRVQEDState *s) - assert(!s->allocating_write_reqs_plugged); - if (s->allocating_acb != NULL) { - /* Another allocating write came concurrently. This cannot happen -- * from bdrv_qed_co_drain_begin, but it can happen when the timer runs. -+ * from bdrv_qed_drain_begin, but it can happen when the timer runs. - */ - qemu_co_mutex_unlock(&s->table_lock); - return false; -@@ -365,7 +365,7 @@ static void bdrv_qed_attach_aio_context(BlockDriverState *bs, - } - } - --static void coroutine_fn bdrv_qed_co_drain_begin(BlockDriverState *bs) -+static void bdrv_qed_drain_begin(BlockDriverState *bs) - { - BDRVQEDState *s = bs->opaque; - -@@ -1661,7 +1661,7 @@ static BlockDriver bdrv_qed = { - .bdrv_co_check = bdrv_qed_co_check, - .bdrv_detach_aio_context = bdrv_qed_detach_aio_context, - .bdrv_attach_aio_context = bdrv_qed_attach_aio_context, -- .bdrv_co_drain_begin = bdrv_qed_co_drain_begin, -+ .bdrv_drain_begin = bdrv_qed_drain_begin, - }; - - static void bdrv_qed_init(void) -diff --git a/block/throttle.c b/block/throttle.c -index 131eba3ab4..88851c84f4 100644 ---- a/block/throttle.c -+++ b/block/throttle.c -@@ -214,7 +214,7 @@ static void throttle_reopen_abort(BDRVReopenState *reopen_state) - reopen_state->opaque = NULL; - } - --static void coroutine_fn throttle_co_drain_begin(BlockDriverState *bs) -+static void throttle_drain_begin(BlockDriverState *bs) - { - ThrottleGroupMember *tgm = bs->opaque; - if (qatomic_fetch_inc(&tgm->io_limits_disabled) == 0) { -@@ -222,7 +222,7 @@ static void coroutine_fn throttle_co_drain_begin(BlockDriverState *bs) - } - } - --static void coroutine_fn throttle_co_drain_end(BlockDriverState *bs) -+static void throttle_drain_end(BlockDriverState *bs) - { - ThrottleGroupMember *tgm = bs->opaque; - assert(tgm->io_limits_disabled); -@@ -261,8 +261,8 @@ static BlockDriver bdrv_throttle = { - .bdrv_reopen_commit = throttle_reopen_commit, - .bdrv_reopen_abort = throttle_reopen_abort, - -- .bdrv_co_drain_begin = throttle_co_drain_begin, -- .bdrv_co_drain_end = throttle_co_drain_end, -+ .bdrv_drain_begin = throttle_drain_begin, -+ .bdrv_drain_end = throttle_drain_end, - - .is_filter = true, - .strong_runtime_opts = throttle_strong_runtime_opts, -diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h -index 31ae91e56e..40d646d1ed 100644 ---- a/include/block/block_int-common.h -+++ b/include/block/block_int-common.h -@@ -735,17 +735,19 @@ struct BlockDriver { - void (*bdrv_io_unplug)(BlockDriverState *bs); - - /** -- * bdrv_co_drain_begin is called if implemented in the beginning of a -+ * bdrv_drain_begin is called if implemented in the beginning of a - * drain operation to drain and stop any internal sources of requests in - * the driver. -- * bdrv_co_drain_end is called if implemented at the end of the drain. -+ * bdrv_drain_end is called if implemented at the end of the drain. - * - * They should be used by the driver to e.g. manage scheduled I/O - * requests, or toggle an internal state. After the end of the drain new - * requests will continue normally. -+ * -+ * Implementations of both functions must not call aio_poll(). - */ -- void coroutine_fn (*bdrv_co_drain_begin)(BlockDriverState *bs); -- void coroutine_fn (*bdrv_co_drain_end)(BlockDriverState *bs); -+ void (*bdrv_drain_begin)(BlockDriverState *bs); -+ void (*bdrv_drain_end)(BlockDriverState *bs); - - bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs); - bool coroutine_fn (*bdrv_co_can_store_new_dirty_bitmap)( -diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c -index 24f34e24ad..695519ee02 100644 ---- a/tests/unit/test-bdrv-drain.c -+++ b/tests/unit/test-bdrv-drain.c -@@ -46,7 +46,7 @@ static void coroutine_fn sleep_in_drain_begin(void *opaque) - bdrv_dec_in_flight(bs); - } - --static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs) -+static void bdrv_test_drain_begin(BlockDriverState *bs) - { - BDRVTestState *s = bs->opaque; - s->drain_count++; -@@ -57,7 +57,7 @@ static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs) - } - } - --static void coroutine_fn bdrv_test_co_drain_end(BlockDriverState *bs) -+static void bdrv_test_drain_end(BlockDriverState *bs) - { - BDRVTestState *s = bs->opaque; - s->drain_count--; -@@ -111,8 +111,8 @@ static BlockDriver bdrv_test = { - .bdrv_close = bdrv_test_close, - .bdrv_co_preadv = bdrv_test_co_preadv, - -- .bdrv_co_drain_begin = bdrv_test_co_drain_begin, -- .bdrv_co_drain_end = bdrv_test_co_drain_end, -+ .bdrv_drain_begin = bdrv_test_drain_begin, -+ .bdrv_drain_end = bdrv_test_drain_end, - - .bdrv_child_perm = bdrv_default_perms, - -@@ -1703,6 +1703,7 @@ static void test_blockjob_commit_by_drained_end(void) - bdrv_drained_begin(bs_child); - g_assert(!job_has_completed); - bdrv_drained_end(bs_child); -+ aio_poll(qemu_get_aio_context(), false); - g_assert(job_has_completed); - - bdrv_unref(bs_parents[0]); -@@ -1858,6 +1859,7 @@ static void test_drop_intermediate_poll(void) - - g_assert(!job_has_completed); - ret = bdrv_drop_intermediate(chain[1], chain[0], NULL); -+ aio_poll(qemu_get_aio_context(), false); - g_assert(ret == 0); - g_assert(job_has_completed); - -@@ -1946,7 +1948,7 @@ static void coroutine_fn bdrv_replace_test_drain_co(void *opaque) - * .was_drained. - * Increment .drain_count. - */ --static void coroutine_fn bdrv_replace_test_co_drain_begin(BlockDriverState *bs) -+static void bdrv_replace_test_drain_begin(BlockDriverState *bs) - { - BDRVReplaceTestState *s = bs->opaque; - -@@ -1977,7 +1979,7 @@ static void coroutine_fn bdrv_replace_test_read_entry(void *opaque) - * If .drain_count reaches 0 and the node has a backing file, issue a - * read request. - */ --static void coroutine_fn bdrv_replace_test_co_drain_end(BlockDriverState *bs) -+static void bdrv_replace_test_drain_end(BlockDriverState *bs) - { - BDRVReplaceTestState *s = bs->opaque; - -@@ -2002,8 +2004,8 @@ static BlockDriver bdrv_replace_test = { - .bdrv_close = bdrv_replace_test_close, - .bdrv_co_preadv = bdrv_replace_test_co_preadv, - -- .bdrv_co_drain_begin = bdrv_replace_test_co_drain_begin, -- .bdrv_co_drain_end = bdrv_replace_test_co_drain_end, -+ .bdrv_drain_begin = bdrv_replace_test_drain_begin, -+ .bdrv_drain_end = bdrv_replace_test_drain_end, - - .bdrv_child_perm = bdrv_default_perms, - }; --- -2.31.1 - diff --git a/SOURCES/kvm-block-Split-BlockNodeInfo-off-of-ImageInfo.patch b/SOURCES/kvm-block-Split-BlockNodeInfo-off-of-ImageInfo.patch deleted file mode 100644 index 2d95689..0000000 --- a/SOURCES/kvm-block-Split-BlockNodeInfo-off-of-ImageInfo.patch +++ /dev/null @@ -1,246 +0,0 @@ -From 54e290df4bc1c9e83be7357caed6a2b1ba4f21f0 Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Mon, 20 Jun 2022 18:26:56 +0200 -Subject: [PATCH 09/20] block: Split BlockNodeInfo off of ImageInfo - -RH-Author: Hanna Czenczek -RH-MergeRequest: 145: Show protocol-level information in qemu-img info -RH-Bugzilla: 1860292 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella -RH-Commit: [4/12] fc8d69d549bb9a929db218b91697ee3ae95c1ff6 (hreitz/qemu-kvm-c-9-s) - -ImageInfo sometimes contains flat information, and sometimes it does -not. Split off a BlockNodeInfo struct, which only contains information -about a single node and has no link to the backing image. - -We do this so we can extend BlockNodeInfo to a BlockGraphInfo struct, -which has links to all child nodes, not just the backing node. It would -be strange to base BlockGraphInfo on ImageInfo, because then this -extended struct would have two links to the backing node (one in -BlockGraphInfo as one of all the child links, and one in ImageInfo). - -Furthermore, it is quite common to ignore the backing-image field -altogether: bdrv_query_image_info() does not set it, and -bdrv_image_info_dump() does not evaluate it. That signals that we -should have different structs for describing a single node and one that -has a link to the backing image. - -Still, bdrv_query_image_info() and bdrv_image_info_dump() are not -changed too much in this patch. Follow-up patches will handle them. - -Signed-off-by: Hanna Reitz -Message-Id: <20220620162704.80987-5-hreitz@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit a2085f8909377b6df738f6c3f7ee6db4d16da8f7) -Signed-off-by: Hanna Czenczek ---- - block/qapi.c | 86 ++++++++++++++++++++++++++++++++------------ - include/block/qapi.h | 3 ++ - qapi/block-core.json | 24 +++++++++---- - 3 files changed, 85 insertions(+), 28 deletions(-) - -diff --git a/block/qapi.c b/block/qapi.c -index 51202b470a..e5022b4481 100644 ---- a/block/qapi.c -+++ b/block/qapi.c -@@ -241,30 +241,18 @@ int bdrv_query_snapshot_info_list(BlockDriverState *bs, - } - - /** -- * bdrv_query_image_info: -- * @bs: block device to examine -- * @p_info: location to store image information -- * @errp: location to store error information -- * -- * Store "flat" image information in @p_info. -- * -- * "Flat" means it does *not* query backing image information, -- * i.e. (*pinfo)->has_backing_image will be set to false and -- * (*pinfo)->backing_image to NULL even when the image does in fact have -- * a backing image. -- * -- * @p_info will be set only on success. On error, store error in @errp. -+ * Helper function for other query info functions. Store information about @bs -+ * in @info, setting @errp on error. - */ --void bdrv_query_image_info(BlockDriverState *bs, -- ImageInfo **p_info, -- Error **errp) -+static void bdrv_do_query_node_info(BlockDriverState *bs, -+ BlockNodeInfo *info, -+ Error **errp) - { - int64_t size; - const char *backing_filename; - BlockDriverInfo bdi; - int ret; - Error *err = NULL; -- ImageInfo *info; - - aio_context_acquire(bdrv_get_aio_context(bs)); - -@@ -277,7 +265,6 @@ void bdrv_query_image_info(BlockDriverState *bs, - - bdrv_refresh_filename(bs); - -- info = g_new0(ImageInfo, 1); - info->filename = g_strdup(bs->filename); - info->format = g_strdup(bdrv_get_format_name(bs)); - info->virtual_size = size; -@@ -298,7 +285,6 @@ void bdrv_query_image_info(BlockDriverState *bs, - info->format_specific = bdrv_get_specific_info(bs, &err); - if (err) { - error_propagate(errp, err); -- qapi_free_ImageInfo(info); - goto out; - } - info->has_format_specific = info->format_specific != NULL; -@@ -339,16 +325,72 @@ void bdrv_query_image_info(BlockDriverState *bs, - break; - default: - error_propagate(errp, err); -- qapi_free_ImageInfo(info); - goto out; - } - -- *p_info = info; -- - out: - aio_context_release(bdrv_get_aio_context(bs)); - } - -+/** -+ * bdrv_query_block_node_info: -+ * @bs: block node to examine -+ * @p_info: location to store node information -+ * @errp: location to store error information -+ * -+ * Store image information about @bs in @p_info. -+ * -+ * @p_info will be set only on success. On error, store error in @errp. -+ */ -+void bdrv_query_block_node_info(BlockDriverState *bs, -+ BlockNodeInfo **p_info, -+ Error **errp) -+{ -+ BlockNodeInfo *info; -+ ERRP_GUARD(); -+ -+ info = g_new0(BlockNodeInfo, 1); -+ bdrv_do_query_node_info(bs, info, errp); -+ if (*errp) { -+ qapi_free_BlockNodeInfo(info); -+ return; -+ } -+ -+ *p_info = info; -+} -+ -+/** -+ * bdrv_query_image_info: -+ * @bs: block node to examine -+ * @p_info: location to store image information -+ * @errp: location to store error information -+ * -+ * Store "flat" image information in @p_info. -+ * -+ * "Flat" means it does *not* query backing image information, -+ * i.e. (*pinfo)->has_backing_image will be set to false and -+ * (*pinfo)->backing_image to NULL even when the image does in fact have -+ * a backing image. -+ * -+ * @p_info will be set only on success. On error, store error in @errp. -+ */ -+void bdrv_query_image_info(BlockDriverState *bs, -+ ImageInfo **p_info, -+ Error **errp) -+{ -+ ImageInfo *info; -+ ERRP_GUARD(); -+ -+ info = g_new0(ImageInfo, 1); -+ bdrv_do_query_node_info(bs, qapi_ImageInfo_base(info), errp); -+ if (*errp) { -+ qapi_free_ImageInfo(info); -+ return; -+ } -+ -+ *p_info = info; -+} -+ - /* @p_info will be set only on success. */ - static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info, - Error **errp) -diff --git a/include/block/qapi.h b/include/block/qapi.h -index c09859ea78..c7de4e3fa9 100644 ---- a/include/block/qapi.h -+++ b/include/block/qapi.h -@@ -35,6 +35,9 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk, - int bdrv_query_snapshot_info_list(BlockDriverState *bs, - SnapshotInfoList **p_list, - Error **errp); -+void bdrv_query_block_node_info(BlockDriverState *bs, -+ BlockNodeInfo **p_info, -+ Error **errp); - void bdrv_query_image_info(BlockDriverState *bs, - ImageInfo **p_info, - Error **errp); -diff --git a/qapi/block-core.json b/qapi/block-core.json -index 4b9365167f..7720da0498 100644 ---- a/qapi/block-core.json -+++ b/qapi/block-core.json -@@ -251,7 +251,7 @@ - } } - - ## --# @ImageInfo: -+# @BlockNodeInfo: - # - # Information about a QEMU image file - # -@@ -279,22 +279,34 @@ - # - # @snapshots: list of VM snapshots - # --# @backing-image: info of the backing image (since 1.6) --# - # @format-specific: structure supplying additional format-specific - # information (since 1.7) - # --# Since: 1.3 -+# Since: 8.0 - ## --{ 'struct': 'ImageInfo', -+{ 'struct': 'BlockNodeInfo', - 'data': {'filename': 'str', 'format': 'str', '*dirty-flag': 'bool', - '*actual-size': 'int', 'virtual-size': 'int', - '*cluster-size': 'int', '*encrypted': 'bool', '*compressed': 'bool', - '*backing-filename': 'str', '*full-backing-filename': 'str', - '*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'], -- '*backing-image': 'ImageInfo', - '*format-specific': 'ImageInfoSpecific' } } - -+## -+# @ImageInfo: -+# -+# Information about a QEMU image file, and potentially its backing image -+# -+# @backing-image: info of the backing image -+# -+# Since: 1.3 -+## -+{ 'struct': 'ImageInfo', -+ 'base': 'BlockNodeInfo', -+ 'data': { -+ '*backing-image': 'ImageInfo' -+ } } -+ - ## - # @ImageCheck: - # --- -2.31.1 - diff --git a/SOURCES/kvm-block-bdrv-blk_co_unref-for-calls-in-coroutine-conte.patch b/SOURCES/kvm-block-bdrv-blk_co_unref-for-calls-in-coroutine-conte.patch new file mode 100644 index 0000000..0f0347b --- /dev/null +++ b/SOURCES/kvm-block-bdrv-blk_co_unref-for-calls-in-coroutine-conte.patch @@ -0,0 +1,386 @@ +From 7baea25be90e184175dd5a919ee5878cbd4970c2 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 4 May 2023 13:57:33 +0200 +Subject: [PATCH 52/56] block: bdrv/blk_co_unref() for calls in coroutine + context + +RH-Author: Kevin Wolf +RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize() +RH-Bugzilla: 2185688 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [1/4] 8ebf8486b082c30ca1b39a6ede35e471eaaccfa3 (kmwolf/centos-qemu-kvm) + +These functions must not be called in coroutine context, because they +need write access to the graph. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +Reviewed-by: Stefan Hajnoczi +Message-Id: <20230504115750.54437-4-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit b2ab5f545fa1eaaf2955dd617bee19a8b3279786) +Signed-off-by: Kevin Wolf +--- + block.c | 2 +- + block/crypto.c | 6 +++--- + block/parallels.c | 6 +++--- + block/qcow.c | 6 +++--- + block/qcow2.c | 14 +++++++------- + block/qed.c | 6 +++--- + block/vdi.c | 6 +++--- + block/vhdx.c | 6 +++--- + block/vmdk.c | 18 +++++++++--------- + block/vpc.c | 6 +++--- + include/block/block-global-state.h | 3 ++- + include/sysemu/block-backend-global-state.h | 5 ++++- + 12 files changed, 44 insertions(+), 40 deletions(-) + +diff --git a/block.c b/block.c +index d79a52ca74..a48112f945 100644 +--- a/block.c ++++ b/block.c +@@ -680,7 +680,7 @@ int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv, + + ret = 0; + out: +- blk_unref(blk); ++ blk_co_unref(blk); + return ret; + } + +diff --git a/block/crypto.c b/block/crypto.c +index ca67289187..8fd3ad0054 100644 +--- a/block/crypto.c ++++ b/block/crypto.c +@@ -355,7 +355,7 @@ block_crypto_co_create_generic(BlockDriverState *bs, int64_t size, + ret = 0; + cleanup: + qcrypto_block_free(crypto); +- blk_unref(blk); ++ blk_co_unref(blk); + return ret; + } + +@@ -661,7 +661,7 @@ block_crypto_co_create_luks(BlockdevCreateOptions *create_options, Error **errp) + + ret = 0; + fail: +- bdrv_unref(bs); ++ bdrv_co_unref(bs); + return ret; + } + +@@ -730,7 +730,7 @@ fail: + bdrv_co_delete_file_noerr(bs); + } + +- bdrv_unref(bs); ++ bdrv_co_unref(bs); + qapi_free_QCryptoBlockCreateOptions(create_opts); + qobject_unref(cryptoopts); + return ret; +diff --git a/block/parallels.c b/block/parallels.c +index 013684801a..b49c35929e 100644 +--- a/block/parallels.c ++++ b/block/parallels.c +@@ -613,8 +613,8 @@ static int coroutine_fn parallels_co_create(BlockdevCreateOptions* opts, + + ret = 0; + out: +- blk_unref(blk); +- bdrv_unref(bs); ++ blk_co_unref(blk); ++ bdrv_co_unref(bs); + return ret; + + exit: +@@ -691,7 +691,7 @@ parallels_co_create_opts(BlockDriver *drv, const char *filename, + + done: + qobject_unref(qdict); +- bdrv_unref(bs); ++ bdrv_co_unref(bs); + qapi_free_BlockdevCreateOptions(create_options); + return ret; + } +diff --git a/block/qcow.c b/block/qcow.c +index 490e4f819e..a0c701f578 100644 +--- a/block/qcow.c ++++ b/block/qcow.c +@@ -915,8 +915,8 @@ static int coroutine_fn qcow_co_create(BlockdevCreateOptions *opts, + g_free(tmp); + ret = 0; + exit: +- blk_unref(qcow_blk); +- bdrv_unref(bs); ++ blk_co_unref(qcow_blk); ++ bdrv_co_unref(bs); + qcrypto_block_free(crypto); + return ret; + } +@@ -1015,7 +1015,7 @@ qcow_co_create_opts(BlockDriver *drv, const char *filename, + fail: + g_free(backing_fmt); + qobject_unref(qdict); +- bdrv_unref(bs); ++ bdrv_co_unref(bs); + qapi_free_BlockdevCreateOptions(create_options); + return ret; + } +diff --git a/block/qcow2.c b/block/qcow2.c +index 22084730f9..0b8beb8b47 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -3711,7 +3711,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) + goto out; + } + +- blk_unref(blk); ++ blk_co_unref(blk); + blk = NULL; + + /* +@@ -3791,7 +3791,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) + } + } + +- blk_unref(blk); ++ blk_co_unref(blk); + blk = NULL; + + /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning. +@@ -3816,9 +3816,9 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) + + ret = 0; + out: +- blk_unref(blk); +- bdrv_unref(bs); +- bdrv_unref(data_bs); ++ blk_co_unref(blk); ++ bdrv_co_unref(bs); ++ bdrv_co_unref(data_bs); + return ret; + } + +@@ -3949,8 +3949,8 @@ finish: + } + + qobject_unref(qdict); +- bdrv_unref(bs); +- bdrv_unref(data_bs); ++ bdrv_co_unref(bs); ++ bdrv_co_unref(data_bs); + qapi_free_BlockdevCreateOptions(create_options); + return ret; + } +diff --git a/block/qed.c b/block/qed.c +index 0705a7b4e2..aff2a2076e 100644 +--- a/block/qed.c ++++ b/block/qed.c +@@ -748,8 +748,8 @@ static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts, + ret = 0; /* success */ + out: + g_free(l1_table); +- blk_unref(blk); +- bdrv_unref(bs); ++ blk_co_unref(blk); ++ bdrv_co_unref(bs); + return ret; + } + +@@ -819,7 +819,7 @@ bdrv_qed_co_create_opts(BlockDriver *drv, const char *filename, + + fail: + qobject_unref(qdict); +- bdrv_unref(bs); ++ bdrv_co_unref(bs); + qapi_free_BlockdevCreateOptions(create_options); + return ret; + } +diff --git a/block/vdi.c b/block/vdi.c +index f2434d6153..08331d2dd7 100644 +--- a/block/vdi.c ++++ b/block/vdi.c +@@ -886,8 +886,8 @@ static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options, + + ret = 0; + exit: +- blk_unref(blk); +- bdrv_unref(bs_file); ++ blk_co_unref(blk); ++ bdrv_co_unref(bs_file); + g_free(bmap); + return ret; + } +@@ -975,7 +975,7 @@ vdi_co_create_opts(BlockDriver *drv, const char *filename, + done: + qobject_unref(qdict); + qapi_free_BlockdevCreateOptions(create_options); +- bdrv_unref(bs_file); ++ bdrv_co_unref(bs_file); + return ret; + } + +diff --git a/block/vhdx.c b/block/vhdx.c +index 81420722a1..00777da91a 100644 +--- a/block/vhdx.c ++++ b/block/vhdx.c +@@ -2053,8 +2053,8 @@ static int coroutine_fn vhdx_co_create(BlockdevCreateOptions *opts, + + ret = 0; + delete_and_exit: +- blk_unref(blk); +- bdrv_unref(bs); ++ blk_co_unref(blk); ++ bdrv_co_unref(bs); + g_free(creator); + return ret; + } +@@ -2144,7 +2144,7 @@ vhdx_co_create_opts(BlockDriver *drv, const char *filename, + + fail: + qobject_unref(qdict); +- bdrv_unref(bs); ++ bdrv_co_unref(bs); + qapi_free_BlockdevCreateOptions(create_options); + return ret; + } +diff --git a/block/vmdk.c b/block/vmdk.c +index f5f49018fe..01ca13c82b 100644 +--- a/block/vmdk.c ++++ b/block/vmdk.c +@@ -2306,7 +2306,7 @@ exit: + if (pbb) { + *pbb = blk; + } else { +- blk_unref(blk); ++ blk_co_unref(blk); + blk = NULL; + } + } +@@ -2516,12 +2516,12 @@ vmdk_co_do_create(int64_t size, + if (strcmp(blk_bs(backing)->drv->format_name, "vmdk")) { + error_setg(errp, "Invalid backing file format: %s. Must be vmdk", + blk_bs(backing)->drv->format_name); +- blk_unref(backing); ++ blk_co_unref(backing); + ret = -EINVAL; + goto exit; + } + ret = vmdk_read_cid(blk_bs(backing), 0, &parent_cid); +- blk_unref(backing); ++ blk_co_unref(backing); + if (ret) { + error_setg(errp, "Failed to read parent CID"); + goto exit; +@@ -2542,14 +2542,14 @@ vmdk_co_do_create(int64_t size, + blk_bs(extent_blk)->filename); + created_size += cur_size; + extent_idx++; +- blk_unref(extent_blk); ++ blk_co_unref(extent_blk); + } + + /* Check whether we got excess extents */ + extent_blk = extent_fn(-1, extent_idx, flat, split, compress, zeroed_grain, + opaque, NULL); + if (extent_blk) { +- blk_unref(extent_blk); ++ blk_co_unref(extent_blk); + error_setg(errp, "List of extents contains unused extents"); + ret = -EINVAL; + goto exit; +@@ -2590,7 +2590,7 @@ vmdk_co_do_create(int64_t size, + ret = 0; + exit: + if (blk) { +- blk_unref(blk); ++ blk_co_unref(blk); + } + g_free(desc); + g_free(parent_desc_line); +@@ -2641,7 +2641,7 @@ vmdk_co_create_opts_cb(int64_t size, int idx, bool flat, bool split, + errp)) { + goto exit; + } +- bdrv_unref(bs); ++ bdrv_co_unref(bs); + exit: + g_free(ext_filename); + return blk; +@@ -2797,12 +2797,12 @@ static BlockBackend * coroutine_fn vmdk_co_create_cb(int64_t size, int idx, + return NULL; + } + blk_set_allow_write_beyond_eof(blk, true); +- bdrv_unref(bs); ++ bdrv_co_unref(bs); + + if (size != -1) { + ret = vmdk_init_extent(blk, size, flat, compress, zeroed_grain, errp); + if (ret) { +- blk_unref(blk); ++ blk_co_unref(blk); + blk = NULL; + } + } +diff --git a/block/vpc.c b/block/vpc.c +index b89b0ff8e2..07ddda5b99 100644 +--- a/block/vpc.c ++++ b/block/vpc.c +@@ -1082,8 +1082,8 @@ static int coroutine_fn vpc_co_create(BlockdevCreateOptions *opts, + } + + out: +- blk_unref(blk); +- bdrv_unref(bs); ++ blk_co_unref(blk); ++ bdrv_co_unref(bs); + return ret; + } + +@@ -1162,7 +1162,7 @@ vpc_co_create_opts(BlockDriver *drv, const char *filename, + + fail: + qobject_unref(qdict); +- bdrv_unref(bs); ++ bdrv_co_unref(bs); + qapi_free_BlockdevCreateOptions(create_options); + return ret; + } +diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h +index 399200a9a3..cd4ea554bf 100644 +--- a/include/block/block-global-state.h ++++ b/include/block/block-global-state.h +@@ -214,7 +214,8 @@ void bdrv_img_create(const char *filename, const char *fmt, + bool quiet, Error **errp); + + void bdrv_ref(BlockDriverState *bs); +-void bdrv_unref(BlockDriverState *bs); ++void no_coroutine_fn bdrv_unref(BlockDriverState *bs); ++void coroutine_fn no_co_wrapper bdrv_co_unref(BlockDriverState *bs); + void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child); + BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, + BlockDriverState *child_bs, +diff --git a/include/sysemu/block-backend-global-state.h b/include/sysemu/block-backend-global-state.h +index 2b6d27db7c..fa83f9389c 100644 +--- a/include/sysemu/block-backend-global-state.h ++++ b/include/sysemu/block-backend-global-state.h +@@ -42,7 +42,10 @@ blk_co_new_open(const char *filename, const char *reference, QDict *options, + + int blk_get_refcnt(BlockBackend *blk); + void blk_ref(BlockBackend *blk); +-void blk_unref(BlockBackend *blk); ++ ++void no_coroutine_fn blk_unref(BlockBackend *blk); ++void coroutine_fn no_co_wrapper blk_co_unref(BlockBackend *blk); ++ + void blk_remove_all_bs(void); + BlockBackend *blk_by_name(const char *name); + BlockBackend *blk_next(BlockBackend *blk); +-- +2.39.1 + diff --git a/SOURCES/kvm-block-blkio-do-not-use-open-flags-in-qemu_open.patch b/SOURCES/kvm-block-blkio-do-not-use-open-flags-in-qemu_open.patch new file mode 100644 index 0000000..caf6694 --- /dev/null +++ b/SOURCES/kvm-block-blkio-do-not-use-open-flags-in-qemu_open.patch @@ -0,0 +1,74 @@ +From b1f0546548e561856252c2bc610a8f4f8fcdf007 Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Wed, 26 Jul 2023 09:48:07 +0200 +Subject: [PATCH 02/14] block/blkio: do not use open flags in qemu_open() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Stefano Garzarella +RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers +RH-Bugzilla: 2225354 2225439 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Alberto Faria +RH-Commit: [2/6] 1ccd0ef56182bb5e2374c3b5be98ee1ec05066d6 (sgarzarella/qemu-kvm-c-9-s) + +qemu_open() in blkio_virtio_blk_common_open() is used to open the +character device (e.g. /dev/vhost-vdpa-0 or /dev/vfio/vfio) or in +the future eventually the unix socket. + +In all these cases we cannot open the path in read-only mode, +when the `read-only` option of blockdev is on, because the exchange +of IOCTL commands for example will fail. + +In order to open the device read-only, we have to use the `read-only` +property of the libblkio driver as we already do in blkio_file_open(). + +Fixes: cad2ccc395 ("block/blkio: use qemu_open() to support fd passing for virtio-blk") +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2225439 +Reported-by: Qing Wang +Signed-off-by: Stefano Garzarella +Reviewed-by: Daniel P. Berrangé +Message-id: 20230726074807.14041-1-sgarzare@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit a5942c177b7bcc1357e496b7d68668befcfc2bb9) +Signed-off-by: Stefano Garzarella +--- + block/blkio.c | 21 ++++++++++++--------- + 1 file changed, 12 insertions(+), 9 deletions(-) + +diff --git a/block/blkio.c b/block/blkio.c +index 3ea9841bd8..5a82c6cb1a 100644 +--- a/block/blkio.c ++++ b/block/blkio.c +@@ -685,15 +685,18 @@ static int blkio_virtio_blk_common_open(BlockDriverState *bs, + * layer through the "/dev/fdset/N" special path. + */ + if (fd_supported) { +- int open_flags; +- +- if (flags & BDRV_O_RDWR) { +- open_flags = O_RDWR; +- } else { +- open_flags = O_RDONLY; +- } +- +- fd = qemu_open(path, open_flags, errp); ++ /* ++ * `path` can contain the path of a character device ++ * (e.g. /dev/vhost-vdpa-0 or /dev/vfio/vfio) or a unix socket. ++ * ++ * So, we should always open it with O_RDWR flag, also if BDRV_O_RDWR ++ * is not set in the open flags, because the exchange of IOCTL commands ++ * for example will fail. ++ * ++ * In order to open the device read-only, we are using the `read-only` ++ * property of the libblkio driver in blkio_file_open(). ++ */ ++ fd = qemu_open(path, O_RDWR, errp); + if (fd < 0) { + return -EINVAL; + } +-- +2.39.3 + diff --git a/SOURCES/kvm-block-blkio-enable-the-completion-eventfd.patch b/SOURCES/kvm-block-blkio-enable-the-completion-eventfd.patch new file mode 100644 index 0000000..8a6f72b --- /dev/null +++ b/SOURCES/kvm-block-blkio-enable-the-completion-eventfd.patch @@ -0,0 +1,54 @@ +From ef99db21e9469f3fc946b7bf3edc1837d7b24e0b Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Tue, 25 Jul 2023 12:37:44 +0200 +Subject: [PATCH 01/14] block/blkio: enable the completion eventfd + +RH-Author: Stefano Garzarella +RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers +RH-Bugzilla: 2225354 2225439 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Alberto Faria +RH-Commit: [1/6] d91b3a465942863550130105ae2f38f47a82a360 (sgarzarella/qemu-kvm-c-9-s) + +Until libblkio 1.3.0, virtio-blk drivers had completion eventfd +notifications enabled from the start, but from the next releases +this is no longer the case, so we have to explicitly enable them. + +In fact, the libblkio documentation says they could be disabled, +so we should always enable them at the start if we want to be +sure to get completion eventfd notifications: + + By default, the driver might not generate completion events for + requests so it is necessary to explicitly enable the completion + file descriptor before use: + + void blkioq_set_completion_fd_enabled(struct blkioq *q, bool enable); + +I discovered this while trying a development version of libblkio: +the guest kernel hangs during boot, while probing the device. + +Fixes: fd66dbd424f5 ("blkio: add libblkio block driver") +Signed-off-by: Stefano Garzarella +Message-id: 20230725103744.77343-1-sgarzare@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 9359c459889fce1804c4e1b2a2ff8f182b4a9ae8) +Signed-off-by: Stefano Garzarella +--- + block/blkio.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/block/blkio.c b/block/blkio.c +index afcec359f2..3ea9841bd8 100644 +--- a/block/blkio.c ++++ b/block/blkio.c +@@ -844,6 +844,7 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags, + QLIST_INIT(&s->bounce_bufs); + s->blkioq = blkio_get_queue(s->blkio, 0); + s->completion_fd = blkioq_get_completion_fd(s->blkioq); ++ blkioq_set_completion_fd_enabled(s->blkioq, true); + + blkio_attach_aio_context(bs, bdrv_get_aio_context(bs)); + return 0; +-- +2.39.3 + diff --git a/SOURCES/kvm-block-blkio-fall-back-on-using-path-when-fd-setting-.patch b/SOURCES/kvm-block-blkio-fall-back-on-using-path-when-fd-setting-.patch new file mode 100644 index 0000000..f4d6e3c --- /dev/null +++ b/SOURCES/kvm-block-blkio-fall-back-on-using-path-when-fd-setting-.patch @@ -0,0 +1,67 @@ +From c1ce3ba81698b9d52ac9dff83c01ee8141ca403d Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Thu, 27 Jul 2023 18:10:19 +0200 +Subject: [PATCH 05/14] block/blkio: fall back on using `path` when `fd` + setting fails + +RH-Author: Stefano Garzarella +RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers +RH-Bugzilla: 2225354 2225439 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Alberto Faria +RH-Commit: [5/6] c03cea95146a59b2830ffe2dd56ef77a6630ce3e (sgarzarella/qemu-kvm-c-9-s) + +qemu_open() fails if called with an unix domain socket in this way: + -blockdev node-name=drive0,driver=virtio-blk-vhost-user,path=vhost-user-blk.sock,cache.direct=on: Could not open 'vhost-user-blk.sock': No such device or address + +Since virtio-blk-vhost-user does not support fd passing, let`s always fall back +on using `path` if we fail the fd passing. + +Fixes: cad2ccc395 ("block/blkio: use qemu_open() to support fd passing for virtio-blk") +Reported-by: Qing Wang +Signed-off-by: Stefano Garzarella +Message-id: 20230727161020.84213-4-sgarzare@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 723bea27b127969931fa26bc0de79372a3d9e148) +Signed-off-by: Stefano Garzarella +--- + block/blkio.c | 20 ++++++++++---------- + 1 file changed, 10 insertions(+), 10 deletions(-) + +diff --git a/block/blkio.c b/block/blkio.c +index 93a8f8fc5c..eef80e9ce5 100644 +--- a/block/blkio.c ++++ b/block/blkio.c +@@ -710,19 +710,19 @@ static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options, + * In order to open the device read-only, we are using the `read-only` + * property of the libblkio driver in blkio_file_open(). + */ +- fd = qemu_open(path, O_RDWR, errp); ++ fd = qemu_open(path, O_RDWR, NULL); + if (fd < 0) { +- return -EINVAL; ++ fd_supported = false; ++ } else { ++ ret = blkio_set_int(s->blkio, "fd", fd); ++ if (ret < 0) { ++ fd_supported = false; ++ qemu_close(fd); ++ } + } ++ } + +- ret = blkio_set_int(s->blkio, "fd", fd); +- if (ret < 0) { +- error_setg_errno(errp, -ret, "failed to set fd: %s", +- blkio_get_error_msg()); +- qemu_close(fd); +- return ret; +- } +- } else { ++ if (!fd_supported) { + ret = blkio_set_str(s->blkio, "path", path); + if (ret < 0) { + error_setg_errno(errp, -ret, "failed to set path: %s", +-- +2.39.3 + diff --git a/SOURCES/kvm-block-blkio-fix-module_block.py-parsing.patch b/SOURCES/kvm-block-blkio-fix-module_block.py-parsing.patch new file mode 100644 index 0000000..1c89a0b --- /dev/null +++ b/SOURCES/kvm-block-blkio-fix-module_block.py-parsing.patch @@ -0,0 +1,205 @@ +From 545482400ea87d54b1b839587f8aaad41e30692f Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 4 Jul 2023 14:34:36 +0200 +Subject: [PATCH 36/37] block/blkio: fix module_block.py parsing + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 181: block/blkio: fix module_block.py parsing +RH-Bugzilla: 2213317 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Kevin Wolf +RH-Commit: [1/2] c85df95824f4889526a73527771dec9efcb06926 (stefanha/centos-stream-qemu-kvm) + +When QEMU is built with --enable-modules, the module_block.py script +parses block/*.c to find block drivers that are built as modules. The +script generates a table of block drivers called block_driver_modules[]. +This table is used for block driver module loading. + +The blkio.c driver uses macros to define its BlockDriver structs. This +was done to avoid code duplication but the module_block.py script is +unable to parse the macro. The result is that libblkio-based block +drivers can be built as modules but will not be found at runtime. + +One fix is to make the module_block.py script or build system fancier so +it can parse C macros (e.g. by parsing the preprocessed source code). I +chose not to do this because it raises the complexity of the build, +making future issues harder to debug. + +Keep things simple: use the macro to avoid duplicating BlockDriver +function pointers but define .format_name and .protocol_name manually +for each BlockDriver. This way the module_block.py is able to parse the +code. + +Also get rid of the block driver name macros (e.g. DRIVER_IO_URING) +because module_block.py cannot parse them either. + +Fixes: fd66dbd424f5 ("blkio: add libblkio block driver") +Reported-by: Qing Wang +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Stefano Garzarella +Message-id: 20230704123436.187761-1-stefanha@redhat.com +Cc: Stefano Garzarella +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit c21eae1ccc782440f320accb6f90c66cb8f45ee9) + +Conflicts: +- Downstream lacks commit 28ff7b4dfbb5 ("block/blkio: convert to + blk_io_plug_call() API") so keep the .bdrv_co_io_unplug callback. + +Signed-off-by: Stefan Hajnoczi +--- + block/blkio.c | 118 ++++++++++++++++++++++++++------------------------ + 1 file changed, 61 insertions(+), 57 deletions(-) + +diff --git a/block/blkio.c b/block/blkio.c +index 6a6f20f923..afcec359f2 100644 +--- a/block/blkio.c ++++ b/block/blkio.c +@@ -21,16 +21,6 @@ + + #include "block/block-io.h" + +-/* +- * Keep the QEMU BlockDriver names identical to the libblkio driver names. +- * Using macros instead of typing out the string literals avoids typos. +- */ +-#define DRIVER_IO_URING "io_uring" +-#define DRIVER_NVME_IO_URING "nvme-io_uring" +-#define DRIVER_VIRTIO_BLK_VFIO_PCI "virtio-blk-vfio-pci" +-#define DRIVER_VIRTIO_BLK_VHOST_USER "virtio-blk-vhost-user" +-#define DRIVER_VIRTIO_BLK_VHOST_VDPA "virtio-blk-vhost-vdpa" +- + /* + * Allocated bounce buffers are kept in a list sorted by buffer address. + */ +@@ -743,15 +733,15 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags, + return ret; + } + +- if (strcmp(blkio_driver, DRIVER_IO_URING) == 0) { ++ if (strcmp(blkio_driver, "io_uring") == 0) { + ret = blkio_io_uring_open(bs, options, flags, errp); +- } else if (strcmp(blkio_driver, DRIVER_NVME_IO_URING) == 0) { ++ } else if (strcmp(blkio_driver, "nvme-io_uring") == 0) { + ret = blkio_nvme_io_uring(bs, options, flags, errp); +- } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VFIO_PCI) == 0) { ++ } else if (strcmp(blkio_driver, "virtio-blk-vfio-pci") == 0) { + ret = blkio_virtio_blk_common_open(bs, options, flags, errp); +- } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VHOST_USER) == 0) { ++ } else if (strcmp(blkio_driver, "virtio-blk-vhost-user") == 0) { + ret = blkio_virtio_blk_common_open(bs, options, flags, errp); +- } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VHOST_VDPA) == 0) { ++ } else if (strcmp(blkio_driver, "virtio-blk-vhost-vdpa") == 0) { + ret = blkio_virtio_blk_common_open(bs, options, flags, errp); + } else { + g_assert_not_reached(); +@@ -1027,50 +1017,64 @@ static void blkio_refresh_limits(BlockDriverState *bs, Error **errp) + * - truncate + */ + +-#define BLKIO_DRIVER(name, ...) \ +- { \ +- .format_name = name, \ +- .protocol_name = name, \ +- .instance_size = sizeof(BDRVBlkioState), \ +- .bdrv_file_open = blkio_file_open, \ +- .bdrv_close = blkio_close, \ +- .bdrv_co_getlength = blkio_co_getlength, \ +- .bdrv_co_truncate = blkio_truncate, \ +- .bdrv_co_get_info = blkio_co_get_info, \ +- .bdrv_attach_aio_context = blkio_attach_aio_context, \ +- .bdrv_detach_aio_context = blkio_detach_aio_context, \ +- .bdrv_co_pdiscard = blkio_co_pdiscard, \ +- .bdrv_co_preadv = blkio_co_preadv, \ +- .bdrv_co_pwritev = blkio_co_pwritev, \ +- .bdrv_co_flush_to_disk = blkio_co_flush, \ +- .bdrv_co_pwrite_zeroes = blkio_co_pwrite_zeroes, \ +- .bdrv_co_io_unplug = blkio_co_io_unplug, \ +- .bdrv_refresh_limits = blkio_refresh_limits, \ +- .bdrv_register_buf = blkio_register_buf, \ +- .bdrv_unregister_buf = blkio_unregister_buf, \ +- __VA_ARGS__ \ +- } +- +-static BlockDriver bdrv_io_uring = BLKIO_DRIVER( +- DRIVER_IO_URING, +- .bdrv_needs_filename = true, +-); +- +-static BlockDriver bdrv_nvme_io_uring = BLKIO_DRIVER( +- DRIVER_NVME_IO_URING, +-); +- +-static BlockDriver bdrv_virtio_blk_vfio_pci = BLKIO_DRIVER( +- DRIVER_VIRTIO_BLK_VFIO_PCI +-); ++/* ++ * Do not include .format_name and .protocol_name because module_block.py ++ * does not parse macros in the source code. ++ */ ++#define BLKIO_DRIVER_COMMON \ ++ .instance_size = sizeof(BDRVBlkioState), \ ++ .bdrv_file_open = blkio_file_open, \ ++ .bdrv_close = blkio_close, \ ++ .bdrv_co_getlength = blkio_co_getlength, \ ++ .bdrv_co_truncate = blkio_truncate, \ ++ .bdrv_co_get_info = blkio_co_get_info, \ ++ .bdrv_attach_aio_context = blkio_attach_aio_context, \ ++ .bdrv_detach_aio_context = blkio_detach_aio_context, \ ++ .bdrv_co_pdiscard = blkio_co_pdiscard, \ ++ .bdrv_co_preadv = blkio_co_preadv, \ ++ .bdrv_co_pwritev = blkio_co_pwritev, \ ++ .bdrv_co_flush_to_disk = blkio_co_flush, \ ++ .bdrv_co_pwrite_zeroes = blkio_co_pwrite_zeroes, \ ++ .bdrv_co_io_unplug = blkio_co_io_unplug, \ ++ .bdrv_refresh_limits = blkio_refresh_limits, \ ++ .bdrv_register_buf = blkio_register_buf, \ ++ .bdrv_unregister_buf = blkio_unregister_buf, + +-static BlockDriver bdrv_virtio_blk_vhost_user = BLKIO_DRIVER( +- DRIVER_VIRTIO_BLK_VHOST_USER +-); ++/* ++ * Use the same .format_name and .protocol_name as the libblkio driver name for ++ * consistency. ++ */ + +-static BlockDriver bdrv_virtio_blk_vhost_vdpa = BLKIO_DRIVER( +- DRIVER_VIRTIO_BLK_VHOST_VDPA +-); ++static BlockDriver bdrv_io_uring = { ++ .format_name = "io_uring", ++ .protocol_name = "io_uring", ++ .bdrv_needs_filename = true, ++ BLKIO_DRIVER_COMMON ++}; ++ ++static BlockDriver bdrv_nvme_io_uring = { ++ .format_name = "nvme-io_uring", ++ .protocol_name = "nvme-io_uring", ++ BLKIO_DRIVER_COMMON ++}; ++ ++static BlockDriver bdrv_virtio_blk_vfio_pci = { ++ .format_name = "virtio-blk-vfio-pci", ++ .protocol_name = "virtio-blk-vfio-pci", ++ BLKIO_DRIVER_COMMON ++}; ++ ++static BlockDriver bdrv_virtio_blk_vhost_user = { ++ .format_name = "virtio-blk-vhost-user", ++ .protocol_name = "virtio-blk-vhost-user", ++ BLKIO_DRIVER_COMMON ++}; ++ ++static BlockDriver bdrv_virtio_blk_vhost_vdpa = { ++ .format_name = "virtio-blk-vhost-vdpa", ++ .protocol_name = "virtio-blk-vhost-vdpa", ++ BLKIO_DRIVER_COMMON ++}; + + static void bdrv_blkio_init(void) + { +-- +2.39.3 + diff --git a/SOURCES/kvm-block-blkio-move-blkio_connect-in-the-drivers-functi.patch b/SOURCES/kvm-block-blkio-move-blkio_connect-in-the-drivers-functi.patch new file mode 100644 index 0000000..e3ec1ee --- /dev/null +++ b/SOURCES/kvm-block-blkio-move-blkio_connect-in-the-drivers-functi.patch @@ -0,0 +1,151 @@ +From 458c33c9f19ed01beeb9b2b494ce6ed10d2ed4ac Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Thu, 27 Jul 2023 18:10:17 +0200 +Subject: [PATCH 03/14] block/blkio: move blkio_connect() in the drivers + functions + +RH-Author: Stefano Garzarella +RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers +RH-Bugzilla: 2225354 2225439 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Alberto Faria +RH-Commit: [3/6] c356108d7dfe1ba2098c094f8d12b6e40853560c (sgarzarella/qemu-kvm-c-9-s) + +This is in preparation for the next patch, where for virtio-blk +drivers we need to handle the failure of blkio_connect(). + +Let's also rename the *_open() functions to *_connect() to make +the code reflect the changes applied. + +Signed-off-by: Stefano Garzarella +Message-id: 20230727161020.84213-2-sgarzare@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 69785d66ae1ec43f77fc65109a21721992bead9f) +Signed-off-by: Stefano Garzarella +--- + block/blkio.c | 67 ++++++++++++++++++++++++++++++--------------------- + 1 file changed, 40 insertions(+), 27 deletions(-) + +diff --git a/block/blkio.c b/block/blkio.c +index 5a82c6cb1a..85d1eed5fb 100644 +--- a/block/blkio.c ++++ b/block/blkio.c +@@ -602,8 +602,8 @@ static void blkio_unregister_buf(BlockDriverState *bs, void *host, size_t size) + } + } + +-static int blkio_io_uring_open(BlockDriverState *bs, QDict *options, int flags, +- Error **errp) ++static int blkio_io_uring_connect(BlockDriverState *bs, QDict *options, ++ int flags, Error **errp) + { + const char *filename = qdict_get_str(options, "filename"); + BDRVBlkioState *s = bs->opaque; +@@ -626,11 +626,18 @@ static int blkio_io_uring_open(BlockDriverState *bs, QDict *options, int flags, + } + } + ++ ret = blkio_connect(s->blkio); ++ if (ret < 0) { ++ error_setg_errno(errp, -ret, "blkio_connect failed: %s", ++ blkio_get_error_msg()); ++ return ret; ++ } ++ + return 0; + } + +-static int blkio_nvme_io_uring(BlockDriverState *bs, QDict *options, int flags, +- Error **errp) ++static int blkio_nvme_io_uring_connect(BlockDriverState *bs, QDict *options, ++ int flags, Error **errp) + { + const char *path = qdict_get_try_str(options, "path"); + BDRVBlkioState *s = bs->opaque; +@@ -654,11 +661,18 @@ static int blkio_nvme_io_uring(BlockDriverState *bs, QDict *options, int flags, + return -EINVAL; + } + ++ ret = blkio_connect(s->blkio); ++ if (ret < 0) { ++ error_setg_errno(errp, -ret, "blkio_connect failed: %s", ++ blkio_get_error_msg()); ++ return ret; ++ } ++ + return 0; + } + +-static int blkio_virtio_blk_common_open(BlockDriverState *bs, +- QDict *options, int flags, Error **errp) ++static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options, ++ int flags, Error **errp) + { + const char *path = qdict_get_try_str(options, "path"); + BDRVBlkioState *s = bs->opaque; +@@ -717,6 +731,13 @@ static int blkio_virtio_blk_common_open(BlockDriverState *bs, + } + } + ++ ret = blkio_connect(s->blkio); ++ if (ret < 0) { ++ error_setg_errno(errp, -ret, "blkio_connect failed: %s", ++ blkio_get_error_msg()); ++ return ret; ++ } ++ + qdict_del(options, "path"); + + return 0; +@@ -736,24 +757,6 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags, + return ret; + } + +- if (strcmp(blkio_driver, "io_uring") == 0) { +- ret = blkio_io_uring_open(bs, options, flags, errp); +- } else if (strcmp(blkio_driver, "nvme-io_uring") == 0) { +- ret = blkio_nvme_io_uring(bs, options, flags, errp); +- } else if (strcmp(blkio_driver, "virtio-blk-vfio-pci") == 0) { +- ret = blkio_virtio_blk_common_open(bs, options, flags, errp); +- } else if (strcmp(blkio_driver, "virtio-blk-vhost-user") == 0) { +- ret = blkio_virtio_blk_common_open(bs, options, flags, errp); +- } else if (strcmp(blkio_driver, "virtio-blk-vhost-vdpa") == 0) { +- ret = blkio_virtio_blk_common_open(bs, options, flags, errp); +- } else { +- g_assert_not_reached(); +- } +- if (ret < 0) { +- blkio_destroy(&s->blkio); +- return ret; +- } +- + if (!(flags & BDRV_O_RDWR)) { + ret = blkio_set_bool(s->blkio, "read-only", true); + if (ret < 0) { +@@ -764,10 +767,20 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags, + } + } + +- ret = blkio_connect(s->blkio); ++ if (strcmp(blkio_driver, "io_uring") == 0) { ++ ret = blkio_io_uring_connect(bs, options, flags, errp); ++ } else if (strcmp(blkio_driver, "nvme-io_uring") == 0) { ++ ret = blkio_nvme_io_uring_connect(bs, options, flags, errp); ++ } else if (strcmp(blkio_driver, "virtio-blk-vfio-pci") == 0) { ++ ret = blkio_virtio_blk_connect(bs, options, flags, errp); ++ } else if (strcmp(blkio_driver, "virtio-blk-vhost-user") == 0) { ++ ret = blkio_virtio_blk_connect(bs, options, flags, errp); ++ } else if (strcmp(blkio_driver, "virtio-blk-vhost-vdpa") == 0) { ++ ret = blkio_virtio_blk_connect(bs, options, flags, errp); ++ } else { ++ g_assert_not_reached(); ++ } + if (ret < 0) { +- error_setg_errno(errp, -ret, "blkio_connect failed: %s", +- blkio_get_error_msg()); + blkio_destroy(&s->blkio); + return ret; + } +-- +2.39.3 + diff --git a/SOURCES/kvm-block-blkio-retry-blkio_connect-if-it-fails-using-fd.patch b/SOURCES/kvm-block-blkio-retry-blkio_connect-if-it-fails-using-fd.patch new file mode 100644 index 0000000..5ec9e0b --- /dev/null +++ b/SOURCES/kvm-block-blkio-retry-blkio_connect-if-it-fails-using-fd.patch @@ -0,0 +1,85 @@ +From ece855a71d9234c58497f37cb5498f507742167d Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Thu, 27 Jul 2023 18:10:18 +0200 +Subject: [PATCH 04/14] block/blkio: retry blkio_connect() if it fails using + `fd` + +RH-Author: Stefano Garzarella +RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers +RH-Bugzilla: 2225354 2225439 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Alberto Faria +RH-Commit: [4/6] 14ebc1f333617ce22c68693dec1c9a186d4f8a08 (sgarzarella/qemu-kvm-c-9-s) + +libblkio 1.3.0 added support of "fd" property for virtio-blk-vhost-vdpa +driver. In QEMU, starting from commit cad2ccc395 ("block/blkio: use +qemu_open() to support fd passing for virtio-blk") we are using +`blkio_get_int(..., "fd")` to check if the "fd" property is supported +for all the virtio-blk-* driver. + +Unfortunately that property is also available for those driver that do +not support it, such as virtio-blk-vhost-user. + +So, `blkio_get_int()` is not enough to check whether the driver supports +the `fd` property or not. This is because the virito-blk common libblkio +driver only checks whether or not `fd` is set during `blkio_connect()` +and fails with -EINVAL for those transports that do not support it +(all except vhost-vdpa for now). + +So let's handle the `blkio_connect()` failure, retrying it using `path` +directly. + +Fixes: cad2ccc395 ("block/blkio: use qemu_open() to support fd passing for virtio-blk") +Suggested-by: Stefan Hajnoczi +Signed-off-by: Stefano Garzarella +Message-id: 20230727161020.84213-3-sgarzare@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 809c319f8a089fbc49223dc29e1cc2b978beeada) +Signed-off-by: Stefano Garzarella +--- + block/blkio.c | 29 +++++++++++++++++++++++++++++ + 1 file changed, 29 insertions(+) + +diff --git a/block/blkio.c b/block/blkio.c +index 85d1eed5fb..93a8f8fc5c 100644 +--- a/block/blkio.c ++++ b/block/blkio.c +@@ -732,6 +732,35 @@ static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options, + } + + ret = blkio_connect(s->blkio); ++ /* ++ * If the libblkio driver doesn't support the `fd` property, blkio_connect() ++ * will fail with -EINVAL. So let's try calling blkio_connect() again by ++ * directly setting `path`. ++ */ ++ if (fd_supported && ret == -EINVAL) { ++ qemu_close(fd); ++ ++ /* ++ * We need to clear the `fd` property we set previously by setting ++ * it to -1. ++ */ ++ ret = blkio_set_int(s->blkio, "fd", -1); ++ if (ret < 0) { ++ error_setg_errno(errp, -ret, "failed to set fd: %s", ++ blkio_get_error_msg()); ++ return ret; ++ } ++ ++ ret = blkio_set_str(s->blkio, "path", path); ++ if (ret < 0) { ++ error_setg_errno(errp, -ret, "failed to set path: %s", ++ blkio_get_error_msg()); ++ return ret; ++ } ++ ++ ret = blkio_connect(s->blkio); ++ } ++ + if (ret < 0) { + error_setg_errno(errp, -ret, "blkio_connect failed: %s", + blkio_get_error_msg()); +-- +2.39.3 + diff --git a/SOURCES/kvm-block-blkio-use-blkio_set_int-fd-to-check-fd-support.patch b/SOURCES/kvm-block-blkio-use-blkio_set_int-fd-to-check-fd-support.patch new file mode 100644 index 0000000..c6e1cd8 --- /dev/null +++ b/SOURCES/kvm-block-blkio-use-blkio_set_int-fd-to-check-fd-support.patch @@ -0,0 +1,49 @@ +From 2f4436e7cc2f63d198229dc8ba32783460c0b185 Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Thu, 27 Jul 2023 18:10:20 +0200 +Subject: [PATCH 06/14] block/blkio: use blkio_set_int("fd") to check fd + support + +RH-Author: Stefano Garzarella +RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers +RH-Bugzilla: 2225354 2225439 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Alberto Faria +RH-Commit: [6/6] d57aafb2c3a8ed13aa3c6dcce5525a9cc8f5aa21 (sgarzarella/qemu-kvm-c-9-s) + +Setting the `fd` property fails with virtio-blk-* libblkio drivers +that do not support fd passing since +https://gitlab.com/libblkio/libblkio/-/merge_requests/208. + +Getting the `fd` property, on the other hand, always succeeds for +virtio-blk-* libblkio drivers even when they don't support fd passing. + +This patch switches to setting the `fd` property because it is a +better mechanism for probing fd passing support than getting the `fd` +property. + +Signed-off-by: Stefano Garzarella +Message-id: 20230727161020.84213-5-sgarzare@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 1c38fe69e2b8a05c1762b122292fa7e3662f06fd) +Signed-off-by: Stefano Garzarella +--- + block/blkio.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/block/blkio.c b/block/blkio.c +index eef80e9ce5..8defbf744f 100644 +--- a/block/blkio.c ++++ b/block/blkio.c +@@ -689,7 +689,7 @@ static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options, + return -EINVAL; + } + +- if (blkio_get_int(s->blkio, "fd", &fd) == 0) { ++ if (blkio_set_int(s->blkio, "fd", -1) == 0) { + fd_supported = true; + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-block-blkio-use-qemu_open-to-support-fd-passing-for-.patch b/SOURCES/kvm-block-blkio-use-qemu_open-to-support-fd-passing-for-.patch new file mode 100644 index 0000000..3b32299 --- /dev/null +++ b/SOURCES/kvm-block-blkio-use-qemu_open-to-support-fd-passing-for-.patch @@ -0,0 +1,108 @@ +From fd57241cf0f8c2906fa56118f8da1e65a5b1e4d8 Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Tue, 30 May 2023 09:19:40 +0200 +Subject: [PATCH 3/5] block/blkio: use qemu_open() to support fd passing for + virtio-blk + +RH-Author: Stefano Garzarella +RH-MergeRequest: 169: block/blkio: support fd passing for virtio-blk-vhost-vdpa driver +RH-Bugzilla: 2180076 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [1/2] 9ff1a1510500db101648341207a36318a0c41c5a (sgarzarella/qemu-kvm-c-9-s) + +Some virtio-blk drivers (e.g. virtio-blk-vhost-vdpa) supports the fd +passing. Let's expose this to the user, so the management layer +can pass the file descriptor of an already opened path. + +If the libblkio virtio-blk driver supports fd passing, let's always +use qemu_open() to open the `path`, so we can handle fd passing +from the management layer through the "/dev/fdset/N" special path. + +Reviewed-by: Stefan Hajnoczi +Signed-off-by: Stefano Garzarella +Message-id: 20230530071941.8954-2-sgarzare@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit cad2ccc395c7113fb30bc9390774b67b34f06c68) +Signed-off-by: Stefano Garzarella +--- + block/blkio.c | 53 ++++++++++++++++++++++++++++++++++++++++++--------- + 1 file changed, 44 insertions(+), 9 deletions(-) + +diff --git a/block/blkio.c b/block/blkio.c +index 0cdc99a729..6a6f20f923 100644 +--- a/block/blkio.c ++++ b/block/blkio.c +@@ -672,25 +672,60 @@ static int blkio_virtio_blk_common_open(BlockDriverState *bs, + { + const char *path = qdict_get_try_str(options, "path"); + BDRVBlkioState *s = bs->opaque; +- int ret; ++ bool fd_supported = false; ++ int fd, ret; + + if (!path) { + error_setg(errp, "missing 'path' option"); + return -EINVAL; + } + +- ret = blkio_set_str(s->blkio, "path", path); +- qdict_del(options, "path"); +- if (ret < 0) { +- error_setg_errno(errp, -ret, "failed to set path: %s", +- blkio_get_error_msg()); +- return ret; +- } +- + if (!(flags & BDRV_O_NOCACHE)) { + error_setg(errp, "cache.direct=off is not supported"); + return -EINVAL; + } ++ ++ if (blkio_get_int(s->blkio, "fd", &fd) == 0) { ++ fd_supported = true; ++ } ++ ++ /* ++ * If the libblkio driver supports fd passing, let's always use qemu_open() ++ * to open the `path`, so we can handle fd passing from the management ++ * layer through the "/dev/fdset/N" special path. ++ */ ++ if (fd_supported) { ++ int open_flags; ++ ++ if (flags & BDRV_O_RDWR) { ++ open_flags = O_RDWR; ++ } else { ++ open_flags = O_RDONLY; ++ } ++ ++ fd = qemu_open(path, open_flags, errp); ++ if (fd < 0) { ++ return -EINVAL; ++ } ++ ++ ret = blkio_set_int(s->blkio, "fd", fd); ++ if (ret < 0) { ++ error_setg_errno(errp, -ret, "failed to set fd: %s", ++ blkio_get_error_msg()); ++ qemu_close(fd); ++ return ret; ++ } ++ } else { ++ ret = blkio_set_str(s->blkio, "path", path); ++ if (ret < 0) { ++ error_setg_errno(errp, -ret, "failed to set path: %s", ++ blkio_get_error_msg()); ++ return ret; ++ } ++ } ++ ++ qdict_del(options, "path"); ++ + return 0; + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-block-compile-out-assert_bdrv_graph_readable-by-defa.patch b/SOURCES/kvm-block-compile-out-assert_bdrv_graph_readable-by-defa.patch new file mode 100644 index 0000000..b6eebf3 --- /dev/null +++ b/SOURCES/kvm-block-compile-out-assert_bdrv_graph_readable-by-defa.patch @@ -0,0 +1,121 @@ +From d9190117f3c701380701d6e9b2aa3c2446b9708f Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Mon, 1 May 2023 13:34:43 -0400 +Subject: [PATCH 01/21] block: compile out assert_bdrv_graph_readable() by + default + +RH-Author: Kevin Wolf +RH-MergeRequest: 166: block/graph-lock: Disable locking for now +RH-Bugzilla: 2186725 +RH-Acked-by: Eric Blake +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [1/4] d8cb4bb832c85e8216d97e57679a34c7bc6a8f71 (kmwolf/centos-qemu-kvm) + +reader_count() is a performance bottleneck because the global +aio_context_list_lock mutex causes thread contention. Put this debugging +assertion behind a new ./configure --enable-debug-graph-lock option and +disable it by default. + +The --enable-debug-graph-lock option is also enabled by the more general +--enable-debug option. + +Signed-off-by: Stefan Hajnoczi +Message-Id: <20230501173443.153062-1-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 58a2e3f5c37be02dac3086b81bdda9414b931edf) +Signed-off-by: Kevin Wolf +--- + block/graph-lock.c | 3 +++ + configure | 1 + + meson.build | 2 ++ + meson_options.txt | 2 ++ + scripts/meson-buildoptions.sh | 4 ++++ + 5 files changed, 12 insertions(+) + +diff --git a/block/graph-lock.c b/block/graph-lock.c +index 454c31e691..259a7a0bde 100644 +--- a/block/graph-lock.c ++++ b/block/graph-lock.c +@@ -265,7 +265,10 @@ void bdrv_graph_rdunlock_main_loop(void) + + void assert_bdrv_graph_readable(void) + { ++ /* reader_count() is slow due to aio_context_list_lock lock contention */ ++#ifdef CONFIG_DEBUG_GRAPH_LOCK + assert(qemu_in_main_thread() || reader_count()); ++#endif + } + + void assert_bdrv_graph_writable(void) +diff --git a/configure b/configure +index 800b5850f4..a62a3e6be9 100755 +--- a/configure ++++ b/configure +@@ -806,6 +806,7 @@ for opt do + --enable-debug) + # Enable debugging options that aren't excessively noisy + debug_tcg="yes" ++ meson_option_parse --enable-debug-graph-lock "" + meson_option_parse --enable-debug-mutex "" + meson_option_add -Doptimization=0 + fortify_source="no" +diff --git a/meson.build b/meson.build +index c44d05a13f..d964e741e7 100644 +--- a/meson.build ++++ b/meson.build +@@ -1956,6 +1956,7 @@ if get_option('debug_stack_usage') and have_coroutine_pool + have_coroutine_pool = false + endif + config_host_data.set10('CONFIG_COROUTINE_POOL', have_coroutine_pool) ++config_host_data.set('CONFIG_DEBUG_GRAPH_LOCK', get_option('debug_graph_lock')) + config_host_data.set('CONFIG_DEBUG_MUTEX', get_option('debug_mutex')) + config_host_data.set('CONFIG_DEBUG_STACK_USAGE', get_option('debug_stack_usage')) + config_host_data.set('CONFIG_GPROF', get_option('gprof')) +@@ -3833,6 +3834,7 @@ summary_info += {'PIE': get_option('b_pie')} + summary_info += {'static build': config_host.has_key('CONFIG_STATIC')} + summary_info += {'malloc trim support': has_malloc_trim} + summary_info += {'membarrier': have_membarrier} ++summary_info += {'debug graph lock': get_option('debug_graph_lock')} + summary_info += {'debug stack usage': get_option('debug_stack_usage')} + summary_info += {'mutex debugging': get_option('debug_mutex')} + summary_info += {'memory allocator': get_option('malloc')} +diff --git a/meson_options.txt b/meson_options.txt +index fc9447d267..bc857fe68b 100644 +--- a/meson_options.txt ++++ b/meson_options.txt +@@ -311,6 +311,8 @@ option('rng_none', type: 'boolean', value: false, + description: 'dummy RNG, avoid using /dev/(u)random and getrandom()') + option('coroutine_pool', type: 'boolean', value: true, + description: 'coroutine freelist (better performance)') ++option('debug_graph_lock', type: 'boolean', value: false, ++ description: 'graph lock debugging support') + option('debug_mutex', type: 'boolean', value: false, + description: 'mutex debugging support') + option('debug_stack_usage', type: 'boolean', value: false, +diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh +index 009fab1515..30e1f25259 100644 +--- a/scripts/meson-buildoptions.sh ++++ b/scripts/meson-buildoptions.sh +@@ -21,6 +21,8 @@ meson_options_help() { + printf "%s\n" ' QEMU' + printf "%s\n" ' --enable-cfi Control-Flow Integrity (CFI)' + printf "%s\n" ' --enable-cfi-debug Verbose errors in case of CFI violation' ++ printf "%s\n" ' --enable-debug-graph-lock' ++ printf "%s\n" ' graph lock debugging support' + printf "%s\n" ' --enable-debug-mutex mutex debugging support' + printf "%s\n" ' --enable-debug-stack-usage' + printf "%s\n" ' measure coroutine stack usage' +@@ -249,6 +251,8 @@ _meson_option_parse() { + --datadir=*) quote_sh "-Ddatadir=$2" ;; + --enable-dbus-display) printf "%s" -Ddbus_display=enabled ;; + --disable-dbus-display) printf "%s" -Ddbus_display=disabled ;; ++ --enable-debug-graph-lock) printf "%s" -Ddebug_graph_lock=true ;; ++ --disable-debug-graph-lock) printf "%s" -Ddebug_graph_lock=false ;; + --enable-debug-mutex) printf "%s" -Ddebug_mutex=true ;; + --disable-debug-mutex) printf "%s" -Ddebug_mutex=false ;; + --enable-debug-stack-usage) printf "%s" -Ddebug_stack_usage=true ;; +-- +2.39.3 + diff --git a/SOURCES/kvm-block-drop-bdrv_remove_filter_or_cow_child.patch b/SOURCES/kvm-block-drop-bdrv_remove_filter_or_cow_child.patch deleted file mode 100644 index 19d52b5..0000000 --- a/SOURCES/kvm-block-drop-bdrv_remove_filter_or_cow_child.patch +++ /dev/null @@ -1,70 +0,0 @@ -From defd6b325264d94ffb1355a8b19f9a77bd694a2f Mon Sep 17 00:00:00 2001 -From: Vladimir Sementsov-Ogievskiy -Date: Mon, 7 Nov 2022 19:35:56 +0300 -Subject: [PATCH 13/31] block: drop bdrv_remove_filter_or_cow_child - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [1/16] 6339edd738c3b79f8ecb6c1e012e52b6afb1a622 (sgarzarella/qemu-kvm-c-9-s) - -Drop this simple wrapper used only in one place. We have too many graph -modifying functions even without it. - -Signed-off-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Hanna Reitz -Message-Id: <20221107163558.618889-3-vsementsov@yandex-team.ru> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit f38eaec4c3618dfc4a23e20435cefb5bf8325264) -Signed-off-by: Stefano Garzarella ---- - block.c | 15 +-------------- - 1 file changed, 1 insertion(+), 14 deletions(-) - -diff --git a/block.c b/block.c -index a18f052374..ec184150a2 100644 ---- a/block.c -+++ b/block.c -@@ -93,8 +93,6 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs, - static void bdrv_replace_child_noperm(BdrvChild *child, - BlockDriverState *new_bs); - static void bdrv_remove_child(BdrvChild *child, Transaction *tran); --static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs, -- Transaction *tran); - - static int bdrv_reopen_prepare(BDRVReopenState *reopen_state, - BlockReopenQueue *queue, -@@ -5073,17 +5071,6 @@ static void bdrv_remove_child(BdrvChild *child, Transaction *tran) - tran_add(tran, &bdrv_remove_child_drv, child); - } - --/* -- * A function to remove backing-chain child of @bs if exists: cow child for -- * format nodes (always .backing) and filter child for filters (may be .file or -- * .backing) -- */ --static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs, -- Transaction *tran) --{ -- bdrv_remove_child(bdrv_filter_or_cow_child(bs), tran); --} -- - static int bdrv_replace_node_noperm(BlockDriverState *from, - BlockDriverState *to, - bool auto_skip, Transaction *tran, -@@ -5168,7 +5155,7 @@ static int bdrv_replace_node_common(BlockDriverState *from, - } - - if (detach_subchain) { -- bdrv_remove_filter_or_cow_child(to_cow_parent, tran); -+ bdrv_remove_child(bdrv_filter_or_cow_child(to_cow_parent), tran); - } - - found = g_hash_table_new(NULL, NULL); --- -2.31.1 - diff --git a/SOURCES/kvm-block-file-Add-file-specific-image-info.patch b/SOURCES/kvm-block-file-Add-file-specific-image-info.patch deleted file mode 100644 index a81b6b0..0000000 --- a/SOURCES/kvm-block-file-Add-file-specific-image-info.patch +++ /dev/null @@ -1,145 +0,0 @@ -From 4af86458d6bea2a6e15fd57d4d4bbe88e35f7e72 Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Mon, 20 Jun 2022 18:26:54 +0200 -Subject: [PATCH 07/20] block/file: Add file-specific image info - -RH-Author: Hanna Czenczek -RH-MergeRequest: 145: Show protocol-level information in qemu-img info -RH-Bugzilla: 1860292 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella -RH-Commit: [2/12] d8cc351d6c16c41b2000e41dc555f13093a9edce (hreitz/qemu-kvm-c-9-s) - -Add some (optional) information that the file driver can provide for -image files, namely the extent size hint. - -Signed-off-by: Hanna Reitz -Message-Id: <20220620162704.80987-3-hreitz@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 7f36a50ab4e7d39369cac67be4ba9d6ee4081dc0) -Signed-off-by: Hanna Czenczek ---- - block/file-posix.c | 30 ++++++++++++++++++++++++++++++ - qapi/block-core.json | 26 ++++++++++++++++++++++++-- - 2 files changed, 54 insertions(+), 2 deletions(-) - -diff --git a/block/file-posix.c b/block/file-posix.c -index b9647c5ffc..df3da79aed 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -3095,6 +3095,34 @@ static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) - return 0; - } - -+static ImageInfoSpecific *raw_get_specific_info(BlockDriverState *bs, -+ Error **errp) -+{ -+ ImageInfoSpecificFile *file_info = g_new0(ImageInfoSpecificFile, 1); -+ ImageInfoSpecific *spec_info = g_new(ImageInfoSpecific, 1); -+ -+ *spec_info = (ImageInfoSpecific){ -+ .type = IMAGE_INFO_SPECIFIC_KIND_FILE, -+ .u.file.data = file_info, -+ }; -+ -+#ifdef FS_IOC_FSGETXATTR -+ { -+ BDRVRawState *s = bs->opaque; -+ struct fsxattr attr; -+ int ret; -+ -+ ret = ioctl(s->fd, FS_IOC_FSGETXATTR, &attr); -+ if (!ret && attr.fsx_extsize != 0) { -+ file_info->has_extent_size_hint = true; -+ file_info->extent_size_hint = attr.fsx_extsize; -+ } -+ } -+#endif -+ -+ return spec_info; -+} -+ - static BlockStatsSpecificFile get_blockstats_specific_file(BlockDriverState *bs) - { - BDRVRawState *s = bs->opaque; -@@ -3328,6 +3356,7 @@ BlockDriver bdrv_file = { - .bdrv_co_truncate = raw_co_truncate, - .bdrv_getlength = raw_getlength, - .bdrv_get_info = raw_get_info, -+ .bdrv_get_specific_info = raw_get_specific_info, - .bdrv_get_allocated_file_size - = raw_get_allocated_file_size, - .bdrv_get_specific_stats = raw_get_specific_stats, -@@ -3700,6 +3729,7 @@ static BlockDriver bdrv_host_device = { - .bdrv_co_truncate = raw_co_truncate, - .bdrv_getlength = raw_getlength, - .bdrv_get_info = raw_get_info, -+ .bdrv_get_specific_info = raw_get_specific_info, - .bdrv_get_allocated_file_size - = raw_get_allocated_file_size, - .bdrv_get_specific_stats = hdev_get_specific_stats, -diff --git a/qapi/block-core.json b/qapi/block-core.json -index 95ac4fa634..f5d822cbd6 100644 ---- a/qapi/block-core.json -+++ b/qapi/block-core.json -@@ -139,16 +139,29 @@ - '*encryption-format': 'RbdImageEncryptionFormat' - } } - -+## -+# @ImageInfoSpecificFile: -+# -+# @extent-size-hint: Extent size hint (if available) -+# -+# Since: 8.0 -+## -+{ 'struct': 'ImageInfoSpecificFile', -+ 'data': { -+ '*extent-size-hint': 'size' -+ } } -+ - ## - # @ImageInfoSpecificKind: - # - # @luks: Since 2.7 - # @rbd: Since 6.1 -+# @file: Since 8.0 - # - # Since: 1.7 - ## - { 'enum': 'ImageInfoSpecificKind', -- 'data': [ 'qcow2', 'vmdk', 'luks', 'rbd' ] } -+ 'data': [ 'qcow2', 'vmdk', 'luks', 'rbd', 'file' ] } - - ## - # @ImageInfoSpecificQCow2Wrapper: -@@ -185,6 +198,14 @@ - { 'struct': 'ImageInfoSpecificRbdWrapper', - 'data': { 'data': 'ImageInfoSpecificRbd' } } - -+## -+# @ImageInfoSpecificFileWrapper: -+# -+# Since: 8.0 -+## -+{ 'struct': 'ImageInfoSpecificFileWrapper', -+ 'data': { 'data': 'ImageInfoSpecificFile' } } -+ - ## - # @ImageInfoSpecific: - # -@@ -199,7 +220,8 @@ - 'qcow2': 'ImageInfoSpecificQCow2Wrapper', - 'vmdk': 'ImageInfoSpecificVmdkWrapper', - 'luks': 'ImageInfoSpecificLUKSWrapper', -- 'rbd': 'ImageInfoSpecificRbdWrapper' -+ 'rbd': 'ImageInfoSpecificRbdWrapper', -+ 'file': 'ImageInfoSpecificFileWrapper' - } } - - ## --- -2.31.1 - diff --git a/SOURCES/kvm-block-qapi-Add-indentation-to-bdrv_node_info_dump.patch b/SOURCES/kvm-block-qapi-Add-indentation-to-bdrv_node_info_dump.patch deleted file mode 100644 index 62979ef..0000000 --- a/SOURCES/kvm-block-qapi-Add-indentation-to-bdrv_node_info_dump.patch +++ /dev/null @@ -1,206 +0,0 @@ -From c8c282c2e1d74cfc5de6527f7e20dfc3e76b67ac Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Mon, 20 Jun 2022 18:27:00 +0200 -Subject: [PATCH 13/20] block/qapi: Add indentation to bdrv_node_info_dump() - -RH-Author: Hanna Czenczek -RH-MergeRequest: 145: Show protocol-level information in qemu-img info -RH-Bugzilla: 1860292 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella -RH-Commit: [8/12] d3a697e81ab9828457198075e5815a592363c725 (hreitz/qemu-kvm-c-9-s) - -In order to let qemu-img info present a block graph, add a parameter to -bdrv_node_info_dump() and bdrv_image_info_specific_dump() so that the -information of nodes below the root level can be given an indentation. - -Signed-off-by: Hanna Reitz -Message-Id: <20220620162704.80987-9-hreitz@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 76c9e9750d1bd580e8ed4465f6be3a986434e7c3) -Signed-off-by: Hanna Czenczek ---- - block/monitor/block-hmp-cmds.c | 2 +- - block/qapi.c | 47 +++++++++++++++++++--------------- - include/block/qapi.h | 5 ++-- - qemu-img.c | 2 +- - qemu-io-cmds.c | 3 ++- - 5 files changed, 34 insertions(+), 25 deletions(-) - -diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c -index aa37faa601..72824d4e2e 100644 ---- a/block/monitor/block-hmp-cmds.c -+++ b/block/monitor/block-hmp-cmds.c -@@ -734,7 +734,7 @@ static void print_block_info(Monitor *mon, BlockInfo *info, - monitor_printf(mon, "\nImages:\n"); - image_info = inserted->image; - while (1) { -- bdrv_node_info_dump(qapi_ImageInfo_base(image_info)); -+ bdrv_node_info_dump(qapi_ImageInfo_base(image_info), 0); - if (image_info->has_backing_image) { - image_info = image_info->backing_image; - } else { -diff --git a/block/qapi.c b/block/qapi.c -index f208c21ccf..3e35603f0c 100644 ---- a/block/qapi.c -+++ b/block/qapi.c -@@ -915,7 +915,8 @@ static bool qobject_is_empty_dump(const QObject *obj) - * prepending an optional prefix if the dump is not empty. - */ - void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, -- const char *prefix) -+ const char *prefix, -+ int indentation) - { - QObject *obj, *data; - Visitor *v = qobject_output_visitor_new(&obj); -@@ -925,48 +926,51 @@ void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, - data = qdict_get(qobject_to(QDict, obj), "data"); - if (!qobject_is_empty_dump(data)) { - if (prefix) { -- qemu_printf("%s", prefix); -+ qemu_printf("%*s%s", indentation * 4, "", prefix); - } -- dump_qobject(1, data); -+ dump_qobject(indentation + 1, data); - } - qobject_unref(obj); - visit_free(v); - } - --void bdrv_node_info_dump(BlockNodeInfo *info) -+void bdrv_node_info_dump(BlockNodeInfo *info, int indentation) - { - char *size_buf, *dsize_buf; -+ g_autofree char *ind_s = g_strdup_printf("%*s", indentation * 4, ""); -+ - if (!info->has_actual_size) { - dsize_buf = g_strdup("unavailable"); - } else { - dsize_buf = size_to_str(info->actual_size); - } - size_buf = size_to_str(info->virtual_size); -- qemu_printf("image: %s\n" -- "file format: %s\n" -- "virtual size: %s (%" PRId64 " bytes)\n" -- "disk size: %s\n", -- info->filename, info->format, size_buf, -- info->virtual_size, -- dsize_buf); -+ qemu_printf("%simage: %s\n" -+ "%sfile format: %s\n" -+ "%svirtual size: %s (%" PRId64 " bytes)\n" -+ "%sdisk size: %s\n", -+ ind_s, info->filename, -+ ind_s, info->format, -+ ind_s, size_buf, info->virtual_size, -+ ind_s, dsize_buf); - g_free(size_buf); - g_free(dsize_buf); - - if (info->has_encrypted && info->encrypted) { -- qemu_printf("encrypted: yes\n"); -+ qemu_printf("%sencrypted: yes\n", ind_s); - } - - if (info->has_cluster_size) { -- qemu_printf("cluster_size: %" PRId64 "\n", -- info->cluster_size); -+ qemu_printf("%scluster_size: %" PRId64 "\n", -+ ind_s, info->cluster_size); - } - - if (info->has_dirty_flag && info->dirty_flag) { -- qemu_printf("cleanly shut down: no\n"); -+ qemu_printf("%scleanly shut down: no\n", ind_s); - } - - if (info->has_backing_filename) { -- qemu_printf("backing file: %s", info->backing_filename); -+ qemu_printf("%sbacking file: %s", ind_s, info->backing_filename); - if (!info->has_full_backing_filename) { - qemu_printf(" (cannot determine actual path)"); - } else if (strcmp(info->backing_filename, -@@ -975,15 +979,16 @@ void bdrv_node_info_dump(BlockNodeInfo *info) - } - qemu_printf("\n"); - if (info->has_backing_filename_format) { -- qemu_printf("backing file format: %s\n", -- info->backing_filename_format); -+ qemu_printf("%sbacking file format: %s\n", -+ ind_s, info->backing_filename_format); - } - } - - if (info->has_snapshots) { - SnapshotInfoList *elem; - -- qemu_printf("Snapshot list:\n"); -+ qemu_printf("%sSnapshot list:\n", ind_s); -+ qemu_printf("%s", ind_s); - bdrv_snapshot_dump(NULL); - qemu_printf("\n"); - -@@ -1003,6 +1008,7 @@ void bdrv_node_info_dump(BlockNodeInfo *info) - - pstrcpy(sn.id_str, sizeof(sn.id_str), elem->value->id); - pstrcpy(sn.name, sizeof(sn.name), elem->value->name); -+ qemu_printf("%s", ind_s); - bdrv_snapshot_dump(&sn); - qemu_printf("\n"); - } -@@ -1010,6 +1016,7 @@ void bdrv_node_info_dump(BlockNodeInfo *info) - - if (info->has_format_specific) { - bdrv_image_info_specific_dump(info->format_specific, -- "Format specific information:\n"); -+ "Format specific information:\n", -+ indentation); - } - } -diff --git a/include/block/qapi.h b/include/block/qapi.h -index 196436020e..38855f2ae9 100644 ---- a/include/block/qapi.h -+++ b/include/block/qapi.h -@@ -49,6 +49,7 @@ void bdrv_query_block_graph_info(BlockDriverState *bs, - - void bdrv_snapshot_dump(QEMUSnapshotInfo *sn); - void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, -- const char *prefix); --void bdrv_node_info_dump(BlockNodeInfo *info); -+ const char *prefix, -+ int indentation); -+void bdrv_node_info_dump(BlockNodeInfo *info, int indentation); - #endif -diff --git a/qemu-img.c b/qemu-img.c -index 3b2ca3bbcb..30b4ea58bb 100644 ---- a/qemu-img.c -+++ b/qemu-img.c -@@ -2859,7 +2859,7 @@ static void dump_human_image_info_list(BlockNodeInfoList *list) - } - delim = true; - -- bdrv_node_info_dump(elem->value); -+ bdrv_node_info_dump(elem->value, 0); - } - } - -diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c -index f4a374528e..fdcb89211b 100644 ---- a/qemu-io-cmds.c -+++ b/qemu-io-cmds.c -@@ -1826,7 +1826,8 @@ static int info_f(BlockBackend *blk, int argc, char **argv) - } - if (spec_info) { - bdrv_image_info_specific_dump(spec_info, -- "Format specific information:\n"); -+ "Format specific information:\n", -+ 0); - qapi_free_ImageInfoSpecific(spec_info); - } - --- -2.31.1 - diff --git a/SOURCES/kvm-block-qapi-Introduce-BlockGraphInfo.patch b/SOURCES/kvm-block-qapi-Introduce-BlockGraphInfo.patch deleted file mode 100644 index e9a1622..0000000 --- a/SOURCES/kvm-block-qapi-Introduce-BlockGraphInfo.patch +++ /dev/null @@ -1,155 +0,0 @@ -From 0044e3848b02ef6edba5961d1f4b6297d137d207 Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Mon, 20 Jun 2022 18:26:59 +0200 -Subject: [PATCH 12/20] block/qapi: Introduce BlockGraphInfo - -RH-Author: Hanna Czenczek -RH-MergeRequest: 145: Show protocol-level information in qemu-img info -RH-Bugzilla: 1860292 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella -RH-Commit: [7/12] de47bac372cd552b812c774a2f35f95923af74ff (hreitz/qemu-kvm-c-9-s) - -Introduce a new QAPI type BlockGraphInfo and an associated -bdrv_query_block_graph_info() function that recursively gathers -BlockNodeInfo objects through a block graph. - -A follow-up patch is going to make "qemu-img info" use this to print -information about all nodes that are (usually implicitly) opened for a -given image file. - -Signed-off-by: Hanna Reitz -Message-Id: <20220620162704.80987-8-hreitz@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 6cab33997b91eb86e82a6a2ae58a24f835249d4a) -Signed-off-by: Hanna Czenczek ---- - block/qapi.c | 48 ++++++++++++++++++++++++++++++++++++++++++++ - include/block/qapi.h | 3 +++ - qapi/block-core.json | 35 ++++++++++++++++++++++++++++++++ - 3 files changed, 86 insertions(+) - -diff --git a/block/qapi.c b/block/qapi.c -index 5d0a8d2ce3..f208c21ccf 100644 ---- a/block/qapi.c -+++ b/block/qapi.c -@@ -411,6 +411,54 @@ fail: - qapi_free_ImageInfo(info); - } - -+/** -+ * bdrv_query_block_graph_info: -+ * @bs: root node to start from -+ * @p_info: location to store image information -+ * @errp: location to store error information -+ * -+ * Store image information about the graph starting from @bs in @p_info. -+ * -+ * @p_info will be set only on success. On error, store error in @errp. -+ */ -+void bdrv_query_block_graph_info(BlockDriverState *bs, -+ BlockGraphInfo **p_info, -+ Error **errp) -+{ -+ BlockGraphInfo *info; -+ BlockChildInfoList **children_list_tail; -+ BdrvChild *c; -+ ERRP_GUARD(); -+ -+ info = g_new0(BlockGraphInfo, 1); -+ bdrv_do_query_node_info(bs, qapi_BlockGraphInfo_base(info), errp); -+ if (*errp) { -+ goto fail; -+ } -+ -+ children_list_tail = &info->children; -+ -+ QLIST_FOREACH(c, &bs->children, next) { -+ BlockChildInfo *c_info; -+ -+ c_info = g_new0(BlockChildInfo, 1); -+ QAPI_LIST_APPEND(children_list_tail, c_info); -+ -+ c_info->name = g_strdup(c->name); -+ bdrv_query_block_graph_info(c->bs, &c_info->info, errp); -+ if (*errp) { -+ goto fail; -+ } -+ } -+ -+ *p_info = info; -+ return; -+ -+fail: -+ assert(*errp != NULL); -+ qapi_free_BlockGraphInfo(info); -+} -+ - /* @p_info will be set only on success. */ - static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info, - Error **errp) -diff --git a/include/block/qapi.h b/include/block/qapi.h -index 2174bf8fa2..196436020e 100644 ---- a/include/block/qapi.h -+++ b/include/block/qapi.h -@@ -43,6 +43,9 @@ void bdrv_query_image_info(BlockDriverState *bs, - bool flat, - bool skip_implicit_filters, - Error **errp); -+void bdrv_query_block_graph_info(BlockDriverState *bs, -+ BlockGraphInfo **p_info, -+ Error **errp); - - void bdrv_snapshot_dump(QEMUSnapshotInfo *sn); - void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, -diff --git a/qapi/block-core.json b/qapi/block-core.json -index 4cf2deeb6c..d703e0fb16 100644 ---- a/qapi/block-core.json -+++ b/qapi/block-core.json -@@ -307,6 +307,41 @@ - '*backing-image': 'ImageInfo' - } } - -+## -+# @BlockChildInfo: -+# -+# Information about all nodes in the block graph starting at some node, -+# annotated with information about that node in relation to its parent. -+# -+# @name: Child name of the root node in the BlockGraphInfo struct, in its role -+# as the child of some undescribed parent node -+# -+# @info: Block graph information starting at this node -+# -+# Since: 8.0 -+## -+{ 'struct': 'BlockChildInfo', -+ 'data': { -+ 'name': 'str', -+ 'info': 'BlockGraphInfo' -+ } } -+ -+## -+# @BlockGraphInfo: -+# -+# Information about all nodes in a block (sub)graph in the form of BlockNodeInfo -+# data. -+# The base BlockNodeInfo struct contains the information for the (sub)graph's -+# root node. -+# -+# @children: Array of links to this node's child nodes' information -+# -+# Since: 8.0 -+## -+{ 'struct': 'BlockGraphInfo', -+ 'base': 'BlockNodeInfo', -+ 'data': { 'children': ['BlockChildInfo'] } } -+ - ## - # @ImageCheck: - # --- -2.31.1 - diff --git a/SOURCES/kvm-block-qapi-Let-bdrv_query_image_info-recurse.patch b/SOURCES/kvm-block-qapi-Let-bdrv_query_image_info-recurse.patch deleted file mode 100644 index e5c012a..0000000 --- a/SOURCES/kvm-block-qapi-Let-bdrv_query_image_info-recurse.patch +++ /dev/null @@ -1,197 +0,0 @@ -From ae2c3df00d673d436fe4d8ec9103a3b76d7e6233 Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Mon, 20 Jun 2022 18:26:58 +0200 -Subject: [PATCH 11/20] block/qapi: Let bdrv_query_image_info() recurse - -RH-Author: Hanna Czenczek -RH-MergeRequest: 145: Show protocol-level information in qemu-img info -RH-Bugzilla: 1860292 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella -RH-Commit: [6/12] 451a83fd682cd6dd6026c22974d18c2f12ee06e3 (hreitz/qemu-kvm-c-9-s) - -There is no real reason why bdrv_query_image_info() should generally not -recurse. The ImageInfo struct has a pointer to the backing image, so it -should generally be filled, unless the caller explicitly opts out. - -This moves the recursing code from bdrv_block_device_info() into -bdrv_query_image_info(). - -Signed-off-by: Hanna Reitz -Message-Id: <20220620162704.80987-7-hreitz@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 5d8813593f3f673fc96eed199beb35690cc46f58) - -Conflicts: - block/qapi.c: Conflicts with - 54fde4ff0621c22b15cbaaa3c74301cc0dbd1c9e ("qapi block: Elide - redundant has_FOO in generated C"), which dropped - `has_backing_image`. Without that commit (and 44ea9d9be before it), - we still need to set `has_backing_image` in - `bdrv_query_image_info()`. - -Signed-off-by: Hanna Czenczek ---- - block/qapi.c | 94 +++++++++++++++++++++++++++----------------- - include/block/qapi.h | 2 + - 2 files changed, 59 insertions(+), 37 deletions(-) - -diff --git a/block/qapi.c b/block/qapi.c -index ad88bf9b38..5d0a8d2ce3 100644 ---- a/block/qapi.c -+++ b/block/qapi.c -@@ -47,8 +47,10 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk, - Error **errp) - { - ImageInfo **p_image_info; -+ ImageInfo *backing_info; - BlockDriverState *bs0, *backing; - BlockDeviceInfo *info; -+ ERRP_GUARD(); - - if (!bs->drv) { - error_setg(errp, "Block device %s is ejected", bs->node_name); -@@ -149,38 +151,21 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk, - bs0 = bs; - p_image_info = &info->image; - info->backing_file_depth = 0; -- while (1) { -- Error *local_err = NULL; -- bdrv_query_image_info(bs0, p_image_info, &local_err); -- if (local_err) { -- error_propagate(errp, local_err); -- qapi_free_BlockDeviceInfo(info); -- return NULL; -- } -- -- /* stop gathering data for flat output */ -- if (flat) { -- break; -- } - -- if (bs0->drv && bdrv_filter_or_cow_child(bs0)) { -- /* -- * Put any filtered child here (for backwards compatibility to when -- * we put bs0->backing here, which might be any filtered child). -- */ -- info->backing_file_depth++; -- bs0 = bdrv_filter_or_cow_bs(bs0); -- (*p_image_info)->has_backing_image = true; -- p_image_info = &((*p_image_info)->backing_image); -- } else { -- break; -- } -+ /* -+ * Skip automatically inserted nodes that the user isn't aware of for -+ * query-block (blk != NULL), but not for query-named-block-nodes -+ */ -+ bdrv_query_image_info(bs0, p_image_info, flat, blk != NULL, errp); -+ if (*errp) { -+ qapi_free_BlockDeviceInfo(info); -+ return NULL; -+ } - -- /* Skip automatically inserted nodes that the user isn't aware of for -- * query-block (blk != NULL), but not for query-named-block-nodes */ -- if (blk) { -- bs0 = bdrv_skip_implicit_filters(bs0); -- } -+ backing_info = info->image->backing_image; -+ while (backing_info) { -+ info->backing_file_depth++; -+ backing_info = backing_info->backing_image; - } - - return info; -@@ -363,19 +348,28 @@ void bdrv_query_block_node_info(BlockDriverState *bs, - * bdrv_query_image_info: - * @bs: block node to examine - * @p_info: location to store image information -+ * @flat: skip backing node information -+ * @skip_implicit_filters: skip implicit filters in the backing chain - * @errp: location to store error information - * -- * Store "flat" image information in @p_info. -+ * Store image information in @p_info, potentially recursively covering the -+ * backing chain. - * -- * "Flat" means it does *not* query backing image information, -- * i.e. (*pinfo)->has_backing_image will be set to false and -- * (*pinfo)->backing_image to NULL even when the image does in fact have -- * a backing image. -+ * If @flat is true, do not query backing image information, i.e. -+ * (*p_info)->has_backing_image will be set to false and -+ * (*p_info)->backing_image to NULL even when the image does in fact have a -+ * backing image. -+ * -+ * If @skip_implicit_filters is true, implicit filter nodes in the backing chain -+ * will be skipped when querying backing image information. -+ * (@skip_implicit_filters is ignored when @flat is true.) - * - * @p_info will be set only on success. On error, store error in @errp. - */ - void bdrv_query_image_info(BlockDriverState *bs, - ImageInfo **p_info, -+ bool flat, -+ bool skip_implicit_filters, - Error **errp) - { - ImageInfo *info; -@@ -384,11 +378,37 @@ void bdrv_query_image_info(BlockDriverState *bs, - info = g_new0(ImageInfo, 1); - bdrv_do_query_node_info(bs, qapi_ImageInfo_base(info), errp); - if (*errp) { -- qapi_free_ImageInfo(info); -- return; -+ goto fail; -+ } -+ -+ if (!flat) { -+ BlockDriverState *backing; -+ -+ /* -+ * Use any filtered child here (for backwards compatibility to when -+ * we always took bs->backing, which might be any filtered child). -+ */ -+ backing = bdrv_filter_or_cow_bs(bs); -+ if (skip_implicit_filters) { -+ backing = bdrv_skip_implicit_filters(backing); -+ } -+ -+ if (backing) { -+ bdrv_query_image_info(backing, &info->backing_image, false, -+ skip_implicit_filters, errp); -+ if (*errp) { -+ goto fail; -+ } -+ info->has_backing_image = true; -+ } - } - - *p_info = info; -+ return; -+ -+fail: -+ assert(*errp); -+ qapi_free_ImageInfo(info); - } - - /* @p_info will be set only on success. */ -diff --git a/include/block/qapi.h b/include/block/qapi.h -index 22198dcd0c..2174bf8fa2 100644 ---- a/include/block/qapi.h -+++ b/include/block/qapi.h -@@ -40,6 +40,8 @@ void bdrv_query_block_node_info(BlockDriverState *bs, - Error **errp); - void bdrv_query_image_info(BlockDriverState *bs, - ImageInfo **p_info, -+ bool flat, -+ bool skip_implicit_filters, - Error **errp); - - void bdrv_snapshot_dump(QEMUSnapshotInfo *sn); --- -2.31.1 - diff --git a/SOURCES/kvm-block-temporarily-hold-the-new-AioContext-of-bs_top-.patch b/SOURCES/kvm-block-temporarily-hold-the-new-AioContext-of-bs_top-.patch deleted file mode 100644 index 8d5a20a..0000000 --- a/SOURCES/kvm-block-temporarily-hold-the-new-AioContext-of-bs_top-.patch +++ /dev/null @@ -1,99 +0,0 @@ -From b952c8f1da6f8597736c0e040565830139369359 Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Tue, 14 Feb 2023 18:16:21 +0100 -Subject: [PATCH] block: temporarily hold the new AioContext of bs_top in - bdrv_append() - -RH-Author: Stefano Garzarella -RH-MergeRequest: 153: block: temporarily hold the new AioContext of bs_top in bdrv_append() -RH-Bugzilla: 2168209 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Kevin Wolf -RH-Commit: [1/1] 5b190426d996e8c9f7a781bd97aee8d25756dbd3 (sgarzarella/qemu-kvm-c-9-s) - -bdrv_append() is called with bs_top AioContext held, but -bdrv_attach_child_noperm() could change the AioContext of bs_top. - -bdrv_replace_node_noperm() calls bdrv_drained_begin() starting from -commit 2398747128 ("block: Don't poll in bdrv_replace_child_noperm()"). -bdrv_drained_begin() can call BDRV_POLL_WHILE that assumes the new lock -is taken, so let's temporarily hold the new AioContext to prevent QEMU -from failing in BDRV_POLL_WHILE when it tries to release the wrong -AioContext. - -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2168209 -Reported-by: Aihua Liang -Signed-off-by: Stefano Garzarella -Message-Id: <20230214171621.11574-1-sgarzare@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 60d90bf43c169b9d1dbcb17ed794b7b02c6862b1) -Signed-off-by: Stefano Garzarella ---- - block.c | 23 +++++++++++++++++++++++ - 1 file changed, 23 insertions(+) - -diff --git a/block.c b/block.c -index 0d78711416..9e1dcb9e47 100644 ---- a/block.c -+++ b/block.c -@@ -5275,6 +5275,8 @@ int bdrv_drop_filter(BlockDriverState *bs, Error **errp) - * child. - * - * This function does not create any image files. -+ * -+ * The caller must hold the AioContext lock for @bs_top. - */ - int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, - Error **errp) -@@ -5282,11 +5284,14 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, - int ret; - BdrvChild *child; - Transaction *tran = tran_new(); -+ AioContext *old_context, *new_context = NULL; - - GLOBAL_STATE_CODE(); - - assert(!bs_new->backing); - -+ old_context = bdrv_get_aio_context(bs_top); -+ - child = bdrv_attach_child_noperm(bs_new, bs_top, "backing", - &child_of_bds, bdrv_backing_role(bs_new), - tran, errp); -@@ -5295,6 +5300,19 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, - goto out; - } - -+ /* -+ * bdrv_attach_child_noperm could change the AioContext of bs_top. -+ * bdrv_replace_node_noperm calls bdrv_drained_begin, so let's temporarily -+ * hold the new AioContext, since bdrv_drained_begin calls BDRV_POLL_WHILE -+ * that assumes the new lock is taken. -+ */ -+ new_context = bdrv_get_aio_context(bs_top); -+ -+ if (old_context != new_context) { -+ aio_context_release(old_context); -+ aio_context_acquire(new_context); -+ } -+ - ret = bdrv_replace_node_noperm(bs_top, bs_new, true, tran, errp); - if (ret < 0) { - goto out; -@@ -5306,6 +5324,11 @@ out: - - bdrv_refresh_limits(bs_top, NULL, NULL); - -+ if (new_context && old_context != new_context) { -+ aio_context_release(new_context); -+ aio_context_acquire(old_context); -+ } -+ - return ret; - } - --- -2.31.1 - diff --git a/SOURCES/kvm-block-vmdk-Change-extent-info-type.patch b/SOURCES/kvm-block-vmdk-Change-extent-info-type.patch deleted file mode 100644 index 6b8f6a7..0000000 --- a/SOURCES/kvm-block-vmdk-Change-extent-info-type.patch +++ /dev/null @@ -1,140 +0,0 @@ -From d8caed018afb0f60f449e971398d2a8d6c2992e7 Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Mon, 20 Jun 2022 18:26:55 +0200 -Subject: [PATCH 08/20] block/vmdk: Change extent info type - -RH-Author: Hanna Czenczek -RH-MergeRequest: 145: Show protocol-level information in qemu-img info -RH-Bugzilla: 1860292 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella -RH-Commit: [3/12] efe50a2797c679ce6bb5faa423047461a34e6792 (hreitz/qemu-kvm-c-9-s) - -VMDK's implementation of .bdrv_get_specific_info() returns information -about its extent files, ostensibly in the form of ImageInfo objects. -However, it does not get this information through -bdrv_query_image_info(), but fills only a select few fields with custom -information that does not always match the fields' purposes. - -For example, @format, which is supposed to be a block driver name, is -filled with the extent type, e.g. SPARSE or FLAT. - -In ImageInfo, @compressed shows whether the data that can be seen in the -image is stored in compressed form or not. For example, a compressed -qcow2 image will store compressed data in its data file, but when -accessing the qcow2 node, you will see normal data. This is not how -VMDK uses the @compressed field for its extent files: Instead, it -signifies whether accessing the extent file will yield compressed data -(which the VMDK driver then (de-)compresses). - -Create a new structure to represent the extent information. This allows -us to clarify the fields' meanings, and it clearly shows that these are -not complete ImageInfo objects. (That is, if a user wants an extent -file's ImageInfo object, they will need to query it separately, and will -not get it from ImageInfoSpecificVmdk.extents.) - -Note that this removes the last use of ['ImageInfo'] (i.e. an array of -ImageInfo objects), so the QAPI generator will no longer generate -ImageInfoList by default. However, we use it in qemu-img.c, so we need -to create a dummy object to force the generate to create that type, -similarly to DummyForceArrays in machine.json (introduced in commit -9f08c8ec73878122ad4b061ed334f0437afaaa32 ("qapi: Lazy creation of array -types")). - -Signed-off-by: Hanna Reitz -Message-Id: <20220620162704.80987-4-hreitz@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 456e75171a85c19a5bfa202eefcbdc4ef1692f05) -Signed-off-by: Hanna Czenczek ---- - block/vmdk.c | 8 ++++---- - qapi/block-core.json | 38 +++++++++++++++++++++++++++++++++++++- - 2 files changed, 41 insertions(+), 5 deletions(-) - -diff --git a/block/vmdk.c b/block/vmdk.c -index 26376352b9..4435b9880b 100644 ---- a/block/vmdk.c -+++ b/block/vmdk.c -@@ -2901,12 +2901,12 @@ static int vmdk_has_zero_init(BlockDriverState *bs) - return 1; - } - --static ImageInfo *vmdk_get_extent_info(VmdkExtent *extent) -+static VmdkExtentInfo *vmdk_get_extent_info(VmdkExtent *extent) - { -- ImageInfo *info = g_new0(ImageInfo, 1); -+ VmdkExtentInfo *info = g_new0(VmdkExtentInfo, 1); - - bdrv_refresh_filename(extent->file->bs); -- *info = (ImageInfo){ -+ *info = (VmdkExtentInfo){ - .filename = g_strdup(extent->file->bs->filename), - .format = g_strdup(extent->type), - .virtual_size = extent->sectors * BDRV_SECTOR_SIZE, -@@ -2985,7 +2985,7 @@ static ImageInfoSpecific *vmdk_get_specific_info(BlockDriverState *bs, - int i; - BDRVVmdkState *s = bs->opaque; - ImageInfoSpecific *spec_info = g_new0(ImageInfoSpecific, 1); -- ImageInfoList **tail; -+ VmdkExtentInfoList **tail; - - *spec_info = (ImageInfoSpecific){ - .type = IMAGE_INFO_SPECIFIC_KIND_VMDK, -diff --git a/qapi/block-core.json b/qapi/block-core.json -index f5d822cbd6..4b9365167f 100644 ---- a/qapi/block-core.json -+++ b/qapi/block-core.json -@@ -124,7 +124,33 @@ - 'create-type': 'str', - 'cid': 'int', - 'parent-cid': 'int', -- 'extents': ['ImageInfo'] -+ 'extents': ['VmdkExtentInfo'] -+ } } -+ -+## -+# @VmdkExtentInfo: -+# -+# Information about a VMDK extent file -+# -+# @filename: Name of the extent file -+# -+# @format: Extent type (e.g. FLAT or SPARSE) -+# -+# @virtual-size: Number of bytes covered by this extent -+# -+# @cluster-size: Cluster size in bytes (for non-flat extents) -+# -+# @compressed: Whether this extent contains compressed data -+# -+# Since: 8.0 -+## -+{ 'struct': 'VmdkExtentInfo', -+ 'data': { -+ 'filename': 'str', -+ 'format': 'str', -+ 'virtual-size': 'int', -+ '*cluster-size': 'int', -+ '*compressed': 'bool' - } } - - ## -@@ -5754,3 +5780,13 @@ - 'data': { 'device': 'str', '*id': 'str', '*name': 'str'}, - 'returns': 'SnapshotInfo', - 'allow-preconfig': true } -+ -+## -+# @DummyBlockCoreForceArrays: -+# -+# Not used by QMP; hack to let us use ImageInfoList internally -+# -+# Since: 8.0 -+## -+{ 'struct': 'DummyBlockCoreForceArrays', -+ 'data': { 'unused-image-info': ['ImageInfo'] } } --- -2.31.1 - diff --git a/SOURCES/kvm-checkpatch-add-qemu_bh_new-aio_bh_new-checks.patch b/SOURCES/kvm-checkpatch-add-qemu_bh_new-aio_bh_new-checks.patch new file mode 100644 index 0000000..4173648 --- /dev/null +++ b/SOURCES/kvm-checkpatch-add-qemu_bh_new-aio_bh_new-checks.patch @@ -0,0 +1,55 @@ +From 961bc392ee60743344236ddd247ab646a0eec914 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 May 2023 10:29:03 -0400 +Subject: [PATCH 07/21] checkpatch: add qemu_bh_new/aio_bh_new checks + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/13] e0473487f0e3186c42559a5c36a8650f27ab26ae (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit ef56ffbdd6b0605dc1e305611287b948c970e236 +Author: Alexander Bulekov +Date: Thu Apr 27 17:10:08 2023 -0400 + + checkpatch: add qemu_bh_new/aio_bh_new checks + + Advise authors to use the _guarded versions of the APIs, instead. + + Signed-off-by: Alexander Bulekov + Reviewed-by: Darren Kenny + Message-Id: <20230427211013.2994127-4-alxndr@bu.edu> + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + scripts/checkpatch.pl | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl +index d768171dcf..eeaec436eb 100755 +--- a/scripts/checkpatch.pl ++++ b/scripts/checkpatch.pl +@@ -2865,6 +2865,14 @@ sub process { + if ($line =~ /\bsignal\s*\(/ && !($line =~ /SIG_(?:IGN|DFL)/)) { + ERROR("use sigaction to establish signal handlers; signal is not portable\n" . $herecurr); + } ++# recommend qemu_bh_new_guarded instead of qemu_bh_new ++ if ($realfile =~ /.*\/hw\/.*/ && $line =~ /\bqemu_bh_new\s*\(/) { ++ ERROR("use qemu_bh_new_guarded() instead of qemu_bh_new() to avoid reentrancy problems\n" . $herecurr); ++ } ++# recommend aio_bh_new_guarded instead of aio_bh_new ++ if ($realfile =~ /.*\/hw\/.*/ && $line =~ /\baio_bh_new\s*\(/) { ++ ERROR("use aio_bh_new_guarded() instead of aio_bh_new() to avoid reentrancy problems\n" . $herecurr); ++ } + # check for module_init(), use category-specific init macros explicitly please + if ($line =~ /^module_init\s*\(/) { + ERROR("please use block_init(), type_init() etc. instead of module_init()\n" . $herecurr); +-- +2.39.3 + diff --git a/SOURCES/kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch b/SOURCES/kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch deleted file mode 100644 index 1a3c139..0000000 --- a/SOURCES/kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch +++ /dev/null @@ -1,127 +0,0 @@ -From b886411a682b56bfe674f0a35d40c67c8e9dc87a Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 21 Feb 2023 16:22:17 -0500 -Subject: [PATCH 02/12] dma-helpers: prevent dma_blk_cb() vs dma_aio_cancel() - race - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 155: virtio-scsi: reset SCSI devices from main loop thread -RH-Bugzilla: 2155748 -RH-Acked-by: Eric Blake -RH-Acked-by: Kevin Wolf -RH-Acked-by: Laszlo Ersek -RH-Commit: [2/3] eeeea43c25d8f4fa84591b05547fb77e4058abff (stefanha/centos-stream-qemu-kvm) - -dma_blk_cb() only takes the AioContext lock around ->io_func(). That -means the rest of dma_blk_cb() is not protected. In particular, the -DMAAIOCB field accesses happen outside the lock. - -There is a race when the main loop thread holds the AioContext lock and -invokes scsi_device_purge_requests() -> bdrv_aio_cancel() -> -dma_aio_cancel() while an IOThread executes dma_blk_cb(). The dbs->acb -field determines how cancellation proceeds. If dma_aio_cancel() sees -dbs->acb == NULL while dma_blk_cb() is still running, the request can be -completed twice (-ECANCELED and the actual return value). - -The following assertion can occur with virtio-scsi when an IOThread is -used: - - ../hw/scsi/scsi-disk.c:368: scsi_dma_complete: Assertion `r->req.aiocb != NULL' failed. - -Fix the race by holding the AioContext across dma_blk_cb(). Now -dma_aio_cancel() under the AioContext lock will not see -inconsistent/intermediate states. - -Cc: Paolo Bonzini -Reviewed-by: Eric Blake -Signed-off-by: Stefan Hajnoczi -Message-Id: <20230221212218.1378734-3-stefanha@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit abfcd2760b3e70727bbc0792221b8b98a733dc32) -Signed-off-by: Stefan Hajnoczi ---- - hw/scsi/scsi-disk.c | 4 +--- - softmmu/dma-helpers.c | 12 +++++++----- - 2 files changed, 8 insertions(+), 8 deletions(-) - -diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c -index 5327f93f4c..b12d8b0816 100644 ---- a/hw/scsi/scsi-disk.c -+++ b/hw/scsi/scsi-disk.c -@@ -354,13 +354,12 @@ done: - scsi_req_unref(&r->req); - } - -+/* Called with AioContext lock held */ - static void scsi_dma_complete(void *opaque, int ret) - { - SCSIDiskReq *r = (SCSIDiskReq *)opaque; - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - -- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -- - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; - -@@ -370,7 +369,6 @@ static void scsi_dma_complete(void *opaque, int ret) - block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); - } - scsi_dma_complete_noio(r, ret); -- aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); - } - - static void scsi_read_complete_noio(SCSIDiskReq *r, int ret) -diff --git a/softmmu/dma-helpers.c b/softmmu/dma-helpers.c -index 7820fec54c..2463964805 100644 ---- a/softmmu/dma-helpers.c -+++ b/softmmu/dma-helpers.c -@@ -113,17 +113,19 @@ static void dma_complete(DMAAIOCB *dbs, int ret) - static void dma_blk_cb(void *opaque, int ret) - { - DMAAIOCB *dbs = (DMAAIOCB *)opaque; -+ AioContext *ctx = dbs->ctx; - dma_addr_t cur_addr, cur_len; - void *mem; - - trace_dma_blk_cb(dbs, ret); - -+ aio_context_acquire(ctx); - dbs->acb = NULL; - dbs->offset += dbs->iov.size; - - if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) { - dma_complete(dbs, ret); -- return; -+ goto out; - } - dma_blk_unmap(dbs); - -@@ -164,9 +166,9 @@ static void dma_blk_cb(void *opaque, int ret) - - if (dbs->iov.size == 0) { - trace_dma_map_wait(dbs); -- dbs->bh = aio_bh_new(dbs->ctx, reschedule_dma, dbs); -+ dbs->bh = aio_bh_new(ctx, reschedule_dma, dbs); - cpu_register_map_client(dbs->bh); -- return; -+ goto out; - } - - if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) { -@@ -174,11 +176,11 @@ static void dma_blk_cb(void *opaque, int ret) - QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align)); - } - -- aio_context_acquire(dbs->ctx); - dbs->acb = dbs->io_func(dbs->offset, &dbs->iov, - dma_blk_cb, dbs, dbs->io_func_opaque); -- aio_context_release(dbs->ctx); - assert(dbs->acb); -+out: -+ aio_context_release(ctx); - } - - static void dma_aio_cancel(BlockAIOCB *acb) --- -2.39.1 - diff --git a/SOURCES/kvm-edu-add-smp_mb__after_rmw.patch b/SOURCES/kvm-edu-add-smp_mb__after_rmw.patch deleted file mode 100644 index dd77648..0000000 --- a/SOURCES/kvm-edu-add-smp_mb__after_rmw.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 67bbeb056f75adc6c964468d876531ab68366fe0 Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Thu, 9 Mar 2023 08:24:36 -0500 -Subject: [PATCH 07/12] edu: add smp_mb__after_rmw() - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() -RH-Bugzilla: 2175660 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Eric Auger -RH-Commit: [4/9] 2ad6fd6cb33fde39d2d017d94c0dde2152ad70c4 (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 - -commit 2482aeea4195ad84cf3d4e5b15b28ec5b420ed5a -Author: Paolo Bonzini -Date: Thu Mar 2 11:16:13 2023 +0100 - - edu: add smp_mb__after_rmw() - - Ensure ordering between clearing the COMPUTING flag and checking - IRQFACT, and between setting the IRQFACT flag and checking - COMPUTING. This ensures that no wakeups are lost. - - Reviewed-by: Richard Henderson - Reviewed-by: David Hildenbrand - Signed-off-by: Paolo Bonzini - -Signed-off-by: Emanuele Giuseppe Esposito ---- - hw/misc/edu.c | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/hw/misc/edu.c b/hw/misc/edu.c -index e935c418d4..a1f8bc77e7 100644 ---- a/hw/misc/edu.c -+++ b/hw/misc/edu.c -@@ -267,6 +267,8 @@ static void edu_mmio_write(void *opaque, hwaddr addr, uint64_t val, - case 0x20: - if (val & EDU_STATUS_IRQFACT) { - qatomic_or(&edu->status, EDU_STATUS_IRQFACT); -+ /* Order check of the COMPUTING flag after setting IRQFACT. */ -+ smp_mb__after_rmw(); - } else { - qatomic_and(&edu->status, ~EDU_STATUS_IRQFACT); - } -@@ -349,6 +351,9 @@ static void *edu_fact_thread(void *opaque) - qemu_mutex_unlock(&edu->thr_mutex); - qatomic_and(&edu->status, ~EDU_STATUS_COMPUTING); - -+ /* Clear COMPUTING flag before checking IRQFACT. */ -+ smp_mb__after_rmw(); -+ - if (qatomic_read(&edu->status) & EDU_STATUS_IRQFACT) { - qemu_mutex_lock_iothread(); - edu_raise_irq(edu, FACT_IRQ); --- -2.39.1 - diff --git a/SOURCES/kvm-graph-lock-Disable-locking-for-now.patch b/SOURCES/kvm-graph-lock-Disable-locking-for-now.patch new file mode 100644 index 0000000..77086e5 --- /dev/null +++ b/SOURCES/kvm-graph-lock-Disable-locking-for-now.patch @@ -0,0 +1,153 @@ +From 516bf44de08a13d97c08e210137078e642ce8e88 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 17 May 2023 17:28:32 +0200 +Subject: [PATCH 02/21] graph-lock: Disable locking for now + +RH-Author: Kevin Wolf +RH-MergeRequest: 166: block/graph-lock: Disable locking for now +RH-Bugzilla: 2186725 +RH-Acked-by: Eric Blake +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [2/4] 39d42fb527aad0491a018743289de7b762108317 (kmwolf/centos-qemu-kvm) + +In QEMU 8.0, we've been seeing deadlocks in bdrv_graph_wrlock(). They +come from callers that hold an AioContext lock, which is not allowed +during polling. In theory, we could temporarily release the lock, but +callers are inconsistent about whether they hold a lock, and if they do, +some are also confused about which one they hold. While all of this is +fixable, it's not trivial, and the best course of action for 8.0.1 is +probably just disabling the graph locking code temporarily. + +We don't currently rely on graph locking yet. It is supposed to replace +the AioContext lock eventually to enable multiqueue support, but as long +as we still have the AioContext lock, it is sufficient without the graph +lock. Once the AioContext lock goes away, the deadlock doesn't exist any +more either and this commit can be reverted. (Of course, it can also be +reverted while the AioContext lock still exists if the callers have been +fixed.) + +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Message-Id: <20230517152834.277483-2-kwolf@redhat.com> +Reviewed-by: Eric Blake +Signed-off-by: Kevin Wolf +(cherry picked from commit 80fc5d260002432628710f8b0c7cfc7d9b97bb9d) +Signed-off-by: Kevin Wolf +--- + block/graph-lock.c | 24 ++++++++++++++++++++++++ + 1 file changed, 24 insertions(+) + +diff --git a/block/graph-lock.c b/block/graph-lock.c +index 259a7a0bde..2490926c90 100644 +--- a/block/graph-lock.c ++++ b/block/graph-lock.c +@@ -30,8 +30,10 @@ BdrvGraphLock graph_lock; + /* Protects the list of aiocontext and orphaned_reader_count */ + static QemuMutex aio_context_list_lock; + ++#if 0 + /* Written and read with atomic operations. */ + static int has_writer; ++#endif + + /* + * A reader coroutine could move from an AioContext to another. +@@ -88,6 +90,7 @@ void unregister_aiocontext(AioContext *ctx) + g_free(ctx->bdrv_graph); + } + ++#if 0 + static uint32_t reader_count(void) + { + BdrvGraphRWlock *brdv_graph; +@@ -105,10 +108,17 @@ static uint32_t reader_count(void) + assert((int32_t)rd >= 0); + return rd; + } ++#endif + + void bdrv_graph_wrlock(void) + { + GLOBAL_STATE_CODE(); ++ /* ++ * TODO Some callers hold an AioContext lock when this is called, which ++ * causes deadlocks. Reenable once the AioContext locking is cleaned up (or ++ * AioContext locks are gone). ++ */ ++#if 0 + assert(!qatomic_read(&has_writer)); + + /* Make sure that constantly arriving new I/O doesn't cause starvation */ +@@ -139,11 +149,13 @@ void bdrv_graph_wrlock(void) + } while (reader_count() >= 1); + + bdrv_drain_all_end(); ++#endif + } + + void bdrv_graph_wrunlock(void) + { + GLOBAL_STATE_CODE(); ++#if 0 + QEMU_LOCK_GUARD(&aio_context_list_lock); + assert(qatomic_read(&has_writer)); + +@@ -155,10 +167,13 @@ void bdrv_graph_wrunlock(void) + + /* Wake up all coroutine that are waiting to read the graph */ + qemu_co_enter_all(&reader_queue, &aio_context_list_lock); ++#endif + } + + void coroutine_fn bdrv_graph_co_rdlock(void) + { ++ /* TODO Reenable when wrlock is reenabled */ ++#if 0 + BdrvGraphRWlock *bdrv_graph; + bdrv_graph = qemu_get_current_aio_context()->bdrv_graph; + +@@ -223,10 +238,12 @@ void coroutine_fn bdrv_graph_co_rdlock(void) + qemu_co_queue_wait(&reader_queue, &aio_context_list_lock); + } + } ++#endif + } + + void coroutine_fn bdrv_graph_co_rdunlock(void) + { ++#if 0 + BdrvGraphRWlock *bdrv_graph; + bdrv_graph = qemu_get_current_aio_context()->bdrv_graph; + +@@ -249,6 +266,7 @@ void coroutine_fn bdrv_graph_co_rdunlock(void) + if (qatomic_read(&has_writer)) { + aio_wait_kick(); + } ++#endif + } + + void bdrv_graph_rdlock_main_loop(void) +@@ -266,13 +284,19 @@ void bdrv_graph_rdunlock_main_loop(void) + void assert_bdrv_graph_readable(void) + { + /* reader_count() is slow due to aio_context_list_lock lock contention */ ++ /* TODO Reenable when wrlock is reenabled */ ++#if 0 + #ifdef CONFIG_DEBUG_GRAPH_LOCK + assert(qemu_in_main_thread() || reader_count()); + #endif ++#endif + } + + void assert_bdrv_graph_writable(void) + { + assert(qemu_in_main_thread()); ++ /* TODO Reenable when wrlock is reenabled */ ++#if 0 + assert(qatomic_read(&has_writer)); ++#endif + } +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-acpi-Mark-acpi-blobs-as-resizable-on-RHEL-pc-mach.patch b/SOURCES/kvm-hw-acpi-Mark-acpi-blobs-as-resizable-on-RHEL-pc-mach.patch new file mode 100644 index 0000000..67e702c --- /dev/null +++ b/SOURCES/kvm-hw-acpi-Mark-acpi-blobs-as-resizable-on-RHEL-pc-mach.patch @@ -0,0 +1,40 @@ +From b4645e7682aa1bde6f89df0eff2a9de83720eecc Mon Sep 17 00:00:00 2001 +From: Ani Sinha +Date: Tue, 2 May 2023 15:51:53 +0530 +Subject: [PATCH 3/3] hw/acpi: Mark acpi blobs as resizable on RHEL pc machines + version 7.6 and above + +RH-Author: Ani Sinha +RH-MergeRequest: 160: hw/acpi: limit warning on acpi table size to pc machines older than version 2.3 +RH-Bugzilla: 1934134 +RH-Acked-by: Igor Mammedov +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: MST +RH-Commit: [2/2] 95d443af6e75c569d89d04d028012c3c56c0c3a4 (anisinha/centos-qemu-kvm) + +Please look at QEMU upstream commit +1af507756bae7 ("hw/acpi: limit warning on acpi table size to pc machines older than version 2.3") +This patch adapts the above change so that it applies to RHEL pc machines of +version 7.6 and newer. These are the machine types that are currently supported +in RHEL. Q35 machines are not affected. + +Signed-off-by: Ani Sinha +--- + hw/i386/pc_piix.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 4d5880e249..6c7be628e1 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -914,6 +914,7 @@ static void pc_machine_rhel7_options(MachineClass *m) + m->default_machine_opts = "firmware=bios-256k.bin,hpet=off"; + pcmc->default_nic_model = "e1000"; + pcmc->pci_root_uid = 0; ++ pcmc->resizable_acpi_blob = true; + m->default_display = "std"; + m->no_parallel = 1; + m->numa_mem_supported = true; +-- +2.39.1 + diff --git a/SOURCES/kvm-hw-acpi-limit-warning-on-acpi-table-size-to-pc-machi.patch b/SOURCES/kvm-hw-acpi-limit-warning-on-acpi-table-size-to-pc-machi.patch new file mode 100644 index 0000000..e06113a --- /dev/null +++ b/SOURCES/kvm-hw-acpi-limit-warning-on-acpi-table-size-to-pc-machi.patch @@ -0,0 +1,101 @@ +From 3f70da88788c398877b8ded0b27689530385302b Mon Sep 17 00:00:00 2001 +From: Ani Sinha +Date: Wed, 29 Mar 2023 10:27:26 +0530 +Subject: [PATCH 2/3] hw/acpi: limit warning on acpi table size to pc machines + older than version 2.3 + +RH-Author: Ani Sinha +RH-MergeRequest: 160: hw/acpi: limit warning on acpi table size to pc machines older than version 2.3 +RH-Bugzilla: 1934134 +RH-Acked-by: Igor Mammedov +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: MST +RH-Commit: [1/2] 96c3b6d51e16734eb4e8de52635e0ca036964090 (anisinha/centos-qemu-kvm) + +i440fx machine versions 2.3 and newer supports dynamic ram +resizing. See commit a1666142db6233 ("acpi-build: make ROMs RAM blocks resizeable") . +Currently supported all q35 machine types (versions 2.4 and newer) supports +resizable RAM/ROM blocks.Therefore the warning generated when the ACPI table +size exceeds a pre-defined value does not apply to those machine versions. +Add a check limiting the warning message to only those machines that does not +support expandable ram blocks (that is, i440fx machines with version 2.2 +and older). + +Signed-off-by: Ani Sinha +Message-Id: <20230329045726.14028-1-anisinha@redhat.com> +Reviewed-by: Igor Mammedov +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 1af507756bae775028c27d30e602e2b9c72cd074) +--- + hw/i386/acpi-build.c | 6 ++++-- + hw/i386/pc.c | 1 + + hw/i386/pc_piix.c | 1 + + include/hw/i386/pc.h | 3 +++ + 4 files changed, 9 insertions(+), 2 deletions(-) + +diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c +index ec857a117e..9bc4d8a981 100644 +--- a/hw/i386/acpi-build.c ++++ b/hw/i386/acpi-build.c +@@ -2695,7 +2695,8 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) + int legacy_table_size = + ROUND_UP(tables_blob->len - aml_len + legacy_aml_len, + ACPI_BUILD_ALIGN_SIZE); +- if (tables_blob->len > legacy_table_size) { ++ if ((tables_blob->len > legacy_table_size) && ++ !pcmc->resizable_acpi_blob) { + /* Should happen only with PCI bridges and -M pc-i440fx-2.0. */ + warn_report("ACPI table size %u exceeds %d bytes," + " migration may not work", +@@ -2706,7 +2707,8 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) + g_array_set_size(tables_blob, legacy_table_size); + } else { + /* Make sure we have a buffer in case we need to resize the tables. */ +- if (tables_blob->len > ACPI_BUILD_TABLE_SIZE / 2) { ++ if ((tables_blob->len > ACPI_BUILD_TABLE_SIZE / 2) && ++ !pcmc->resizable_acpi_blob) { + /* As of QEMU 2.1, this fires with 160 VCPUs and 255 memory slots. */ + warn_report("ACPI table size %u exceeds %d bytes," + " migration may not work", +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index f216922cee..7db5a2348f 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -2092,6 +2092,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) + pcmc->acpi_data_size = 0x20000 + 0x8000; + pcmc->pvh_enabled = true; + pcmc->kvmclock_create_always = true; ++ pcmc->resizable_acpi_blob = true; + assert(!mc->get_hotplug_handler); + mc->async_pf_vmexit_disable = false; + mc->get_hotplug_handler = pc_get_hotplug_handler; +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index fc704d783f..4d5880e249 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -750,6 +750,7 @@ static void pc_i440fx_2_2_machine_options(MachineClass *m) + compat_props_add(m->compat_props, hw_compat_2_2, hw_compat_2_2_len); + compat_props_add(m->compat_props, pc_compat_2_2, pc_compat_2_2_len); + pcmc->rsdp_in_ram = false; ++ pcmc->resizable_acpi_blob = false; + } + + DEFINE_I440FX_MACHINE(v2_2, "pc-i440fx-2.2", pc_compat_2_2_fn, +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index d218ad1628..2f514d13d8 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -130,6 +130,9 @@ struct PCMachineClass { + + /* create kvmclock device even when KVM PV features are not exposed */ + bool kvmclock_create_always; ++ ++ /* resizable acpi blob compat */ ++ bool resizable_acpi_blob; + }; + + #define TYPE_PC_MACHINE "generic-pc-machine" +-- +2.39.1 + diff --git a/SOURCES/kvm-hw-arm-Validate-cluster-and-NUMA-node-boundary.patch b/SOURCES/kvm-hw-arm-Validate-cluster-and-NUMA-node-boundary.patch new file mode 100644 index 0000000..e96bb10 --- /dev/null +++ b/SOURCES/kvm-hw-arm-Validate-cluster-and-NUMA-node-boundary.patch @@ -0,0 +1,60 @@ +From 7b57aec372fc238cbaafe86557f9fb4b560895b1 Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Tue, 27 Jun 2023 20:20:09 +1000 +Subject: [PATCH 2/6] hw/arm: Validate cluster and NUMA node boundary + +RH-Author: Gavin Shan +RH-MergeRequest: 175: hw/arm: Validate CPU cluster and NUMA node boundary for RHEL machines +RH-Bugzilla: 2171363 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Eric Auger +RH-Commit: [2/3] fcac7ea85d9f73613989903c642fc1bf6c51946b + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2171363 + +There are two ARM machines where NUMA is aware: 'virt' and 'sbsa-ref'. +Both of them are required to follow cluster-NUMA-node boundary. To +enable the validation to warn about the irregular configuration where +multiple CPUs in one cluster have been associated with different NUMA +nodes. + +Signed-off-by: Gavin Shan +Acked-by: Igor Mammedov +Message-Id: <20230509002739.18388-3-gshan@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit fecff672351ace5e39adf7dbcf7a8ee748b201cb) +Signed-off-by: Gavin Shan +--- + hw/arm/sbsa-ref.c | 2 ++ + hw/arm/virt.c | 2 ++ + 2 files changed, 4 insertions(+) + +diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c +index 0b93558dde..efb380e7c8 100644 +--- a/hw/arm/sbsa-ref.c ++++ b/hw/arm/sbsa-ref.c +@@ -864,6 +864,8 @@ static void sbsa_ref_class_init(ObjectClass *oc, void *data) + mc->possible_cpu_arch_ids = sbsa_ref_possible_cpu_arch_ids; + mc->cpu_index_to_instance_props = sbsa_ref_cpu_index_to_props; + mc->get_default_cpu_node_id = sbsa_ref_get_default_cpu_node_id; ++ /* platform instead of architectural choice */ ++ mc->cpu_cluster_has_numa_boundary = true; + } + + static const TypeInfo sbsa_ref_info = { +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 9be53e9355..df6a0231bc 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3083,6 +3083,8 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) + mc->smp_props.clusters_supported = true; + mc->auto_enable_numa_with_memhp = true; + mc->auto_enable_numa_with_memdev = true; ++ /* platform instead of architectural choice */ ++ mc->cpu_cluster_has_numa_boundary = true; + mc->default_ram_id = "mach-virt.ram"; + + object_class_property_add(oc, "acpi", "OnOffAuto", +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-arm-smmu-Handle-big-endian-hosts-correctly.patch b/SOURCES/kvm-hw-arm-smmu-Handle-big-endian-hosts-correctly.patch new file mode 100644 index 0000000..3bbe93f --- /dev/null +++ b/SOURCES/kvm-hw-arm-smmu-Handle-big-endian-hosts-correctly.patch @@ -0,0 +1,166 @@ +From a3412036477e8c91e0b71fcd91de4e24a9904077 Mon Sep 17 00:00:00 2001 +From: Peter Maydell +Date: Tue, 25 Jul 2023 10:56:51 +0100 +Subject: [PATCH 09/14] hw/arm/smmu: Handle big-endian hosts correctly +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 197: virtio-iommu/smmu: backport some late fixes +RH-Bugzilla: 2229133 +RH-Acked-by: Thomas Huth +RH-Acked-by: Peter Xu +RH-Commit: [3/3] df9c8d228b25273e0c4927a10b21e66fb4bef5f0 (eauger1/centos-qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2229133 + +The implementation of the SMMUv3 has multiple places where it reads a +data structure from the guest and directly operates on it without +doing a guest-to-host endianness conversion. Since all SMMU data +structures are little-endian, this means that the SMMU doesn't work +on a big-endian host. In particular, this causes the Avocado test + machine_aarch64_virt.py:Aarch64VirtMachine.test_alpine_virt_tcg_gic_max +to fail on an s390x host. + +Add appropriate byte-swapping on reads and writes of guest in-memory +data structures so that the device works correctly on big-endian +hosts. + +As part of this we constrain queue_read() to operate only on Cmd +structs and queue_write() on Evt structs, because in practice these +are the only data structures the two functions are used with, and we +need to know what the data structure is to be able to byte-swap its +parts correctly. + +Signed-off-by: Peter Maydell +Tested-by: Thomas Huth +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Eric Auger +Message-id: 20230717132641.764660-1-peter.maydell@linaro.org +Cc: qemu-stable@nongnu.org +(cherry picked from commit c6445544d4cea2628fbad3bad09f3d3a03c749d3) +Signed-off-by: Eric Auger +--- + hw/arm/smmu-common.c | 3 +-- + hw/arm/smmuv3.c | 39 +++++++++++++++++++++++++++++++-------- + 2 files changed, 32 insertions(+), 10 deletions(-) + +diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c +index e7f1c1f219..daa02ce798 100644 +--- a/hw/arm/smmu-common.c ++++ b/hw/arm/smmu-common.c +@@ -192,8 +192,7 @@ static int get_pte(dma_addr_t baseaddr, uint32_t index, uint64_t *pte, + dma_addr_t addr = baseaddr + index * sizeof(*pte); + + /* TODO: guarantee 64-bit single-copy atomicity */ +- ret = dma_memory_read(&address_space_memory, addr, pte, sizeof(*pte), +- MEMTXATTRS_UNSPECIFIED); ++ ret = ldq_le_dma(&address_space_memory, addr, pte, MEMTXATTRS_UNSPECIFIED); + + if (ret != MEMTX_OK) { + info->type = SMMU_PTW_ERR_WALK_EABT; +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index 270c80b665..cfb56725a6 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -98,20 +98,34 @@ static void smmuv3_write_gerrorn(SMMUv3State *s, uint32_t new_gerrorn) + trace_smmuv3_write_gerrorn(toggled & pending, s->gerrorn); + } + +-static inline MemTxResult queue_read(SMMUQueue *q, void *data) ++static inline MemTxResult queue_read(SMMUQueue *q, Cmd *cmd) + { + dma_addr_t addr = Q_CONS_ENTRY(q); ++ MemTxResult ret; ++ int i; + +- return dma_memory_read(&address_space_memory, addr, data, q->entry_size, +- MEMTXATTRS_UNSPECIFIED); ++ ret = dma_memory_read(&address_space_memory, addr, cmd, sizeof(Cmd), ++ MEMTXATTRS_UNSPECIFIED); ++ if (ret != MEMTX_OK) { ++ return ret; ++ } ++ for (i = 0; i < ARRAY_SIZE(cmd->word); i++) { ++ le32_to_cpus(&cmd->word[i]); ++ } ++ return ret; + } + +-static MemTxResult queue_write(SMMUQueue *q, void *data) ++static MemTxResult queue_write(SMMUQueue *q, Evt *evt_in) + { + dma_addr_t addr = Q_PROD_ENTRY(q); + MemTxResult ret; ++ Evt evt = *evt_in; ++ int i; + +- ret = dma_memory_write(&address_space_memory, addr, data, q->entry_size, ++ for (i = 0; i < ARRAY_SIZE(evt.word); i++) { ++ cpu_to_le32s(&evt.word[i]); ++ } ++ ret = dma_memory_write(&address_space_memory, addr, &evt, sizeof(Evt), + MEMTXATTRS_UNSPECIFIED); + if (ret != MEMTX_OK) { + return ret; +@@ -291,7 +305,7 @@ static void smmuv3_init_regs(SMMUv3State *s) + static int smmu_get_ste(SMMUv3State *s, dma_addr_t addr, STE *buf, + SMMUEventInfo *event) + { +- int ret; ++ int ret, i; + + trace_smmuv3_get_ste(addr); + /* TODO: guarantee 64-bit single-copy atomicity */ +@@ -304,6 +318,9 @@ static int smmu_get_ste(SMMUv3State *s, dma_addr_t addr, STE *buf, + event->u.f_ste_fetch.addr = addr; + return -EINVAL; + } ++ for (i = 0; i < ARRAY_SIZE(buf->word); i++) { ++ le32_to_cpus(&buf->word[i]); ++ } + return 0; + + } +@@ -313,7 +330,7 @@ static int smmu_get_cd(SMMUv3State *s, STE *ste, uint32_t ssid, + CD *buf, SMMUEventInfo *event) + { + dma_addr_t addr = STE_CTXPTR(ste); +- int ret; ++ int ret, i; + + trace_smmuv3_get_cd(addr); + /* TODO: guarantee 64-bit single-copy atomicity */ +@@ -326,6 +343,9 @@ static int smmu_get_cd(SMMUv3State *s, STE *ste, uint32_t ssid, + event->u.f_ste_fetch.addr = addr; + return -EINVAL; + } ++ for (i = 0; i < ARRAY_SIZE(buf->word); i++) { ++ le32_to_cpus(&buf->word[i]); ++ } + return 0; + } + +@@ -407,7 +427,7 @@ static int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste, + return -EINVAL; + } + if (s->features & SMMU_FEATURE_2LVL_STE) { +- int l1_ste_offset, l2_ste_offset, max_l2_ste, span; ++ int l1_ste_offset, l2_ste_offset, max_l2_ste, span, i; + dma_addr_t l1ptr, l2ptr; + STEDesc l1std; + +@@ -431,6 +451,9 @@ static int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste, + event->u.f_ste_fetch.addr = l1ptr; + return -EINVAL; + } ++ for (i = 0; i < ARRAY_SIZE(l1std.word); i++) { ++ le32_to_cpus(&l1std.word[i]); ++ } + + span = L1STD_SPAN(&l1std); + +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-arm-virt-Add-compact-highmem-property.patch b/SOURCES/kvm-hw-arm-virt-Add-compact-highmem-property.patch deleted file mode 100644 index bc65e2f..0000000 --- a/SOURCES/kvm-hw-arm-virt-Add-compact-highmem-property.patch +++ /dev/null @@ -1,169 +0,0 @@ -From 4ab2aff624908e49b099f00609875f4d03e9e1ec Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 21 Dec 2022 08:48:45 +0800 -Subject: [PATCH 6/8] hw/arm/virt: Add 'compact-highmem' property - -RH-Author: Gavin Shan -RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment -RH-Bugzilla: 2113840 -RH-Acked-by: Eric Auger -RH-Acked-by: Cornelia Huck -RH-Acked-by: Miroslav Rezanina -RH-Commit: [6/8] 781506f3445493f05b511547370b6d88ef092457 - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840 - -After the improvement to high memory region address assignment is -applied, the memory layout can be changed, introducing possible -migration breakage. For example, VIRT_HIGH_PCIE_MMIO memory region -is disabled or enabled when the optimization is applied or not, with -the following configuration. The configuration is only achievable by -modifying the source code until more properties are added to allow -users selectively disable those high memory regions. - - pa_bits = 40; - vms->highmem_redists = false; - vms->highmem_ecam = false; - vms->highmem_mmio = true; - - # qemu-system-aarch64 -accel kvm -cpu host \ - -machine virt-7.2,compact-highmem={on, off} \ - -m 4G,maxmem=511G -monitor stdio - - Region compact-highmem=off compact-highmem=on - ---------------------------------------------------------------- - MEM [1GB 512GB] [1GB 512GB] - HIGH_GIC_REDISTS2 [512GB 512GB+64MB] [disabled] - HIGH_PCIE_ECAM [512GB+256MB 512GB+512MB] [disabled] - HIGH_PCIE_MMIO [disabled] [512GB 1TB] - -In order to keep backwords compatibility, we need to disable the -optimization on machine, which is virt-7.1 or ealier than it. It -means the optimization is enabled by default from virt-7.2. Besides, -'compact-highmem' property is added so that the optimization can be -explicitly enabled or disabled on all machine types by users. - -Signed-off-by: Gavin Shan -Reviewed-by: Eric Auger -Reviewed-by: Cornelia Huck -Reviewed-by: Marc Zyngier -Tested-by: Zhenyu Zhang -Message-id: 20221029224307.138822-7-gshan@redhat.com -Signed-off-by: Peter Maydell -(cherry picked from commit f40408a9fe5d1db70a75a33d2b26c8af8a5d57b0) -Signed-off-by: Gavin Shan -Conflicts: - hw/arm/virt.c - Comment out the handlers of property 'compact-highmem' since - the property isn't exposed. ---- - docs/system/arm/virt.rst | 4 ++++ - hw/arm/virt.c | 34 ++++++++++++++++++++++++++++++++++ - include/hw/arm/virt.h | 1 + - 3 files changed, 39 insertions(+) - -diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst -index 20442ea2c1..4454706392 100644 ---- a/docs/system/arm/virt.rst -+++ b/docs/system/arm/virt.rst -@@ -94,6 +94,10 @@ highmem - address space above 32 bits. The default is ``on`` for machine types - later than ``virt-2.12``. - -+compact-highmem -+ Set ``on``/``off`` to enable/disable the compact layout for high memory regions. -+ The default is ``on`` for machine types later than ``virt-7.2``. -+ - gic-version - Specify the version of the Generic Interrupt Controller (GIC) to provide. - Valid values are: -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 6896e0ca0f..6087511ae9 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -216,6 +216,12 @@ static const MemMapEntry base_memmap[] = { - * Note the extended_memmap is sized so that it eventually also includes the - * base_memmap entries (VIRT_HIGH_GIC_REDIST2 index is greater than the last - * index of base_memmap). -+ * -+ * The memory map for these Highmem IO Regions can be in legacy or compact -+ * layout, depending on 'compact-highmem' property. With legacy layout, the -+ * PA space for one specific region is always reserved, even if the region -+ * has been disabled or doesn't fit into the PA space. However, the PA space -+ * for the region won't be reserved in these circumstances with compact layout. - */ - static MemMapEntry extended_memmap[] = { - /* Additional 64 MB redist region (can contain up to 512 redistributors) */ -@@ -2400,6 +2406,22 @@ static void virt_set_highmem(Object *obj, bool value, Error **errp) - vms->highmem = value; - } - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ -+static bool virt_get_compact_highmem(Object *obj, Error **errp) -+{ -+ VirtMachineState *vms = VIRT_MACHINE(obj); -+ -+ return vms->highmem_compact; -+} -+ -+static void virt_set_compact_highmem(Object *obj, bool value, Error **errp) -+{ -+ VirtMachineState *vms = VIRT_MACHINE(obj); -+ -+ vms->highmem_compact = value; -+} -+#endif /* disabled for RHEL */ -+ - static bool virt_get_its(Object *obj, Error **errp) - { - VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -3023,6 +3045,13 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) - "Set on/off to enable/disable using " - "physical address space above 32 bits"); - -+ object_class_property_add_bool(oc, "compact-highmem", -+ virt_get_compact_highmem, -+ virt_set_compact_highmem); -+ object_class_property_set_description(oc, "compact-highmem", -+ "Set on/off to enable/disable compact " -+ "layout for high memory regions"); -+ - object_class_property_add_str(oc, "gic-version", virt_get_gic_version, - virt_set_gic_version); - object_class_property_set_description(oc, "gic-version", -@@ -3107,6 +3136,7 @@ static void virt_instance_init(Object *obj) - - /* High memory is enabled by default */ - vms->highmem = true; -+ vms->highmem_compact = !vmc->no_highmem_compact; - vms->gic_version = VIRT_GIC_VERSION_NOSEL; - - vms->highmem_ecam = !vmc->no_highmem_ecam; -@@ -3176,8 +3206,12 @@ DEFINE_VIRT_MACHINE_AS_LATEST(7, 2) - - static void virt_machine_7_1_options(MachineClass *mc) - { -+ VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); -+ - virt_machine_7_2_options(mc); - compat_props_add(mc->compat_props, hw_compat_7_1, hw_compat_7_1_len); -+ /* Compact layout for high memory regions was introduced with 7.2 */ -+ vmc->no_highmem_compact = true; - } - DEFINE_VIRT_MACHINE(7, 1) - -diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index 15bd291311..85e7d61868 100644 ---- a/include/hw/arm/virt.h -+++ b/include/hw/arm/virt.h -@@ -125,6 +125,7 @@ struct VirtMachineClass { - bool no_pmu; - bool claim_edge_triggered_timers; - bool smbios_old_sys_ver; -+ bool no_highmem_compact; - bool no_highmem_ecam; - bool no_ged; /* Machines < 4.2 have no support for ACPI GED device */ - bool kvm_no_adjvtime; --- -2.31.1 - diff --git a/SOURCES/kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch b/SOURCES/kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch deleted file mode 100644 index df691a7..0000000 --- a/SOURCES/kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch +++ /dev/null @@ -1,179 +0,0 @@ -From 30e86a7c4fbcdc95b74bcb2a15745cb221783091 Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 21 Dec 2022 08:48:45 +0800 -Subject: [PATCH 7/8] hw/arm/virt: Add properties to disable high memory - regions - -RH-Author: Gavin Shan -RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment -RH-Bugzilla: 2113840 -RH-Acked-by: Eric Auger -RH-Acked-by: Cornelia Huck -RH-Acked-by: Miroslav Rezanina -RH-Commit: [7/8] 16f8762393b447a590b31c9e4d8d3c58c6bc9fa8 - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840 - -The 3 high memory regions are usually enabled by default, but they may -be not used. For example, VIRT_HIGH_GIC_REDIST2 isn't needed by GICv2. -This leads to waste in the PA space. - -Add properties ("highmem-redists", "highmem-ecam", "highmem-mmio") to -allow users selectively disable them if needed. After that, the high -memory region for GICv3 or GICv4 redistributor can be disabled by user, -the number of maximal supported CPUs needs to be calculated based on -'vms->highmem_redists'. The follow-up error message is also improved -to indicate if the high memory region for GICv3 and GICv4 has been -enabled or not. - -Suggested-by: Marc Zyngier -Signed-off-by: Gavin Shan -Reviewed-by: Marc Zyngier -Reviewed-by: Cornelia Huck -Reviewed-by: Eric Auger -Message-id: 20221029224307.138822-8-gshan@redhat.com -Signed-off-by: Peter Maydell -(cherry picked from commit 6a48c64eec355ab1aff694eb4522d07a8e461368) -Signed-off-by: Gavin Shan -Conflicts: - hw/arm/virt.c - Comment out the handlers of the property 'highmem-redists', - 'highmem-ecam' and 'highmem-mmio' since they aren't exposed. ---- - docs/system/arm/virt.rst | 13 +++++++ - hw/arm/virt.c | 75 ++++++++++++++++++++++++++++++++++++++-- - 2 files changed, 86 insertions(+), 2 deletions(-) - -diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst -index 4454706392..188a4f211f 100644 ---- a/docs/system/arm/virt.rst -+++ b/docs/system/arm/virt.rst -@@ -98,6 +98,19 @@ compact-highmem - Set ``on``/``off`` to enable/disable the compact layout for high memory regions. - The default is ``on`` for machine types later than ``virt-7.2``. - -+highmem-redists -+ Set ``on``/``off`` to enable/disable the high memory region for GICv3 or -+ GICv4 redistributor. The default is ``on``. Setting this to ``off`` will -+ limit the maximum number of CPUs when GICv3 or GICv4 is used. -+ -+highmem-ecam -+ Set ``on``/``off`` to enable/disable the high memory region for PCI ECAM. -+ The default is ``on`` for machine types later than ``virt-3.0``. -+ -+highmem-mmio -+ Set ``on``/``off`` to enable/disable the high memory region for PCI MMIO. -+ The default is ``on``. -+ - gic-version - Specify the version of the Generic Interrupt Controller (GIC) to provide. - Valid values are: -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 6087511ae9..304fa0d6e7 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -2142,14 +2142,20 @@ static void machvirt_init(MachineState *machine) - if (vms->gic_version == VIRT_GIC_VERSION_2) { - virt_max_cpus = GIC_NCPU; - } else { -- virt_max_cpus = virt_redist_capacity(vms, VIRT_GIC_REDIST) + -- virt_redist_capacity(vms, VIRT_HIGH_GIC_REDIST2); -+ virt_max_cpus = virt_redist_capacity(vms, VIRT_GIC_REDIST); -+ if (vms->highmem_redists) { -+ virt_max_cpus += virt_redist_capacity(vms, VIRT_HIGH_GIC_REDIST2); -+ } - } - - if (max_cpus > virt_max_cpus) { - error_report("Number of SMP CPUs requested (%d) exceeds max CPUs " - "supported by machine 'mach-virt' (%d)", - max_cpus, virt_max_cpus); -+ if (vms->gic_version != VIRT_GIC_VERSION_2 && !vms->highmem_redists) { -+ error_printf("Try 'highmem-redists=on' for more CPUs\n"); -+ } -+ - exit(1); - } - -@@ -2420,6 +2426,49 @@ static void virt_set_compact_highmem(Object *obj, bool value, Error **errp) - - vms->highmem_compact = value; - } -+ -+static bool virt_get_highmem_redists(Object *obj, Error **errp) -+{ -+ VirtMachineState *vms = VIRT_MACHINE(obj); -+ -+ return vms->highmem_redists; -+} -+ -+static void virt_set_highmem_redists(Object *obj, bool value, Error **errp) -+{ -+ VirtMachineState *vms = VIRT_MACHINE(obj); -+ -+ vms->highmem_redists = value; -+} -+ -+static bool virt_get_highmem_ecam(Object *obj, Error **errp) -+{ -+ VirtMachineState *vms = VIRT_MACHINE(obj); -+ -+ return vms->highmem_ecam; -+} -+ -+static void virt_set_highmem_ecam(Object *obj, bool value, Error **errp) -+{ -+ VirtMachineState *vms = VIRT_MACHINE(obj); -+ -+ vms->highmem_ecam = value; -+} -+ -+static bool virt_get_highmem_mmio(Object *obj, Error **errp) -+{ -+ VirtMachineState *vms = VIRT_MACHINE(obj); -+ -+ return vms->highmem_mmio; -+} -+ -+static void virt_set_highmem_mmio(Object *obj, bool value, Error **errp) -+{ -+ VirtMachineState *vms = VIRT_MACHINE(obj); -+ -+ vms->highmem_mmio = value; -+} -+ - #endif /* disabled for RHEL */ - - static bool virt_get_its(Object *obj, Error **errp) -@@ -3052,6 +3101,28 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) - "Set on/off to enable/disable compact " - "layout for high memory regions"); - -+ object_class_property_add_bool(oc, "highmem-redists", -+ virt_get_highmem_redists, -+ virt_set_highmem_redists); -+ object_class_property_set_description(oc, "highmem-redists", -+ "Set on/off to enable/disable high " -+ "memory region for GICv3 or GICv4 " -+ "redistributor"); -+ -+ object_class_property_add_bool(oc, "highmem-ecam", -+ virt_get_highmem_ecam, -+ virt_set_highmem_ecam); -+ object_class_property_set_description(oc, "highmem-ecam", -+ "Set on/off to enable/disable high " -+ "memory region for PCI ECAM"); -+ -+ object_class_property_add_bool(oc, "highmem-mmio", -+ virt_get_highmem_mmio, -+ virt_set_highmem_mmio); -+ object_class_property_set_description(oc, "highmem-mmio", -+ "Set on/off to enable/disable high " -+ "memory region for PCI MMIO"); -+ - object_class_property_add_str(oc, "gic-version", virt_get_gic_version, - virt_set_gic_version); - object_class_property_set_description(oc, "gic-version", --- -2.31.1 - diff --git a/SOURCES/kvm-hw-arm-virt-Enable-compat-high-memory-region-address.patch b/SOURCES/kvm-hw-arm-virt-Enable-compat-high-memory-region-address.patch deleted file mode 100644 index 6b20bb8..0000000 --- a/SOURCES/kvm-hw-arm-virt-Enable-compat-high-memory-region-address.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 969ea1ff46b52c5fe6d87f2eeb1625871a2dfb2a Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 21 Dec 2022 08:48:45 +0800 -Subject: [PATCH 8/8] hw/arm/virt: Enable compat high memory region address - assignment for 9.2.0 machine - -RH-Author: Gavin Shan -RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment -RH-Bugzilla: 2113840 -RH-Acked-by: Eric Auger -RH-Acked-by: Cornelia Huck -RH-Acked-by: Miroslav Rezanina -RH-Commit: [8/8] beda1791c0c35dce5c669efd47685302b8468032 - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840 -Upstream: RHEL only - -The compact high memory region address assignment is enabled for 9.2.0, -but it's kept as disabled for 9.0.0, to keep the backwards compatibility -on 9.0.0. Note that these newly added properties ('compact-highmem', -'highmem-redists', 'highmem-ecam', and 'highmem-mmio') in the upstream -aren't exposed for the downstream. - -Signed-off-by: Gavin Shan ---- - hw/arm/virt.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 304fa0d6e7..e41c0b462c 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -3581,6 +3581,7 @@ static void rhel_virt_instance_init(Object *obj) - - /* High memory is enabled by default */ - vms->highmem = true; -+ vms->highmem_compact = !vmc->no_highmem_compact; - vms->gic_version = VIRT_GIC_VERSION_NOSEL; - - vms->highmem_ecam = !vmc->no_highmem_ecam; -@@ -3659,5 +3660,7 @@ static void rhel900_virt_options(MachineClass *mc) - - /* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */ - vmc->no_tcg_lpa2 = true; -+ /* Compact layout for high memory regions was introduced with 9.2.0 */ -+ vmc->no_highmem_compact = true; - } - DEFINE_RHEL_MACHINE(9, 0, 0) --- -2.31.1 - diff --git a/SOURCES/kvm-hw-arm-virt-Improve-high-memory-region-address-assig.patch b/SOURCES/kvm-hw-arm-virt-Improve-high-memory-region-address-assig.patch deleted file mode 100644 index 9dcdf61..0000000 --- a/SOURCES/kvm-hw-arm-virt-Improve-high-memory-region-address-assig.patch +++ /dev/null @@ -1,112 +0,0 @@ -From 1c7fad3776a14ca35b24dc2fdb262d4ddf40d6eb Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 21 Dec 2022 08:48:45 +0800 -Subject: [PATCH 5/8] hw/arm/virt: Improve high memory region address - assignment - -RH-Author: Gavin Shan -RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment -RH-Bugzilla: 2113840 -RH-Acked-by: Eric Auger -RH-Acked-by: Cornelia Huck -RH-Acked-by: Miroslav Rezanina -RH-Commit: [5/8] 4d77fa78b5258a1bd8d30405cec5ba3311d42f92 - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840 - -There are three high memory regions, which are VIRT_HIGH_REDIST2, -VIRT_HIGH_PCIE_ECAM and VIRT_HIGH_PCIE_MMIO. Their base addresses -are floating on highest RAM address. However, they can be disabled -in several cases. - -(1) One specific high memory region is likely to be disabled by - code by toggling vms->highmem_{redists, ecam, mmio}. - -(2) VIRT_HIGH_PCIE_ECAM region is disabled on machine, which is - 'virt-2.12' or ealier than it. - -(3) VIRT_HIGH_PCIE_ECAM region is disabled when firmware is loaded - on 32-bits system. - -(4) One specific high memory region is disabled when it breaks the - PA space limit. - -The current implementation of virt_set_{memmap, high_memmap}() isn't -optimized because the high memory region's PA space is always reserved, -regardless of whatever the actual state in the corresponding -vms->highmem_{redists, ecam, mmio} flag. In the code, 'base' and -'vms->highest_gpa' are always increased for case (1), (2) and (3). -It's unnecessary since the assigned PA space for the disabled high -memory region won't be used afterwards. - -Improve the address assignment for those three high memory region by -skipping the address assignment for one specific high memory region if -it has been disabled in case (1), (2) and (3). The memory layout may -be changed after the improvement is applied, which leads to potential -migration breakage. So 'vms->highmem_compact' is added to control if -the improvement should be applied. For now, 'vms->highmem_compact' is -set to false, meaning that we don't have memory layout change until it -becomes configurable through property 'compact-highmem' in next patch. - -Signed-off-by: Gavin Shan -Reviewed-by: Eric Auger -Reviewed-by: Cornelia Huck -Reviewed-by: Marc Zyngier -Tested-by: Zhenyu Zhang -Message-id: 20221029224307.138822-6-gshan@redhat.com -Signed-off-by: Peter Maydell -(cherry picked from commit 4a4ff9edc6a8fdc76082af5b41b059217138c09b) -Signed-off-by: Gavin Shan ---- - hw/arm/virt.c | 15 ++++++++++----- - include/hw/arm/virt.h | 1 + - 2 files changed, 11 insertions(+), 5 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 6e3b9fc060..6896e0ca0f 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -1768,18 +1768,23 @@ static void virt_set_high_memmap(VirtMachineState *vms, - vms->memmap[i].size = region_size; - - /* -- * Check each device to see if they fit in the PA space, -- * moving highest_gpa as we go. -+ * Check each device to see if it fits in the PA space, -+ * moving highest_gpa as we go. For compatibility, move -+ * highest_gpa for disabled fitting devices as well, if -+ * the compact layout has been disabled. - * - * For each device that doesn't fit, disable it. - */ - fits = (region_base + region_size) <= BIT_ULL(pa_bits); -- if (fits) { -- vms->highest_gpa = region_base + region_size - 1; -+ *region_enabled &= fits; -+ if (vms->highmem_compact && !*region_enabled) { -+ continue; - } - -- *region_enabled &= fits; - base = region_base + region_size; -+ if (fits) { -+ vms->highest_gpa = base - 1; -+ } - } - } - -diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index 22b54ec510..15bd291311 100644 ---- a/include/hw/arm/virt.h -+++ b/include/hw/arm/virt.h -@@ -144,6 +144,7 @@ struct VirtMachineState { - PFlashCFI01 *flash[2]; - bool secure; - bool highmem; -+ bool highmem_compact; - bool highmem_ecam; - bool highmem_mmio; - bool highmem_redists; --- -2.31.1 - diff --git a/SOURCES/kvm-hw-arm-virt-Introduce-variable-region_base-in-virt_s.patch b/SOURCES/kvm-hw-arm-virt-Introduce-variable-region_base-in-virt_s.patch deleted file mode 100644 index ea9cb1f..0000000 --- a/SOURCES/kvm-hw-arm-virt-Introduce-variable-region_base-in-virt_s.patch +++ /dev/null @@ -1,82 +0,0 @@ -From 305a369fd18f29914bf96cc181add532d435d8ed Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 21 Dec 2022 08:48:45 +0800 -Subject: [PATCH 3/8] hw/arm/virt: Introduce variable region_base in - virt_set_high_memmap() - -RH-Author: Gavin Shan -RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment -RH-Bugzilla: 2113840 -RH-Acked-by: Eric Auger -RH-Acked-by: Cornelia Huck -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/8] 15de90df217d680ccc858b679898b3993e1c050a - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840 - -This introduces variable 'region_base' for the base address of the -specific high memory region. It's the preparatory work to optimize -high memory region address assignment. - -No functional change intended. - -Signed-off-by: Gavin Shan -Reviewed-by: Eric Auger -Reviewed-by: Cornelia Huck -Reviewed-by: Marc Zyngier -Tested-by: Zhenyu Zhang -Message-id: 20221029224307.138822-4-gshan@redhat.com -Signed-off-by: Peter Maydell -(cherry picked from commit fa245799b9407fc7b561da185b3d889df5e16a88) -Signed-off-by: Gavin Shan ---- - hw/arm/virt.c | 12 ++++++------ - 1 file changed, 6 insertions(+), 6 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index ca098d40b8..ddcf7ee2f8 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -1739,15 +1739,15 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx) - static void virt_set_high_memmap(VirtMachineState *vms, - hwaddr base, int pa_bits) - { -- hwaddr region_size; -+ hwaddr region_base, region_size; - bool fits; - int i; - - for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) { -+ region_base = ROUND_UP(base, extended_memmap[i].size); - region_size = extended_memmap[i].size; - -- base = ROUND_UP(base, region_size); -- vms->memmap[i].base = base; -+ vms->memmap[i].base = region_base; - vms->memmap[i].size = region_size; - - /* -@@ -1756,9 +1756,9 @@ static void virt_set_high_memmap(VirtMachineState *vms, - * - * For each device that doesn't fit, disable it. - */ -- fits = (base + region_size) <= BIT_ULL(pa_bits); -+ fits = (region_base + region_size) <= BIT_ULL(pa_bits); - if (fits) { -- vms->highest_gpa = base + region_size - 1; -+ vms->highest_gpa = region_base + region_size - 1; - } - - switch (i) { -@@ -1773,7 +1773,7 @@ static void virt_set_high_memmap(VirtMachineState *vms, - break; - } - -- base += region_size; -+ base = region_base + region_size; - } - } - --- -2.31.1 - diff --git a/SOURCES/kvm-hw-arm-virt-Introduce-virt_get_high_memmap_enabled-h.patch b/SOURCES/kvm-hw-arm-virt-Introduce-virt_get_high_memmap_enabled-h.patch deleted file mode 100644 index 659faeb..0000000 --- a/SOURCES/kvm-hw-arm-virt-Introduce-virt_get_high_memmap_enabled-h.patch +++ /dev/null @@ -1,95 +0,0 @@ -From a2ddd68c8365ec602db6b2a9cf83bb441ca701cc Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 21 Dec 2022 08:48:45 +0800 -Subject: [PATCH 4/8] hw/arm/virt: Introduce virt_get_high_memmap_enabled() - helper - -RH-Author: Gavin Shan -RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment -RH-Bugzilla: 2113840 -RH-Acked-by: Eric Auger -RH-Acked-by: Cornelia Huck -RH-Acked-by: Miroslav Rezanina -RH-Commit: [4/8] 65524de2fc106600bbaff641caa8c4f2f8027114 - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840 - -This introduces virt_get_high_memmap_enabled() helper, which returns -the pointer to vms->highmem_{redists, ecam, mmio}. The pointer will -be used in the subsequent patches. - -No functional change intended. - -Signed-off-by: Gavin Shan -Reviewed-by: Eric Auger -Reviewed-by: Cornelia Huck -Reviewed-by: Marc Zyngier -Tested-by: Zhenyu Zhang -Message-id: 20221029224307.138822-5-gshan@redhat.com -Signed-off-by: Peter Maydell -(cherry picked from commit a5cb1350b19a5c2a58ab4edddf609ed429c13085) -Signed-off-by: Gavin Shan ---- - hw/arm/virt.c | 32 +++++++++++++++++++------------- - 1 file changed, 19 insertions(+), 13 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index ddcf7ee2f8..6e3b9fc060 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -1736,14 +1736,31 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx) - return arm_cpu_mp_affinity(idx, clustersz); - } - -+static inline bool *virt_get_high_memmap_enabled(VirtMachineState *vms, -+ int index) -+{ -+ bool *enabled_array[] = { -+ &vms->highmem_redists, -+ &vms->highmem_ecam, -+ &vms->highmem_mmio, -+ }; -+ -+ assert(ARRAY_SIZE(extended_memmap) - VIRT_LOWMEMMAP_LAST == -+ ARRAY_SIZE(enabled_array)); -+ assert(index - VIRT_LOWMEMMAP_LAST < ARRAY_SIZE(enabled_array)); -+ -+ return enabled_array[index - VIRT_LOWMEMMAP_LAST]; -+} -+ - static void virt_set_high_memmap(VirtMachineState *vms, - hwaddr base, int pa_bits) - { - hwaddr region_base, region_size; -- bool fits; -+ bool *region_enabled, fits; - int i; - - for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) { -+ region_enabled = virt_get_high_memmap_enabled(vms, i); - region_base = ROUND_UP(base, extended_memmap[i].size); - region_size = extended_memmap[i].size; - -@@ -1761,18 +1778,7 @@ static void virt_set_high_memmap(VirtMachineState *vms, - vms->highest_gpa = region_base + region_size - 1; - } - -- switch (i) { -- case VIRT_HIGH_GIC_REDIST2: -- vms->highmem_redists &= fits; -- break; -- case VIRT_HIGH_PCIE_ECAM: -- vms->highmem_ecam &= fits; -- break; -- case VIRT_HIGH_PCIE_MMIO: -- vms->highmem_mmio &= fits; -- break; -- } -- -+ *region_enabled &= fits; - base = region_base + region_size; - } - } --- -2.31.1 - diff --git a/SOURCES/kvm-hw-arm-virt-Introduce-virt_set_high_memmap-helper.patch b/SOURCES/kvm-hw-arm-virt-Introduce-virt_set_high_memmap-helper.patch deleted file mode 100644 index f55c06a..0000000 --- a/SOURCES/kvm-hw-arm-virt-Introduce-virt_set_high_memmap-helper.patch +++ /dev/null @@ -1,130 +0,0 @@ -From 5dff87c5ea60054709021025c9513ec259433ce2 Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 21 Dec 2022 08:48:45 +0800 -Subject: [PATCH 1/8] hw/arm/virt: Introduce virt_set_high_memmap() helper - -RH-Author: Gavin Shan -RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment -RH-Bugzilla: 2113840 -RH-Acked-by: Eric Auger -RH-Acked-by: Cornelia Huck -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/8] 5f6ba5af7a2c21d8473c58e088ee99b11336c673 - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840 - -This introduces virt_set_high_memmap() helper. The logic of high -memory region address assignment is moved to the helper. The intention -is to make the subsequent optimization for high memory region address -assignment easier. - -No functional change intended. - -Signed-off-by: Gavin Shan -Reviewed-by: Eric Auger -Reviewed-by: Cornelia Huck -Reviewed-by: Marc Zyngier -Tested-by: Zhenyu Zhang -Message-id: 20221029224307.138822-2-gshan@redhat.com -Signed-off-by: Peter Maydell -(cherry picked from commit 4af6b6edece5ef273d29972d53547f823d2bc1c0) -Signed-off-by: Gavin Shan ---- - hw/arm/virt.c | 74 ++++++++++++++++++++++++++++----------------------- - 1 file changed, 41 insertions(+), 33 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index bf18838b87..bea5f54720 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -1736,6 +1736,46 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx) - return arm_cpu_mp_affinity(idx, clustersz); - } - -+static void virt_set_high_memmap(VirtMachineState *vms, -+ hwaddr base, int pa_bits) -+{ -+ int i; -+ -+ for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) { -+ hwaddr size = extended_memmap[i].size; -+ bool fits; -+ -+ base = ROUND_UP(base, size); -+ vms->memmap[i].base = base; -+ vms->memmap[i].size = size; -+ -+ /* -+ * Check each device to see if they fit in the PA space, -+ * moving highest_gpa as we go. -+ * -+ * For each device that doesn't fit, disable it. -+ */ -+ fits = (base + size) <= BIT_ULL(pa_bits); -+ if (fits) { -+ vms->highest_gpa = base + size - 1; -+ } -+ -+ switch (i) { -+ case VIRT_HIGH_GIC_REDIST2: -+ vms->highmem_redists &= fits; -+ break; -+ case VIRT_HIGH_PCIE_ECAM: -+ vms->highmem_ecam &= fits; -+ break; -+ case VIRT_HIGH_PCIE_MMIO: -+ vms->highmem_mmio &= fits; -+ break; -+ } -+ -+ base += size; -+ } -+} -+ - static void virt_set_memmap(VirtMachineState *vms, int pa_bits) - { - MachineState *ms = MACHINE(vms); -@@ -1791,39 +1831,7 @@ static void virt_set_memmap(VirtMachineState *vms, int pa_bits) - /* We know for sure that at least the memory fits in the PA space */ - vms->highest_gpa = memtop - 1; - -- for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) { -- hwaddr size = extended_memmap[i].size; -- bool fits; -- -- base = ROUND_UP(base, size); -- vms->memmap[i].base = base; -- vms->memmap[i].size = size; -- -- /* -- * Check each device to see if they fit in the PA space, -- * moving highest_gpa as we go. -- * -- * For each device that doesn't fit, disable it. -- */ -- fits = (base + size) <= BIT_ULL(pa_bits); -- if (fits) { -- vms->highest_gpa = base + size - 1; -- } -- -- switch (i) { -- case VIRT_HIGH_GIC_REDIST2: -- vms->highmem_redists &= fits; -- break; -- case VIRT_HIGH_PCIE_ECAM: -- vms->highmem_ecam &= fits; -- break; -- case VIRT_HIGH_PCIE_MMIO: -- vms->highmem_mmio &= fits; -- break; -- } -- -- base += size; -- } -+ virt_set_high_memmap(vms, base, pa_bits); - - if (device_memory_size > 0) { - ms->device_memory = g_malloc0(sizeof(*ms->device_memory)); --- -2.31.1 - diff --git a/SOURCES/kvm-hw-arm-virt-Rename-variable-size-to-region_size-in-v.patch b/SOURCES/kvm-hw-arm-virt-Rename-variable-size-to-region_size-in-v.patch deleted file mode 100644 index 27bc6bb..0000000 --- a/SOURCES/kvm-hw-arm-virt-Rename-variable-size-to-region_size-in-v.patch +++ /dev/null @@ -1,83 +0,0 @@ -From bd5b7edbf8f4425f4b4e0d49a00cbdd48d9c6f48 Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 21 Dec 2022 08:48:45 +0800 -Subject: [PATCH 2/8] hw/arm/virt: Rename variable size to region_size in - virt_set_high_memmap() - -RH-Author: Gavin Shan -RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment -RH-Bugzilla: 2113840 -RH-Acked-by: Eric Auger -RH-Acked-by: Cornelia Huck -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/8] 1cadf1b00686cceb45821a58fdcb509bc5da335d - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840 - -This renames variable 'size' to 'region_size' in virt_set_high_memmap(). -Its counterpart ('region_base') will be introduced in next patch. - -No functional change intended. - -Signed-off-by: Gavin Shan -Reviewed-by: Eric Auger -Reviewed-by: Cornelia Huck -Reviewed-by: Marc Zyngier -Tested-by: Zhenyu Zhang -Message-id: 20221029224307.138822-3-gshan@redhat.com -Signed-off-by: Peter Maydell -(cherry picked from commit 370bea9d1c78796eec235ed6cb4310f489931a62) -Signed-off-by: Gavin Shan ---- - hw/arm/virt.c | 15 ++++++++------- - 1 file changed, 8 insertions(+), 7 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index bea5f54720..ca098d40b8 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -1739,15 +1739,16 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx) - static void virt_set_high_memmap(VirtMachineState *vms, - hwaddr base, int pa_bits) - { -+ hwaddr region_size; -+ bool fits; - int i; - - for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) { -- hwaddr size = extended_memmap[i].size; -- bool fits; -+ region_size = extended_memmap[i].size; - -- base = ROUND_UP(base, size); -+ base = ROUND_UP(base, region_size); - vms->memmap[i].base = base; -- vms->memmap[i].size = size; -+ vms->memmap[i].size = region_size; - - /* - * Check each device to see if they fit in the PA space, -@@ -1755,9 +1756,9 @@ static void virt_set_high_memmap(VirtMachineState *vms, - * - * For each device that doesn't fit, disable it. - */ -- fits = (base + size) <= BIT_ULL(pa_bits); -+ fits = (base + region_size) <= BIT_ULL(pa_bits); - if (fits) { -- vms->highest_gpa = base + size - 1; -+ vms->highest_gpa = base + region_size - 1; - } - - switch (i) { -@@ -1772,7 +1773,7 @@ static void virt_set_high_memmap(VirtMachineState *vms, - break; - } - -- base += size; -+ base += region_size; - } - } - --- -2.31.1 - diff --git a/SOURCES/kvm-hw-arm-virt-Validate-cluster-and-NUMA-node-boundary-.patch b/SOURCES/kvm-hw-arm-virt-Validate-cluster-and-NUMA-node-boundary-.patch new file mode 100644 index 0000000..42ec705 --- /dev/null +++ b/SOURCES/kvm-hw-arm-virt-Validate-cluster-and-NUMA-node-boundary-.patch @@ -0,0 +1,41 @@ +From 022529f6d0ee306da857825c72a98bf7ddf5de22 Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Tue, 27 Jun 2023 20:20:09 +1000 +Subject: [PATCH 3/6] hw/arm/virt: Validate cluster and NUMA node boundary for + RHEL machines + +RH-Author: Gavin Shan +RH-MergeRequest: 175: hw/arm: Validate CPU cluster and NUMA node boundary for RHEL machines +RH-Bugzilla: 2171363 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Eric Auger +RH-Commit: [3/3] a396c499259b566861ca007b01f8539bf6113711 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2171363 +Upstream Status: RHEL only + +Set mc->cpu_cluster_has_numa_boundary to true so that the boundary of +CPU cluster and NUMA node will be validated for 'virt-rhel*' machines. +A warning message will be printed if the boundary is broken. + +Signed-off-by: Gavin Shan +--- + hw/arm/virt.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index df6a0231bc..faf68488d5 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3530,6 +3530,8 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + mc->smp_props.clusters_supported = true; + mc->auto_enable_numa_with_memhp = true; + mc->auto_enable_numa_with_memdev = true; ++ /* platform instead of architectural choice */ ++ mc->cpu_cluster_has_numa_boundary = true; + mc->default_ram_id = "mach-virt.ram"; + + object_class_property_add(oc, "acpi", "OnOffAuto", +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-reg-for-machine-ty.patch b/SOURCES/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-reg-for-machine-ty.patch new file mode 100644 index 0000000..fe9cd8c --- /dev/null +++ b/SOURCES/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-reg-for-machine-ty.patch @@ -0,0 +1,44 @@ +From 491cf9e251026d135f315b7fe0d8771841f06e9f Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Tue, 25 Jul 2023 15:34:45 -0300 +Subject: [PATCH 8/9] hw/pci: Disable PCI_ERR_UNCOR_MASK reg for machine type + <= pc-q35-rhel9.2.0 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 192: hw/pci: Disable PCI_ERR_UNCOR_MASK reg for machine type <= pc-q35-rhel9.2.0 +RH-Bugzilla: 2223691 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [1/1] e57816f8ad15a9ce5f342b061c103ae011ec1223 (LeoBras/centos-qemu-kvm) + +This is a downstream-only patch to that sets off the property +x-pcie-err-unc-mask for machine types <= pc-q35-rhel9.2.0, allowing +live migrations to RHEL9.2 happen successfully. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2223691 +Fixes: 293a34b4be ("hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine +type < 8.0") +Signed-off-by: Leonardo Bras +--- + hw/core/machine.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 5ea52317b9..6f5117669d 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -62,6 +62,8 @@ GlobalProperty hw_compat_rhel_9_2[] = { + { "virtio-mem", "x-early-migration", "false" }, + /* hw_compat_rhel_9_2 from hw_compat_7_2 */ + { "migration", "x-preempt-pre-7-2", "true" }, ++ /* hw_compat_rhel_9_2 from hw_compat_7_2 */ ++ { TYPE_PCI_DEVICE, "x-pcie-err-unc-mask", "off" }, + }; + const size_t hw_compat_rhel_9_2_len = G_N_ELEMENTS(hw_compat_rhel_9_2); + +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-register-for-machi.patch b/SOURCES/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-register-for-machi.patch new file mode 100644 index 0000000..164bea7 --- /dev/null +++ b/SOURCES/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-register-for-machi.patch @@ -0,0 +1,118 @@ +From 3ac01bb90da12538898f95b2fb4e7f6bc1557eb3 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Tue, 2 May 2023 21:27:02 -0300 +Subject: [PATCH 18/21] hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine + type < 8.0 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 170: hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine type < 8.0 +RH-Bugzilla: 2189423 +RH-Acked-by: Peter Xu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/1] ad62dd5a8567f386770577513c00a0bf36bd3df1 (LeoBras/centos-qemu-kvm) + +Since it's implementation on v8.0.0-rc0, having the PCI_ERR_UNCOR_MASK +set for machine types < 8.0 will cause migration to fail if the target +QEMU version is < 8.0.0 : + +qemu-system-x86_64: get_pci_config_device: Bad config data: i=0x10a read: 40 device: 0 cmask: ff wmask: 0 w1cmask:0 +qemu-system-x86_64: Failed to load PCIDevice:config +qemu-system-x86_64: Failed to load e1000e:parent_obj +qemu-system-x86_64: error while loading state for instance 0x0 of device '0000:00:02.0/e1000e' +qemu-system-x86_64: load of migration failed: Invalid argument + +The above test migrated a 7.2 machine type from QEMU master to QEMU 7.2.0, +with this cmdline: + +./qemu-system-x86_64 -M pc-q35-7.2 [-incoming XXX] + +In order to fix this, property x-pcie-err-unc-mask was introduced to +control when PCI_ERR_UNCOR_MASK is enabled. This property is enabled by +default, but is disabled if machine type <= 7.2. + +Fixes: 010746ae1d ("hw/pci/aer: Implement PCI_ERR_UNCOR_MASK register") +Suggested-by: Michael S. Tsirkin +Signed-off-by: Leonardo Bras +Message-Id: <20230503002701.854329-1-leobras@redhat.com> +Reviewed-by: Jonathan Cameron +Reviewed-by: Peter Xu +Reviewed-by: Juan Quintela +Fixes: https://gitlab.com/qemu-project/qemu/-/issues/1576 +Tested-by: Fiona Ebner +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 5ed3dabe57dd9f4c007404345e5f5bf0e347317f) +Signed-off-by: Leonardo Bras +--- + hw/core/machine.c | 1 + + hw/pci/pci.c | 2 ++ + hw/pci/pcie_aer.c | 11 +++++++---- + include/hw/pci/pci.h | 2 ++ + 4 files changed, 12 insertions(+), 4 deletions(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 0e0120b7f2..c28702b690 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -43,6 +43,7 @@ GlobalProperty hw_compat_7_2[] = { + { "e1000e", "migrate-timadj", "off" }, + { "virtio-mem", "x-early-migration", "false" }, + { "migration", "x-preempt-pre-7-2", "true" }, ++ { TYPE_PCI_DEVICE, "x-pcie-err-unc-mask", "off" }, + }; + const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2); + +diff --git a/hw/pci/pci.c b/hw/pci/pci.c +index def5000e7b..8ad4349e96 100644 +--- a/hw/pci/pci.c ++++ b/hw/pci/pci.c +@@ -79,6 +79,8 @@ static Property pci_props[] = { + DEFINE_PROP_STRING("failover_pair_id", PCIDevice, + failover_pair_id), + DEFINE_PROP_UINT32("acpi-index", PCIDevice, acpi_index, 0), ++ DEFINE_PROP_BIT("x-pcie-err-unc-mask", PCIDevice, cap_present, ++ QEMU_PCIE_ERR_UNC_MASK_BITNR, true), + DEFINE_PROP_END_OF_LIST() + }; + +diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c +index 103667c368..374d593ead 100644 +--- a/hw/pci/pcie_aer.c ++++ b/hw/pci/pcie_aer.c +@@ -112,10 +112,13 @@ int pcie_aer_init(PCIDevice *dev, uint8_t cap_ver, uint16_t offset, + + pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS, + PCI_ERR_UNC_SUPPORTED); +- pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK, +- PCI_ERR_UNC_MASK_DEFAULT); +- pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK, +- PCI_ERR_UNC_SUPPORTED); ++ ++ if (dev->cap_present & QEMU_PCIE_ERR_UNC_MASK) { ++ pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK, ++ PCI_ERR_UNC_MASK_DEFAULT); ++ pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK, ++ PCI_ERR_UNC_SUPPORTED); ++ } + + pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER, + PCI_ERR_UNC_SEVERITY_DEFAULT); +diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h +index d5a40cd058..6dc6742fc4 100644 +--- a/include/hw/pci/pci.h ++++ b/include/hw/pci/pci.h +@@ -207,6 +207,8 @@ enum { + QEMU_PCIE_EXTCAP_INIT = (1 << QEMU_PCIE_EXTCAP_INIT_BITNR), + #define QEMU_PCIE_CXL_BITNR 10 + QEMU_PCIE_CAP_CXL = (1 << QEMU_PCIE_CXL_BITNR), ++#define QEMU_PCIE_ERR_UNC_MASK_BITNR 11 ++ QEMU_PCIE_ERR_UNC_MASK = (1 << QEMU_PCIE_ERR_UNC_MASK_BITNR), + }; + + typedef struct PCIINTxRoute { +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-replace-most-qemu_bh_new-calls-with-qemu_bh_new_g.patch b/SOURCES/kvm-hw-replace-most-qemu_bh_new-calls-with-qemu_bh_new_g.patch new file mode 100644 index 0000000..08ee94f --- /dev/null +++ b/SOURCES/kvm-hw-replace-most-qemu_bh_new-calls-with-qemu_bh_new_g.patch @@ -0,0 +1,470 @@ +From d1b7a9b25c0df9016cd8e93d40837314b1a81d70 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 May 2023 10:29:03 -0400 +Subject: [PATCH 08/21] hw: replace most qemu_bh_new calls with + qemu_bh_new_guarded + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [4/13] bcbc67dd0023aee2b3a342665237daa83b183c7b (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit f63192b0544af5d3e4d5edfd85ab520fcf671377 +Author: Alexander Bulekov +Date: Thu Apr 27 17:10:09 2023 -0400 + + hw: replace most qemu_bh_new calls with qemu_bh_new_guarded + + This protects devices from bh->mmio reentrancy issues. + + Thanks: Thomas Huth for diagnosing OS X test failure. + Signed-off-by: Alexander Bulekov + Reviewed-by: Darren Kenny + Reviewed-by: Stefan Hajnoczi + Reviewed-by: Michael S. Tsirkin + Reviewed-by: Paul Durrant + Reviewed-by: Thomas Huth + Message-Id: <20230427211013.2994127-5-alxndr@bu.edu> + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + hw/9pfs/xen-9p-backend.c | 5 ++++- + hw/block/dataplane/virtio-blk.c | 3 ++- + hw/block/dataplane/xen-block.c | 5 +++-- + hw/char/virtio-serial-bus.c | 3 ++- + hw/display/qxl.c | 9 ++++++--- + hw/display/virtio-gpu.c | 6 ++++-- + hw/ide/ahci.c | 3 ++- + hw/ide/ahci_internal.h | 1 + + hw/ide/core.c | 4 +++- + hw/misc/imx_rngc.c | 6 ++++-- + hw/misc/macio/mac_dbdma.c | 2 +- + hw/net/virtio-net.c | 3 ++- + hw/nvme/ctrl.c | 6 ++++-- + hw/scsi/mptsas.c | 3 ++- + hw/scsi/scsi-bus.c | 3 ++- + hw/scsi/vmw_pvscsi.c | 3 ++- + hw/usb/dev-uas.c | 3 ++- + hw/usb/hcd-dwc2.c | 3 ++- + hw/usb/hcd-ehci.c | 3 ++- + hw/usb/hcd-uhci.c | 2 +- + hw/usb/host-libusb.c | 6 ++++-- + hw/usb/redirect.c | 6 ++++-- + hw/usb/xen-usb.c | 3 ++- + hw/virtio/virtio-balloon.c | 5 +++-- + hw/virtio/virtio-crypto.c | 3 ++- + 25 files changed, 66 insertions(+), 33 deletions(-) + +diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c +index 74f3a05f88..0e266c552b 100644 +--- a/hw/9pfs/xen-9p-backend.c ++++ b/hw/9pfs/xen-9p-backend.c +@@ -61,6 +61,7 @@ typedef struct Xen9pfsDev { + + int num_rings; + Xen9pfsRing *rings; ++ MemReentrancyGuard mem_reentrancy_guard; + } Xen9pfsDev; + + static void xen_9pfs_disconnect(struct XenLegacyDevice *xendev); +@@ -443,7 +444,9 @@ static int xen_9pfs_connect(struct XenLegacyDevice *xendev) + xen_9pdev->rings[i].ring.out = xen_9pdev->rings[i].data + + XEN_FLEX_RING_SIZE(ring_order); + +- xen_9pdev->rings[i].bh = qemu_bh_new(xen_9pfs_bh, &xen_9pdev->rings[i]); ++ xen_9pdev->rings[i].bh = qemu_bh_new_guarded(xen_9pfs_bh, ++ &xen_9pdev->rings[i], ++ &xen_9pdev->mem_reentrancy_guard); + xen_9pdev->rings[i].out_cons = 0; + xen_9pdev->rings[i].out_size = 0; + xen_9pdev->rings[i].inprogress = false; +diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c +index b28d81737e..a6202997ee 100644 +--- a/hw/block/dataplane/virtio-blk.c ++++ b/hw/block/dataplane/virtio-blk.c +@@ -127,7 +127,8 @@ bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, + } else { + s->ctx = qemu_get_aio_context(); + } +- s->bh = aio_bh_new(s->ctx, notify_guest_bh, s); ++ s->bh = aio_bh_new_guarded(s->ctx, notify_guest_bh, s, ++ &DEVICE(vdev)->mem_reentrancy_guard); + s->batch_notify_vqs = bitmap_new(conf->num_queues); + + *dataplane = s; +diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c +index 734da42ea7..d8bc39d359 100644 +--- a/hw/block/dataplane/xen-block.c ++++ b/hw/block/dataplane/xen-block.c +@@ -633,8 +633,9 @@ XenBlockDataPlane *xen_block_dataplane_create(XenDevice *xendev, + } else { + dataplane->ctx = qemu_get_aio_context(); + } +- dataplane->bh = aio_bh_new(dataplane->ctx, xen_block_dataplane_bh, +- dataplane); ++ dataplane->bh = aio_bh_new_guarded(dataplane->ctx, xen_block_dataplane_bh, ++ dataplane, ++ &DEVICE(xendev)->mem_reentrancy_guard); + + return dataplane; + } +diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c +index 7d4601cb5d..dd619f0731 100644 +--- a/hw/char/virtio-serial-bus.c ++++ b/hw/char/virtio-serial-bus.c +@@ -985,7 +985,8 @@ static void virtser_port_device_realize(DeviceState *dev, Error **errp) + return; + } + +- port->bh = qemu_bh_new(flush_queued_data_bh, port); ++ port->bh = qemu_bh_new_guarded(flush_queued_data_bh, port, ++ &dev->mem_reentrancy_guard); + port->elem = NULL; + } + +diff --git a/hw/display/qxl.c b/hw/display/qxl.c +index 80ce1e9a93..f1c0eb7dfc 100644 +--- a/hw/display/qxl.c ++++ b/hw/display/qxl.c +@@ -2201,11 +2201,14 @@ static void qxl_realize_common(PCIQXLDevice *qxl, Error **errp) + + qemu_add_vm_change_state_handler(qxl_vm_change_state_handler, qxl); + +- qxl->update_irq = qemu_bh_new(qxl_update_irq_bh, qxl); ++ qxl->update_irq = qemu_bh_new_guarded(qxl_update_irq_bh, qxl, ++ &DEVICE(qxl)->mem_reentrancy_guard); + qxl_reset_state(qxl); + +- qxl->update_area_bh = qemu_bh_new(qxl_render_update_area_bh, qxl); +- qxl->ssd.cursor_bh = qemu_bh_new(qemu_spice_cursor_refresh_bh, &qxl->ssd); ++ qxl->update_area_bh = qemu_bh_new_guarded(qxl_render_update_area_bh, qxl, ++ &DEVICE(qxl)->mem_reentrancy_guard); ++ qxl->ssd.cursor_bh = qemu_bh_new_guarded(qemu_spice_cursor_refresh_bh, &qxl->ssd, ++ &DEVICE(qxl)->mem_reentrancy_guard); + } + + static void qxl_realize_primary(PCIDevice *dev, Error **errp) +diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c +index 5e15c79b94..66ac9b6cc5 100644 +--- a/hw/display/virtio-gpu.c ++++ b/hw/display/virtio-gpu.c +@@ -1339,8 +1339,10 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error **errp) + + g->ctrl_vq = virtio_get_queue(vdev, 0); + g->cursor_vq = virtio_get_queue(vdev, 1); +- g->ctrl_bh = qemu_bh_new(virtio_gpu_ctrl_bh, g); +- g->cursor_bh = qemu_bh_new(virtio_gpu_cursor_bh, g); ++ g->ctrl_bh = qemu_bh_new_guarded(virtio_gpu_ctrl_bh, g, ++ &qdev->mem_reentrancy_guard); ++ g->cursor_bh = qemu_bh_new_guarded(virtio_gpu_cursor_bh, g, ++ &qdev->mem_reentrancy_guard); + QTAILQ_INIT(&g->reslist); + QTAILQ_INIT(&g->cmdq); + QTAILQ_INIT(&g->fenceq); +diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c +index 55902e1df7..4e76d6b191 100644 +--- a/hw/ide/ahci.c ++++ b/hw/ide/ahci.c +@@ -1509,7 +1509,8 @@ static void ahci_cmd_done(const IDEDMA *dma) + ahci_write_fis_d2h(ad); + + if (ad->port_regs.cmd_issue && !ad->check_bh) { +- ad->check_bh = qemu_bh_new(ahci_check_cmd_bh, ad); ++ ad->check_bh = qemu_bh_new_guarded(ahci_check_cmd_bh, ad, ++ &ad->mem_reentrancy_guard); + qemu_bh_schedule(ad->check_bh); + } + } +diff --git a/hw/ide/ahci_internal.h b/hw/ide/ahci_internal.h +index 303fcd7235..2480455372 100644 +--- a/hw/ide/ahci_internal.h ++++ b/hw/ide/ahci_internal.h +@@ -321,6 +321,7 @@ struct AHCIDevice { + bool init_d2h_sent; + AHCICmdHdr *cur_cmd; + NCQTransferState ncq_tfs[AHCI_MAX_CMDS]; ++ MemReentrancyGuard mem_reentrancy_guard; + }; + + struct AHCIPCIState { +diff --git a/hw/ide/core.c b/hw/ide/core.c +index 45d14a25e9..de48ff9f86 100644 +--- a/hw/ide/core.c ++++ b/hw/ide/core.c +@@ -513,6 +513,7 @@ BlockAIOCB *ide_issue_trim( + BlockCompletionFunc *cb, void *cb_opaque, void *opaque) + { + IDEState *s = opaque; ++ IDEDevice *dev = s->unit ? s->bus->slave : s->bus->master; + TrimAIOCB *iocb; + + /* Paired with a decrement in ide_trim_bh_cb() */ +@@ -520,7 +521,8 @@ BlockAIOCB *ide_issue_trim( + + iocb = blk_aio_get(&trim_aiocb_info, s->blk, cb, cb_opaque); + iocb->s = s; +- iocb->bh = qemu_bh_new(ide_trim_bh_cb, iocb); ++ iocb->bh = qemu_bh_new_guarded(ide_trim_bh_cb, iocb, ++ &DEVICE(dev)->mem_reentrancy_guard); + iocb->ret = 0; + iocb->qiov = qiov; + iocb->i = -1; +diff --git a/hw/misc/imx_rngc.c b/hw/misc/imx_rngc.c +index 632c03779c..082c6980ad 100644 +--- a/hw/misc/imx_rngc.c ++++ b/hw/misc/imx_rngc.c +@@ -228,8 +228,10 @@ static void imx_rngc_realize(DeviceState *dev, Error **errp) + sysbus_init_mmio(sbd, &s->iomem); + + sysbus_init_irq(sbd, &s->irq); +- s->self_test_bh = qemu_bh_new(imx_rngc_self_test, s); +- s->seed_bh = qemu_bh_new(imx_rngc_seed, s); ++ s->self_test_bh = qemu_bh_new_guarded(imx_rngc_self_test, s, ++ &dev->mem_reentrancy_guard); ++ s->seed_bh = qemu_bh_new_guarded(imx_rngc_seed, s, ++ &dev->mem_reentrancy_guard); + } + + static void imx_rngc_reset(DeviceState *dev) +diff --git a/hw/misc/macio/mac_dbdma.c b/hw/misc/macio/mac_dbdma.c +index 43bb1f56ba..80a789f32b 100644 +--- a/hw/misc/macio/mac_dbdma.c ++++ b/hw/misc/macio/mac_dbdma.c +@@ -914,7 +914,7 @@ static void mac_dbdma_realize(DeviceState *dev, Error **errp) + { + DBDMAState *s = MAC_DBDMA(dev); + +- s->bh = qemu_bh_new(DBDMA_run_bh, s); ++ s->bh = qemu_bh_new_guarded(DBDMA_run_bh, s, &dev->mem_reentrancy_guard); + } + + static void mac_dbdma_class_init(ObjectClass *oc, void *data) +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 53e1c32643..447f669921 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -2917,7 +2917,8 @@ static void virtio_net_add_queue(VirtIONet *n, int index) + n->vqs[index].tx_vq = + virtio_add_queue(vdev, n->net_conf.tx_queue_size, + virtio_net_handle_tx_bh); +- n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]); ++ n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index], ++ &DEVICE(vdev)->mem_reentrancy_guard); + } + + n->vqs[index].tx_waiting = 0; +diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c +index ac24eeb5ed..e5a468975e 100644 +--- a/hw/nvme/ctrl.c ++++ b/hw/nvme/ctrl.c +@@ -4607,7 +4607,8 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr, + QTAILQ_INSERT_TAIL(&(sq->req_list), &sq->io_req[i], entry); + } + +- sq->bh = qemu_bh_new(nvme_process_sq, sq); ++ sq->bh = qemu_bh_new_guarded(nvme_process_sq, sq, ++ &DEVICE(sq->ctrl)->mem_reentrancy_guard); + + if (n->dbbuf_enabled) { + sq->db_addr = n->dbbuf_dbs + (sqid << 3); +@@ -5253,7 +5254,8 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr, + } + } + n->cq[cqid] = cq; +- cq->bh = qemu_bh_new(nvme_post_cqes, cq); ++ cq->bh = qemu_bh_new_guarded(nvme_post_cqes, cq, ++ &DEVICE(cq->ctrl)->mem_reentrancy_guard); + } + + static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req) +diff --git a/hw/scsi/mptsas.c b/hw/scsi/mptsas.c +index c485da792c..3de288b454 100644 +--- a/hw/scsi/mptsas.c ++++ b/hw/scsi/mptsas.c +@@ -1322,7 +1322,8 @@ static void mptsas_scsi_realize(PCIDevice *dev, Error **errp) + } + s->max_devices = MPTSAS_NUM_PORTS; + +- s->request_bh = qemu_bh_new(mptsas_fetch_requests, s); ++ s->request_bh = qemu_bh_new_guarded(mptsas_fetch_requests, s, ++ &DEVICE(dev)->mem_reentrancy_guard); + + scsi_bus_init(&s->bus, sizeof(s->bus), &dev->qdev, &mptsas_scsi_info); + } +diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c +index c97176110c..3c20b47ad0 100644 +--- a/hw/scsi/scsi-bus.c ++++ b/hw/scsi/scsi-bus.c +@@ -193,7 +193,8 @@ static void scsi_dma_restart_cb(void *opaque, bool running, RunState state) + AioContext *ctx = blk_get_aio_context(s->conf.blk); + /* The reference is dropped in scsi_dma_restart_bh.*/ + object_ref(OBJECT(s)); +- s->bh = aio_bh_new(ctx, scsi_dma_restart_bh, s); ++ s->bh = aio_bh_new_guarded(ctx, scsi_dma_restart_bh, s, ++ &DEVICE(s)->mem_reentrancy_guard); + qemu_bh_schedule(s->bh); + } + } +diff --git a/hw/scsi/vmw_pvscsi.c b/hw/scsi/vmw_pvscsi.c +index fa76696855..4de34536e9 100644 +--- a/hw/scsi/vmw_pvscsi.c ++++ b/hw/scsi/vmw_pvscsi.c +@@ -1184,7 +1184,8 @@ pvscsi_realizefn(PCIDevice *pci_dev, Error **errp) + pcie_endpoint_cap_init(pci_dev, PVSCSI_EXP_EP_OFFSET); + } + +- s->completion_worker = qemu_bh_new(pvscsi_process_completion_queue, s); ++ s->completion_worker = qemu_bh_new_guarded(pvscsi_process_completion_queue, s, ++ &DEVICE(pci_dev)->mem_reentrancy_guard); + + scsi_bus_init(&s->bus, sizeof(s->bus), DEVICE(pci_dev), &pvscsi_scsi_info); + /* override default SCSI bus hotplug-handler, with pvscsi's one */ +diff --git a/hw/usb/dev-uas.c b/hw/usb/dev-uas.c +index 88f99c05d5..f013ded91e 100644 +--- a/hw/usb/dev-uas.c ++++ b/hw/usb/dev-uas.c +@@ -937,7 +937,8 @@ static void usb_uas_realize(USBDevice *dev, Error **errp) + + QTAILQ_INIT(&uas->results); + QTAILQ_INIT(&uas->requests); +- uas->status_bh = qemu_bh_new(usb_uas_send_status_bh, uas); ++ uas->status_bh = qemu_bh_new_guarded(usb_uas_send_status_bh, uas, ++ &d->mem_reentrancy_guard); + + dev->flags |= (1 << USB_DEV_FLAG_IS_SCSI_STORAGE); + scsi_bus_init(&uas->bus, sizeof(uas->bus), DEVICE(dev), &usb_uas_scsi_info); +diff --git a/hw/usb/hcd-dwc2.c b/hw/usb/hcd-dwc2.c +index 8755e9cbb0..a0c4e782b2 100644 +--- a/hw/usb/hcd-dwc2.c ++++ b/hw/usb/hcd-dwc2.c +@@ -1364,7 +1364,8 @@ static void dwc2_realize(DeviceState *dev, Error **errp) + s->fi = USB_FRMINTVL - 1; + s->eof_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, dwc2_frame_boundary, s); + s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, dwc2_work_timer, s); +- s->async_bh = qemu_bh_new(dwc2_work_bh, s); ++ s->async_bh = qemu_bh_new_guarded(dwc2_work_bh, s, ++ &dev->mem_reentrancy_guard); + + sysbus_init_irq(sbd, &s->irq); + } +diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c +index d4da8dcb8d..c930c60921 100644 +--- a/hw/usb/hcd-ehci.c ++++ b/hw/usb/hcd-ehci.c +@@ -2533,7 +2533,8 @@ void usb_ehci_realize(EHCIState *s, DeviceState *dev, Error **errp) + } + + s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, ehci_work_timer, s); +- s->async_bh = qemu_bh_new(ehci_work_bh, s); ++ s->async_bh = qemu_bh_new_guarded(ehci_work_bh, s, ++ &dev->mem_reentrancy_guard); + s->device = dev; + + s->vmstate = qemu_add_vm_change_state_handler(usb_ehci_vm_state_change, s); +diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c +index 8ac1175ad2..77baaa7a6b 100644 +--- a/hw/usb/hcd-uhci.c ++++ b/hw/usb/hcd-uhci.c +@@ -1190,7 +1190,7 @@ void usb_uhci_common_realize(PCIDevice *dev, Error **errp) + USB_SPEED_MASK_LOW | USB_SPEED_MASK_FULL); + } + } +- s->bh = qemu_bh_new(uhci_bh, s); ++ s->bh = qemu_bh_new_guarded(uhci_bh, s, &DEVICE(dev)->mem_reentrancy_guard); + s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, uhci_frame_timer, s); + s->num_ports_vmstate = NB_PORTS; + QTAILQ_INIT(&s->queues); +diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c +index 176868d345..f500db85ab 100644 +--- a/hw/usb/host-libusb.c ++++ b/hw/usb/host-libusb.c +@@ -1141,7 +1141,8 @@ static void usb_host_nodev_bh(void *opaque) + static void usb_host_nodev(USBHostDevice *s) + { + if (!s->bh_nodev) { +- s->bh_nodev = qemu_bh_new(usb_host_nodev_bh, s); ++ s->bh_nodev = qemu_bh_new_guarded(usb_host_nodev_bh, s, ++ &DEVICE(s)->mem_reentrancy_guard); + } + qemu_bh_schedule(s->bh_nodev); + } +@@ -1739,7 +1740,8 @@ static int usb_host_post_load(void *opaque, int version_id) + USBHostDevice *dev = opaque; + + if (!dev->bh_postld) { +- dev->bh_postld = qemu_bh_new(usb_host_post_load_bh, dev); ++ dev->bh_postld = qemu_bh_new_guarded(usb_host_post_load_bh, dev, ++ &DEVICE(dev)->mem_reentrancy_guard); + } + qemu_bh_schedule(dev->bh_postld); + dev->bh_postld_pending = true; +diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c +index fd7df599bc..39fbaaab16 100644 +--- a/hw/usb/redirect.c ++++ b/hw/usb/redirect.c +@@ -1441,8 +1441,10 @@ static void usbredir_realize(USBDevice *udev, Error **errp) + } + } + +- dev->chardev_close_bh = qemu_bh_new(usbredir_chardev_close_bh, dev); +- dev->device_reject_bh = qemu_bh_new(usbredir_device_reject_bh, dev); ++ dev->chardev_close_bh = qemu_bh_new_guarded(usbredir_chardev_close_bh, dev, ++ &DEVICE(dev)->mem_reentrancy_guard); ++ dev->device_reject_bh = qemu_bh_new_guarded(usbredir_device_reject_bh, dev, ++ &DEVICE(dev)->mem_reentrancy_guard); + dev->attach_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, usbredir_do_attach, dev); + + packet_id_queue_init(&dev->cancelled, dev, "cancelled"); +diff --git a/hw/usb/xen-usb.c b/hw/usb/xen-usb.c +index 66cb3f7c24..38ee660a30 100644 +--- a/hw/usb/xen-usb.c ++++ b/hw/usb/xen-usb.c +@@ -1032,7 +1032,8 @@ static void usbback_alloc(struct XenLegacyDevice *xendev) + + QTAILQ_INIT(&usbif->req_free_q); + QSIMPLEQ_INIT(&usbif->hotplug_q); +- usbif->bh = qemu_bh_new(usbback_bh, usbif); ++ usbif->bh = qemu_bh_new_guarded(usbback_bh, usbif, ++ &DEVICE(xendev)->mem_reentrancy_guard); + } + + static int usbback_free(struct XenLegacyDevice *xendev) +diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c +index 43092aa634..5186e831dd 100644 +--- a/hw/virtio/virtio-balloon.c ++++ b/hw/virtio/virtio-balloon.c +@@ -909,8 +909,9 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp) + precopy_add_notifier(&s->free_page_hint_notify); + + object_ref(OBJECT(s->iothread)); +- s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread), +- virtio_ballloon_get_free_page_hints, s); ++ s->free_page_bh = aio_bh_new_guarded(iothread_get_aio_context(s->iothread), ++ virtio_ballloon_get_free_page_hints, s, ++ &dev->mem_reentrancy_guard); + } + + if (virtio_has_feature(s->host_features, VIRTIO_BALLOON_F_REPORTING)) { +diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c +index 802e1b9659..2fe804510f 100644 +--- a/hw/virtio/virtio-crypto.c ++++ b/hw/virtio/virtio-crypto.c +@@ -1074,7 +1074,8 @@ static void virtio_crypto_device_realize(DeviceState *dev, Error **errp) + vcrypto->vqs[i].dataq = + virtio_add_queue(vdev, 1024, virtio_crypto_handle_dataq_bh); + vcrypto->vqs[i].dataq_bh = +- qemu_bh_new(virtio_crypto_dataq_bh, &vcrypto->vqs[i]); ++ qemu_bh_new_guarded(virtio_crypto_dataq_bh, &vcrypto->vqs[i], ++ &dev->mem_reentrancy_guard); + vcrypto->vqs[i].vcrypto = vcrypto; + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-scsi-lsi53c895a-Fix-reentrancy-issues-in-the-LSI-.patch b/SOURCES/kvm-hw-scsi-lsi53c895a-Fix-reentrancy-issues-in-the-LSI-.patch new file mode 100644 index 0000000..efa966e --- /dev/null +++ b/SOURCES/kvm-hw-scsi-lsi53c895a-Fix-reentrancy-issues-in-the-LSI-.patch @@ -0,0 +1,141 @@ +From 8075a9e05699ef0c4e078017eefc20db3186328f Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Mon, 29 May 2023 14:21:08 -0400 +Subject: [PATCH 17/21] hw/scsi/lsi53c895a: Fix reentrancy issues in the LSI + controller (CVE-2023-0330) + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [13/13] 0b6fa742075ef2db3a354ee672dccca3747051cc (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit b987718bbb1d0eabf95499b976212dd5f0120d75 +Author: Thomas Huth +Date: Mon May 22 11:10:11 2023 +0200 + + hw/scsi/lsi53c895a: Fix reentrancy issues in the LSI controller (CVE-2023-0330) + + We cannot use the generic reentrancy guard in the LSI code, so + we have to manually prevent endless reentrancy here. The problematic + lsi_execute_script() function has already a way to detect whether + too many instructions have been executed - we just have to slightly + change the logic here that it also takes into account if the function + has been called too often in a reentrant way. + + The code in fuzz-lsi53c895a-test.c has been taken from an earlier + patch by Mauro Matteo Cascella. + + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1563 + Message-Id: <20230522091011.1082574-1-thuth@redhat.com> + Reviewed-by: Stefan Hajnoczi + Reviewed-by: Alexander Bulekov + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + hw/scsi/lsi53c895a.c | 23 +++++++++++++++------ + tests/qtest/fuzz-lsi53c895a-test.c | 33 ++++++++++++++++++++++++++++++ + 2 files changed, 50 insertions(+), 6 deletions(-) + +diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c +index 048436352b..f7d45b0b20 100644 +--- a/hw/scsi/lsi53c895a.c ++++ b/hw/scsi/lsi53c895a.c +@@ -1134,15 +1134,24 @@ static void lsi_execute_script(LSIState *s) + uint32_t addr, addr_high; + int opcode; + int insn_processed = 0; ++ static int reentrancy_level; ++ ++ reentrancy_level++; + + s->istat1 |= LSI_ISTAT1_SRUN; + again: +- if (++insn_processed > LSI_MAX_INSN) { +- /* Some windows drivers make the device spin waiting for a memory +- location to change. If we have been executed a lot of code then +- assume this is the case and force an unexpected device disconnect. +- This is apparently sufficient to beat the drivers into submission. +- */ ++ /* ++ * Some windows drivers make the device spin waiting for a memory location ++ * to change. If we have executed more than LSI_MAX_INSN instructions then ++ * assume this is the case and force an unexpected device disconnect. This ++ * is apparently sufficient to beat the drivers into submission. ++ * ++ * Another issue (CVE-2023-0330) can occur if the script is programmed to ++ * trigger itself again and again. Avoid this problem by stopping after ++ * being called multiple times in a reentrant way (8 is an arbitrary value ++ * which should be enough for all valid use cases). ++ */ ++ if (++insn_processed > LSI_MAX_INSN || reentrancy_level > 8) { + if (!(s->sien0 & LSI_SIST0_UDC)) { + qemu_log_mask(LOG_GUEST_ERROR, + "lsi_scsi: inf. loop with UDC masked"); +@@ -1596,6 +1605,8 @@ again: + } + } + trace_lsi_execute_script_stop(); ++ ++ reentrancy_level--; + } + + static uint8_t lsi_reg_readb(LSIState *s, int offset) +diff --git a/tests/qtest/fuzz-lsi53c895a-test.c b/tests/qtest/fuzz-lsi53c895a-test.c +index 2012bd54b7..1b55928b9f 100644 +--- a/tests/qtest/fuzz-lsi53c895a-test.c ++++ b/tests/qtest/fuzz-lsi53c895a-test.c +@@ -8,6 +8,36 @@ + #include "qemu/osdep.h" + #include "libqtest.h" + ++/* ++ * This used to trigger a DMA reentrancy issue ++ * leading to memory corruption bugs like stack ++ * overflow or use-after-free ++ * https://gitlab.com/qemu-project/qemu/-/issues/1563 ++ */ ++static void test_lsi_dma_reentrancy(void) ++{ ++ QTestState *s; ++ ++ s = qtest_init("-M q35 -m 512M -nodefaults " ++ "-blockdev driver=null-co,node-name=null0 " ++ "-device lsi53c810 -device scsi-cd,drive=null0"); ++ ++ qtest_outl(s, 0xcf8, 0x80000804); /* PCI Command Register */ ++ qtest_outw(s, 0xcfc, 0x7); /* Enables accesses */ ++ qtest_outl(s, 0xcf8, 0x80000814); /* Memory Bar 1 */ ++ qtest_outl(s, 0xcfc, 0xff100000); /* Set MMIO Address*/ ++ qtest_outl(s, 0xcf8, 0x80000818); /* Memory Bar 2 */ ++ qtest_outl(s, 0xcfc, 0xff000000); /* Set RAM Address*/ ++ qtest_writel(s, 0xff000000, 0xc0000024); ++ qtest_writel(s, 0xff000114, 0x00000080); ++ qtest_writel(s, 0xff00012c, 0xff000000); ++ qtest_writel(s, 0xff000004, 0xff000114); ++ qtest_writel(s, 0xff000008, 0xff100014); ++ qtest_writel(s, 0xff10002f, 0x000000ff); ++ ++ qtest_quit(s); ++} ++ + /* + * This used to trigger a UAF in lsi_do_msgout() + * https://gitlab.com/qemu-project/qemu/-/issues/972 +@@ -124,5 +154,8 @@ int main(int argc, char **argv) + qtest_add_func("fuzz/lsi53c895a/lsi_do_msgout_cancel_req", + test_lsi_do_msgout_cancel_req); + ++ qtest_add_func("fuzz/lsi53c895a/lsi_dma_reentrancy", ++ test_lsi_dma_reentrancy); ++ + return g_test_run(); + } +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-smbios-fix-field-corruption-in-type-4-table.patch b/SOURCES/kvm-hw-smbios-fix-field-corruption-in-type-4-table.patch deleted file mode 100644 index b452281..0000000 --- a/SOURCES/kvm-hw-smbios-fix-field-corruption-in-type-4-table.patch +++ /dev/null @@ -1,59 +0,0 @@ -From 8b0c5c6d356fd6cce9092727e20097b70e07bba9 Mon Sep 17 00:00:00 2001 -From: Julia Suvorova -Date: Thu, 23 Feb 2023 13:57:47 +0100 -Subject: [PATCH] hw/smbios: fix field corruption in type 4 table - -RH-Author: Julia Suvorova -RH-MergeRequest: 156: hw/smbios: fix field corruption in type 4 table -RH-Bugzilla: 2169904 -RH-Acked-by: Igor Mammedov -RH-Acked-by: MST -RH-Acked-by: Ani Sinha -RH-Commit: [1/1] ee6d9bb6dfa0fb2625915947072cb91a0926c4ec - -Since table type 4 of SMBIOS version 2.6 is shorter than 3.0, the -strings which follow immediately after the struct fields have been -overwritten by unconditional filling of later fields such as core_count2. -Make these fields dependent on the SMBIOS version. - -Fixes: 05e27d74c7 ("hw/smbios: add core_count2 to smbios table type 4") -Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2169904 - -Signed-off-by: Julia Suvorova -Message-Id: <20230223125747.254914-1-jusual@redhat.com> -Reviewed-by: Igor Mammedov -Reviewed-by: Ani Sinha -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 60d09b8dc7dd4256d664ad680795cb1327805b2b) ---- - hw/smbios/smbios.c | 8 +++++--- - 1 file changed, 5 insertions(+), 3 deletions(-) - -diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index c5ad69237e..2d2ece3edb 100644 ---- a/hw/smbios/smbios.c -+++ b/hw/smbios/smbios.c -@@ -752,14 +752,16 @@ static void smbios_build_type_4_table(MachineState *ms, unsigned instance) - t->core_count = (ms->smp.cores > 255) ? 0xFF : ms->smp.cores; - t->core_enabled = t->core_count; - -- t->core_count2 = t->core_enabled2 = cpu_to_le16(ms->smp.cores); -- - t->thread_count = (ms->smp.threads > 255) ? 0xFF : ms->smp.threads; -- t->thread_count2 = cpu_to_le16(ms->smp.threads); - - t->processor_characteristics = cpu_to_le16(0x02); /* Unknown */ - t->processor_family2 = cpu_to_le16(0x01); /* Other */ - -+ if (tbl_len == SMBIOS_TYPE_4_LEN_V30) { -+ t->core_count2 = t->core_enabled2 = cpu_to_le16(ms->smp.cores); -+ t->thread_count2 = cpu_to_le16(ms->smp.threads); -+ } -+ - SMBIOS_BUILD_TABLE_POST; - smbios_type4_count++; - } --- -2.31.1 - diff --git a/SOURCES/kvm-hw-vfio-pci-quirks-Sanitize-capability-pointer.patch b/SOURCES/kvm-hw-vfio-pci-quirks-Sanitize-capability-pointer.patch new file mode 100644 index 0000000..ffabd75 --- /dev/null +++ b/SOURCES/kvm-hw-vfio-pci-quirks-Sanitize-capability-pointer.patch @@ -0,0 +1,76 @@ +From fcd6219a95851d17fd8bde69d87e78c6533be990 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 24/37] hw/vfio/pci-quirks: Sanitize capability pointer +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [22/28] cb080409c1912f4365f8e31cd23c914b48f91575 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 0ddcb39c9357 +Author: Alex Williamson +Date: Fri Jun 30 16:36:08 2023 -0600 + + hw/vfio/pci-quirks: Sanitize capability pointer + + Coverity reports a tained scalar when traversing the capabilities + chain (CID 1516589). In practice I've never seen a device with a + chain so broken as to cause an issue, but it's also pretty easy to + sanitize. + + Fixes: f6b30c1984f7 ("hw/vfio/pci-quirks: Support alternate offset for GPUDirect Cliques") + Signed-off-by: Alex Williamson + Reviewed-by: Cédric Le Goater + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/pci-quirks.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c +index 0ed2fcd531..f4ff836805 100644 +--- a/hw/vfio/pci-quirks.c ++++ b/hw/vfio/pci-quirks.c +@@ -1530,6 +1530,12 @@ const PropertyInfo qdev_prop_nv_gpudirect_clique = { + .set = set_nv_gpudirect_clique_id, + }; + ++static bool is_valid_std_cap_offset(uint8_t pos) ++{ ++ return (pos >= PCI_STD_HEADER_SIZEOF && ++ pos <= (PCI_CFG_SPACE_SIZE - PCI_CAP_SIZEOF)); ++} ++ + static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) + { + PCIDevice *pdev = &vdev->pdev; +@@ -1563,7 +1569,7 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) + */ + ret = pread(vdev->vbasedev.fd, &tmp, 1, + vdev->config_offset + PCI_CAPABILITY_LIST); +- if (ret != 1 || !tmp) { ++ if (ret != 1 || !is_valid_std_cap_offset(tmp)) { + error_setg(errp, "NVIDIA GPUDirect Clique ID: error getting cap list"); + return -EINVAL; + } +@@ -1575,7 +1581,7 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) + d4_conflict = true; + } + tmp = pdev->config[tmp + PCI_CAP_LIST_NEXT]; +- } while (tmp); ++ } while (is_valid_std_cap_offset(tmp)); + + if (!c8_conflict) { + pos = 0xC8; +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-vfio-pci-quirks-Support-alternate-offset-for-GPUD.patch b/SOURCES/kvm-hw-vfio-pci-quirks-Support-alternate-offset-for-GPUD.patch new file mode 100644 index 0000000..99f5c75 --- /dev/null +++ b/SOURCES/kvm-hw-vfio-pci-quirks-Support-alternate-offset-for-GPUD.patch @@ -0,0 +1,110 @@ +From dd38230a0a375fb8427fa106ff79562e56c51b6c Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 18/37] hw/vfio/pci-quirks: Support alternate offset for + GPUDirect Cliques +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [16/28] 9befb7c9adaeb58e9d0b49686cf54b751c742832 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit f6b30c1984f7 +Author: Alex Williamson +Date: Thu Jun 8 12:05:07 2023 -0600 + + hw/vfio/pci-quirks: Support alternate offset for GPUDirect Cliques + + NVIDIA Turing and newer GPUs implement the MSI-X capability at the offset + previously reserved for use by hypervisors to implement the GPUDirect + Cliques capability. A revised specification provides an alternate + location. Add a config space walk to the quirk to check for conflicts, + allowing us to fall back to the new location or generate an error at the + quirk setup rather than when the real conflicting capability is added + should there be no available location. + + Signed-off-by: Alex Williamson + Reviewed-by: Cédric Le Goater + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/pci-quirks.c | 41 ++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 40 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c +index f0147a050a..0ed2fcd531 100644 +--- a/hw/vfio/pci-quirks.c ++++ b/hw/vfio/pci-quirks.c +@@ -1490,6 +1490,9 @@ void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev) + * +---------------------------------+---------------------------------+ + * + * https://lists.gnu.org/archive/html/qemu-devel/2017-08/pdfUda5iEpgOS.pdf ++ * ++ * Specification for Turning and later GPU architectures: ++ * https://lists.gnu.org/archive/html/qemu-devel/2023-06/pdf142OR4O4c2.pdf + */ + static void get_nv_gpudirect_clique_id(Object *obj, Visitor *v, + const char *name, void *opaque, +@@ -1530,7 +1533,9 @@ const PropertyInfo qdev_prop_nv_gpudirect_clique = { + static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) + { + PCIDevice *pdev = &vdev->pdev; +- int ret, pos = 0xC8; ++ int ret, pos; ++ bool c8_conflict = false, d4_conflict = false; ++ uint8_t tmp; + + if (vdev->nv_gpudirect_clique == 0xFF) { + return 0; +@@ -1547,6 +1552,40 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) + return -EINVAL; + } + ++ /* ++ * Per the updated specification above, it's recommended to use offset ++ * D4h for Turing and later GPU architectures due to a conflict of the ++ * MSI-X capability at C8h. We don't know how to determine the GPU ++ * architecture, instead we walk the capability chain to mark conflicts ++ * and choose one or error based on the result. ++ * ++ * NB. Cap list head in pdev->config is already cleared, read from device. ++ */ ++ ret = pread(vdev->vbasedev.fd, &tmp, 1, ++ vdev->config_offset + PCI_CAPABILITY_LIST); ++ if (ret != 1 || !tmp) { ++ error_setg(errp, "NVIDIA GPUDirect Clique ID: error getting cap list"); ++ return -EINVAL; ++ } ++ ++ do { ++ if (tmp == 0xC8) { ++ c8_conflict = true; ++ } else if (tmp == 0xD4) { ++ d4_conflict = true; ++ } ++ tmp = pdev->config[tmp + PCI_CAP_LIST_NEXT]; ++ } while (tmp); ++ ++ if (!c8_conflict) { ++ pos = 0xC8; ++ } else if (!d4_conflict) { ++ pos = 0xD4; ++ } else { ++ error_setg(errp, "NVIDIA GPUDirect Clique ID: invalid config space"); ++ return -EINVAL; ++ } ++ + ret = pci_add_capability(pdev, PCI_CAP_ID_VNDR, pos, 8, errp); + if (ret < 0) { + error_prepend(errp, "Failed to add NVIDIA GPUDirect cap: "); +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-virtio-iommu-Fix-potential-OOB-access-in-virtio_i.patch b/SOURCES/kvm-hw-virtio-iommu-Fix-potential-OOB-access-in-virtio_i.patch new file mode 100644 index 0000000..7a5963c --- /dev/null +++ b/SOURCES/kvm-hw-virtio-iommu-Fix-potential-OOB-access-in-virtio_i.patch @@ -0,0 +1,62 @@ +From 0a731ac1191182546e80af5f39d178a5a2f3688f Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Mon, 17 Jul 2023 18:21:26 +0200 +Subject: [PATCH 07/14] hw/virtio-iommu: Fix potential OOB access in + virtio_iommu_handle_command() + +RH-Author: Eric Auger +RH-MergeRequest: 197: virtio-iommu/smmu: backport some late fixes +RH-Bugzilla: 2229133 +RH-Acked-by: Thomas Huth +RH-Acked-by: Peter Xu +RH-Commit: [1/3] ecdb1e1aa6b93761dc87ea79bc0a1093ad649a74 (eauger1/centos-qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2229133 + +In the virtio_iommu_handle_command() when a PROBE request is handled, +output_size takes a value greater than the tail size and on a subsequent +iteration we can get a stack out-of-band access. Initialize the +output_size on each iteration. + +The issue was found with ASAN. Credits to: +Yiming Tao(Zhejiang University) +Gaoning Pan(Zhejiang University) + +Fixes: 1733eebb9e7 ("virtio-iommu: Implement RESV_MEM probe request") +Signed-off-by: Eric Auger +Reported-by: Mauro Matteo Cascella +Cc: qemu-stable@nongnu.org + +Message-Id: <20230717162126.11693-1-eric.auger@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit cf2f89edf36a59183166ae8721a8d7ab5cd286bd) +Signed-off-by: Eric Auger +--- + hw/virtio/virtio-iommu.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c +index 421e2a944f..17ce630200 100644 +--- a/hw/virtio/virtio-iommu.c ++++ b/hw/virtio/virtio-iommu.c +@@ -728,13 +728,15 @@ static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq) + VirtIOIOMMU *s = VIRTIO_IOMMU(vdev); + struct virtio_iommu_req_head head; + struct virtio_iommu_req_tail tail = {}; +- size_t output_size = sizeof(tail), sz; + VirtQueueElement *elem; + unsigned int iov_cnt; + struct iovec *iov; + void *buf = NULL; ++ size_t sz; + + for (;;) { ++ size_t output_size = sizeof(tail); ++ + elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); + if (!elem) { + return; +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-cpu-Update-how-the-EBX-register-of-CPUID-0x8000.patch b/SOURCES/kvm-i386-cpu-Update-how-the-EBX-register-of-CPUID-0x8000.patch new file mode 100644 index 0000000..3ee6b29 --- /dev/null +++ b/SOURCES/kvm-i386-cpu-Update-how-the-EBX-register-of-CPUID-0x8000.patch @@ -0,0 +1,52 @@ +From f9d982fae156aa9db0506e1e098c1e8a7f7eec94 Mon Sep 17 00:00:00 2001 +From: Bandan Das +Date: Thu, 3 Aug 2023 14:29:15 -0400 +Subject: [PATCH 13/14] i386/cpu: Update how the EBX register of CPUID + 0x8000001F is set + +RH-Author: Bandan Das +RH-MergeRequest: 196: Updates to SEV reduced-phys-bits parameter +RH-Bugzilla: 2214839 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [4/4] efc368b2c844fd4fbc3c755a5e2da288329e7a2c (bdas1/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2214839 + +commit fb6bbafc0f19385fb257ee073ed13dcaf613f2f8 +Author: Tom Lendacky +Date: Fri Sep 30 10:14:30 2022 -0500 + + i386/cpu: Update how the EBX register of CPUID 0x8000001F is set + + Update the setting of CPUID 0x8000001F EBX to clearly document the ranges + associated with fields being set. + + Fixes: 6cb8f2a663 ("cpu/i386: populate CPUID 0x8000_001F when SEV is active") + Signed-off-by: Tom Lendacky + Reviewed-by: Dr. David Alan Gilbert + Message-Id: <5822fd7d02b575121380e1f493a8f6d9eba2b11a.1664550870.git.thomas.lendacky@amd.com> + Signed-off-by: Paolo Bonzini + +Signed-off-by: Bandan Das +--- + target/i386/cpu.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 839706b430..4ac3046313 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -6008,8 +6008,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + if (sev_enabled()) { + *eax = 0x2; + *eax |= sev_es_enabled() ? 0x8 : 0; +- *ebx = sev_get_cbit_position(); +- *ebx |= sev_get_reduced_phys_bits() << 6; ++ *ebx = sev_get_cbit_position() & 0x3f; /* EBX[5:0] */ ++ *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */ + } + break; + default: +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Update-checks-and-information-related-to-re.patch b/SOURCES/kvm-i386-sev-Update-checks-and-information-related-to-re.patch new file mode 100644 index 0000000..e9d28d3 --- /dev/null +++ b/SOURCES/kvm-i386-sev-Update-checks-and-information-related-to-re.patch @@ -0,0 +1,77 @@ +From 5c0d254762caaffd574bd95dbfc1df416e6e2509 Mon Sep 17 00:00:00 2001 +From: Bandan Das +Date: Thu, 3 Aug 2023 14:22:55 -0400 +Subject: [PATCH 12/14] i386/sev: Update checks and information related to + reduced-phys-bits + +RH-Author: Bandan Das +RH-MergeRequest: 196: Updates to SEV reduced-phys-bits parameter +RH-Bugzilla: 2214839 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/4] 7c5e7ea9f6cd39e84e5b60417c849430296399fd (bdas1/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2214839 + +commit 8168fed9f84e3128f7628969ae78af49433d5ce7 +Author: Tom Lendacky +Date: Fri Sep 30 10:14:29 2022 -0500 + + i386/sev: Update checks and information related to reduced-phys-bits + + The value of the reduced-phys-bits parameter is propogated to the CPUID + information exposed to the guest. Update the current validation check to + account for the size of the CPUID field (6-bits), ensuring the value is + in the range of 1 to 63. + + Maintain backward compatibility, to an extent, by allowing a value greater + than 1 (so that the previously documented value of 5 still works), but not + allowing anything over 63. + + Fixes: d8575c6c02 ("sev/i386: add command to initialize the memory encryption context") + Signed-off-by: Tom Lendacky + Reviewed-by: Dr. David Alan Gilbert + Message-Id: + Signed-off-by: Paolo Bonzini + +Signed-off-by: Bandan Das +--- + target/i386/sev.c | 17 ++++++++++++++--- + 1 file changed, 14 insertions(+), 3 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 859e06f6ad..fe2144c038 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -932,15 +932,26 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + host_cpuid(0x8000001F, 0, NULL, &ebx, NULL, NULL); + host_cbitpos = ebx & 0x3f; + ++ /* ++ * The cbitpos value will be placed in bit positions 5:0 of the EBX ++ * register of CPUID 0x8000001F. No need to verify the range as the ++ * comparison against the host value accomplishes that. ++ */ + if (host_cbitpos != sev->cbitpos) { + error_setg(errp, "%s: cbitpos check failed, host '%d' requested '%d'", + __func__, host_cbitpos, sev->cbitpos); + goto err; + } + +- if (sev->reduced_phys_bits < 1) { +- error_setg(errp, "%s: reduced_phys_bits check failed, it should be >=1," +- " requested '%d'", __func__, sev->reduced_phys_bits); ++ /* ++ * The reduced-phys-bits value will be placed in bit positions 11:6 of ++ * the EBX register of CPUID 0x8000001F, so verify the supplied value ++ * is in the range of 1 to 63. ++ */ ++ if (sev->reduced_phys_bits < 1 || sev->reduced_phys_bits > 63) { ++ error_setg(errp, "%s: reduced_phys_bits check failed," ++ " it should be in the range of 1 to 63, requested '%d'", ++ __func__, sev->reduced_phys_bits); + goto err; + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-intel-iommu-fail-DEVIOTLB_UNMAP-without-dt-mode.patch b/SOURCES/kvm-intel-iommu-fail-DEVIOTLB_UNMAP-without-dt-mode.patch deleted file mode 100644 index 0f321e4..0000000 --- a/SOURCES/kvm-intel-iommu-fail-DEVIOTLB_UNMAP-without-dt-mode.patch +++ /dev/null @@ -1,64 +0,0 @@ -From cadcc1c6a001622d971c86d44925516905e3d104 Mon Sep 17 00:00:00 2001 -From: Jason Wang -Date: Thu, 23 Feb 2023 14:59:21 +0800 -Subject: [PATCH 8/8] intel-iommu: fail DEVIOTLB_UNMAP without dt mode - -RH-Author: Laurent Vivier -RH-MergeRequest: 157: intel-iommu: fail DEVIOTLB_UNMAP without dt mode -RH-Bugzilla: 2156876 -RH-Acked-by: Eric Auger -RH-Acked-by: Peter Xu -RH-Acked-by: MST -RH-Commit: [1/1] eb9dbae6140ef4ba10d90b9e66abd75540f6892d (lvivier/qemu-kvm-centos) - -Without dt mode, device IOTLB notifier won't work since guest won't -send device IOTLB invalidation descriptor in this case. Let's fail -early instead of misbehaving silently. - -Reviewed-by: Laurent Vivier -Tested-by: Laurent Vivier -Tested-by: Viktor Prutyanov -Buglink: https://bugzilla.redhat.com/2156876 -Signed-off-by: Jason Wang -Message-Id: <20230223065924.42503-3-jasowang@redhat.com> -Reviewed-by: Peter Xu -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 09adb0e021207b60a0c51a68939b4539d98d3ef3) - -Conflict in hw/i386/intel_iommu.c because of missing commit: - - 4ce27463ccce ("intel-iommu: fail MAP notifier without caching mode") ---- - hw/i386/intel_iommu.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c -index a08ee85edf..d2983f40d3 100644 ---- a/hw/i386/intel_iommu.c -+++ b/hw/i386/intel_iommu.c -@@ -3179,6 +3179,7 @@ static int vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu, - { - VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu); - IntelIOMMUState *s = vtd_as->iommu_state; -+ X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); - - /* TODO: add support for VFIO and vhost users */ - if (s->snoop_control) { -@@ -3186,6 +3187,13 @@ static int vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu, - "Snoop Control with vhost or VFIO is not supported"); - return -ENOTSUP; - } -+ if (!x86_iommu->dt_supported && (new & IOMMU_NOTIFIER_DEVIOTLB_UNMAP)) { -+ error_setg_errno(errp, ENOTSUP, -+ "device %02x.%02x.%x requires device IOTLB mode", -+ pci_bus_num(vtd_as->bus), PCI_SLOT(vtd_as->devfn), -+ PCI_FUNC(vtd_as->devfn)); -+ return -ENOTSUP; -+ } - - /* Update per-address-space notifier flags */ - vtd_as->notifier_flags = new; --- -2.39.1 - diff --git a/SOURCES/kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch b/SOURCES/kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch deleted file mode 100644 index 22abf35..0000000 --- a/SOURCES/kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch +++ /dev/null @@ -1,386 +0,0 @@ -From 3a29b50036b972caae5bca0e5dfc34d910b1d5e9 Mon Sep 17 00:00:00 2001 -From: "manish.mishra" -Date: Tue, 20 Dec 2022 18:44:17 +0000 -Subject: [PATCH 6/8] io: Add support for MSG_PEEK for socket channel -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 150: migration: Fix multifd crash on channel disorders -RH-Bugzilla: 2169732 -RH-Acked-by: quintela1 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Dr. David Alan Gilbert -RH-Commit: [1/2] 266563f3e387e97ec710d9bc179e5de26dfd09f1 (peterx/qemu-kvm) - -MSG_PEEK peeks at the channel, The data is treated as unread and -the next read shall still return this data. This support is -currently added only for socket class. Extra parameter 'flags' -is added to io_readv calls to pass extra read flags like MSG_PEEK. - -Reviewed-by: Peter Xu -Reviewed-by: Daniel P. Berrange -Reviewed-by: Juan Quintela -Suggested-by: Daniel P. Berrange -Signed-off-by: manish.mishra -Signed-off-by: Juan Quintela -(cherry picked from commit 84615a19ddf2bfb38d7b3a0d487d2397ee55e4f3) -Signed-off-by: Peter Xu ---- - chardev/char-socket.c | 4 ++-- - include/io/channel.h | 6 ++++++ - io/channel-buffer.c | 1 + - io/channel-command.c | 1 + - io/channel-file.c | 1 + - io/channel-null.c | 1 + - io/channel-socket.c | 19 ++++++++++++++++++- - io/channel-tls.c | 1 + - io/channel-websock.c | 1 + - io/channel.c | 16 ++++++++++++---- - migration/channel-block.c | 1 + - migration/rdma.c | 1 + - scsi/qemu-pr-helper.c | 2 +- - tests/qtest/tpm-emu.c | 2 +- - tests/unit/test-io-channel-socket.c | 1 + - util/vhost-user-server.c | 2 +- - 16 files changed, 50 insertions(+), 10 deletions(-) - -diff --git a/chardev/char-socket.c b/chardev/char-socket.c -index 879564aa8a..5afce9a464 100644 ---- a/chardev/char-socket.c -+++ b/chardev/char-socket.c -@@ -283,11 +283,11 @@ static ssize_t tcp_chr_recv(Chardev *chr, char *buf, size_t len) - if (qio_channel_has_feature(s->ioc, QIO_CHANNEL_FEATURE_FD_PASS)) { - ret = qio_channel_readv_full(s->ioc, &iov, 1, - &msgfds, &msgfds_num, -- NULL); -+ 0, NULL); - } else { - ret = qio_channel_readv_full(s->ioc, &iov, 1, - NULL, NULL, -- NULL); -+ 0, NULL); - } - - if (msgfds_num) { -diff --git a/include/io/channel.h b/include/io/channel.h -index c680ee7480..716235d496 100644 ---- a/include/io/channel.h -+++ b/include/io/channel.h -@@ -34,6 +34,8 @@ OBJECT_DECLARE_TYPE(QIOChannel, QIOChannelClass, - - #define QIO_CHANNEL_WRITE_FLAG_ZERO_COPY 0x1 - -+#define QIO_CHANNEL_READ_FLAG_MSG_PEEK 0x1 -+ - typedef enum QIOChannelFeature QIOChannelFeature; - - enum QIOChannelFeature { -@@ -41,6 +43,7 @@ enum QIOChannelFeature { - QIO_CHANNEL_FEATURE_SHUTDOWN, - QIO_CHANNEL_FEATURE_LISTEN, - QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY, -+ QIO_CHANNEL_FEATURE_READ_MSG_PEEK, - }; - - -@@ -114,6 +117,7 @@ struct QIOChannelClass { - size_t niov, - int **fds, - size_t *nfds, -+ int flags, - Error **errp); - int (*io_close)(QIOChannel *ioc, - Error **errp); -@@ -188,6 +192,7 @@ void qio_channel_set_name(QIOChannel *ioc, - * @niov: the length of the @iov array - * @fds: pointer to an array that will received file handles - * @nfds: pointer filled with number of elements in @fds on return -+ * @flags: read flags (QIO_CHANNEL_READ_FLAG_*) - * @errp: pointer to a NULL-initialized error object - * - * Read data from the IO channel, storing it in the -@@ -224,6 +229,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc, - size_t niov, - int **fds, - size_t *nfds, -+ int flags, - Error **errp); - - -diff --git a/io/channel-buffer.c b/io/channel-buffer.c -index bf52011be2..8096180f85 100644 ---- a/io/channel-buffer.c -+++ b/io/channel-buffer.c -@@ -54,6 +54,7 @@ static ssize_t qio_channel_buffer_readv(QIOChannel *ioc, - size_t niov, - int **fds, - size_t *nfds, -+ int flags, - Error **errp) - { - QIOChannelBuffer *bioc = QIO_CHANNEL_BUFFER(ioc); -diff --git a/io/channel-command.c b/io/channel-command.c -index 74516252ba..e7edd091af 100644 ---- a/io/channel-command.c -+++ b/io/channel-command.c -@@ -203,6 +203,7 @@ static ssize_t qio_channel_command_readv(QIOChannel *ioc, - size_t niov, - int **fds, - size_t *nfds, -+ int flags, - Error **errp) - { - QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc); -diff --git a/io/channel-file.c b/io/channel-file.c -index b67687c2aa..d76663e6ae 100644 ---- a/io/channel-file.c -+++ b/io/channel-file.c -@@ -86,6 +86,7 @@ static ssize_t qio_channel_file_readv(QIOChannel *ioc, - size_t niov, - int **fds, - size_t *nfds, -+ int flags, - Error **errp) - { - QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc); -diff --git a/io/channel-null.c b/io/channel-null.c -index 75e3781507..4fafdb770d 100644 ---- a/io/channel-null.c -+++ b/io/channel-null.c -@@ -60,6 +60,7 @@ qio_channel_null_readv(QIOChannel *ioc, - size_t niov, - int **fds G_GNUC_UNUSED, - size_t *nfds G_GNUC_UNUSED, -+ int flags, - Error **errp) - { - QIOChannelNull *nioc = QIO_CHANNEL_NULL(ioc); -diff --git a/io/channel-socket.c b/io/channel-socket.c -index b76dca9cc1..7aca84f61a 100644 ---- a/io/channel-socket.c -+++ b/io/channel-socket.c -@@ -173,6 +173,9 @@ int qio_channel_socket_connect_sync(QIOChannelSocket *ioc, - } - #endif - -+ qio_channel_set_feature(QIO_CHANNEL(ioc), -+ QIO_CHANNEL_FEATURE_READ_MSG_PEEK); -+ - return 0; - } - -@@ -406,6 +409,9 @@ qio_channel_socket_accept(QIOChannelSocket *ioc, - } - #endif /* WIN32 */ - -+ qio_channel_set_feature(QIO_CHANNEL(cioc), -+ QIO_CHANNEL_FEATURE_READ_MSG_PEEK); -+ - trace_qio_channel_socket_accept_complete(ioc, cioc, cioc->fd); - return cioc; - -@@ -496,6 +502,7 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc, - size_t niov, - int **fds, - size_t *nfds, -+ int flags, - Error **errp) - { - QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); -@@ -517,6 +524,10 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc, - - } - -+ if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) { -+ sflags |= MSG_PEEK; -+ } -+ - retry: - ret = recvmsg(sioc->fd, &msg, sflags); - if (ret < 0) { -@@ -624,11 +635,17 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc, - size_t niov, - int **fds, - size_t *nfds, -+ int flags, - Error **errp) - { - QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); - ssize_t done = 0; - ssize_t i; -+ int sflags = 0; -+ -+ if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) { -+ sflags |= MSG_PEEK; -+ } - - for (i = 0; i < niov; i++) { - ssize_t ret; -@@ -636,7 +653,7 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc, - ret = recv(sioc->fd, - iov[i].iov_base, - iov[i].iov_len, -- 0); -+ sflags); - if (ret < 0) { - if (errno == EAGAIN) { - if (done) { -diff --git a/io/channel-tls.c b/io/channel-tls.c -index 4ce890a538..c730cb8ec5 100644 ---- a/io/channel-tls.c -+++ b/io/channel-tls.c -@@ -260,6 +260,7 @@ static ssize_t qio_channel_tls_readv(QIOChannel *ioc, - size_t niov, - int **fds, - size_t *nfds, -+ int flags, - Error **errp) - { - QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc); -diff --git a/io/channel-websock.c b/io/channel-websock.c -index fb4932ade7..a12acc27cf 100644 ---- a/io/channel-websock.c -+++ b/io/channel-websock.c -@@ -1081,6 +1081,7 @@ static ssize_t qio_channel_websock_readv(QIOChannel *ioc, - size_t niov, - int **fds, - size_t *nfds, -+ int flags, - Error **errp) - { - QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc); -diff --git a/io/channel.c b/io/channel.c -index 0640941ac5..a8c7f11649 100644 ---- a/io/channel.c -+++ b/io/channel.c -@@ -52,6 +52,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc, - size_t niov, - int **fds, - size_t *nfds, -+ int flags, - Error **errp) - { - QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc); -@@ -63,7 +64,14 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc, - return -1; - } - -- return klass->io_readv(ioc, iov, niov, fds, nfds, errp); -+ if ((flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) && -+ !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { -+ error_setg_errno(errp, EINVAL, -+ "Channel does not support peek read"); -+ return -1; -+ } -+ -+ return klass->io_readv(ioc, iov, niov, fds, nfds, flags, errp); - } - - -@@ -146,7 +154,7 @@ int qio_channel_readv_full_all_eof(QIOChannel *ioc, - while ((nlocal_iov > 0) || local_fds) { - ssize_t len; - len = qio_channel_readv_full(ioc, local_iov, nlocal_iov, local_fds, -- local_nfds, errp); -+ local_nfds, 0, errp); - if (len == QIO_CHANNEL_ERR_BLOCK) { - if (qemu_in_coroutine()) { - qio_channel_yield(ioc, G_IO_IN); -@@ -284,7 +292,7 @@ ssize_t qio_channel_readv(QIOChannel *ioc, - size_t niov, - Error **errp) - { -- return qio_channel_readv_full(ioc, iov, niov, NULL, NULL, errp); -+ return qio_channel_readv_full(ioc, iov, niov, NULL, NULL, 0, errp); - } - - -@@ -303,7 +311,7 @@ ssize_t qio_channel_read(QIOChannel *ioc, - Error **errp) - { - struct iovec iov = { .iov_base = buf, .iov_len = buflen }; -- return qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, errp); -+ return qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, 0, errp); - } - - -diff --git a/migration/channel-block.c b/migration/channel-block.c -index f4ab53acdb..b7374363c3 100644 ---- a/migration/channel-block.c -+++ b/migration/channel-block.c -@@ -53,6 +53,7 @@ qio_channel_block_readv(QIOChannel *ioc, - size_t niov, - int **fds, - size_t *nfds, -+ int flags, - Error **errp) - { - QIOChannelBlock *bioc = QIO_CHANNEL_BLOCK(ioc); -diff --git a/migration/rdma.c b/migration/rdma.c -index 94a55dd95b..d8b4632094 100644 ---- a/migration/rdma.c -+++ b/migration/rdma.c -@@ -2854,6 +2854,7 @@ static ssize_t qio_channel_rdma_readv(QIOChannel *ioc, - size_t niov, - int **fds, - size_t *nfds, -+ int flags, - Error **errp) - { - QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); -diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c -index 196b78c00d..199227a556 100644 ---- a/scsi/qemu-pr-helper.c -+++ b/scsi/qemu-pr-helper.c -@@ -614,7 +614,7 @@ static int coroutine_fn prh_read(PRHelperClient *client, void *buf, int sz, - iov.iov_base = buf; - iov.iov_len = sz; - n_read = qio_channel_readv_full(QIO_CHANNEL(client->ioc), &iov, 1, -- &fds, &nfds, errp); -+ &fds, &nfds, 0, errp); - - if (n_read == QIO_CHANNEL_ERR_BLOCK) { - qio_channel_yield(QIO_CHANNEL(client->ioc), G_IO_IN); -diff --git a/tests/qtest/tpm-emu.c b/tests/qtest/tpm-emu.c -index 2994d1cf42..3cf1acaf7d 100644 ---- a/tests/qtest/tpm-emu.c -+++ b/tests/qtest/tpm-emu.c -@@ -106,7 +106,7 @@ void *tpm_emu_ctrl_thread(void *data) - int *pfd = NULL; - size_t nfd = 0; - -- qio_channel_readv_full(ioc, &iov, 1, &pfd, &nfd, &error_abort); -+ qio_channel_readv_full(ioc, &iov, 1, &pfd, &nfd, 0, &error_abort); - cmd = be32_to_cpu(cmd); - g_assert_cmpint(cmd, ==, CMD_SET_DATAFD); - g_assert_cmpint(nfd, ==, 1); -diff --git a/tests/unit/test-io-channel-socket.c b/tests/unit/test-io-channel-socket.c -index b36a5d972a..b964bb202d 100644 ---- a/tests/unit/test-io-channel-socket.c -+++ b/tests/unit/test-io-channel-socket.c -@@ -460,6 +460,7 @@ static void test_io_channel_unix_fd_pass(void) - G_N_ELEMENTS(iorecv), - &fdrecv, - &nfdrecv, -+ 0, - &error_abort); - - g_assert(nfdrecv == G_N_ELEMENTS(fdsend)); -diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c -index 232984ace6..145eb17c08 100644 ---- a/util/vhost-user-server.c -+++ b/util/vhost-user-server.c -@@ -116,7 +116,7 @@ vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg) - * qio_channel_readv_full may have short reads, keeping calling it - * until getting VHOST_USER_HDR_SIZE or 0 bytes in total - */ -- rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, &local_err); -+ rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, 0, &local_err); - if (rc < 0) { - if (rc == QIO_CHANNEL_ERR_BLOCK) { - assert(local_err == NULL); --- -2.31.1 - diff --git a/SOURCES/kvm-io-remove-io-watch-if-TLS-channel-is-closed-during-h.patch b/SOURCES/kvm-io-remove-io-watch-if-TLS-channel-is-closed-during-h.patch index 95ae201..8d6795e 100644 --- a/SOURCES/kvm-io-remove-io-watch-if-TLS-channel-is-closed-during-h.patch +++ b/SOURCES/kvm-io-remove-io-watch-if-TLS-channel-is-closed-during-h.patch @@ -1,20 +1,19 @@ -From c13b4e32be9de900e7a55ebf5c341df8363e3b4a Mon Sep 17 00:00:00 2001 +From 0306736e3afbe7be99d01e4d70d1a5f2e38c32c2 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Tue, 15 Aug 2023 00:08:55 +0000 -Subject: [PATCH 4/4] io: remove io watch if TLS channel is closed during - handshake +Subject: [PATCH] io: remove io watch if TLS channel is closed during handshake MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RH-Author: Jon Maloy -RH-MergeRequest: 301: io: remove io watch if TLS channel is closed during handshake -RH-Bugzilla: 2216503 +RH-MergeRequest: 315: io: remove io watch if TLS channel is closed during handshake +RH-Bugzilla: 2216504 RH-Acked-by: Peter Xu -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [1/1] 10bc3055a369a89996a1be34ce8d6c1fbc2c531e (redhat/rhel/src/qemu-kvm/jons-qemu-kvm-2) +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/1] 5f23602074b2edde0d445d529f07434bd156202d (redhat/rhel/src/qemu-kvm/jons-qemu-kvm-2) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2216503 +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2216504 CVE: CVE-2023-3354 Upstream: Merged @@ -56,10 +55,10 @@ index 5672479e9e..26c67f17e2 100644 /** diff --git a/io/channel-tls.c b/io/channel-tls.c -index c730cb8ec5..bd79e78837 100644 +index 9805dd0a3f..847d5297c3 100644 --- a/io/channel-tls.c +++ b/io/channel-tls.c -@@ -195,12 +195,13 @@ static void qio_channel_tls_handshake_task(QIOChannelTLS *ioc, +@@ -198,12 +198,13 @@ static void qio_channel_tls_handshake_task(QIOChannelTLS *ioc, } trace_qio_channel_tls_handshake_pending(ioc, status); @@ -79,7 +78,7 @@ index c730cb8ec5..bd79e78837 100644 } } -@@ -215,6 +216,7 @@ static gboolean qio_channel_tls_handshake_io(QIOChannel *ioc, +@@ -218,6 +219,7 @@ static gboolean qio_channel_tls_handshake_io(QIOChannel *ioc, QIOChannelTLS *tioc = QIO_CHANNEL_TLS( qio_task_get_source(task)); @@ -87,7 +86,7 @@ index c730cb8ec5..bd79e78837 100644 g_free(data); qio_channel_tls_handshake_task(tioc, task, context); -@@ -375,6 +377,10 @@ static int qio_channel_tls_close(QIOChannel *ioc, +@@ -378,6 +380,10 @@ static int qio_channel_tls_close(QIOChannel *ioc, { QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc); diff --git a/SOURCES/kvm-iotests-106-214-308-Read-only-one-size-line.patch b/SOURCES/kvm-iotests-106-214-308-Read-only-one-size-line.patch deleted file mode 100644 index 399acfc..0000000 --- a/SOURCES/kvm-iotests-106-214-308-Read-only-one-size-line.patch +++ /dev/null @@ -1,99 +0,0 @@ -From 6727e92a97f8ee9f367a41111bef3f5cad4a479a Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Mon, 20 Jun 2022 18:27:02 +0200 -Subject: [PATCH 15/20] iotests/106, 214, 308: Read only one size line - -RH-Author: Hanna Czenczek -RH-MergeRequest: 145: Show protocol-level information in qemu-img info -RH-Bugzilla: 1860292 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella -RH-Commit: [10/12] 1554e0a92b92ed101a251478ccae43f45f6e071e (hreitz/qemu-kvm-c-9-s) - -These tests read size information (sometimes disk size, sometimes -virtual size) from qemu-img info's output. Once qemu-img starts -printing info about child nodes, we are going to see multiple instances -of that per image, but these tests are only interested in the first one, -so use "head -n 1" to get it. - -Signed-off-by: Hanna Reitz -Message-Id: <20220620162704.80987-11-hreitz@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 74163adda3101b127943f7cbbf8fcccd2d472426) -Signed-off-by: Hanna Czenczek ---- - tests/qemu-iotests/106 | 4 ++-- - tests/qemu-iotests/214 | 6 ++++-- - tests/qemu-iotests/308 | 4 ++-- - 3 files changed, 8 insertions(+), 6 deletions(-) - -diff --git a/tests/qemu-iotests/106 b/tests/qemu-iotests/106 -index 9d6adb542d..ae0fc46691 100755 ---- a/tests/qemu-iotests/106 -+++ b/tests/qemu-iotests/106 -@@ -66,7 +66,7 @@ for create_mode in off falloc full; do - expected_size=$((expected_size + $GROWTH_SIZE)) - fi - -- actual_size=$($QEMU_IMG info -f "$IMGFMT" "$TEST_IMG" | grep 'disk size') -+ actual_size=$($QEMU_IMG info -f "$IMGFMT" "$TEST_IMG" | grep 'disk size' | head -n 1) - actual_size=$(echo "$actual_size" | sed -e 's/^[^0-9]*\([0-9]\+\).*$/\1/') - - # The actual size may exceed the expected size, depending on the file -@@ -105,7 +105,7 @@ for growth_mode in falloc full; do - _make_test_img -o "extent_size_hint=0" 2G - $QEMU_IMG resize -f "$IMGFMT" --preallocation=$growth_mode "$TEST_IMG" +${GROWTH_SIZE}K - -- actual_size=$($QEMU_IMG info -f "$IMGFMT" "$TEST_IMG" | grep 'disk size') -+ actual_size=$($QEMU_IMG info -f "$IMGFMT" "$TEST_IMG" | grep 'disk size' | head -n 1) - actual_size=$(echo "$actual_size" | sed -e 's/^[^0-9]*\([0-9]\+\).*$/\1/') - - if [ $actual_size -lt $GROWTH_SIZE ]; then -diff --git a/tests/qemu-iotests/214 b/tests/qemu-iotests/214 -index c66e246ba2..55ffcd7f44 100755 ---- a/tests/qemu-iotests/214 -+++ b/tests/qemu-iotests/214 -@@ -102,7 +102,8 @@ let data_size="8 * $cluster_size" - $QEMU_IO -c "write -P 0xaa 0 $data_size" "$TEST_IMG" \ - 2>&1 | _filter_qemu_io | _filter_testdir - sizeA=$($QEMU_IMG info --output=json "$TEST_IMG" | -- sed -n '/"actual-size":/ s/[^0-9]//gp') -+ sed -n '/"actual-size":/ s/[^0-9]//gp' | -+ head -n 1) - - _make_test_img 2M -o cluster_size=$cluster_size - echo "Write compressed data:" -@@ -124,7 +125,8 @@ $QEMU_IO -c "write -P 0xcc $offset $data_size" "json:{\ - _filter_qemu_io | _filter_testdir - - sizeB=$($QEMU_IMG info --output=json "$TEST_IMG" | -- sed -n '/"actual-size":/ s/[^0-9]//gp') -+ sed -n '/"actual-size":/ s/[^0-9]//gp' | -+ head -n 1) - - if [ $sizeA -lt $sizeB ] - then -diff --git a/tests/qemu-iotests/308 b/tests/qemu-iotests/308 -index bde4aac2fa..09275e9a10 100755 ---- a/tests/qemu-iotests/308 -+++ b/tests/qemu-iotests/308 -@@ -217,12 +217,12 @@ echo - echo '=== Remove export ===' - - # Double-check that $EXT_MP appears as a non-empty file (the raw image) --$QEMU_IMG info -f raw "$EXT_MP" | grep 'virtual size' -+$QEMU_IMG info -f raw "$EXT_MP" | grep 'virtual size' | head -n 1 - - fuse_export_del 'export-mp' - - # See that the file appears empty again --$QEMU_IMG info -f raw "$EXT_MP" | grep 'virtual size' -+$QEMU_IMG info -f raw "$EXT_MP" | grep 'virtual size' | head -n 1 - - echo - echo '=== Writable export ===' --- -2.31.1 - diff --git a/SOURCES/kvm-iotests-Filter-child-node-information.patch b/SOURCES/kvm-iotests-Filter-child-node-information.patch deleted file mode 100644 index 12eee3a..0000000 --- a/SOURCES/kvm-iotests-Filter-child-node-information.patch +++ /dev/null @@ -1,171 +0,0 @@ -From 3102e62f80757729c97e58e2b3d62a6a9de952a7 Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Mon, 20 Jun 2022 18:27:01 +0200 -Subject: [PATCH 14/20] iotests: Filter child node information - -RH-Author: Hanna Czenczek -RH-MergeRequest: 145: Show protocol-level information in qemu-img info -RH-Bugzilla: 1860292 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella -RH-Commit: [9/12] 0b0a42d54397791f7f149e53c9175b7863707e70 (hreitz/qemu-kvm-c-9-s) - -Before we let qemu-img info print child node information, have -common.filter, common.rc, and iotests.py filter it from the test output -so we get as few reference output changes as possible. - -Signed-off-by: Hanna Reitz -Message-Id: <20220620162704.80987-10-hreitz@redhat.com> -Tested-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit bcc6777ad6facede73c0cf8b1700045bf4365f7d) -Signed-off-by: Hanna Czenczek ---- - tests/qemu-iotests/common.filter | 22 ++++++++++++++-------- - tests/qemu-iotests/common.rc | 22 ++++++++++++++-------- - tests/qemu-iotests/iotests.py | 18 +++++++++++++++--- - 3 files changed, 43 insertions(+), 19 deletions(-) - -diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter -index 6a13757177..6ddda2ee64 100644 ---- a/tests/qemu-iotests/common.filter -+++ b/tests/qemu-iotests/common.filter -@@ -224,6 +224,7 @@ _filter_img_info() - - discard=0 - regex_json_spec_start='^ *"format-specific": \{' -+ regex_json_child_start='^ *"children": \[' - gsed -e "s#$REMOTE_TEST_DIR#TEST_DIR#g" \ - -e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" \ - -e "s#$TEST_DIR#TEST_DIR#g" \ -@@ -252,20 +253,25 @@ _filter_img_info() - -e 's/\(compression type: \)\(zlib\|zstd\)/\1COMPRESSION_TYPE/' \ - -e "s/uuid: [-a-f0-9]\\+/uuid: 00000000-0000-0000-0000-000000000000/" | \ - while IFS='' read -r line; do -- if [[ $format_specific == 1 ]]; then -- discard=0 -- elif [[ $line == "Format specific information:" ]]; then -- discard=1 -- elif [[ $line =~ $regex_json_spec_start ]]; then -- discard=2 -- regex_json_spec_end="^${line%%[^ ]*}\\},? *$" -+ if [[ $discard == 0 ]]; then -+ if [[ $format_specific == 0 && $line == "Format specific information:" ]]; then -+ discard=1 -+ elif [[ $line =~ "Child node '/" ]]; then -+ discard=1 -+ elif [[ $line =~ $regex_json_spec_start ]]; then -+ discard=2 -+ regex_json_end="^${line%%[^ ]*}\\},? *$" -+ elif [[ $line =~ $regex_json_child_start ]]; then -+ discard=2 -+ regex_json_end="^${line%%[^ ]*}\\],? *$" -+ fi - fi - if [[ $discard == 0 ]]; then - echo "$line" - elif [[ $discard == 1 && ! $line ]]; then - echo - discard=0 -- elif [[ $discard == 2 && $line =~ $regex_json_spec_end ]]; then -+ elif [[ $discard == 2 && $line =~ $regex_json_end ]]; then - discard=0 - fi - done -diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc -index db757025cb..f4476b62f7 100644 ---- a/tests/qemu-iotests/common.rc -+++ b/tests/qemu-iotests/common.rc -@@ -711,6 +711,7 @@ _img_info() - - discard=0 - regex_json_spec_start='^ *"format-specific": \{' -+ regex_json_child_start='^ *"children": \[' - $QEMU_IMG info $QEMU_IMG_EXTRA_ARGS "$@" "$TEST_IMG" 2>&1 | \ - sed -e "s#$REMOTE_TEST_DIR#TEST_DIR#g" \ - -e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" \ -@@ -721,20 +722,25 @@ _img_info() - -e "/^disk size:/ D" \ - -e "/actual-size/ D" | \ - while IFS='' read -r line; do -- if [[ $format_specific == 1 ]]; then -- discard=0 -- elif [[ $line == "Format specific information:" ]]; then -- discard=1 -- elif [[ $line =~ $regex_json_spec_start ]]; then -- discard=2 -- regex_json_spec_end="^${line%%[^ ]*}\\},? *$" -+ if [[ $discard == 0 ]]; then -+ if [[ $format_specific == 0 && $line == "Format specific information:" ]]; then -+ discard=1 -+ elif [[ $line =~ "Child node '/" ]]; then -+ discard=1 -+ elif [[ $format_specific == 0 && $line =~ $regex_json_spec_start ]]; then -+ discard=2 -+ regex_json_end="^${line%%[^ ]*}\\},? *$" -+ elif [[ $line =~ $regex_json_child_start ]]; then -+ discard=2 -+ regex_json_end="^${line%%[^ ]*}\\],? *$" -+ fi - fi - if [[ $discard == 0 ]]; then - echo "$line" - elif [[ $discard == 1 && ! $line ]]; then - echo - discard=0 -- elif [[ $discard == 2 && $line =~ $regex_json_spec_end ]]; then -+ elif [[ $discard == 2 && $line =~ $regex_json_end ]]; then - discard=0 - fi - done -diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py -index da7d6637e1..94aeb3f3b2 100644 ---- a/tests/qemu-iotests/iotests.py -+++ b/tests/qemu-iotests/iotests.py -@@ -329,7 +329,7 @@ def qemu_img_log(*args: str, check: bool = True - - def img_info_log(filename: str, filter_path: Optional[str] = None, - use_image_opts: bool = False, extra_args: Sequence[str] = (), -- check: bool = True, -+ check: bool = True, drop_child_info: bool = True, - ) -> None: - args = ['info'] - if use_image_opts: -@@ -342,7 +342,7 @@ def img_info_log(filename: str, filter_path: Optional[str] = None, - output = qemu_img(*args, check=check).stdout - if not filter_path: - filter_path = filename -- log(filter_img_info(output, filter_path)) -+ log(filter_img_info(output, filter_path, drop_child_info)) - - def qemu_io_wrap_args(args: Sequence[str]) -> List[str]: - if '-f' in args or '--image-opts' in args: -@@ -642,11 +642,23 @@ def _filter(_key, value): - def filter_generated_node_ids(msg): - return re.sub("#block[0-9]+", "NODE_NAME", msg) - --def filter_img_info(output, filename): -+def filter_img_info(output: str, filename: str, -+ drop_child_info: bool = True) -> str: - lines = [] -+ drop_indented = False - for line in output.split('\n'): - if 'disk size' in line or 'actual-size' in line: - continue -+ -+ # Drop child node info -+ if drop_indented: -+ if line.startswith(' '): -+ continue -+ drop_indented = False -+ if drop_child_info and "Child node '/" in line: -+ drop_indented = True -+ continue -+ - line = line.replace(filename, 'TEST_IMG') - line = filter_testfiles(line) - line = line.replace(imgfmt, 'IMGFMT') --- -2.31.1 - diff --git a/SOURCES/kvm-iotests-Test-commit-with-iothreads-and-ongoing-I-O.patch b/SOURCES/kvm-iotests-Test-commit-with-iothreads-and-ongoing-I-O.patch new file mode 100644 index 0000000..1fc5697 --- /dev/null +++ b/SOURCES/kvm-iotests-Test-commit-with-iothreads-and-ongoing-I-O.patch @@ -0,0 +1,144 @@ +From 399bfc04fb8352af6d2f4c984e68c334d2043368 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 17 May 2023 17:28:34 +0200 +Subject: [PATCH 04/21] iotests: Test commit with iothreads and ongoing I/O + +RH-Author: Kevin Wolf +RH-MergeRequest: 166: block/graph-lock: Disable locking for now +RH-Bugzilla: 2186725 +RH-Acked-by: Eric Blake +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [4/4] 1e42fde5951ae12bddc4eea2320f066f7079878f (kmwolf/centos-qemu-kvm) + +This tests exercises graph locking, draining, and graph modifications +with AioContext switches a lot. Amongst others, it serves as a +regression test for bdrv_graph_wrlock() deadlocking because it is called +with a locked AioContext and for AioContext handling in the NBD server. + +Signed-off-by: Kevin Wolf +Message-Id: <20230517152834.277483-4-kwolf@redhat.com> +Tested-by: Eric Blake +Reviewed-by: Eric Blake +Signed-off-by: Kevin Wolf +(cherry picked from commit 95fdd8db61848d31fde1d9b32da7f3f76babfa25) +Signed-off-by: Kevin Wolf +--- + tests/qemu-iotests/iotests.py | 4 ++ + .../qemu-iotests/tests/graph-changes-while-io | 56 +++++++++++++++++-- + .../tests/graph-changes-while-io.out | 4 +- + 3 files changed, 58 insertions(+), 6 deletions(-) + +diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py +index 3e82c634cf..7073579a7d 100644 +--- a/tests/qemu-iotests/iotests.py ++++ b/tests/qemu-iotests/iotests.py +@@ -462,6 +462,10 @@ def qmp(self, cmd: str, args: Optional[Dict[str, object]] = None) \ + assert self._qmp is not None + return self._qmp.cmd(cmd, args) + ++ def get_qmp(self) -> QEMUMonitorProtocol: ++ assert self._qmp is not None ++ return self._qmp ++ + def stop(self, kill_signal=15): + self._p.send_signal(kill_signal) + self._p.wait() +diff --git a/tests/qemu-iotests/tests/graph-changes-while-io b/tests/qemu-iotests/tests/graph-changes-while-io +index 7664f33689..750e7d4d38 100755 +--- a/tests/qemu-iotests/tests/graph-changes-while-io ++++ b/tests/qemu-iotests/tests/graph-changes-while-io +@@ -22,19 +22,19 @@ + import os + from threading import Thread + import iotests +-from iotests import imgfmt, qemu_img, qemu_img_create, QMPTestCase, \ +- QemuStorageDaemon ++from iotests import imgfmt, qemu_img, qemu_img_create, qemu_io, \ ++ QMPTestCase, QemuStorageDaemon + + + top = os.path.join(iotests.test_dir, 'top.img') + nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock') + + +-def do_qemu_img_bench() -> None: ++def do_qemu_img_bench(count: int = 2000000) -> None: + """ + Do some I/O requests on `nbd_sock`. + """ +- qemu_img('bench', '-f', 'raw', '-c', '2000000', ++ qemu_img('bench', '-f', 'raw', '-c', str(count), + f'nbd+unix:///node0?socket={nbd_sock}') + + +@@ -84,6 +84,54 @@ class TestGraphChangesWhileIO(QMPTestCase): + + bench_thr.join() + ++ def test_commit_while_io(self) -> None: ++ # Run qemu-img bench in the background ++ bench_thr = Thread(target=do_qemu_img_bench, args=(200000, )) ++ bench_thr.start() ++ ++ qemu_io('-c', 'write 0 64k', top) ++ qemu_io('-c', 'write 128k 64k', top) ++ ++ result = self.qsd.qmp('blockdev-add', { ++ 'driver': imgfmt, ++ 'node-name': 'overlay', ++ 'backing': None, ++ 'file': { ++ 'driver': 'file', ++ 'filename': top ++ } ++ }) ++ self.assert_qmp(result, 'return', {}) ++ ++ result = self.qsd.qmp('blockdev-snapshot', { ++ 'node': 'node0', ++ 'overlay': 'overlay', ++ }) ++ self.assert_qmp(result, 'return', {}) ++ ++ # While qemu-img bench is running, repeatedly commit overlay to node0 ++ while bench_thr.is_alive(): ++ result = self.qsd.qmp('block-commit', { ++ 'job-id': 'job0', ++ 'device': 'overlay', ++ }) ++ self.assert_qmp(result, 'return', {}) ++ ++ result = self.qsd.qmp('block-job-cancel', { ++ 'device': 'job0', ++ }) ++ self.assert_qmp(result, 'return', {}) ++ ++ cancelled = False ++ while not cancelled: ++ for event in self.qsd.get_qmp().get_events(wait=10.0): ++ if event['event'] != 'JOB_STATUS_CHANGE': ++ continue ++ if event['data']['status'] == 'null': ++ cancelled = True ++ ++ bench_thr.join() ++ + if __name__ == '__main__': + # Format must support raw backing files + iotests.main(supported_fmts=['qcow', 'qcow2', 'qed'], +diff --git a/tests/qemu-iotests/tests/graph-changes-while-io.out b/tests/qemu-iotests/tests/graph-changes-while-io.out +index ae1213e6f8..fbc63e62f8 100644 +--- a/tests/qemu-iotests/tests/graph-changes-while-io.out ++++ b/tests/qemu-iotests/tests/graph-changes-while-io.out +@@ -1,5 +1,5 @@ +-. ++.. + ---------------------------------------------------------------------- +-Ran 1 tests ++Ran 2 tests + + OK +-- +2.39.3 + diff --git a/SOURCES/kvm-iotests-Test-resizing-image-attached-to-an-iothread.patch b/SOURCES/kvm-iotests-Test-resizing-image-attached-to-an-iothread.patch new file mode 100644 index 0000000..4e91505 --- /dev/null +++ b/SOURCES/kvm-iotests-Test-resizing-image-attached-to-an-iothread.patch @@ -0,0 +1,132 @@ +From 2c9e6892369ff99decd4030642b8dcf3875e9ebf Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 9 May 2023 15:41:33 +0200 +Subject: [PATCH 55/56] iotests: Test resizing image attached to an iothread + +RH-Author: Kevin Wolf +RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize() +RH-Bugzilla: 2185688 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [4/4] 8d31752d1e6e8c6a422d68d9cb2251fbc34b7aef (kmwolf/centos-qemu-kvm) + +This tests that trying to resize an image with QMP block_resize doesn't +hang or otherwise fail when the image is attached to a device running in +an iothread. + +This is a regression test for the recent fix that changed +qmp_block_resize, which is a coroutine based QMP handler, to avoid +calling no_coroutine_fns directly. + +Signed-off-by: Kevin Wolf +Message-Id: <20230509134133.373408-1-kwolf@redhat.com> +Reviewed-by: Eric Blake +Signed-off-by: Kevin Wolf +(cherry picked from commit e113362e4cdfdcfe1d497e569527f70a0021333a) +Signed-off-by: Kevin Wolf +--- + tests/qemu-iotests/tests/iothreads-resize | 71 +++++++++++++++++++ + tests/qemu-iotests/tests/iothreads-resize.out | 11 +++ + 2 files changed, 82 insertions(+) + create mode 100755 tests/qemu-iotests/tests/iothreads-resize + create mode 100644 tests/qemu-iotests/tests/iothreads-resize.out + +diff --git a/tests/qemu-iotests/tests/iothreads-resize b/tests/qemu-iotests/tests/iothreads-resize +new file mode 100755 +index 0000000000..36e4598c62 +--- /dev/null ++++ b/tests/qemu-iotests/tests/iothreads-resize +@@ -0,0 +1,71 @@ ++#!/usr/bin/env bash ++# group: rw auto quick ++# ++# Test resizing an image that is attached to a separate iothread ++# ++# Copyright (C) 2023 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++# creator ++owner=kwolf@redhat.com ++ ++seq=`basename $0` ++echo "QA output created by $seq" ++ ++status=1 # failure is the default! ++ ++_cleanup() ++{ ++ _cleanup_test_img ++} ++trap "_cleanup; exit \$status" 0 1 2 3 15 ++ ++# get standard environment, filters and checks ++cd .. ++. ./common.rc ++. ./common.filter ++ ++# Resizing images is only supported by a few block drivers ++_supported_fmt raw qcow2 qed ++_supported_proto file ++_require_devices virtio-scsi-pci ++ ++size=64M ++_make_test_img $size ++ ++qmp() { ++cat < +Date: Thu, 11 May 2023 13:03:22 +0200 +Subject: [PATCH 54/56] iotests: Use alternative CPU type that is not + deprecated in RHEL + +RH-Author: Kevin Wolf +RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize() +RH-Bugzilla: 2185688 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [3/4] 038d4718c0ee7a17ff5e6f4af8fc04d07e452f8d (kmwolf/centos-qemu-kvm) + +This is a downstream-only patch that is necessary because the default +CPU in RHEL is marked as deprecated. This makes test cases fail due to +the warning in the output: + +qemu-system-x86_64: warning: CPU model qemu64-x86_64-cpu is deprecated -- use at least 'Nehalem' / 'Opteron_G4', or 'host' / 'max' + +Fixes: 318178778db60b6475d1484509bee136317156d3 +Signed-off-by: Kevin Wolf +--- + tests/qemu-iotests/testenv.py | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/tests/qemu-iotests/testenv.py b/tests/qemu-iotests/testenv.py +index 9a37ad9152..963514aab3 100644 +--- a/tests/qemu-iotests/testenv.py ++++ b/tests/qemu-iotests/testenv.py +@@ -244,6 +244,9 @@ def __init__(self, source_dir: str, build_dir: str, + if self.qemu_prog.endswith(f'qemu-system-{suffix}'): + self.qemu_options += f' -machine {machine}' + ++ if self.qemu_prog.endswith('qemu-system-x86_64'): ++ self.qemu_options += ' -cpu Nehalem' ++ + # QEMU_DEFAULT_MACHINE + self.qemu_default_machine = get_default_machine(self.qemu_prog) + +-- +2.39.1 + diff --git a/SOURCES/kvm-iotests-iov-padding-New-test.patch b/SOURCES/kvm-iotests-iov-padding-New-test.patch new file mode 100644 index 0000000..9ef37a2 --- /dev/null +++ b/SOURCES/kvm-iotests-iov-padding-New-test.patch @@ -0,0 +1,186 @@ +From add833b5de202d6765dda56c8773985fbe7f40a6 Mon Sep 17 00:00:00 2001 +From: Hanna Czenczek +Date: Tue, 11 Apr 2023 19:34:18 +0200 +Subject: [PATCH 4/9] iotests/iov-padding: New test + +RH-Author: Hanna Czenczek +RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX +RH-Bugzilla: 2174676 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [4/5] b32715b5c2a3e2add39c5ed6e8f71df56e0b91a0 (hreitz/qemu-kvm-c-9-s) + +Test that even vectored IO requests with 1024 vector elements that are +not aligned to the device's request alignment will succeed. + +Reviewed-by: Eric Blake +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Czenczek +Message-Id: <20230411173418.19549-5-hreitz@redhat.com> +(cherry picked from commit d7e1905e3f54ff9512db4c7a946a8603b62b108d) +Signed-off-by: Hanna Czenczek +--- + tests/qemu-iotests/tests/iov-padding | 85 ++++++++++++++++++++++++ + tests/qemu-iotests/tests/iov-padding.out | 59 ++++++++++++++++ + 2 files changed, 144 insertions(+) + create mode 100755 tests/qemu-iotests/tests/iov-padding + create mode 100644 tests/qemu-iotests/tests/iov-padding.out + +diff --git a/tests/qemu-iotests/tests/iov-padding b/tests/qemu-iotests/tests/iov-padding +new file mode 100755 +index 0000000000..b9604900c7 +--- /dev/null ++++ b/tests/qemu-iotests/tests/iov-padding +@@ -0,0 +1,85 @@ ++#!/usr/bin/env bash ++# group: rw quick ++# ++# Check the interaction of request padding (to fit alignment restrictions) with ++# vectored I/O from the guest ++# ++# Copyright Red Hat ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++seq=$(basename $0) ++echo "QA output created by $seq" ++ ++status=1 # failure is the default! ++ ++_cleanup() ++{ ++ _cleanup_test_img ++} ++trap "_cleanup; exit \$status" 0 1 2 3 15 ++ ++# get standard environment, filters and checks ++cd .. ++. ./common.rc ++. ./common.filter ++ ++_supported_fmt raw ++_supported_proto file ++ ++_make_test_img 1M ++ ++IMGSPEC="driver=blkdebug,align=4096,image.driver=file,image.filename=$TEST_IMG" ++ ++# Four combinations: ++# - Offset 4096, length 1023 * 512 + 512: Fully aligned to 4k ++# - Offset 4096, length 1023 * 512 + 4096: Head is aligned, tail is not ++# - Offset 512, length 1023 * 512 + 512: Neither head nor tail are aligned ++# - Offset 512, length 1023 * 512 + 4096: Tail is aligned, head is not ++for start_offset in 4096 512; do ++ for last_element_length in 512 4096; do ++ length=$((1023 * 512 + $last_element_length)) ++ ++ echo ++ echo "== performing 1024-element vectored requests to image (offset: $start_offset; length: $length) ==" ++ ++ # Fill with data for testing ++ $QEMU_IO -c 'write -P 1 0 1M' "$TEST_IMG" | _filter_qemu_io ++ ++ # 1023 512-byte buffers, and then one with length $last_element_length ++ cmd_params="-P 2 $start_offset $(yes 512 | head -n 1023 | tr '\n' ' ') $last_element_length" ++ QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS_NO_FMT" $QEMU_IO \ ++ -c "writev $cmd_params" \ ++ --image-opts \ ++ "$IMGSPEC" \ ++ | _filter_qemu_io ++ ++ # Read all patterns -- read the part we just wrote with writev twice, ++ # once "normally", and once with a readv, so we see that that works, too ++ QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS_NO_FMT" $QEMU_IO \ ++ -c "read -P 1 0 $start_offset" \ ++ -c "read -P 2 $start_offset $length" \ ++ -c "readv $cmd_params" \ ++ -c "read -P 1 $((start_offset + length)) $((1024 * 1024 - length - start_offset))" \ ++ --image-opts \ ++ "$IMGSPEC" \ ++ | _filter_qemu_io ++ done ++done ++ ++# success, all done ++echo "*** done" ++rm -f $seq.full ++status=0 +diff --git a/tests/qemu-iotests/tests/iov-padding.out b/tests/qemu-iotests/tests/iov-padding.out +new file mode 100644 +index 0000000000..e07a91fac7 +--- /dev/null ++++ b/tests/qemu-iotests/tests/iov-padding.out +@@ -0,0 +1,59 @@ ++QA output created by iov-padding ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 ++ ++== performing 1024-element vectored requests to image (offset: 4096; length: 524288) == ++wrote 1048576/1048576 bytes at offset 0 ++1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++wrote 524288/524288 bytes at offset 4096 ++512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 4096/4096 bytes at offset 0 ++4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 524288/524288 bytes at offset 4096 ++512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 524288/524288 bytes at offset 4096 ++512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 520192/520192 bytes at offset 528384 ++508 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++== performing 1024-element vectored requests to image (offset: 4096; length: 527872) == ++wrote 1048576/1048576 bytes at offset 0 ++1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++wrote 527872/527872 bytes at offset 4096 ++515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 4096/4096 bytes at offset 0 ++4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 527872/527872 bytes at offset 4096 ++515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 527872/527872 bytes at offset 4096 ++515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 516608/516608 bytes at offset 531968 ++504.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++== performing 1024-element vectored requests to image (offset: 512; length: 524288) == ++wrote 1048576/1048576 bytes at offset 0 ++1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++wrote 524288/524288 bytes at offset 512 ++512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 512/512 bytes at offset 0 ++512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 524288/524288 bytes at offset 512 ++512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 524288/524288 bytes at offset 512 ++512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 523776/523776 bytes at offset 524800 ++511.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++== performing 1024-element vectored requests to image (offset: 512; length: 527872) == ++wrote 1048576/1048576 bytes at offset 0 ++1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++wrote 527872/527872 bytes at offset 512 ++515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 512/512 bytes at offset 0 ++512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 527872/527872 bytes at offset 512 ++515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 527872/527872 bytes at offset 512 ++515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 520192/520192 bytes at offset 528384 ++508 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++*** done +-- +2.39.3 + diff --git a/SOURCES/kvm-kvm-Atomic-memslot-updates.patch b/SOURCES/kvm-kvm-Atomic-memslot-updates.patch deleted file mode 100644 index 14e9e32..0000000 --- a/SOURCES/kvm-kvm-Atomic-memslot-updates.patch +++ /dev/null @@ -1,286 +0,0 @@ -From e13fdc97ff05cdee46c112c2dee70b6ef33e7fa7 Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Mon, 16 Jan 2023 07:17:31 -0500 -Subject: [PATCH 31/31] kvm: Atomic memslot updates - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 138: accel: introduce accelerator blocker API -RH-Bugzilla: 1979276 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: David Hildenbrand -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/3] 9f03181ebcad2474fbe859acbce7b9891caa216b (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1979276 - -commit f39b7d2b96e3e73c01bb678cd096f7baf0b9ab39 -Author: David Hildenbrand -Date: Fri Nov 11 10:47:58 2022 -0500 - - kvm: Atomic memslot updates - - If we update an existing memslot (e.g., resize, split), we temporarily - remove the memslot to re-add it immediately afterwards. These updates - are not atomic, especially not for KVM VCPU threads, such that we can - get spurious faults. - - Let's inhibit most KVM ioctls while performing relevant updates, such - that we can perform the update just as if it would happen atomically - without additional kernel support. - - We capture the add/del changes and apply them in the notifier commit - stage instead. There, we can check for overlaps and perform the ioctl - inhibiting only if really required (-> overlap). - - To keep things simple we don't perform additional checks that wouldn't - actually result in an overlap -- such as !RAM memory regions in some - cases (see kvm_set_phys_mem()). - - To minimize cache-line bouncing, use a separate indicator - (in_ioctl_lock) per CPU. Also, make sure to hold the kvm_slots_lock - while performing both actions (removing+re-adding). - - We have to wait until all IOCTLs were exited and block new ones from - getting executed. - - This approach cannot result in a deadlock as long as the inhibitor does - not hold any locks that might hinder an IOCTL from getting finished and - exited - something fairly unusual. The inhibitor will always hold the BQL. - - AFAIKs, one possible candidate would be userfaultfd. If a page cannot be - placed (e.g., during postcopy), because we're waiting for a lock, or if the - userfaultfd thread cannot process a fault, because it is waiting for a - lock, there could be a deadlock. However, the BQL is not applicable here, - because any other guest memory access while holding the BQL would already - result in a deadlock. - - Nothing else in the kernel should block forever and wait for userspace - intervention. - - Note: pause_all_vcpus()/resume_all_vcpus() or - start_exclusive()/end_exclusive() cannot be used, as they either drop - the BQL or require to be called without the BQL - something inhibitors - cannot handle. We need a low-level locking mechanism that is - deadlock-free even when not releasing the BQL. - - Signed-off-by: David Hildenbrand - Signed-off-by: Emanuele Giuseppe Esposito - Tested-by: Emanuele Giuseppe Esposito - Message-Id: <20221111154758.1372674-4-eesposit@redhat.com> - Signed-off-by: Paolo Bonzini - -Signed-off-by: Emanuele Giuseppe Esposito ---- - accel/kvm/kvm-all.c | 101 ++++++++++++++++++++++++++++++++++----- - include/sysemu/kvm_int.h | 8 ++++ - 2 files changed, 98 insertions(+), 11 deletions(-) - -diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index ff660fd469..39ed30ab59 100644 ---- a/accel/kvm/kvm-all.c -+++ b/accel/kvm/kvm-all.c -@@ -31,6 +31,7 @@ - #include "sysemu/kvm_int.h" - #include "sysemu/runstate.h" - #include "sysemu/cpus.h" -+#include "sysemu/accel-blocker.h" - #include "qemu/bswap.h" - #include "exec/memory.h" - #include "exec/ram_addr.h" -@@ -46,6 +47,7 @@ - #include "sysemu/hw_accel.h" - #include "kvm-cpus.h" - #include "sysemu/dirtylimit.h" -+#include "qemu/range.h" - - #include "hw/boards.h" - #include "monitor/stats.h" -@@ -1292,6 +1294,7 @@ void kvm_set_max_memslot_size(hwaddr max_slot_size) - kvm_max_slot_size = max_slot_size; - } - -+/* Called with KVMMemoryListener.slots_lock held */ - static void kvm_set_phys_mem(KVMMemoryListener *kml, - MemoryRegionSection *section, bool add) - { -@@ -1326,14 +1329,12 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, - ram = memory_region_get_ram_ptr(mr) + mr_offset; - ram_start_offset = memory_region_get_ram_addr(mr) + mr_offset; - -- kvm_slots_lock(); -- - if (!add) { - do { - slot_size = MIN(kvm_max_slot_size, size); - mem = kvm_lookup_matching_slot(kml, start_addr, slot_size); - if (!mem) { -- goto out; -+ return; - } - if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { - /* -@@ -1371,7 +1372,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, - start_addr += slot_size; - size -= slot_size; - } while (size); -- goto out; -+ return; - } - - /* register the new slot */ -@@ -1396,9 +1397,6 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, - ram += slot_size; - size -= slot_size; - } while (size); -- --out: -- kvm_slots_unlock(); - } - - static void *kvm_dirty_ring_reaper_thread(void *data) -@@ -1455,18 +1453,95 @@ static void kvm_region_add(MemoryListener *listener, - MemoryRegionSection *section) - { - KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener); -+ KVMMemoryUpdate *update; -+ -+ update = g_new0(KVMMemoryUpdate, 1); -+ update->section = *section; - -- memory_region_ref(section->mr); -- kvm_set_phys_mem(kml, section, true); -+ QSIMPLEQ_INSERT_TAIL(&kml->transaction_add, update, next); - } - - static void kvm_region_del(MemoryListener *listener, - MemoryRegionSection *section) - { - KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener); -+ KVMMemoryUpdate *update; -+ -+ update = g_new0(KVMMemoryUpdate, 1); -+ update->section = *section; -+ -+ QSIMPLEQ_INSERT_TAIL(&kml->transaction_del, update, next); -+} -+ -+static void kvm_region_commit(MemoryListener *listener) -+{ -+ KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, -+ listener); -+ KVMMemoryUpdate *u1, *u2; -+ bool need_inhibit = false; -+ -+ if (QSIMPLEQ_EMPTY(&kml->transaction_add) && -+ QSIMPLEQ_EMPTY(&kml->transaction_del)) { -+ return; -+ } -+ -+ /* -+ * We have to be careful when regions to add overlap with ranges to remove. -+ * We have to simulate atomic KVM memslot updates by making sure no ioctl() -+ * is currently active. -+ * -+ * The lists are order by addresses, so it's easy to find overlaps. -+ */ -+ u1 = QSIMPLEQ_FIRST(&kml->transaction_del); -+ u2 = QSIMPLEQ_FIRST(&kml->transaction_add); -+ while (u1 && u2) { -+ Range r1, r2; -+ -+ range_init_nofail(&r1, u1->section.offset_within_address_space, -+ int128_get64(u1->section.size)); -+ range_init_nofail(&r2, u2->section.offset_within_address_space, -+ int128_get64(u2->section.size)); -+ -+ if (range_overlaps_range(&r1, &r2)) { -+ need_inhibit = true; -+ break; -+ } -+ if (range_lob(&r1) < range_lob(&r2)) { -+ u1 = QSIMPLEQ_NEXT(u1, next); -+ } else { -+ u2 = QSIMPLEQ_NEXT(u2, next); -+ } -+ } -+ -+ kvm_slots_lock(); -+ if (need_inhibit) { -+ accel_ioctl_inhibit_begin(); -+ } -+ -+ /* Remove all memslots before adding the new ones. */ -+ while (!QSIMPLEQ_EMPTY(&kml->transaction_del)) { -+ u1 = QSIMPLEQ_FIRST(&kml->transaction_del); -+ QSIMPLEQ_REMOVE_HEAD(&kml->transaction_del, next); - -- kvm_set_phys_mem(kml, section, false); -- memory_region_unref(section->mr); -+ kvm_set_phys_mem(kml, &u1->section, false); -+ memory_region_unref(u1->section.mr); -+ -+ g_free(u1); -+ } -+ while (!QSIMPLEQ_EMPTY(&kml->transaction_add)) { -+ u1 = QSIMPLEQ_FIRST(&kml->transaction_add); -+ QSIMPLEQ_REMOVE_HEAD(&kml->transaction_add, next); -+ -+ memory_region_ref(u1->section.mr); -+ kvm_set_phys_mem(kml, &u1->section, true); -+ -+ g_free(u1); -+ } -+ -+ if (need_inhibit) { -+ accel_ioctl_inhibit_end(); -+ } -+ kvm_slots_unlock(); - } - - static void kvm_log_sync(MemoryListener *listener, -@@ -1610,8 +1685,12 @@ void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml, - kml->slots[i].slot = i; - } - -+ QSIMPLEQ_INIT(&kml->transaction_add); -+ QSIMPLEQ_INIT(&kml->transaction_del); -+ - kml->listener.region_add = kvm_region_add; - kml->listener.region_del = kvm_region_del; -+ kml->listener.commit = kvm_region_commit; - kml->listener.log_start = kvm_log_start; - kml->listener.log_stop = kvm_log_stop; - kml->listener.priority = 10; -diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h -index 3b4adcdc10..60b520a13e 100644 ---- a/include/sysemu/kvm_int.h -+++ b/include/sysemu/kvm_int.h -@@ -12,6 +12,7 @@ - #include "exec/memory.h" - #include "qapi/qapi-types-common.h" - #include "qemu/accel.h" -+#include "qemu/queue.h" - #include "sysemu/kvm.h" - - typedef struct KVMSlot -@@ -31,10 +32,17 @@ typedef struct KVMSlot - ram_addr_t ram_start_offset; - } KVMSlot; - -+typedef struct KVMMemoryUpdate { -+ QSIMPLEQ_ENTRY(KVMMemoryUpdate) next; -+ MemoryRegionSection section; -+} KVMMemoryUpdate; -+ - typedef struct KVMMemoryListener { - MemoryListener listener; - KVMSlot *slots; - int as_id; -+ QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_add; -+ QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_del; - } KVMMemoryListener; - - #define KVM_MSI_HASHTAB_SIZE 256 --- -2.31.1 - diff --git a/SOURCES/kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch b/SOURCES/kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch index 16db441..d6a6d73 100644 --- a/SOURCES/kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch +++ b/SOURCES/kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch @@ -1,15 +1,20 @@ -From 6319eaee8c2206c4eca858a11ed7c9b7a2f3dff9 Mon Sep 17 00:00:00 2001 +From a5857fb12fcad46e27c415fe82ce13c0cb5d09c7 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti -Date: Thu, 29 Jun 2023 15:13:57 -0300 -Subject: [PATCH] kvm: reuse per-vcpu stats fd to avoid vcpu interruption +Date: Thu, 29 Jun 2023 14:48:32 -0300 +Subject: [PATCH 5/6] kvm: reuse per-vcpu stats fd to avoid vcpu interruption +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit RH-Author: Marcelo Tosatti -RH-MergeRequest: 290: kvm: reuse per-vcpu stats fd to avoid vcpu interruption -RH-Bugzilla: 2221219 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/1] cf6c3188e26c6eae99b48db1f75837e11d1e4489 +RH-MergeRequest: 177: kvm: reuse per-vcpu stats fd to avoid vcpu interruption +RH-Bugzilla: 2218644 +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Leonardo Brás +RH-Commit: [1/1] 4ec72385a9047888121485f49bacb1aff84f7018 (mtosatti/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2214884 +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2218644 Commit: 3b6f485275ae95a81eec589d2773b86ca9ddec4d A regression has been detected in latency testing of KVM guests. @@ -32,7 +37,7 @@ Signed-off-by: Paolo Bonzini 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index 39ed30ab59..c86a6798c6 100644 +index cf3a88d90e..fa7ca46c66 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -450,6 +450,8 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp) @@ -44,7 +49,7 @@ index 39ed30ab59..c86a6798c6 100644 err: return ret; } -@@ -3950,7 +3952,7 @@ static StatsDescriptors *find_stats_descriptors(StatsTarget target, int stats_fd +@@ -3959,7 +3961,7 @@ static StatsDescriptors *find_stats_descriptors(StatsTarget target, int stats_fd /* Read stats header */ kvm_stats_header = &descriptors->kvm_stats_header; @@ -53,7 +58,7 @@ index 39ed30ab59..c86a6798c6 100644 if (ret != sizeof(*kvm_stats_header)) { error_setg(errp, "KVM stats: failed to read stats header: " "expected %zu actual %zu", -@@ -3981,7 +3983,8 @@ static StatsDescriptors *find_stats_descriptors(StatsTarget target, int stats_fd +@@ -3990,7 +3992,8 @@ static StatsDescriptors *find_stats_descriptors(StatsTarget target, int stats_fd } static void query_stats(StatsResultList **result, StatsTarget target, @@ -63,7 +68,7 @@ index 39ed30ab59..c86a6798c6 100644 { struct kvm_stats_desc *kvm_stats_desc; struct kvm_stats_header *kvm_stats_header; -@@ -4039,7 +4042,7 @@ static void query_stats(StatsResultList **result, StatsTarget target, +@@ -4048,7 +4051,7 @@ static void query_stats(StatsResultList **result, StatsTarget target, break; case STATS_TARGET_VCPU: add_stats_entry(result, STATS_PROVIDER_KVM, @@ -72,7 +77,7 @@ index 39ed30ab59..c86a6798c6 100644 stats_list); break; default: -@@ -4076,10 +4079,9 @@ static void query_stats_schema(StatsSchemaList **result, StatsTarget target, +@@ -4085,10 +4088,9 @@ static void query_stats_schema(StatsSchemaList **result, StatsTarget target, add_stats_schema(result, STATS_PROVIDER_KVM, target, stats_list); } @@ -85,7 +90,7 @@ index 39ed30ab59..c86a6798c6 100644 Error *local_err = NULL; if (stats_fd == -1) { -@@ -4088,14 +4090,13 @@ static void query_stats_vcpu(CPUState *cpu, run_on_cpu_data data) +@@ -4097,14 +4099,13 @@ static void query_stats_vcpu(CPUState *cpu, run_on_cpu_data data) return; } query_stats(kvm_stats_args->result.stats, STATS_TARGET_VCPU, @@ -104,7 +109,7 @@ index 39ed30ab59..c86a6798c6 100644 Error *local_err = NULL; if (stats_fd == -1) { -@@ -4105,7 +4106,6 @@ static void query_stats_schema_vcpu(CPUState *cpu, run_on_cpu_data data) +@@ -4114,7 +4115,6 @@ static void query_stats_schema_vcpu(CPUState *cpu, run_on_cpu_data data) } query_stats_schema(kvm_stats_args->result.schema, STATS_TARGET_VCPU, stats_fd, kvm_stats_args->errp); @@ -112,7 +117,7 @@ index 39ed30ab59..c86a6798c6 100644 } static void query_stats_cb(StatsResultList **result, StatsTarget target, -@@ -4123,7 +4123,7 @@ static void query_stats_cb(StatsResultList **result, StatsTarget target, +@@ -4132,7 +4132,7 @@ static void query_stats_cb(StatsResultList **result, StatsTarget target, error_setg_errno(errp, errno, "KVM stats: ioctl failed"); return; } @@ -121,7 +126,7 @@ index 39ed30ab59..c86a6798c6 100644 close(stats_fd); break; } -@@ -4137,7 +4137,7 @@ static void query_stats_cb(StatsResultList **result, StatsTarget target, +@@ -4146,7 +4146,7 @@ static void query_stats_cb(StatsResultList **result, StatsTarget target, if (!apply_str_list_filter(cpu->parent_obj.canonical_path, targets)) { continue; } @@ -130,7 +135,7 @@ index 39ed30ab59..c86a6798c6 100644 } break; } -@@ -4163,6 +4163,6 @@ void query_stats_schemas_cb(StatsSchemaList **result, Error **errp) +@@ -4172,6 +4172,6 @@ void query_stats_schemas_cb(StatsSchemaList **result, Error **errp) if (first_cpu) { stats_args.result.schema = result; stats_args.errp = errp; @@ -139,10 +144,10 @@ index 39ed30ab59..c86a6798c6 100644 } } diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h -index 2417597236..362f22ca06 100644 +index 397fd3ac68..ae96be07e7 100644 --- a/include/hw/core/cpu.h +++ b/include/hw/core/cpu.h -@@ -397,6 +397,7 @@ struct CPUState { +@@ -399,6 +399,7 @@ struct CPUState { struct kvm_dirty_gfn *kvm_dirty_gfns; uint32_t kvm_fetch_index; uint64_t dirty_pages; diff --git a/SOURCES/kvm-linux-headers-Update-to-v6.1.patch b/SOURCES/kvm-linux-headers-Update-to-v6.1.patch deleted file mode 100644 index 6ce9c7d..0000000 --- a/SOURCES/kvm-linux-headers-Update-to-v6.1.patch +++ /dev/null @@ -1,577 +0,0 @@ -From cbe35c6a4794107ea1ddecf0b381ba4b1c8799f5 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Tue, 7 Feb 2023 15:57:10 -0500 -Subject: [PATCH 3/8] linux-headers: Update to v6.1 - -RH-Author: Peter Xu -RH-MergeRequest: 149: Support /dev/userfaultfd -RH-Bugzilla: 2158704 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: quintela1 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/3] 15d97026e802a0f01b5f80f81fb4414dc69b2b2d (peterx/qemu-kvm) - -Signed-off-by: Peter Xu -Reviewed-by: Juan Quintela -Acked-by: Cornelia Huck -Signed-off-by: Juan Quintela -(cherry picked from commit 93e0932b7be2498024cd6ba8446a0fa2cb1769bc) -Signed-off-by: Peter Xu ---- - include/standard-headers/drm/drm_fourcc.h | 34 ++++- - include/standard-headers/linux/ethtool.h | 63 +++++++- - include/standard-headers/linux/fuse.h | 6 +- - .../linux/input-event-codes.h | 1 + - include/standard-headers/linux/virtio_blk.h | 19 +++ - linux-headers/asm-generic/hugetlb_encode.h | 26 ++-- - linux-headers/asm-generic/mman-common.h | 2 + - linux-headers/asm-mips/mman.h | 2 + - linux-headers/asm-riscv/kvm.h | 4 + - linux-headers/linux/kvm.h | 1 + - linux-headers/linux/psci.h | 14 ++ - linux-headers/linux/userfaultfd.h | 4 + - linux-headers/linux/vfio.h | 142 ++++++++++++++++++ - 13 files changed, 298 insertions(+), 20 deletions(-) - -diff --git a/include/standard-headers/drm/drm_fourcc.h b/include/standard-headers/drm/drm_fourcc.h -index 48b620cbef..b868488f93 100644 ---- a/include/standard-headers/drm/drm_fourcc.h -+++ b/include/standard-headers/drm/drm_fourcc.h -@@ -98,18 +98,42 @@ extern "C" { - #define DRM_FORMAT_INVALID 0 - - /* color index */ -+#define DRM_FORMAT_C1 fourcc_code('C', '1', ' ', ' ') /* [7:0] C0:C1:C2:C3:C4:C5:C6:C7 1:1:1:1:1:1:1:1 eight pixels/byte */ -+#define DRM_FORMAT_C2 fourcc_code('C', '2', ' ', ' ') /* [7:0] C0:C1:C2:C3 2:2:2:2 four pixels/byte */ -+#define DRM_FORMAT_C4 fourcc_code('C', '4', ' ', ' ') /* [7:0] C0:C1 4:4 two pixels/byte */ - #define DRM_FORMAT_C8 fourcc_code('C', '8', ' ', ' ') /* [7:0] C */ - --/* 8 bpp Red */ -+/* 1 bpp Darkness (inverse relationship between channel value and brightness) */ -+#define DRM_FORMAT_D1 fourcc_code('D', '1', ' ', ' ') /* [7:0] D0:D1:D2:D3:D4:D5:D6:D7 1:1:1:1:1:1:1:1 eight pixels/byte */ -+ -+/* 2 bpp Darkness (inverse relationship between channel value and brightness) */ -+#define DRM_FORMAT_D2 fourcc_code('D', '2', ' ', ' ') /* [7:0] D0:D1:D2:D3 2:2:2:2 four pixels/byte */ -+ -+/* 4 bpp Darkness (inverse relationship between channel value and brightness) */ -+#define DRM_FORMAT_D4 fourcc_code('D', '4', ' ', ' ') /* [7:0] D0:D1 4:4 two pixels/byte */ -+ -+/* 8 bpp Darkness (inverse relationship between channel value and brightness) */ -+#define DRM_FORMAT_D8 fourcc_code('D', '8', ' ', ' ') /* [7:0] D */ -+ -+/* 1 bpp Red (direct relationship between channel value and brightness) */ -+#define DRM_FORMAT_R1 fourcc_code('R', '1', ' ', ' ') /* [7:0] R0:R1:R2:R3:R4:R5:R6:R7 1:1:1:1:1:1:1:1 eight pixels/byte */ -+ -+/* 2 bpp Red (direct relationship between channel value and brightness) */ -+#define DRM_FORMAT_R2 fourcc_code('R', '2', ' ', ' ') /* [7:0] R0:R1:R2:R3 2:2:2:2 four pixels/byte */ -+ -+/* 4 bpp Red (direct relationship between channel value and brightness) */ -+#define DRM_FORMAT_R4 fourcc_code('R', '4', ' ', ' ') /* [7:0] R0:R1 4:4 two pixels/byte */ -+ -+/* 8 bpp Red (direct relationship between channel value and brightness) */ - #define DRM_FORMAT_R8 fourcc_code('R', '8', ' ', ' ') /* [7:0] R */ - --/* 10 bpp Red */ -+/* 10 bpp Red (direct relationship between channel value and brightness) */ - #define DRM_FORMAT_R10 fourcc_code('R', '1', '0', ' ') /* [15:0] x:R 6:10 little endian */ - --/* 12 bpp Red */ -+/* 12 bpp Red (direct relationship between channel value and brightness) */ - #define DRM_FORMAT_R12 fourcc_code('R', '1', '2', ' ') /* [15:0] x:R 4:12 little endian */ - --/* 16 bpp Red */ -+/* 16 bpp Red (direct relationship between channel value and brightness) */ - #define DRM_FORMAT_R16 fourcc_code('R', '1', '6', ' ') /* [15:0] R little endian */ - - /* 16 bpp RG */ -@@ -204,7 +228,9 @@ extern "C" { - #define DRM_FORMAT_VYUY fourcc_code('V', 'Y', 'U', 'Y') /* [31:0] Y1:Cb0:Y0:Cr0 8:8:8:8 little endian */ - - #define DRM_FORMAT_AYUV fourcc_code('A', 'Y', 'U', 'V') /* [31:0] A:Y:Cb:Cr 8:8:8:8 little endian */ -+#define DRM_FORMAT_AVUY8888 fourcc_code('A', 'V', 'U', 'Y') /* [31:0] A:Cr:Cb:Y 8:8:8:8 little endian */ - #define DRM_FORMAT_XYUV8888 fourcc_code('X', 'Y', 'U', 'V') /* [31:0] X:Y:Cb:Cr 8:8:8:8 little endian */ -+#define DRM_FORMAT_XVUY8888 fourcc_code('X', 'V', 'U', 'Y') /* [31:0] X:Cr:Cb:Y 8:8:8:8 little endian */ - #define DRM_FORMAT_VUY888 fourcc_code('V', 'U', '2', '4') /* [23:0] Cr:Cb:Y 8:8:8 little endian */ - #define DRM_FORMAT_VUY101010 fourcc_code('V', 'U', '3', '0') /* Y followed by U then V, 10:10:10. Non-linear modifier only */ - -diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h -index 4537da20cc..1dc56cdc0a 100644 ---- a/include/standard-headers/linux/ethtool.h -+++ b/include/standard-headers/linux/ethtool.h -@@ -736,6 +736,51 @@ enum ethtool_module_power_mode { - ETHTOOL_MODULE_POWER_MODE_HIGH, - }; - -+/** -+ * enum ethtool_podl_pse_admin_state - operational state of the PoDL PSE -+ * functions. IEEE 802.3-2018 30.15.1.1.2 aPoDLPSEAdminState -+ * @ETHTOOL_PODL_PSE_ADMIN_STATE_UNKNOWN: state of PoDL PSE functions are -+ * unknown -+ * @ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED: PoDL PSE functions are disabled -+ * @ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED: PoDL PSE functions are enabled -+ */ -+enum ethtool_podl_pse_admin_state { -+ ETHTOOL_PODL_PSE_ADMIN_STATE_UNKNOWN = 1, -+ ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED, -+ ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED, -+}; -+ -+/** -+ * enum ethtool_podl_pse_pw_d_status - power detection status of the PoDL PSE. -+ * IEEE 802.3-2018 30.15.1.1.3 aPoDLPSEPowerDetectionStatus: -+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_UNKNOWN: PoDL PSE -+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_DISABLED: "The enumeration “disabled” is -+ * asserted true when the PoDL PSE state diagram variable mr_pse_enable is -+ * false" -+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_SEARCHING: "The enumeration “searching” is -+ * asserted true when either of the PSE state diagram variables -+ * pi_detecting or pi_classifying is true." -+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_DELIVERING: "The enumeration “deliveringPower” -+ * is asserted true when the PoDL PSE state diagram variable pi_powered is -+ * true." -+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_SLEEP: "The enumeration “sleep” is asserted -+ * true when the PoDL PSE state diagram variable pi_sleeping is true." -+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_IDLE: "The enumeration “idle” is asserted true -+ * when the logical combination of the PoDL PSE state diagram variables -+ * pi_prebiased*!pi_sleeping is true." -+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_ERROR: "The enumeration “error” is asserted -+ * true when the PoDL PSE state diagram variable overload_held is true." -+ */ -+enum ethtool_podl_pse_pw_d_status { -+ ETHTOOL_PODL_PSE_PW_D_STATUS_UNKNOWN = 1, -+ ETHTOOL_PODL_PSE_PW_D_STATUS_DISABLED, -+ ETHTOOL_PODL_PSE_PW_D_STATUS_SEARCHING, -+ ETHTOOL_PODL_PSE_PW_D_STATUS_DELIVERING, -+ ETHTOOL_PODL_PSE_PW_D_STATUS_SLEEP, -+ ETHTOOL_PODL_PSE_PW_D_STATUS_IDLE, -+ ETHTOOL_PODL_PSE_PW_D_STATUS_ERROR, -+}; -+ - /** - * struct ethtool_gstrings - string set for data tagging - * @cmd: Command number = %ETHTOOL_GSTRINGS -@@ -1840,6 +1885,20 @@ static inline int ethtool_validate_duplex(uint8_t duplex) - #define MASTER_SLAVE_STATE_SLAVE 3 - #define MASTER_SLAVE_STATE_ERR 4 - -+/* These are used to throttle the rate of data on the phy interface when the -+ * native speed of the interface is higher than the link speed. These should -+ * not be used for phy interfaces which natively support multiple speeds (e.g. -+ * MII or SGMII). -+ */ -+/* No rate matching performed. */ -+#define RATE_MATCH_NONE 0 -+/* The phy sends pause frames to throttle the MAC. */ -+#define RATE_MATCH_PAUSE 1 -+/* The phy asserts CRS to prevent the MAC from transmitting. */ -+#define RATE_MATCH_CRS 2 -+/* The MAC is programmed with a sufficiently-large IPG. */ -+#define RATE_MATCH_OPEN_LOOP 3 -+ - /* Which connector port. */ - #define PORT_TP 0x00 - #define PORT_AUI 0x01 -@@ -2033,8 +2092,8 @@ enum ethtool_reset_flags { - * reported consistently by PHYLIB. Read-only. - * @master_slave_cfg: Master/slave port mode. - * @master_slave_state: Master/slave port state. -+ * @rate_matching: Rate adaptation performed by the PHY - * @reserved: Reserved for future use; see the note on reserved space. -- * @reserved1: Reserved for future use; see the note on reserved space. - * @link_mode_masks: Variable length bitmaps. - * - * If autonegotiation is disabled, the speed and @duplex represent the -@@ -2085,7 +2144,7 @@ struct ethtool_link_settings { - uint8_t transceiver; - uint8_t master_slave_cfg; - uint8_t master_slave_state; -- uint8_t reserved1[1]; -+ uint8_t rate_matching; - uint32_t reserved[7]; - uint32_t link_mode_masks[]; - /* layout of link_mode_masks fields: -diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h -index bda06258be..713d259768 100644 ---- a/include/standard-headers/linux/fuse.h -+++ b/include/standard-headers/linux/fuse.h -@@ -194,6 +194,9 @@ - * - add FUSE_SECURITY_CTX init flag - * - add security context to create, mkdir, symlink, and mknod requests - * - add FUSE_HAS_INODE_DAX, FUSE_ATTR_DAX -+ * -+ * 7.37 -+ * - add FUSE_TMPFILE - */ - - #ifndef _LINUX_FUSE_H -@@ -225,7 +228,7 @@ - #define FUSE_KERNEL_VERSION 7 - - /** Minor version number of this interface */ --#define FUSE_KERNEL_MINOR_VERSION 36 -+#define FUSE_KERNEL_MINOR_VERSION 37 - - /** The node ID of the root inode */ - #define FUSE_ROOT_ID 1 -@@ -533,6 +536,7 @@ enum fuse_opcode { - FUSE_SETUPMAPPING = 48, - FUSE_REMOVEMAPPING = 49, - FUSE_SYNCFS = 50, -+ FUSE_TMPFILE = 51, - - /* CUSE specific operations */ - CUSE_INIT = 4096, -diff --git a/include/standard-headers/linux/input-event-codes.h b/include/standard-headers/linux/input-event-codes.h -index 50790aee5a..815f7a1dff 100644 ---- a/include/standard-headers/linux/input-event-codes.h -+++ b/include/standard-headers/linux/input-event-codes.h -@@ -862,6 +862,7 @@ - #define ABS_TOOL_WIDTH 0x1c - - #define ABS_VOLUME 0x20 -+#define ABS_PROFILE 0x21 - - #define ABS_MISC 0x28 - -diff --git a/include/standard-headers/linux/virtio_blk.h b/include/standard-headers/linux/virtio_blk.h -index 2dcc90826a..e81715cd70 100644 ---- a/include/standard-headers/linux/virtio_blk.h -+++ b/include/standard-headers/linux/virtio_blk.h -@@ -40,6 +40,7 @@ - #define VIRTIO_BLK_F_MQ 12 /* support more than one vq */ - #define VIRTIO_BLK_F_DISCARD 13 /* DISCARD is supported */ - #define VIRTIO_BLK_F_WRITE_ZEROES 14 /* WRITE ZEROES is supported */ -+#define VIRTIO_BLK_F_SECURE_ERASE 16 /* Secure Erase is supported */ - - /* Legacy feature bits */ - #ifndef VIRTIO_BLK_NO_LEGACY -@@ -119,6 +120,21 @@ struct virtio_blk_config { - uint8_t write_zeroes_may_unmap; - - uint8_t unused1[3]; -+ -+ /* the next 3 entries are guarded by VIRTIO_BLK_F_SECURE_ERASE */ -+ /* -+ * The maximum secure erase sectors (in 512-byte sectors) for -+ * one segment. -+ */ -+ __virtio32 max_secure_erase_sectors; -+ /* -+ * The maximum number of secure erase segments in a -+ * secure erase command. -+ */ -+ __virtio32 max_secure_erase_seg; -+ /* Secure erase commands must be aligned to this number of sectors. */ -+ __virtio32 secure_erase_sector_alignment; -+ - } QEMU_PACKED; - - /* -@@ -153,6 +169,9 @@ struct virtio_blk_config { - /* Write zeroes command */ - #define VIRTIO_BLK_T_WRITE_ZEROES 13 - -+/* Secure erase command */ -+#define VIRTIO_BLK_T_SECURE_ERASE 14 -+ - #ifndef VIRTIO_BLK_NO_LEGACY - /* Barrier before this op. */ - #define VIRTIO_BLK_T_BARRIER 0x80000000 -diff --git a/linux-headers/asm-generic/hugetlb_encode.h b/linux-headers/asm-generic/hugetlb_encode.h -index 4f3d5aaa11..de687009bf 100644 ---- a/linux-headers/asm-generic/hugetlb_encode.h -+++ b/linux-headers/asm-generic/hugetlb_encode.h -@@ -20,18 +20,18 @@ - #define HUGETLB_FLAG_ENCODE_SHIFT 26 - #define HUGETLB_FLAG_ENCODE_MASK 0x3f - --#define HUGETLB_FLAG_ENCODE_16KB (14 << HUGETLB_FLAG_ENCODE_SHIFT) --#define HUGETLB_FLAG_ENCODE_64KB (16 << HUGETLB_FLAG_ENCODE_SHIFT) --#define HUGETLB_FLAG_ENCODE_512KB (19 << HUGETLB_FLAG_ENCODE_SHIFT) --#define HUGETLB_FLAG_ENCODE_1MB (20 << HUGETLB_FLAG_ENCODE_SHIFT) --#define HUGETLB_FLAG_ENCODE_2MB (21 << HUGETLB_FLAG_ENCODE_SHIFT) --#define HUGETLB_FLAG_ENCODE_8MB (23 << HUGETLB_FLAG_ENCODE_SHIFT) --#define HUGETLB_FLAG_ENCODE_16MB (24 << HUGETLB_FLAG_ENCODE_SHIFT) --#define HUGETLB_FLAG_ENCODE_32MB (25 << HUGETLB_FLAG_ENCODE_SHIFT) --#define HUGETLB_FLAG_ENCODE_256MB (28 << HUGETLB_FLAG_ENCODE_SHIFT) --#define HUGETLB_FLAG_ENCODE_512MB (29 << HUGETLB_FLAG_ENCODE_SHIFT) --#define HUGETLB_FLAG_ENCODE_1GB (30 << HUGETLB_FLAG_ENCODE_SHIFT) --#define HUGETLB_FLAG_ENCODE_2GB (31 << HUGETLB_FLAG_ENCODE_SHIFT) --#define HUGETLB_FLAG_ENCODE_16GB (34 << HUGETLB_FLAG_ENCODE_SHIFT) -+#define HUGETLB_FLAG_ENCODE_16KB (14U << HUGETLB_FLAG_ENCODE_SHIFT) -+#define HUGETLB_FLAG_ENCODE_64KB (16U << HUGETLB_FLAG_ENCODE_SHIFT) -+#define HUGETLB_FLAG_ENCODE_512KB (19U << HUGETLB_FLAG_ENCODE_SHIFT) -+#define HUGETLB_FLAG_ENCODE_1MB (20U << HUGETLB_FLAG_ENCODE_SHIFT) -+#define HUGETLB_FLAG_ENCODE_2MB (21U << HUGETLB_FLAG_ENCODE_SHIFT) -+#define HUGETLB_FLAG_ENCODE_8MB (23U << HUGETLB_FLAG_ENCODE_SHIFT) -+#define HUGETLB_FLAG_ENCODE_16MB (24U << HUGETLB_FLAG_ENCODE_SHIFT) -+#define HUGETLB_FLAG_ENCODE_32MB (25U << HUGETLB_FLAG_ENCODE_SHIFT) -+#define HUGETLB_FLAG_ENCODE_256MB (28U << HUGETLB_FLAG_ENCODE_SHIFT) -+#define HUGETLB_FLAG_ENCODE_512MB (29U << HUGETLB_FLAG_ENCODE_SHIFT) -+#define HUGETLB_FLAG_ENCODE_1GB (30U << HUGETLB_FLAG_ENCODE_SHIFT) -+#define HUGETLB_FLAG_ENCODE_2GB (31U << HUGETLB_FLAG_ENCODE_SHIFT) -+#define HUGETLB_FLAG_ENCODE_16GB (34U << HUGETLB_FLAG_ENCODE_SHIFT) - - #endif /* _ASM_GENERIC_HUGETLB_ENCODE_H_ */ -diff --git a/linux-headers/asm-generic/mman-common.h b/linux-headers/asm-generic/mman-common.h -index 6c1aa92a92..6ce1f1ceb4 100644 ---- a/linux-headers/asm-generic/mman-common.h -+++ b/linux-headers/asm-generic/mman-common.h -@@ -77,6 +77,8 @@ - - #define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */ - -+#define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */ -+ - /* compatibility flags */ - #define MAP_FILE 0 - -diff --git a/linux-headers/asm-mips/mman.h b/linux-headers/asm-mips/mman.h -index 1be428663c..c6e1fc77c9 100644 ---- a/linux-headers/asm-mips/mman.h -+++ b/linux-headers/asm-mips/mman.h -@@ -103,6 +103,8 @@ - - #define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */ - -+#define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */ -+ - /* compatibility flags */ - #define MAP_FILE 0 - -diff --git a/linux-headers/asm-riscv/kvm.h b/linux-headers/asm-riscv/kvm.h -index 7351417afd..8985ff234c 100644 ---- a/linux-headers/asm-riscv/kvm.h -+++ b/linux-headers/asm-riscv/kvm.h -@@ -48,6 +48,7 @@ struct kvm_sregs { - /* CONFIG registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ - struct kvm_riscv_config { - unsigned long isa; -+ unsigned long zicbom_block_size; - }; - - /* CORE registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ -@@ -98,6 +99,9 @@ enum KVM_RISCV_ISA_EXT_ID { - KVM_RISCV_ISA_EXT_M, - KVM_RISCV_ISA_EXT_SVPBMT, - KVM_RISCV_ISA_EXT_SSTC, -+ KVM_RISCV_ISA_EXT_SVINVAL, -+ KVM_RISCV_ISA_EXT_ZIHINTPAUSE, -+ KVM_RISCV_ISA_EXT_ZICBOM, - KVM_RISCV_ISA_EXT_MAX, - }; - -diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h -index ebdafa576d..b2783c5202 100644 ---- a/linux-headers/linux/kvm.h -+++ b/linux-headers/linux/kvm.h -@@ -1175,6 +1175,7 @@ struct kvm_ppc_resize_hpt { - #define KVM_CAP_VM_DISABLE_NX_HUGE_PAGES 220 - #define KVM_CAP_S390_ZPCI_OP 221 - #define KVM_CAP_S390_CPU_TOPOLOGY 222 -+#define KVM_CAP_DIRTY_LOG_RING_ACQ_REL 223 - - #ifdef KVM_CAP_IRQ_ROUTING - -diff --git a/linux-headers/linux/psci.h b/linux-headers/linux/psci.h -index 213b2a0f70..e60dfd8907 100644 ---- a/linux-headers/linux/psci.h -+++ b/linux-headers/linux/psci.h -@@ -48,12 +48,26 @@ - #define PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU PSCI_0_2_FN64(7) - - #define PSCI_1_0_FN_PSCI_FEATURES PSCI_0_2_FN(10) -+#define PSCI_1_0_FN_CPU_FREEZE PSCI_0_2_FN(11) -+#define PSCI_1_0_FN_CPU_DEFAULT_SUSPEND PSCI_0_2_FN(12) -+#define PSCI_1_0_FN_NODE_HW_STATE PSCI_0_2_FN(13) - #define PSCI_1_0_FN_SYSTEM_SUSPEND PSCI_0_2_FN(14) - #define PSCI_1_0_FN_SET_SUSPEND_MODE PSCI_0_2_FN(15) -+#define PSCI_1_0_FN_STAT_RESIDENCY PSCI_0_2_FN(16) -+#define PSCI_1_0_FN_STAT_COUNT PSCI_0_2_FN(17) -+ - #define PSCI_1_1_FN_SYSTEM_RESET2 PSCI_0_2_FN(18) -+#define PSCI_1_1_FN_MEM_PROTECT PSCI_0_2_FN(19) -+#define PSCI_1_1_FN_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN(19) - -+#define PSCI_1_0_FN64_CPU_DEFAULT_SUSPEND PSCI_0_2_FN64(12) -+#define PSCI_1_0_FN64_NODE_HW_STATE PSCI_0_2_FN64(13) - #define PSCI_1_0_FN64_SYSTEM_SUSPEND PSCI_0_2_FN64(14) -+#define PSCI_1_0_FN64_STAT_RESIDENCY PSCI_0_2_FN64(16) -+#define PSCI_1_0_FN64_STAT_COUNT PSCI_0_2_FN64(17) -+ - #define PSCI_1_1_FN64_SYSTEM_RESET2 PSCI_0_2_FN64(18) -+#define PSCI_1_1_FN64_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN64(19) - - /* PSCI v0.2 power state encoding for CPU_SUSPEND function */ - #define PSCI_0_2_POWER_STATE_ID_MASK 0xffff -diff --git a/linux-headers/linux/userfaultfd.h b/linux-headers/linux/userfaultfd.h -index a3a377cd44..ba5d0df52f 100644 ---- a/linux-headers/linux/userfaultfd.h -+++ b/linux-headers/linux/userfaultfd.h -@@ -12,6 +12,10 @@ - - #include - -+/* ioctls for /dev/userfaultfd */ -+#define USERFAULTFD_IOC 0xAA -+#define USERFAULTFD_IOC_NEW _IO(USERFAULTFD_IOC, 0x00) -+ - /* - * If the UFFDIO_API is upgraded someday, the UFFDIO_UNREGISTER and - * UFFDIO_WAKE ioctls should be defined as _IOW and not as _IOR. In -diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h -index ede44b5572..bee7e42198 100644 ---- a/linux-headers/linux/vfio.h -+++ b/linux-headers/linux/vfio.h -@@ -986,6 +986,148 @@ enum vfio_device_mig_state { - VFIO_DEVICE_STATE_RUNNING_P2P = 5, - }; - -+/* -+ * Upon VFIO_DEVICE_FEATURE_SET, allow the device to be moved into a low power -+ * state with the platform-based power management. Device use of lower power -+ * states depends on factors managed by the runtime power management core, -+ * including system level support and coordinating support among dependent -+ * devices. Enabling device low power entry does not guarantee lower power -+ * usage by the device, nor is a mechanism provided through this feature to -+ * know the current power state of the device. If any device access happens -+ * (either from the host or through the vfio uAPI) when the device is in the -+ * low power state, then the host will move the device out of the low power -+ * state as necessary prior to the access. Once the access is completed, the -+ * device may re-enter the low power state. For single shot low power support -+ * with wake-up notification, see -+ * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP below. Access to mmap'd -+ * device regions is disabled on LOW_POWER_ENTRY and may only be resumed after -+ * calling LOW_POWER_EXIT. -+ */ -+#define VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY 3 -+ -+/* -+ * This device feature has the same behavior as -+ * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY with the exception that the user -+ * provides an eventfd for wake-up notification. When the device moves out of -+ * the low power state for the wake-up, the host will not allow the device to -+ * re-enter a low power state without a subsequent user call to one of the low -+ * power entry device feature IOCTLs. Access to mmap'd device regions is -+ * disabled on LOW_POWER_ENTRY_WITH_WAKEUP and may only be resumed after the -+ * low power exit. The low power exit can happen either through LOW_POWER_EXIT -+ * or through any other access (where the wake-up notification has been -+ * generated). The access to mmap'd device regions will not trigger low power -+ * exit. -+ * -+ * The notification through the provided eventfd will be generated only when -+ * the device has entered and is resumed from a low power state after -+ * calling this device feature IOCTL. A device that has not entered low power -+ * state, as managed through the runtime power management core, will not -+ * generate a notification through the provided eventfd on access. Calling the -+ * LOW_POWER_EXIT feature is optional in the case where notification has been -+ * signaled on the provided eventfd that a resume from low power has occurred. -+ */ -+struct vfio_device_low_power_entry_with_wakeup { -+ __s32 wakeup_eventfd; -+ __u32 reserved; -+}; -+ -+#define VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP 4 -+ -+/* -+ * Upon VFIO_DEVICE_FEATURE_SET, disallow use of device low power states as -+ * previously enabled via VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY or -+ * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP device features. -+ * This device feature IOCTL may itself generate a wakeup eventfd notification -+ * in the latter case if the device had previously entered a low power state. -+ */ -+#define VFIO_DEVICE_FEATURE_LOW_POWER_EXIT 5 -+ -+/* -+ * Upon VFIO_DEVICE_FEATURE_SET start/stop device DMA logging. -+ * VFIO_DEVICE_FEATURE_PROBE can be used to detect if the device supports -+ * DMA logging. -+ * -+ * DMA logging allows a device to internally record what DMAs the device is -+ * initiating and report them back to userspace. It is part of the VFIO -+ * migration infrastructure that allows implementing dirty page tracking -+ * during the pre copy phase of live migration. Only DMA WRITEs are logged, -+ * and this API is not connected to VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE. -+ * -+ * When DMA logging is started a range of IOVAs to monitor is provided and the -+ * device can optimize its logging to cover only the IOVA range given. Each -+ * DMA that the device initiates inside the range will be logged by the device -+ * for later retrieval. -+ * -+ * page_size is an input that hints what tracking granularity the device -+ * should try to achieve. If the device cannot do the hinted page size then -+ * it's the driver choice which page size to pick based on its support. -+ * On output the device will return the page size it selected. -+ * -+ * ranges is a pointer to an array of -+ * struct vfio_device_feature_dma_logging_range. -+ * -+ * The core kernel code guarantees to support by minimum num_ranges that fit -+ * into a single kernel page. User space can try higher values but should give -+ * up if the above can't be achieved as of some driver limitations. -+ * -+ * A single call to start device DMA logging can be issued and a matching stop -+ * should follow at the end. Another start is not allowed in the meantime. -+ */ -+struct vfio_device_feature_dma_logging_control { -+ __aligned_u64 page_size; -+ __u32 num_ranges; -+ __u32 __reserved; -+ __aligned_u64 ranges; -+}; -+ -+struct vfio_device_feature_dma_logging_range { -+ __aligned_u64 iova; -+ __aligned_u64 length; -+}; -+ -+#define VFIO_DEVICE_FEATURE_DMA_LOGGING_START 6 -+ -+/* -+ * Upon VFIO_DEVICE_FEATURE_SET stop device DMA logging that was started -+ * by VFIO_DEVICE_FEATURE_DMA_LOGGING_START -+ */ -+#define VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP 7 -+ -+/* -+ * Upon VFIO_DEVICE_FEATURE_GET read back and clear the device DMA log -+ * -+ * Query the device's DMA log for written pages within the given IOVA range. -+ * During querying the log is cleared for the IOVA range. -+ * -+ * bitmap is a pointer to an array of u64s that will hold the output bitmap -+ * with 1 bit reporting a page_size unit of IOVA. The mapping of IOVA to bits -+ * is given by: -+ * bitmap[(addr - iova)/page_size] & (1ULL << (addr % 64)) -+ * -+ * The input page_size can be any power of two value and does not have to -+ * match the value given to VFIO_DEVICE_FEATURE_DMA_LOGGING_START. The driver -+ * will format its internal logging to match the reporting page size, possibly -+ * by replicating bits if the internal page size is lower than requested. -+ * -+ * The LOGGING_REPORT will only set bits in the bitmap and never clear or -+ * perform any initialization of the user provided bitmap. -+ * -+ * If any error is returned userspace should assume that the dirty log is -+ * corrupted. Error recovery is to consider all memory dirty and try to -+ * restart the dirty tracking, or to abort/restart the whole migration. -+ * -+ * If DMA logging is not enabled, an error will be returned. -+ * -+ */ -+struct vfio_device_feature_dma_logging_report { -+ __aligned_u64 iova; -+ __aligned_u64 length; -+ __aligned_u64 page_size; -+ __aligned_u64 bitmap; -+}; -+ -+#define VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT 8 -+ - /* -------- API for Type1 VFIO IOMMU -------- */ - - /** --- -2.31.1 - diff --git a/SOURCES/kvm-loongarch-mark-loongarch_ipi_iocsr-re-entrnacy-safe.patch b/SOURCES/kvm-loongarch-mark-loongarch_ipi_iocsr-re-entrnacy-safe.patch new file mode 100644 index 0000000..c1100a5 --- /dev/null +++ b/SOURCES/kvm-loongarch-mark-loongarch_ipi_iocsr-re-entrnacy-safe.patch @@ -0,0 +1,53 @@ +From 6de2f37d9a5db6578554929227377e4fd6d2feb3 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 May 2023 10:29:03 -0400 +Subject: [PATCH 14/21] loongarch: mark loongarch_ipi_iocsr re-entrnacy safe + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [10/13] 02435b9148b906960137de32eb5a3c4961e44a57 (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit 6d0589e0e6c64b888864a2bf980537be20389264 +Author: Alexander Bulekov +Date: Sat May 6 07:21:45 2023 -0400 + + loongarch: mark loongarch_ipi_iocsr re-entrnacy safe + + loongarch_ipi_iocsr MRs rely on re-entrant IO through the ipi_send + function. As such, mark these MRs re-entrancy-safe. + + Fixes: a2e1753b80 ("memory: prevent dma-reentracy issues") + Signed-off-by: Alexander Bulekov + Reviewed-by: Song Gao + Message-Id: <20230506112145.3563708-1-alxndr@bu.edu> + Signed-off-by: Song Gao + +Signed-off-by: Jon Maloy +--- + hw/intc/loongarch_ipi.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/hw/intc/loongarch_ipi.c b/hw/intc/loongarch_ipi.c +index aa4bf9eb74..40e98af2ce 100644 +--- a/hw/intc/loongarch_ipi.c ++++ b/hw/intc/loongarch_ipi.c +@@ -215,6 +215,10 @@ static void loongarch_ipi_init(Object *obj) + for (cpu = 0; cpu < MAX_IPI_CORE_NUM; cpu++) { + memory_region_init_io(&s->ipi_iocsr_mem[cpu], obj, &loongarch_ipi_ops, + &lams->ipi_core[cpu], "loongarch_ipi_iocsr", 0x48); ++ ++ /* loongarch_ipi_iocsr performs re-entrant IO through ipi_send */ ++ s->ipi_iocsr_mem[cpu].disable_reentrancy_guard = true; ++ + sysbus_init_mmio(sbd, &s->ipi_iocsr_mem[cpu]); + + memory_region_init_io(&s->ipi64_iocsr_mem[cpu], obj, &loongarch_ipi64_ops, +-- +2.39.3 + diff --git a/SOURCES/kvm-lsi53c895a-disable-reentrancy-detection-for-MMIO-reg.patch b/SOURCES/kvm-lsi53c895a-disable-reentrancy-detection-for-MMIO-reg.patch new file mode 100644 index 0000000..359d53f --- /dev/null +++ b/SOURCES/kvm-lsi53c895a-disable-reentrancy-detection-for-MMIO-reg.patch @@ -0,0 +1,70 @@ +From 0660a7a6994db0db9f6d0b84f6345aa06dc61761 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Mon, 29 May 2023 14:21:08 -0400 +Subject: [PATCH 16/21] lsi53c895a: disable reentrancy detection for MMIO + region, too + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [12/13] fb9da8b68cdf0dc0b0bd8fb8540849c944d0bf20 (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit d139fe9ad8a27bcc50b4ead77d2f97d191a0e95e +Author: Thomas Huth +Date: Tue May 16 11:05:56 2023 +0200 + + lsi53c895a: disable reentrancy detection for MMIO region, too + + While trying to use a SCSI disk on the LSI controller with an + older version of Fedora (25), I'm getting: + + qemu: warning: Blocked re-entrant IO on MemoryRegion: lsi-mmio at addr: 0x34 + + and the SCSI controller is not usable. Seems like we have to + disable the reentrancy checker for the MMIO region, too, to + get this working again. + + The problem could be reproduced it like this: + + ./qemu-system-x86_64 -accel kvm -m 2G -machine q35 \ + -device lsi53c810,id=lsi1 -device scsi-hd,drive=d0 \ + -drive if=none,id=d0,file=.../somedisk.qcow2 \ + -cdrom Fedora-Everything-netinst-i386-25-1.3.iso + + Where somedisk.qcow2 is an image that contains already some partitions + and file systems. + + In the boot menu of Fedora, go to + "Troubleshooting" -> "Rescue a Fedora system" -> "3) Skip to shell" + + Then check "dmesg | grep -i 53c" for failure messages, and try to mount + a partition from somedisk.qcow2. + + Message-Id: <20230516090556.553813-1-thuth@redhat.com> + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + hw/scsi/lsi53c895a.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c +index db27872963..048436352b 100644 +--- a/hw/scsi/lsi53c895a.c ++++ b/hw/scsi/lsi53c895a.c +@@ -2307,6 +2307,7 @@ static void lsi_scsi_realize(PCIDevice *dev, Error **errp) + * re-entrancy guard. + */ + s->ram_io.disable_reentrancy_guard = true; ++ s->mmio_io.disable_reentrancy_guard = true; + + address_space_init(&s->pci_io_as, pci_address_space_io(dev), "lsi-pci-io"); + qdev_init_gpio_out(d, &s->ext_irq, 1); +-- +2.39.3 + diff --git a/SOURCES/kvm-lsi53c895a-disable-reentrancy-detection-for-script-R.patch b/SOURCES/kvm-lsi53c895a-disable-reentrancy-detection-for-script-R.patch new file mode 100644 index 0000000..e671c92 --- /dev/null +++ b/SOURCES/kvm-lsi53c895a-disable-reentrancy-detection-for-script-R.patch @@ -0,0 +1,58 @@ +From 621808c6c4da3adcc073231493d487d6360386c9 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 May 2023 10:29:03 -0400 +Subject: [PATCH 09/21] lsi53c895a: disable reentrancy detection for script RAM + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [5/13] 765d65fc3fb735eb4b52a408ccff91b538ad32b6 (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit bfd6e7ae6a72b84e2eb9574f56e6ec037f05182c +Author: Alexander Bulekov +Date: Thu Apr 27 17:10:10 2023 -0400 + + lsi53c895a: disable reentrancy detection for script RAM + + As the code is designed to use the memory APIs to access the script ram, + disable reentrancy checks for the pseudo-RAM ram_io MemoryRegion. + + In the future, ram_io may be converted from an IO to a proper RAM MemoryRegion. + + Reported-by: Fiona Ebner + Signed-off-by: Alexander Bulekov + Reviewed-by: Thomas Huth + Reviewed-by: Darren Kenny + Message-Id: <20230427211013.2994127-6-alxndr@bu.edu> + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + hw/scsi/lsi53c895a.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c +index af93557a9a..db27872963 100644 +--- a/hw/scsi/lsi53c895a.c ++++ b/hw/scsi/lsi53c895a.c +@@ -2302,6 +2302,12 @@ static void lsi_scsi_realize(PCIDevice *dev, Error **errp) + memory_region_init_io(&s->io_io, OBJECT(s), &lsi_io_ops, s, + "lsi-io", 256); + ++ /* ++ * Since we use the address-space API to interact with ram_io, disable the ++ * re-entrancy guard. ++ */ ++ s->ram_io.disable_reentrancy_guard = true; ++ + address_space_init(&s->pci_io_as, pci_address_space_io(dev), "lsi-pci-io"); + qdev_init_gpio_out(d, &s->ext_irq, 1); + +-- +2.39.3 + diff --git a/SOURCES/kvm-memory-prevent-dma-reentracy-issues.patch b/SOURCES/kvm-memory-prevent-dma-reentracy-issues.patch new file mode 100644 index 0000000..d3697dc --- /dev/null +++ b/SOURCES/kvm-memory-prevent-dma-reentracy-issues.patch @@ -0,0 +1,150 @@ +From 0bc9295be331781491e993b6f1b0dca959194f13 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 May 2023 10:29:03 -0400 +Subject: [PATCH 05/21] memory: prevent dma-reentracy issues + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/13] d4a762d3b156200a65d09cde58cd6d77b229071e (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 +CVE: CVE-2023-0330 + +commit a2e1753b8054344f32cf94f31c6399a58794a380 +Author: Alexander Bulekov +Date: Thu Apr 27 17:10:06 2023 -0400 + + memory: prevent dma-reentracy issues + + Add a flag to the DeviceState, when a device is engaged in PIO/MMIO/DMA. + This flag is set/checked prior to calling a device's MemoryRegion + handlers, and set when device code initiates DMA. The purpose of this + flag is to prevent two types of DMA-based reentrancy issues: + + 1.) mmio -> dma -> mmio case + 2.) bh -> dma write -> mmio case + + These issues have led to problems such as stack-exhaustion and + use-after-frees. + + Summary of the problem from Peter Maydell: + https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com + + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62 + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540 + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541 + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556 + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557 + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827 + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1282 + Resolves: CVE-2023-0330 + + Signed-off-by: Alexander Bulekov + Reviewed-by: Thomas Huth + Message-Id: <20230427211013.2994127-2-alxndr@bu.edu> + [thuth: Replace warn_report() with warn_report_once()] + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + include/exec/memory.h | 5 +++++ + include/hw/qdev-core.h | 7 +++++++ + softmmu/memory.c | 16 ++++++++++++++++ + 3 files changed, 28 insertions(+) + +diff --git a/include/exec/memory.h b/include/exec/memory.h +index 15ade918ba..e45ce6061f 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -767,6 +767,8 @@ struct MemoryRegion { + bool is_iommu; + RAMBlock *ram_block; + Object *owner; ++ /* owner as TYPE_DEVICE. Used for re-entrancy checks in MR access hotpath */ ++ DeviceState *dev; + + const MemoryRegionOps *ops; + void *opaque; +@@ -791,6 +793,9 @@ struct MemoryRegion { + unsigned ioeventfd_nb; + MemoryRegionIoeventfd *ioeventfds; + RamDiscardManager *rdm; /* Only for RAM */ ++ ++ /* For devices designed to perform re-entrant IO into their own IO MRs */ ++ bool disable_reentrancy_guard; + }; + + struct IOMMUMemoryRegion { +diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h +index bd50ad5ee1..7623703943 100644 +--- a/include/hw/qdev-core.h ++++ b/include/hw/qdev-core.h +@@ -162,6 +162,10 @@ struct NamedClockList { + QLIST_ENTRY(NamedClockList) node; + }; + ++typedef struct { ++ bool engaged_in_io; ++} MemReentrancyGuard; ++ + /** + * DeviceState: + * @realized: Indicates whether the device has been fully constructed. +@@ -194,6 +198,9 @@ struct DeviceState { + int alias_required_for_version; + ResettableState reset; + GSList *unplug_blockers; ++ ++ /* Is the device currently in mmio/pio/dma? Used to prevent re-entrancy */ ++ MemReentrancyGuard mem_reentrancy_guard; + }; + + struct DeviceListener { +diff --git a/softmmu/memory.c b/softmmu/memory.c +index b1a6cae6f5..b7b3386e9d 100644 +--- a/softmmu/memory.c ++++ b/softmmu/memory.c +@@ -542,6 +542,18 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, + access_size_max = 4; + } + ++ /* Do not allow more than one simultaneous access to a device's IO Regions */ ++ if (mr->dev && !mr->disable_reentrancy_guard && ++ !mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) { ++ if (mr->dev->mem_reentrancy_guard.engaged_in_io) { ++ warn_report_once("Blocked re-entrant IO on MemoryRegion: " ++ "%s at addr: 0x%" HWADDR_PRIX, ++ memory_region_name(mr), addr); ++ return MEMTX_ACCESS_ERROR; ++ } ++ mr->dev->mem_reentrancy_guard.engaged_in_io = true; ++ } ++ + /* FIXME: support unaligned access? */ + access_size = MAX(MIN(size, access_size_max), access_size_min); + access_mask = MAKE_64BIT_MASK(0, access_size * 8); +@@ -556,6 +568,9 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, + access_mask, attrs); + } + } ++ if (mr->dev) { ++ mr->dev->mem_reentrancy_guard.engaged_in_io = false; ++ } + return r; + } + +@@ -1170,6 +1185,7 @@ static void memory_region_do_init(MemoryRegion *mr, + } + mr->name = g_strdup(name); + mr->owner = owner; ++ mr->dev = (DeviceState *) object_dynamic_cast(mr->owner, TYPE_DEVICE); + mr->ram_block = NULL; + + if (name) { +-- +2.39.3 + diff --git a/SOURCES/kvm-memory-stricter-checks-prior-to-unsetting-engaged_in.patch b/SOURCES/kvm-memory-stricter-checks-prior-to-unsetting-engaged_in.patch new file mode 100644 index 0000000..f45abea --- /dev/null +++ b/SOURCES/kvm-memory-stricter-checks-prior-to-unsetting-engaged_in.patch @@ -0,0 +1,67 @@ +From 3f2042e33acb6db91594e12ebd63b9abd9e753cc Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 7 Jun 2023 11:45:09 -0400 +Subject: [PATCH 15/21] memory: stricter checks prior to unsetting + engaged_in_io + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [11/13] b8e1a4b49dd7fa3b7948d32f46dfe1d7f7a4c1cf (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit 3884bf6468ac6bbb58c2b3feaa74e87f821b52f3 +Author: Alexander Bulekov +Date: Tue May 16 04:40:02 2023 -0400 + + memory: stricter checks prior to unsetting engaged_in_io + + engaged_in_io could be unset by an MR with re-entrancy checks disabled. + Ensure that only MRs that can set the engaged_in_io flag can unset it. + + Signed-off-by: Alexander Bulekov + Message-Id: <20230516084002.3813836-1-alxndr@bu.edu> + Reviewed-by: Darren Kenny + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + softmmu/memory.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/softmmu/memory.c b/softmmu/memory.c +index b7b3386e9d..26424f1d78 100644 +--- a/softmmu/memory.c ++++ b/softmmu/memory.c +@@ -534,6 +534,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, + unsigned access_size; + unsigned i; + MemTxResult r = MEMTX_OK; ++ bool reentrancy_guard_applied = false; + + if (!access_size_min) { + access_size_min = 1; +@@ -552,6 +553,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, + return MEMTX_ACCESS_ERROR; + } + mr->dev->mem_reentrancy_guard.engaged_in_io = true; ++ reentrancy_guard_applied = true; + } + + /* FIXME: support unaligned access? */ +@@ -568,7 +570,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, + access_mask, attrs); + } + } +- if (mr->dev) { ++ if (mr->dev && reentrancy_guard_applied) { + mr->dev->mem_reentrancy_guard.engaged_in_io = false; + } + return r; +-- +2.39.3 + diff --git a/SOURCES/kvm-migration-Add-.save_prepare-handler-to-struct-SaveVM.patch b/SOURCES/kvm-migration-Add-.save_prepare-handler-to-struct-SaveVM.patch new file mode 100644 index 0000000..b94ba7c --- /dev/null +++ b/SOURCES/kvm-migration-Add-.save_prepare-handler-to-struct-SaveVM.patch @@ -0,0 +1,186 @@ +From d831672c4f1d41d863823584173452b89e754e26 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Mon, 11 Sep 2023 16:10:19 +0200 +Subject: [PATCH 3/4] migration: Add .save_prepare() handler to struct + SaveVMHandlers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 318: VFIO migration: fix a QEMU crash when postcopy is enabled +RH-Bugzilla: 2229868 +RH-Acked-by: Alex Williamson +RH-Acked-by: Peter Xu +RH-Commit: [3/4] b3154a736764ae4430561d7f5c298ab4c6ef9e01 + +Bugzilla: https://bugzilla.redhat.com/2229868 + +commit 08fc4cb51774f763dcc6fd74637aa9e00eb6a0ba +Author: Avihai Horon +Date: Wed Sep 6 18:08:51 2023 +0300 + + migration: Add .save_prepare() handler to struct SaveVMHandlers + + Add a new .save_prepare() handler to struct SaveVMHandlers. This handler + is called early, even before migration starts, and can be used by + devices to perform early checks. + + Refactor migrate_init() to be able to return errors and call + .save_prepare() from there. + + Suggested-by: Peter Xu + Signed-off-by: Avihai Horon + Reviewed-by: Peter Xu + Reviewed-by: Cédric Le Goater + Signed-off-by: Cédric Le Goater + +Conflicts: + - migration/migration.c + context change in migrate_init() due to missing commit + aff3f6606d14 ("migration: Rename ram_counters to mig_stats") + context change in migrate_prepare() due to missing commit + 87c22901094a ("migration: Move migrate_set_block_incremental() + to options.c") + +Signed-off-by: Cédric Le Goater +--- + include/migration/register.h | 5 +++++ + migration/migration.c | 15 +++++++++++++-- + migration/migration.h | 2 +- + migration/savevm.c | 29 ++++++++++++++++++++++++++++- + migration/savevm.h | 1 + + 5 files changed, 48 insertions(+), 4 deletions(-) + +diff --git a/include/migration/register.h b/include/migration/register.h +index 90914f32f5..2b12c6adec 100644 +--- a/include/migration/register.h ++++ b/include/migration/register.h +@@ -20,6 +20,11 @@ typedef struct SaveVMHandlers { + /* This runs inside the iothread lock. */ + SaveStateHandler *save_state; + ++ /* ++ * save_prepare is called early, even before migration starts, and can be ++ * used to perform early checks. ++ */ ++ int (*save_prepare)(void *opaque, Error **errp); + void (*save_cleanup)(void *opaque); + int (*save_live_complete_postcopy)(QEMUFile *f, void *opaque); + int (*save_live_complete_precopy)(QEMUFile *f, void *opaque); +diff --git a/migration/migration.c b/migration/migration.c +index a85c8936d9..cdaa757e23 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1389,8 +1389,15 @@ bool migration_is_active(MigrationState *s) + s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); + } + +-void migrate_init(MigrationState *s) ++int migrate_init(MigrationState *s, Error **errp) + { ++ int ret; ++ ++ ret = qemu_savevm_state_prepare(errp); ++ if (ret) { ++ return ret; ++ } ++ + /* + * Reinitialise all migration state, except + * parameters/capabilities that the user set, and +@@ -1429,6 +1436,8 @@ void migrate_init(MigrationState *s) + memset(&ram_counters, 0, sizeof(ram_counters)); + memset(&compression_counters, 0, sizeof(compression_counters)); + migration_reset_vfio_bytes_transferred(); ++ ++ return 0; + } + + int migrate_add_blocker_internal(Error *reason, Error **errp) +@@ -1638,7 +1647,9 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, + migrate_set_block_incremental(s, true); + } + +- migrate_init(s); ++ if (migrate_init(s, errp)) { ++ return false; ++ } + + return true; + } +diff --git a/migration/migration.h b/migration/migration.h +index c5b98485e3..cfbe7c390d 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -465,7 +465,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in); + bool migration_is_setup_or_active(int state); + bool migration_is_running(int state); + +-void migrate_init(MigrationState *s); ++int migrate_init(MigrationState *s, Error **errp); + bool migration_is_blocked(Error **errp); + /* True if outgoing migration has entered postcopy phase */ + bool migration_in_postcopy(void); +diff --git a/migration/savevm.c b/migration/savevm.c +index 13c1a9afa1..2913563d6e 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -1231,6 +1231,30 @@ bool qemu_savevm_state_guest_unplug_pending(void) + return false; + } + ++int qemu_savevm_state_prepare(Error **errp) ++{ ++ SaveStateEntry *se; ++ int ret; ++ ++ QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { ++ if (!se->ops || !se->ops->save_prepare) { ++ continue; ++ } ++ if (se->ops->is_active) { ++ if (!se->ops->is_active(se->opaque)) { ++ continue; ++ } ++ } ++ ++ ret = se->ops->save_prepare(se->opaque, errp); ++ if (ret < 0) { ++ return ret; ++ } ++ } ++ ++ return 0; ++} ++ + void qemu_savevm_state_setup(QEMUFile *f) + { + MigrationState *ms = migrate_get_current(); +@@ -1617,7 +1641,10 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) + return -EINVAL; + } + +- migrate_init(ms); ++ ret = migrate_init(ms, errp); ++ if (ret) { ++ return ret; ++ } + ms->to_dst_file = f; + + qemu_mutex_unlock_iothread(); +diff --git a/migration/savevm.h b/migration/savevm.h +index e894bbc143..74669733dd 100644 +--- a/migration/savevm.h ++++ b/migration/savevm.h +@@ -31,6 +31,7 @@ + + bool qemu_savevm_state_blocked(Error **errp); + void qemu_savevm_non_migratable_list(strList **reasons); ++int qemu_savevm_state_prepare(Error **errp); + void qemu_savevm_state_setup(QEMUFile *f); + bool qemu_savevm_state_guest_unplug_pending(void); + int qemu_savevm_state_resume_prepare(MigrationState *s); +-- +2.39.3 + diff --git a/SOURCES/kvm-migration-Add-migration-prefix-to-functions-in-targe.patch b/SOURCES/kvm-migration-Add-migration-prefix-to-functions-in-targe.patch new file mode 100644 index 0000000..5cba9f4 --- /dev/null +++ b/SOURCES/kvm-migration-Add-migration-prefix-to-functions-in-targe.patch @@ -0,0 +1,139 @@ +From f053185a7fb9fab2a41c0a5ae4e1a403bc99a9a0 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Mon, 11 Sep 2023 16:10:19 +0200 +Subject: [PATCH 1/4] migration: Add migration prefix to functions in target.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 318: VFIO migration: fix a QEMU crash when postcopy is enabled +RH-Bugzilla: 2229868 +RH-Acked-by: Alex Williamson +RH-Acked-by: Peter Xu +RH-Commit: [1/4] 4594d2035423385690d7f1feb5f2e4c8f0be74f5 + +Bugzilla: https://bugzilla.redhat.com/2229868 + +commit 38c482b4778595ee337761f73ec0730d6c47b404 +Author: Avihai Horon +Date: Wed Sep 6 18:08:48 2023 +0300 + + migration: Add migration prefix to functions in target.c + + The functions in target.c are not static, yet they don't have a proper + migration prefix. Add such prefix. + + Signed-off-by: Avihai Horon + Reviewed-by: Cédric Le Goater + Signed-off-by: Cédric Le Goater + +Conflicts: + - migration/migration.c, migration/savevm.c + context changes in migrate_prepare() and qemu_savevm_state() due + to missing commit aff3f6606d14 ("migration: Rename ram_counters + to mig_stats") + +Signed-off-by: Cédric Le Goater +--- + migration/migration.c | 6 +++--- + migration/migration.h | 4 ++-- + migration/savevm.c | 2 +- + migration/target.c | 8 ++++---- + 4 files changed, 10 insertions(+), 10 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 47ad6c43cb..5aa9e5dada 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1021,7 +1021,7 @@ static void fill_source_migration_info(MigrationInfo *info) + populate_time_info(info, s); + populate_ram_info(info, s); + populate_disk_info(info); +- populate_vfio_info(info); ++ migration_populate_vfio_info(info); + break; + case MIGRATION_STATUS_COLO: + info->has_status = true; +@@ -1030,7 +1030,7 @@ static void fill_source_migration_info(MigrationInfo *info) + case MIGRATION_STATUS_COMPLETED: + populate_time_info(info, s); + populate_ram_info(info, s); +- populate_vfio_info(info); ++ migration_populate_vfio_info(info); + break; + case MIGRATION_STATUS_FAILED: + info->has_status = true; +@@ -1638,7 +1638,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, + */ + memset(&ram_counters, 0, sizeof(ram_counters)); + memset(&compression_counters, 0, sizeof(compression_counters)); +- reset_vfio_bytes_transferred(); ++ migration_reset_vfio_bytes_transferred(); + + return true; + } +diff --git a/migration/migration.h b/migration/migration.h +index dfec649af8..c5b98485e3 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -505,8 +505,8 @@ void migration_consume_urgent_request(void); + bool migration_rate_limit(void); + void migration_cancel(const Error *error); + +-void populate_vfio_info(MigrationInfo *info); +-void reset_vfio_bytes_transferred(void); ++void migration_populate_vfio_info(MigrationInfo *info); ++void migration_reset_vfio_bytes_transferred(void); + void postcopy_temp_page_reset(PostcopyTmpPage *tmp_page); + + #endif +diff --git a/migration/savevm.c b/migration/savevm.c +index 83088fc3f8..05db79bfad 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -1620,7 +1620,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) + migrate_init(ms); + memset(&ram_counters, 0, sizeof(ram_counters)); + memset(&compression_counters, 0, sizeof(compression_counters)); +- reset_vfio_bytes_transferred(); ++ migration_reset_vfio_bytes_transferred(); + ms->to_dst_file = f; + + qemu_mutex_unlock_iothread(); +diff --git a/migration/target.c b/migration/target.c +index f39c9a8d88..a6ffa9a5ce 100644 +--- a/migration/target.c ++++ b/migration/target.c +@@ -15,7 +15,7 @@ + #endif + + #ifdef CONFIG_VFIO +-void populate_vfio_info(MigrationInfo *info) ++void migration_populate_vfio_info(MigrationInfo *info) + { + if (vfio_mig_active()) { + info->vfio = g_malloc0(sizeof(*info->vfio)); +@@ -23,16 +23,16 @@ void populate_vfio_info(MigrationInfo *info) + } + } + +-void reset_vfio_bytes_transferred(void) ++void migration_reset_vfio_bytes_transferred(void) + { + vfio_reset_bytes_transferred(); + } + #else +-void populate_vfio_info(MigrationInfo *info) ++void migration_populate_vfio_info(MigrationInfo *info) + { + } + +-void reset_vfio_bytes_transferred(void) ++void migration_reset_vfio_bytes_transferred(void) + { + } + #endif +-- +2.39.3 + diff --git a/SOURCES/kvm-migration-Add-switchover-ack-capability.patch b/SOURCES/kvm-migration-Add-switchover-ack-capability.patch new file mode 100644 index 0000000..399c9ed --- /dev/null +++ b/SOURCES/kvm-migration-Add-switchover-ack-capability.patch @@ -0,0 +1,162 @@ +From 8f89d3bc8f226cd038bf88b9fb3ef43b0fb33034 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 10/37] migration: Add switchover ack capability +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [8/28] 2f4ca020783bd617eca13b18289fce764279833b (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 6574232fff6a +Author: Avihai Horon +Date: Wed Jun 21 14:11:54 2023 +0300 + + migration: Add switchover ack capability + + Migration downtime estimation is calculated based on bandwidth and + remaining migration data. This assumes that loading of migration data in + the destination takes a negligible amount of time and that downtime + depends only on network speed. + + While this may be true for RAM, it's not necessarily true for other + migrated devices. For example, loading the data of a VFIO device in the + destination might require from the device to allocate resources, prepare + internal data structures and so on. These operations can take a + significant amount of time which can increase migration downtime. + + This patch adds a new capability "switchover ack" that prevents the + source from stopping the VM and completing the migration until an ACK + is received from the destination that it's OK to do so. + + This can be used by migrated devices in various ways to reduce downtime. + For example, a device can send initial precopy metadata to pre-allocate + resources in the destination and use this capability to make sure that + the pre-allocation is completed before the source VM is stopped, so it + will have full effect. + + This new capability relies on the return path capability to communicate + from the destination back to the source. + + The actual implementation of the capability will be added in the + following patches. + + Signed-off-by: Avihai Horon + Reviewed-by: Peter Xu + Acked-by: Markus Armbruster + Tested-by: YangHang Liu + Acked-by: Alex Williamson + Signed-off-by: Cédric Le Goater + +Conflicts: + - qapi/migration.json + re-indent of @switchover-ack to avoid ../qapi/migration.json:482:1: + unexpected de-indent (expected at least 17 spaces) + +Signed-off-by: Cédric Le Goater +--- + migration/options.c | 21 +++++++++++++++++++++ + migration/options.h | 1 + + qapi/migration.json | 14 +++++++++++++- + 3 files changed, 35 insertions(+), 1 deletion(-) + +diff --git a/migration/options.c b/migration/options.c +index a76984276d..c3df6c6dde 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -182,6 +182,8 @@ Property migration_properties[] = { + DEFINE_PROP_MIG_CAP("x-zero-copy-send", + MIGRATION_CAPABILITY_ZERO_COPY_SEND), + #endif ++ DEFINE_PROP_MIG_CAP("x-switchover-ack", ++ MIGRATION_CAPABILITY_SWITCHOVER_ACK), + + DEFINE_PROP_END_OF_LIST(), + }; +@@ -305,6 +307,13 @@ bool migrate_return_path(void) + return s->capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; + } + ++bool migrate_switchover_ack(void) ++{ ++ MigrationState *s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_SWITCHOVER_ACK]; ++} ++ + bool migrate_validate_uuid(void) + { + MigrationState *s = migrate_get_current(); +@@ -532,6 +541,18 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) + } + } + ++ if (new_caps[MIGRATION_CAPABILITY_SWITCHOVER_ACK]) { ++ if (!new_caps[MIGRATION_CAPABILITY_RETURN_PATH]) { ++ error_setg(errp, "Capability 'switchover-ack' requires capability " ++ "'return-path'"); ++ return false; ++ } ++ ++ /* Disable this capability until it's implemented */ ++ error_setg(errp, "'switchover-ack' is not implemented yet"); ++ return false; ++ } ++ + return true; + } + +diff --git a/migration/options.h b/migration/options.h +index 7b0f7245ad..0fc7be6869 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -47,6 +47,7 @@ bool migrate_postcopy_ram(void); + bool migrate_rdma_pin_all(void); + bool migrate_release_ram(void); + bool migrate_return_path(void); ++bool migrate_switchover_ack(void); + bool migrate_validate_uuid(void); + bool migrate_xbzrle(void); + bool migrate_zero_blocks(void); +diff --git a/qapi/migration.json b/qapi/migration.json +index 2c35b7b9cf..b6a58347cc 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -478,6 +478,18 @@ + # should not affect the correctness of postcopy migration. + # (since 7.1) + # ++# @switchover-ack: If enabled, migration will not stop the source VM ++# and complete the migration until an ACK is received ++# from the destination that it's OK to do so. ++# Exactly when this ACK is sent depends on the ++# migrated devices that use this feature. For ++# example, a device can use it to make sure some of ++# its data is sent and loaded in the destination ++# before doing switchover. This can reduce downtime ++# if devices that support this capability are ++# present. 'return-path' capability must be enabled ++# to use it. (since 8.1) ++# + # Features: + # @unstable: Members @x-colo and @x-ignore-shared are experimental. + # +@@ -492,7 +504,7 @@ + 'dirty-bitmaps', 'postcopy-blocktime', 'late-block-activate', + { 'name': 'x-ignore-shared', 'features': [ 'unstable' ] }, + 'validate-uuid', 'background-snapshot', +- 'zero-copy-send', 'postcopy-preempt'] } ++ 'zero-copy-send', 'postcopy-preempt', 'switchover-ack'] } + + ## + # @MigrationCapabilityStatus: +-- +2.39.3 + diff --git a/SOURCES/kvm-migration-Allow-postcopy_ram_supported_by_host-to-re.patch b/SOURCES/kvm-migration-Allow-postcopy_ram_supported_by_host-to-re.patch new file mode 100644 index 0000000..7c9748b --- /dev/null +++ b/SOURCES/kvm-migration-Allow-postcopy_ram_supported_by_host-to-re.patch @@ -0,0 +1,308 @@ +From e2c2910edf90186ca0d7d13c9943caa284e95ea9 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Tue, 25 Apr 2023 21:15:14 -0400 +Subject: [PATCH 51/56] migration: Allow postcopy_ram_supported_by_host() to + report err +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [50/50] 08c44affc11c27ddf1aa7ce0dfacbaf5effb80cb (peterx/qemu-kvm) + +Instead of print it to STDERR, bring the error upwards so that it can be +reported via QMP responses. + +E.g.: + +{ "execute": "migrate-set-capabilities" , + "arguments": { "capabilities": + [ { "capability": "postcopy-ram", "state": true } ] } } + +{ "error": + { "class": "GenericError", + "desc": "Postcopy is not supported: Host backend files need to be TMPFS + or HUGETLBFS only" } } + +Signed-off-by: Peter Xu +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit 74c38cf7fd24c60e4f0a90585d17250478260877) +Signed-off-by: Peter Xu +--- + migration/options.c | 8 ++---- + migration/postcopy-ram.c | 60 +++++++++++++++++++++------------------- + migration/postcopy-ram.h | 3 +- + migration/savevm.c | 3 +- + 4 files changed, 39 insertions(+), 35 deletions(-) + +diff --git a/migration/options.c b/migration/options.c +index 4701c75a4d..e51d667e14 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -302,6 +302,7 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) + { + MigrationIncomingState *mis = migration_incoming_get_current(); + ++ ERRP_GUARD(); + #ifndef CONFIG_LIVE_BLOCK_MIGRATION + if (new_caps[MIGRATION_CAPABILITY_BLOCK]) { + error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) " +@@ -327,11 +328,8 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) + */ + if (!old_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM] && + runstate_check(RUN_STATE_INMIGRATE) && +- !postcopy_ram_supported_by_host(mis)) { +- /* postcopy_ram_supported_by_host will have emitted a more +- * detailed message +- */ +- error_setg(errp, "Postcopy is not supported"); ++ !postcopy_ram_supported_by_host(mis, errp)) { ++ error_prepend(errp, "Postcopy is not supported: "); + return false; + } + +diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c +index 0711500036..75aa276bb1 100644 +--- a/migration/postcopy-ram.c ++++ b/migration/postcopy-ram.c +@@ -283,11 +283,13 @@ static bool request_ufd_features(int ufd, uint64_t features) + return true; + } + +-static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) ++static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis, ++ Error **errp) + { + uint64_t asked_features = 0; + static uint64_t supported_features; + ++ ERRP_GUARD(); + /* + * it's not possible to + * request UFFD_API twice per one fd +@@ -295,7 +297,7 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) + */ + if (!supported_features) { + if (!receive_ufd_features(&supported_features)) { +- error_report("%s failed", __func__); ++ error_setg(errp, "Userfault feature detection failed"); + return false; + } + } +@@ -317,8 +319,7 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) + * userfault file descriptor + */ + if (!request_ufd_features(ufd, asked_features)) { +- error_report("%s failed: features %" PRIu64, __func__, +- asked_features); ++ error_setg(errp, "Failed features %" PRIu64, asked_features); + return false; + } + +@@ -329,7 +330,8 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) + have_hp = supported_features & UFFD_FEATURE_MISSING_HUGETLBFS; + #endif + if (!have_hp) { +- error_report("Userfault on this host does not support huge pages"); ++ error_setg(errp, ++ "Userfault on this host does not support huge pages"); + return false; + } + } +@@ -338,7 +340,7 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) + + /* Callback from postcopy_ram_supported_by_host block iterator. + */ +-static int test_ramblock_postcopiable(RAMBlock *rb) ++static int test_ramblock_postcopiable(RAMBlock *rb, Error **errp) + { + const char *block_name = qemu_ram_get_idstr(rb); + ram_addr_t length = qemu_ram_get_used_length(rb); +@@ -346,16 +348,18 @@ static int test_ramblock_postcopiable(RAMBlock *rb) + QemuFsType fs; + + if (length % pagesize) { +- error_report("Postcopy requires RAM blocks to be a page size multiple," +- " block %s is 0x" RAM_ADDR_FMT " bytes with a " +- "page size of 0x%zx", block_name, length, pagesize); ++ error_setg(errp, ++ "Postcopy requires RAM blocks to be a page size multiple," ++ " block %s is 0x" RAM_ADDR_FMT " bytes with a " ++ "page size of 0x%zx", block_name, length, pagesize); + return 1; + } + + if (rb->fd >= 0) { + fs = qemu_fd_getfs(rb->fd); + if (fs != QEMU_FS_TYPE_TMPFS && fs != QEMU_FS_TYPE_HUGETLBFS) { +- error_report("Host backend files need to be TMPFS or HUGETLBFS only"); ++ error_setg(errp, ++ "Host backend files need to be TMPFS or HUGETLBFS only"); + return 1; + } + } +@@ -368,7 +372,7 @@ static int test_ramblock_postcopiable(RAMBlock *rb) + * normally fine since if the postcopy succeeds it gets turned back on at the + * end. + */ +-bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) ++bool postcopy_ram_supported_by_host(MigrationIncomingState *mis, Error **errp) + { + long pagesize = qemu_real_host_page_size(); + int ufd = -1; +@@ -377,29 +381,27 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) + struct uffdio_register reg_struct; + struct uffdio_range range_struct; + uint64_t feature_mask; +- Error *local_err = NULL; + RAMBlock *block; + ++ ERRP_GUARD(); + if (qemu_target_page_size() > pagesize) { +- error_report("Target page size bigger than host page size"); ++ error_setg(errp, "Target page size bigger than host page size"); + goto out; + } + + ufd = uffd_open(O_CLOEXEC); + if (ufd == -1) { +- error_report("%s: userfaultfd not available: %s", __func__, +- strerror(errno)); ++ error_setg(errp, "Userfaultfd not available: %s", strerror(errno)); + goto out; + } + + /* Give devices a chance to object */ +- if (postcopy_notify(POSTCOPY_NOTIFY_PROBE, &local_err)) { +- error_report_err(local_err); ++ if (postcopy_notify(POSTCOPY_NOTIFY_PROBE, errp)) { + goto out; + } + + /* Version and features check */ +- if (!ufd_check_and_apply(ufd, mis)) { ++ if (!ufd_check_and_apply(ufd, mis, errp)) { + goto out; + } + +@@ -417,7 +419,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) + * affect in reality, or we can revisit. + */ + RAMBLOCK_FOREACH(block) { +- if (test_ramblock_postcopiable(block)) { ++ if (test_ramblock_postcopiable(block, errp)) { + goto out; + } + } +@@ -427,7 +429,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) + * it was enabled. + */ + if (munlockall()) { +- error_report("%s: munlockall: %s", __func__, strerror(errno)); ++ error_setg(errp, "munlockall() failed: %s", strerror(errno)); + goto out; + } + +@@ -439,8 +441,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) + testarea = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE | + MAP_ANONYMOUS, -1, 0); + if (testarea == MAP_FAILED) { +- error_report("%s: Failed to map test area: %s", __func__, +- strerror(errno)); ++ error_setg(errp, "Failed to map test area: %s", strerror(errno)); + goto out; + } + g_assert(QEMU_PTR_IS_ALIGNED(testarea, pagesize)); +@@ -450,14 +451,14 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) + reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING; + + if (ioctl(ufd, UFFDIO_REGISTER, ®_struct)) { +- error_report("%s userfault register: %s", __func__, strerror(errno)); ++ error_setg(errp, "UFFDIO_REGISTER failed: %s", strerror(errno)); + goto out; + } + + range_struct.start = (uintptr_t)testarea; + range_struct.len = pagesize; + if (ioctl(ufd, UFFDIO_UNREGISTER, &range_struct)) { +- error_report("%s userfault unregister: %s", __func__, strerror(errno)); ++ error_setg(errp, "UFFDIO_UNREGISTER failed: %s", strerror(errno)); + goto out; + } + +@@ -465,8 +466,8 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) + (__u64)1 << _UFFDIO_COPY | + (__u64)1 << _UFFDIO_ZEROPAGE; + if ((reg_struct.ioctls & feature_mask) != feature_mask) { +- error_report("Missing userfault map features: %" PRIx64, +- (uint64_t)(~reg_struct.ioctls & feature_mask)); ++ error_setg(errp, "Missing userfault map features: %" PRIx64, ++ (uint64_t)(~reg_struct.ioctls & feature_mask)); + goto out; + } + +@@ -1188,6 +1189,8 @@ static int postcopy_temp_pages_setup(MigrationIncomingState *mis) + + int postcopy_ram_incoming_setup(MigrationIncomingState *mis) + { ++ Error *local_err = NULL; ++ + /* Open the fd for the kernel to give us userfaults */ + mis->userfault_fd = uffd_open(O_CLOEXEC | O_NONBLOCK); + if (mis->userfault_fd == -1) { +@@ -1200,7 +1203,8 @@ int postcopy_ram_incoming_setup(MigrationIncomingState *mis) + * Although the host check already tested the API, we need to + * do the check again as an ABI handshake on the new fd. + */ +- if (!ufd_check_and_apply(mis->userfault_fd, mis)) { ++ if (!ufd_check_and_apply(mis->userfault_fd, mis, &local_err)) { ++ error_report_err(local_err); + return -1; + } + +@@ -1360,7 +1364,7 @@ void fill_destination_postcopy_migration_info(MigrationInfo *info) + { + } + +-bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) ++bool postcopy_ram_supported_by_host(MigrationIncomingState *mis, Error **errp) + { + error_report("%s: No OS support", __func__); + return false; +diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h +index b4867a32d5..442ab89752 100644 +--- a/migration/postcopy-ram.h ++++ b/migration/postcopy-ram.h +@@ -14,7 +14,8 @@ + #define QEMU_POSTCOPY_RAM_H + + /* Return true if the host supports everything we need to do postcopy-ram */ +-bool postcopy_ram_supported_by_host(MigrationIncomingState *mis); ++bool postcopy_ram_supported_by_host(MigrationIncomingState *mis, ++ Error **errp); + + /* + * Make all of RAM sensitive to accesses to areas that haven't yet been written +diff --git a/migration/savevm.c b/migration/savevm.c +index 9671211339..211eff3a8b 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -1753,7 +1753,8 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis, + return -EINVAL; + } + +- if (!postcopy_ram_supported_by_host(mis)) { ++ if (!postcopy_ram_supported_by_host(mis, &local_err)) { ++ error_report_err(local_err); + postcopy_state_set(POSTCOPY_INCOMING_NONE); + return -1; + } +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Attempt-disk-reactivation-in-more-failure-.patch b/SOURCES/kvm-migration-Attempt-disk-reactivation-in-more-failure-.patch new file mode 100644 index 0000000..d1620f0 --- /dev/null +++ b/SOURCES/kvm-migration-Attempt-disk-reactivation-in-more-failure-.patch @@ -0,0 +1,111 @@ +From 3691bb5f956e3c60dbf6de183011b31dbc7a7801 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Tue, 2 May 2023 15:52:12 -0500 +Subject: [PATCH 01/56] migration: Attempt disk reactivation in more failure + scenarios + +RH-Author: Eric Blake +RH-MergeRequest: 161: Avoid migration assertion from failed NFS server. +RH-Bugzilla: 2058982 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Acked-by: Kevin Wolf +RH-Commit: [1/1] 5999b747b314641259d3b8809033b057805eed3f (ebblake/centos-qemu-kvm) + +Commit fe904ea824 added a fail_inactivate label, which tries to +reactivate disks on the source after a failure while s->state == +MIGRATION_STATUS_ACTIVE, but didn't actually use the label if +qemu_savevm_state_complete_precopy() failed. This failure to +reactivate is also present in commit 6039dd5b1c (also covering the new +s->state == MIGRATION_STATUS_DEVICE state) and 403d18ae (ensuring +s->block_inactive is set more reliably). + +Consolidate the two labels back into one - no matter HOW migration is +failed, if there is any chance we can reach vm_start() after having +attempted inactivation, it is essential that we have tried to restart +disks before then. This also makes the cleanup more like +migrate_fd_cancel(). + +Suggested-by: Kevin Wolf +Signed-off-by: Eric Blake +Message-Id: <20230502205212.134680-1-eblake@redhat.com> +Acked-by: Peter Xu +Reviewed-by: Juan Quintela +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 6dab4c93ecfae48e2e67b984d1032c1e988d3005) +[eblake: downstream migrate_colo() => migrate_colo_enabled()] +Signed-off-by: Eric Blake +--- + migration/migration.c | 24 ++++++++++++++---------- + 1 file changed, 14 insertions(+), 10 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 08007cef4e..99f86bd6c2 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -3443,6 +3443,11 @@ static void migration_completion(MigrationState *s) + MIGRATION_STATUS_DEVICE); + } + if (ret >= 0) { ++ /* ++ * Inactivate disks except in COLO, and track that we ++ * have done so in order to remember to reactivate ++ * them if migration fails or is cancelled. ++ */ + s->block_inactive = !migrate_colo_enabled(); + qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); + ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, +@@ -3487,13 +3492,13 @@ static void migration_completion(MigrationState *s) + rp_error = await_return_path_close_on_source(s); + trace_migration_return_path_end_after(rp_error); + if (rp_error) { +- goto fail_invalidate; ++ goto fail; + } + } + + if (qemu_file_get_error(s->to_dst_file)) { + trace_migration_completion_file_err(); +- goto fail_invalidate; ++ goto fail; + } + + if (migrate_colo_enabled() && s->state == MIGRATION_STATUS_ACTIVE) { +@@ -3507,26 +3512,25 @@ static void migration_completion(MigrationState *s) + + return; + +-fail_invalidate: +- /* If not doing postcopy, vm_start() will be called: let's regain +- * control on images. +- */ +- if (s->state == MIGRATION_STATUS_ACTIVE || +- s->state == MIGRATION_STATUS_DEVICE) { ++fail: ++ if (s->block_inactive && (s->state == MIGRATION_STATUS_ACTIVE || ++ s->state == MIGRATION_STATUS_DEVICE)) { ++ /* ++ * If not doing postcopy, vm_start() will be called: let's ++ * regain control on images. ++ */ + Error *local_err = NULL; + + qemu_mutex_lock_iothread(); + bdrv_activate_all(&local_err); + if (local_err) { + error_report_err(local_err); +- s->block_inactive = true; + } else { + s->block_inactive = false; + } + qemu_mutex_unlock_iothread(); + } + +-fail: + migrate_set_state(&s->state, current_active_state, + MIGRATION_STATUS_FAILED); + } +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Create-migrate_cap_set.patch b/SOURCES/kvm-migration-Create-migrate_cap_set.patch new file mode 100644 index 0000000..33268bb --- /dev/null +++ b/SOURCES/kvm-migration-Create-migrate_cap_set.patch @@ -0,0 +1,93 @@ +From d772464e9a51a085e10864b2dc7ffd49991fc23b Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 21:02:42 +0100 +Subject: [PATCH 22/56] migration: Create migrate_cap_set() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [21/50] 5b12f04013cf2d374a869134bb67c938c789e24d (peterx/qemu-kvm) + +And remove the convoluted use of qmp_migrate_set_capabilities() to +enable disable MIGRATION_CAPABILITY_BLOCK. + +Signed-off-by: Juan Quintela +Reviewed-by: Fabiano Rosas +(cherry picked from commit 9eb1109cfba5415dd0b0cb82e80fc5e42fe861b7) +Signed-off-by: Peter Xu +--- + migration/migration.c | 34 ++++++++++++++++------------------ + 1 file changed, 16 insertions(+), 18 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index b745d829a4..18058fb597 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1912,25 +1912,24 @@ void migrate_set_state(int *state, int old_state, int new_state) + } + } + +-static MigrationCapabilityStatus *migrate_cap_add(MigrationCapability index, +- bool state) ++static bool migrate_cap_set(int cap, bool value, Error **errp) + { +- MigrationCapabilityStatus *cap; +- +- cap = g_new0(MigrationCapabilityStatus, 1); +- cap->capability = index; +- cap->state = state; ++ MigrationState *s = migrate_get_current(); ++ bool new_caps[MIGRATION_CAPABILITY__MAX]; + +- return cap; +-} ++ if (migration_is_running(s->state)) { ++ error_setg(errp, QERR_MIGRATION_ACTIVE); ++ return false; ++ } + +-void migrate_set_block_enabled(bool value, Error **errp) +-{ +- MigrationCapabilityStatusList *cap = NULL; ++ memcpy(new_caps, s->capabilities, sizeof(new_caps)); ++ new_caps[cap] = value; + +- QAPI_LIST_PREPEND(cap, migrate_cap_add(MIGRATION_CAPABILITY_BLOCK, value)); +- qmp_migrate_set_capabilities(cap, errp); +- qapi_free_MigrationCapabilityStatusList(cap); ++ if (!migrate_caps_check(s->capabilities, new_caps, errp)) { ++ return false; ++ } ++ s->capabilities[cap] = value; ++ return true; + } + + static void migrate_set_block_incremental(MigrationState *s, bool value) +@@ -1942,7 +1941,7 @@ static void block_cleanup_parameters(MigrationState *s) + { + if (s->must_remove_block_options) { + /* setting to false can never fail */ +- migrate_set_block_enabled(false, &error_abort); ++ migrate_cap_set(MIGRATION_CAPABILITY_BLOCK, false, &error_abort); + migrate_set_block_incremental(s, false); + s->must_remove_block_options = false; + } +@@ -2429,8 +2428,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, + "current migration capabilities"); + return false; + } +- migrate_set_block_enabled(true, &local_err); +- if (local_err) { ++ if (!migrate_cap_set(MIGRATION_CAPABILITY_BLOCK, true, &local_err)) { + error_propagate(errp, local_err); + return false; + } +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Create-migrate_checkpoint_delay.patch b/SOURCES/kvm-migration-Create-migrate_checkpoint_delay.patch new file mode 100644 index 0000000..408d258 --- /dev/null +++ b/SOURCES/kvm-migration-Create-migrate_checkpoint_delay.patch @@ -0,0 +1,84 @@ +From a17bee3c8ab48daa471ec53bed0e2cb0bb41fc76 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 01:04:55 +0100 +Subject: [PATCH 41/56] migration: Create migrate_checkpoint_delay() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [40/50] b972d3f12e49dc27aa78eb723ca6d0fac4d174d8 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Fabiano Rosas +(cherry picked from commit f94a858fa3e72ba954a338c01ae9fecc15fcce5c) +Signed-off-by: Peter Xu +--- + migration/colo.c | 5 ++--- + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + 3 files changed, 12 insertions(+), 3 deletions(-) + +diff --git a/migration/colo.c b/migration/colo.c +index 93b78c9270..07bfa21fea 100644 +--- a/migration/colo.c ++++ b/migration/colo.c +@@ -576,7 +576,7 @@ static void colo_process_checkpoint(MigrationState *s) + trace_colo_vm_state_change("stop", "run"); + + timer_mod(s->colo_delay_timer, qemu_clock_get_ms(QEMU_CLOCK_HOST) + +- s->parameters.x_checkpoint_delay); ++ migrate_checkpoint_delay()); + + while (s->state == MIGRATION_STATUS_COLO) { + if (failover_get_state() != FAILOVER_STATUS_NONE) { +@@ -651,8 +651,7 @@ void colo_checkpoint_notify(void *opaque) + + qemu_event_set(&s->colo_checkpoint_event); + s->colo_checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST); +- next_notify_time = s->colo_checkpoint_time + +- s->parameters.x_checkpoint_delay; ++ next_notify_time = s->colo_checkpoint_time + migrate_checkpoint_delay(); + timer_mod(s->colo_delay_timer, next_notify_time); + } + +diff --git a/migration/options.c b/migration/options.c +index b9f3815f7e..0e102e5700 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -472,6 +472,15 @@ bool migrate_block_incremental(void) + return s->parameters.block_incremental; + } + ++uint32_t migrate_checkpoint_delay(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.x_checkpoint_delay; ++} ++ + int migrate_compress_level(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index aa54443353..adc2879bbb 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -46,6 +46,7 @@ bool migrate_cap_set(int cap, bool value, Error **errp); + /* parameters */ + + bool migrate_block_incremental(void); ++uint32_t migrate_checkpoint_delay(void); + int migrate_compress_level(void); + int migrate_compress_threads(void); + int migrate_compress_wait_thread(void); +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Create-migrate_cpu_throttle_increment-func.patch b/SOURCES/kvm-migration-Create-migrate_cpu_throttle_increment-func.patch new file mode 100644 index 0000000..65bad3c --- /dev/null +++ b/SOURCES/kvm-migration-Create-migrate_cpu_throttle_increment-func.patch @@ -0,0 +1,75 @@ +From 7ff430e011780dad00e5ebaad0318c5fa3aec102 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 10:20:49 +0100 +Subject: [PATCH 45/56] migration: Create migrate_cpu_throttle_increment() + function +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [44/50] aec990a106a0347b265f5c056a516e0b91e8183c (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Fabiano Rosas +(cherry picked from commit 9605c2ac282c565bb00b5f344217161bef29eff8) +Signed-off-by: Peter Xu +--- + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + migration/ram.c | 2 +- + 3 files changed, 11 insertions(+), 1 deletion(-) + +diff --git a/migration/options.c b/migration/options.c +index f7fb6999f7..31435d2b45 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -509,6 +509,15 @@ int migrate_compress_wait_thread(void) + return s->parameters.compress_wait_thread; + } + ++uint8_t migrate_cpu_throttle_increment(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.cpu_throttle_increment; ++} ++ + uint8_t migrate_cpu_throttle_initial(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index fd8b91d767..49b29bdafd 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -50,6 +50,7 @@ uint32_t migrate_checkpoint_delay(void); + int migrate_compress_level(void); + int migrate_compress_threads(void); + int migrate_compress_wait_thread(void); ++uint8_t migrate_cpu_throttle_increment(void); + uint8_t migrate_cpu_throttle_initial(void); + int migrate_decompress_threads(void); + uint8_t migrate_max_cpu_throttle(void); +diff --git a/migration/ram.c b/migration/ram.c +index 5e855d5c22..5645745a42 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -713,7 +713,7 @@ static void mig_throttle_guest_down(uint64_t bytes_dirty_period, + { + MigrationState *s = migrate_get_current(); + uint64_t pct_initial = migrate_cpu_throttle_initial(); +- uint64_t pct_increment = s->parameters.cpu_throttle_increment; ++ uint64_t pct_increment = migrate_cpu_throttle_increment(); + bool pct_tailslow = s->parameters.cpu_throttle_tailslow; + int pct_max = migrate_max_cpu_throttle(); + +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Create-migrate_cpu_throttle_initial-to-opt.patch b/SOURCES/kvm-migration-Create-migrate_cpu_throttle_initial-to-opt.patch new file mode 100644 index 0000000..aab2013 --- /dev/null +++ b/SOURCES/kvm-migration-Create-migrate_cpu_throttle_initial-to-opt.patch @@ -0,0 +1,75 @@ +From fdc2f14bfb3ef8897310a7db63287a9bab1fb858 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 01:22:44 +0100 +Subject: [PATCH 44/56] migration: Create migrate_cpu_throttle_initial() to + option.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [43/50] e0e0db7218f28aefd4bd022edbaec236e2030cb1 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Fabiano Rosas +(cherry picked from commit 2a8ec38082f8098f2693bb3632175453c0c84a51) +Signed-off-by: Peter Xu +--- + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + migration/ram.c | 2 +- + 3 files changed, 11 insertions(+), 1 deletion(-) + +diff --git a/migration/options.c b/migration/options.c +index 418aafac64..f7fb6999f7 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -509,6 +509,15 @@ int migrate_compress_wait_thread(void) + return s->parameters.compress_wait_thread; + } + ++uint8_t migrate_cpu_throttle_initial(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.cpu_throttle_initial; ++} ++ + int migrate_decompress_threads(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index 72b1a320b7..fd8b91d767 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -50,6 +50,7 @@ uint32_t migrate_checkpoint_delay(void); + int migrate_compress_level(void); + int migrate_compress_threads(void); + int migrate_compress_wait_thread(void); ++uint8_t migrate_cpu_throttle_initial(void); + int migrate_decompress_threads(void); + uint8_t migrate_max_cpu_throttle(void); + int64_t migrate_max_postcopy_bandwidth(void); +diff --git a/migration/ram.c b/migration/ram.c +index 5c786513ef..5e855d5c22 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -712,7 +712,7 @@ static void mig_throttle_guest_down(uint64_t bytes_dirty_period, + uint64_t bytes_dirty_threshold) + { + MigrationState *s = migrate_get_current(); +- uint64_t pct_initial = s->parameters.cpu_throttle_initial; ++ uint64_t pct_initial = migrate_cpu_throttle_initial(); + uint64_t pct_increment = s->parameters.cpu_throttle_increment; + bool pct_tailslow = s->parameters.cpu_throttle_tailslow; + int pct_max = migrate_max_cpu_throttle(); +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Create-migrate_cpu_throttle_tailslow-funct.patch b/SOURCES/kvm-migration-Create-migrate_cpu_throttle_tailslow-funct.patch new file mode 100644 index 0000000..e36f003 --- /dev/null +++ b/SOURCES/kvm-migration-Create-migrate_cpu_throttle_tailslow-funct.patch @@ -0,0 +1,78 @@ +From b88c51c4b02639e28da73143b1da7bd3d6706ce5 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 10:29:51 +0100 +Subject: [PATCH 46/56] migration: Create migrate_cpu_throttle_tailslow() + function +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [45/50] e93e96392405c60f75abbf288e4fddb191bbc996 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Fabiano Rosas +(cherry picked from commit 873f674c559e3162a6e6e92994301d400c5cc873) +Signed-off-by: Peter Xu +--- + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + migration/ram.c | 3 +-- + 3 files changed, 11 insertions(+), 2 deletions(-) + +diff --git a/migration/options.c b/migration/options.c +index 31435d2b45..615534c151 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -527,6 +527,15 @@ uint8_t migrate_cpu_throttle_initial(void) + return s->parameters.cpu_throttle_initial; + } + ++bool migrate_cpu_throttle_tailslow(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.cpu_throttle_tailslow; ++} ++ + int migrate_decompress_threads(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index 49b29bdafd..99f6bbd7a1 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -52,6 +52,7 @@ int migrate_compress_threads(void); + int migrate_compress_wait_thread(void); + uint8_t migrate_cpu_throttle_increment(void); + uint8_t migrate_cpu_throttle_initial(void); ++bool migrate_cpu_throttle_tailslow(void); + int migrate_decompress_threads(void); + uint8_t migrate_max_cpu_throttle(void); + int64_t migrate_max_postcopy_bandwidth(void); +diff --git a/migration/ram.c b/migration/ram.c +index 5645745a42..01356f60a4 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -711,10 +711,9 @@ static size_t save_page_header(PageSearchStatus *pss, QEMUFile *f, + static void mig_throttle_guest_down(uint64_t bytes_dirty_period, + uint64_t bytes_dirty_threshold) + { +- MigrationState *s = migrate_get_current(); + uint64_t pct_initial = migrate_cpu_throttle_initial(); + uint64_t pct_increment = migrate_cpu_throttle_increment(); +- bool pct_tailslow = s->parameters.cpu_throttle_tailslow; ++ bool pct_tailslow = migrate_cpu_throttle_tailslow(); + int pct_max = migrate_max_cpu_throttle(); + + uint64_t throttle_now = cpu_throttle_get_percentage(); +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Create-migrate_max_bandwidth-function.patch b/SOURCES/kvm-migration-Create-migrate_max_bandwidth-function.patch new file mode 100644 index 0000000..ba1d34c --- /dev/null +++ b/SOURCES/kvm-migration-Create-migrate_max_bandwidth-function.patch @@ -0,0 +1,232 @@ +From b6228b3122f5c1f220f92042277ab1bfbb5ba086 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 11:00:12 +0100 +Subject: [PATCH 48/56] migration: Create migrate_max_bandwidth() function +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [47/50] 3874656f70cb9c2a30f4d63e146539480d422326 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Fabiano Rosas +(cherry picked from commit 9c894df3a37d675652390f7dbbe2f65b7bad7efa) +Signed-off-by: Peter Xu +--- + migration/migration.c | 70 +------------------------------------- + migration/options.c | 79 +++++++++++++++++++++++++++++++++++++++++++ + migration/options.h | 1 + + 3 files changed, 81 insertions(+), 69 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 46a5ea4d42..c2e109329d 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -886,74 +886,6 @@ void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value) + migrate_send_rp_message(mis, MIG_RP_MSG_RESUME_ACK, sizeof(buf), &buf); + } + +-MigrationParameters *qmp_query_migrate_parameters(Error **errp) +-{ +- MigrationParameters *params; +- MigrationState *s = migrate_get_current(); +- +- /* TODO use QAPI_CLONE() instead of duplicating it inline */ +- params = g_malloc0(sizeof(*params)); +- params->has_compress_level = true; +- params->compress_level = s->parameters.compress_level; +- params->has_compress_threads = true; +- params->compress_threads = s->parameters.compress_threads; +- params->has_compress_wait_thread = true; +- params->compress_wait_thread = s->parameters.compress_wait_thread; +- params->has_decompress_threads = true; +- params->decompress_threads = s->parameters.decompress_threads; +- params->has_throttle_trigger_threshold = true; +- params->throttle_trigger_threshold = s->parameters.throttle_trigger_threshold; +- params->has_cpu_throttle_initial = true; +- params->cpu_throttle_initial = s->parameters.cpu_throttle_initial; +- params->has_cpu_throttle_increment = true; +- params->cpu_throttle_increment = s->parameters.cpu_throttle_increment; +- params->has_cpu_throttle_tailslow = true; +- params->cpu_throttle_tailslow = s->parameters.cpu_throttle_tailslow; +- params->tls_creds = g_strdup(s->parameters.tls_creds); +- params->tls_hostname = g_strdup(s->parameters.tls_hostname); +- params->tls_authz = g_strdup(s->parameters.tls_authz ? +- s->parameters.tls_authz : ""); +- params->has_max_bandwidth = true; +- params->max_bandwidth = s->parameters.max_bandwidth; +- params->has_downtime_limit = true; +- params->downtime_limit = s->parameters.downtime_limit; +- params->has_x_checkpoint_delay = true; +- params->x_checkpoint_delay = s->parameters.x_checkpoint_delay; +- params->has_block_incremental = true; +- params->block_incremental = s->parameters.block_incremental; +- params->has_multifd_channels = true; +- params->multifd_channels = s->parameters.multifd_channels; +- params->has_multifd_compression = true; +- params->multifd_compression = s->parameters.multifd_compression; +- params->has_multifd_zlib_level = true; +- params->multifd_zlib_level = s->parameters.multifd_zlib_level; +- params->has_multifd_zstd_level = true; +- params->multifd_zstd_level = s->parameters.multifd_zstd_level; +- params->has_xbzrle_cache_size = true; +- params->xbzrle_cache_size = s->parameters.xbzrle_cache_size; +- params->has_max_postcopy_bandwidth = true; +- params->max_postcopy_bandwidth = s->parameters.max_postcopy_bandwidth; +- params->has_max_cpu_throttle = true; +- params->max_cpu_throttle = s->parameters.max_cpu_throttle; +- params->has_announce_initial = true; +- params->announce_initial = s->parameters.announce_initial; +- params->has_announce_max = true; +- params->announce_max = s->parameters.announce_max; +- params->has_announce_rounds = true; +- params->announce_rounds = s->parameters.announce_rounds; +- params->has_announce_step = true; +- params->announce_step = s->parameters.announce_step; +- +- if (s->parameters.has_block_bitmap_mapping) { +- params->has_block_bitmap_mapping = true; +- params->block_bitmap_mapping = +- QAPI_CLONE(BitmapMigrationNodeAliasList, +- s->parameters.block_bitmap_mapping); +- } +- +- return params; +-} +- + /* + * Return true if we're already in the middle of a migration + * (i.e. any of the active or setup states) +@@ -3775,7 +3707,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) + XFER_LIMIT_RATIO; + } else { + /* This is a fresh new migration */ +- rate_limit = s->parameters.max_bandwidth / XFER_LIMIT_RATIO; ++ rate_limit = migrate_max_bandwidth() / XFER_LIMIT_RATIO; + + /* Notify before starting migration thread */ + notifier_list_notify(&migration_state_notifiers, s); +diff --git a/migration/options.c b/migration/options.c +index 8bd2d949ae..8e8753d9be 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -12,8 +12,10 @@ + */ + + #include "qemu/osdep.h" ++#include "qapi/clone-visitor.h" + #include "qapi/error.h" + #include "qapi/qapi-commands-migration.h" ++#include "qapi/qapi-visit-migration.h" + #include "qapi/qmp/qerror.h" + #include "sysemu/runstate.h" + #include "migration/misc.h" +@@ -562,6 +564,15 @@ uint8_t migrate_max_cpu_throttle(void) + return s->parameters.max_cpu_throttle; + } + ++uint64_t migrate_max_bandwidth(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.max_bandwidth; ++} ++ + int64_t migrate_max_postcopy_bandwidth(void) + { + MigrationState *s; +@@ -641,3 +652,71 @@ AnnounceParameters *migrate_announce_params(void) + + return ≈ + } ++ ++MigrationParameters *qmp_query_migrate_parameters(Error **errp) ++{ ++ MigrationParameters *params; ++ MigrationState *s = migrate_get_current(); ++ ++ /* TODO use QAPI_CLONE() instead of duplicating it inline */ ++ params = g_malloc0(sizeof(*params)); ++ params->has_compress_level = true; ++ params->compress_level = s->parameters.compress_level; ++ params->has_compress_threads = true; ++ params->compress_threads = s->parameters.compress_threads; ++ params->has_compress_wait_thread = true; ++ params->compress_wait_thread = s->parameters.compress_wait_thread; ++ params->has_decompress_threads = true; ++ params->decompress_threads = s->parameters.decompress_threads; ++ params->has_throttle_trigger_threshold = true; ++ params->throttle_trigger_threshold = s->parameters.throttle_trigger_threshold; ++ params->has_cpu_throttle_initial = true; ++ params->cpu_throttle_initial = s->parameters.cpu_throttle_initial; ++ params->has_cpu_throttle_increment = true; ++ params->cpu_throttle_increment = s->parameters.cpu_throttle_increment; ++ params->has_cpu_throttle_tailslow = true; ++ params->cpu_throttle_tailslow = s->parameters.cpu_throttle_tailslow; ++ params->tls_creds = g_strdup(s->parameters.tls_creds); ++ params->tls_hostname = g_strdup(s->parameters.tls_hostname); ++ params->tls_authz = g_strdup(s->parameters.tls_authz ? ++ s->parameters.tls_authz : ""); ++ params->has_max_bandwidth = true; ++ params->max_bandwidth = s->parameters.max_bandwidth; ++ params->has_downtime_limit = true; ++ params->downtime_limit = s->parameters.downtime_limit; ++ params->has_x_checkpoint_delay = true; ++ params->x_checkpoint_delay = s->parameters.x_checkpoint_delay; ++ params->has_block_incremental = true; ++ params->block_incremental = s->parameters.block_incremental; ++ params->has_multifd_channels = true; ++ params->multifd_channels = s->parameters.multifd_channels; ++ params->has_multifd_compression = true; ++ params->multifd_compression = s->parameters.multifd_compression; ++ params->has_multifd_zlib_level = true; ++ params->multifd_zlib_level = s->parameters.multifd_zlib_level; ++ params->has_multifd_zstd_level = true; ++ params->multifd_zstd_level = s->parameters.multifd_zstd_level; ++ params->has_xbzrle_cache_size = true; ++ params->xbzrle_cache_size = s->parameters.xbzrle_cache_size; ++ params->has_max_postcopy_bandwidth = true; ++ params->max_postcopy_bandwidth = s->parameters.max_postcopy_bandwidth; ++ params->has_max_cpu_throttle = true; ++ params->max_cpu_throttle = s->parameters.max_cpu_throttle; ++ params->has_announce_initial = true; ++ params->announce_initial = s->parameters.announce_initial; ++ params->has_announce_max = true; ++ params->announce_max = s->parameters.announce_max; ++ params->has_announce_rounds = true; ++ params->announce_rounds = s->parameters.announce_rounds; ++ params->has_announce_step = true; ++ params->announce_step = s->parameters.announce_step; ++ ++ if (s->parameters.has_block_bitmap_mapping) { ++ params->has_block_bitmap_mapping = true; ++ params->block_bitmap_mapping = ++ QAPI_CLONE(BitmapMigrationNodeAliasList, ++ s->parameters.block_bitmap_mapping); ++ } ++ ++ return params; ++} +diff --git a/migration/options.h b/migration/options.h +index 093bc907a1..1b78fa9f3d 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -64,6 +64,7 @@ uint8_t migrate_cpu_throttle_initial(void); + bool migrate_cpu_throttle_tailslow(void); + int migrate_decompress_threads(void); + uint8_t migrate_max_cpu_throttle(void); ++uint64_t migrate_max_bandwidth(void); + int64_t migrate_max_postcopy_bandwidth(void); + int migrate_multifd_channels(void); + MultiFDCompression migrate_multifd_compression(void); +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Create-migrate_max_cpu_throttle.patch b/SOURCES/kvm-migration-Create-migrate_max_cpu_throttle.patch new file mode 100644 index 0000000..6628b80 --- /dev/null +++ b/SOURCES/kvm-migration-Create-migrate_max_cpu_throttle.patch @@ -0,0 +1,88 @@ +From f0d4e34b00f66d2336b755a34a1ba226571641c4 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 01:13:01 +0100 +Subject: [PATCH 42/56] migration: Create migrate_max_cpu_throttle() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [41/50] fc7537c06d8e1f53d7bb552661f6ddb0133a978d (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Fabiano Rosas +(cherry picked from commit 24155bd0520035d5148c0af5b925932c4d8064a8) +Signed-off-by: Peter Xu +--- + migration/migration.h | 2 -- + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + migration/ram.c | 2 +- + 4 files changed, 11 insertions(+), 3 deletions(-) + +diff --git a/migration/migration.h b/migration/migration.h +index 86051af132..3ae938b19c 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -451,8 +451,6 @@ bool migrate_postcopy(void); + + int migrate_use_tls(void); + +-int migrate_max_cpu_throttle(void); +- + uint64_t ram_get_total_transferred_pages(void); + + /* Sending on the return path - generic and then for each message type */ +diff --git a/migration/options.c b/migration/options.c +index 0e102e5700..2cb04fbbd1 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -517,6 +517,15 @@ int migrate_decompress_threads(void) + return s->parameters.decompress_threads; + } + ++uint8_t migrate_max_cpu_throttle(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.max_cpu_throttle; ++} ++ + int64_t migrate_max_postcopy_bandwidth(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index adc2879bbb..72b1a320b7 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -51,6 +51,7 @@ int migrate_compress_level(void); + int migrate_compress_threads(void); + int migrate_compress_wait_thread(void); + int migrate_decompress_threads(void); ++uint8_t migrate_max_cpu_throttle(void); + int64_t migrate_max_postcopy_bandwidth(void); + int migrate_multifd_channels(void); + MultiFDCompression migrate_multifd_compression(void); +diff --git a/migration/ram.c b/migration/ram.c +index e82cee97c3..5c786513ef 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -715,7 +715,7 @@ static void mig_throttle_guest_down(uint64_t bytes_dirty_period, + uint64_t pct_initial = s->parameters.cpu_throttle_initial; + uint64_t pct_increment = s->parameters.cpu_throttle_increment; + bool pct_tailslow = s->parameters.cpu_throttle_tailslow; +- int pct_max = s->parameters.max_cpu_throttle; ++ int pct_max = migrate_max_cpu_throttle(); + + uint64_t throttle_now = cpu_throttle_get_percentage(); + uint64_t cpu_now, cpu_ideal, throttle_inc; +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Create-migrate_rdma_pin_all-function.patch b/SOURCES/kvm-migration-Create-migrate_rdma_pin_all-function.patch new file mode 100644 index 0000000..c7799f1 --- /dev/null +++ b/SOURCES/kvm-migration-Create-migrate_rdma_pin_all-function.patch @@ -0,0 +1,95 @@ +From e4ef0f2cee6cdf2cf4bd225ac9e610f41d66dfcb Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 22:41:55 +0100 +Subject: [PATCH 32/56] migration: Create migrate_rdma_pin_all() function +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [31/50] 206d96d47d9ee73ddc89dd01186560bf62ea5295 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy + +--- + +Fixed missing space after comma (fabiano) + +(cherry picked from commit 17cba690cdd42108369fafe6b07bff09872fbea6) +Signed-off-by: Peter Xu +--- + migration/options.c | 7 +++++++ + migration/options.h | 1 + + migration/rdma.c | 6 +++--- + 3 files changed, 11 insertions(+), 3 deletions(-) + +diff --git a/migration/options.c b/migration/options.c +index 2003e413da..9c9b8e5863 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -138,6 +138,13 @@ bool migrate_postcopy_ram(void) + return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; + } + ++bool migrate_rdma_pin_all(void) ++{ ++ MigrationState *s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL]; ++} ++ + bool migrate_release_ram(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index 316efd1063..25c002b37a 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -30,6 +30,7 @@ bool migrate_pause_before_switchover(void); + bool migrate_postcopy_blocktime(void); + bool migrate_postcopy_preempt(void); + bool migrate_postcopy_ram(void); ++bool migrate_rdma_pin_all(void); + bool migrate_release_ram(void); + bool migrate_return_path(void); + bool migrate_validate_uuid(void); +diff --git a/migration/rdma.c b/migration/rdma.c +index bf55e2f163..0af5e944f0 100644 +--- a/migration/rdma.c ++++ b/migration/rdma.c +@@ -35,6 +35,7 @@ + #include + #include "trace.h" + #include "qom/object.h" ++#include "options.h" + #include + + /* +@@ -4178,8 +4179,7 @@ void rdma_start_outgoing_migration(void *opaque, + goto err; + } + +- ret = qemu_rdma_source_init(rdma, +- s->capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp); ++ ret = qemu_rdma_source_init(rdma, migrate_rdma_pin_all(), errp); + + if (ret) { + goto err; +@@ -4201,7 +4201,7 @@ void rdma_start_outgoing_migration(void *opaque, + } + + ret = qemu_rdma_source_init(rdma_return_path, +- s->capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp); ++ migrate_rdma_pin_all(), errp); + + if (ret) { + goto return_path_err; +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Create-migrate_throttle_trigger_threshold.patch b/SOURCES/kvm-migration-Create-migrate_throttle_trigger_threshold.patch new file mode 100644 index 0000000..5fc1072 --- /dev/null +++ b/SOURCES/kvm-migration-Create-migrate_throttle_trigger_threshold.patch @@ -0,0 +1,75 @@ +From 27862b9d31da6447b60f185cdad95764018c6bc6 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 00:59:13 +0100 +Subject: [PATCH 40/56] migration: Create migrate_throttle_trigger_threshold() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [39/50] b8af9080c49be3d38bd2784d61289be89c03db3e (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Fabiano Rosas +(cherry picked from commit 6499efdb16e5c1288b4c8390d3bf68b313329b8b) +Signed-off-by: Peter Xu +--- + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + migration/ram.c | 3 +-- + 3 files changed, 11 insertions(+), 2 deletions(-) + +diff --git a/migration/options.c b/migration/options.c +index 2b6d88b4b9..b9f3815f7e 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -554,6 +554,15 @@ int migrate_multifd_zstd_level(void) + return s->parameters.multifd_zstd_level; + } + ++uint8_t migrate_throttle_trigger_threshold(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.throttle_trigger_threshold; ++} ++ + uint64_t migrate_xbzrle_cache_size(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index 96d5a8e6e4..aa54443353 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -55,6 +55,7 @@ int migrate_multifd_channels(void); + MultiFDCompression migrate_multifd_compression(void); + int migrate_multifd_zlib_level(void); + int migrate_multifd_zstd_level(void); ++uint8_t migrate_throttle_trigger_threshold(void); + uint64_t migrate_xbzrle_cache_size(void); + + #endif +diff --git a/migration/ram.c b/migration/ram.c +index 4576d0d849..e82cee97c3 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1178,8 +1178,7 @@ static void migration_update_rates(RAMState *rs, int64_t end_time) + + static void migration_trigger_throttle(RAMState *rs) + { +- MigrationState *s = migrate_get_current(); +- uint64_t threshold = s->parameters.throttle_trigger_threshold; ++ uint64_t threshold = migrate_throttle_trigger_threshold(); + uint64_t bytes_xfer_period = + stat64_get(&ram_counters.transferred) - rs->bytes_xfer_prev; + uint64_t bytes_dirty_period = rs->num_dirty_pages_period * TARGET_PAGE_SIZE; +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Create-options.c.patch b/SOURCES/kvm-migration-Create-options.c.patch new file mode 100644 index 0000000..ea60202 --- /dev/null +++ b/SOURCES/kvm-migration-Create-options.c.patch @@ -0,0 +1,524 @@ +From 282634a835f4711c8b501dd76c344058bc399fbd Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 21:18:45 +0100 +Subject: [PATCH 23/56] migration: Create options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [22/50] 10c9be528b9fcfae93f1a12fcd09db1a69e58f64 (peterx/qemu-kvm) + +We move there all capabilities helpers from migration.c. + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert + +--- + +Following David advise: +- looked through the history, capabilities are newer than 2012, so we + can remove that bit of the header. +- This part is posterior to Anthony. + Original Author is Orit. Once there, + I put myself. Peter Xu also did quite a bit of work here. + Anyone else wants/needs to be there? I didn't search too hard + because nobody asked before to be added. + +What do you think? + +(cherry picked from commit 1f0776f1c03312aad5d6a5f98871240bc3af01e5) +Signed-off-by: Peter Xu +--- + hw/virtio/virtio-balloon.c | 1 + + migration/block-dirty-bitmap.c | 1 + + migration/block.c | 1 + + migration/colo.c | 1 + + migration/meson.build | 1 + + migration/migration.c | 109 +---------------------------- + migration/migration.h | 12 ---- + migration/options.c | 124 +++++++++++++++++++++++++++++++++ + migration/options.h | 32 +++++++++ + migration/postcopy-ram.c | 1 + + migration/ram.c | 1 + + migration/savevm.c | 1 + + migration/socket.c | 1 + + 13 files changed, 166 insertions(+), 120 deletions(-) + create mode 100644 migration/options.c + create mode 100644 migration/options.h + +diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c +index 746f07c4d2..43092aa634 100644 +--- a/hw/virtio/virtio-balloon.c ++++ b/hw/virtio/virtio-balloon.c +@@ -32,6 +32,7 @@ + #include "qemu/error-report.h" + #include "migration/misc.h" + #include "migration/migration.h" ++#include "migration/options.h" + + #include "hw/virtio/virtio-bus.h" + #include "hw/virtio/virtio-access.h" +diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c +index fe73aa94b1..a6ffae0002 100644 +--- a/migration/block-dirty-bitmap.c ++++ b/migration/block-dirty-bitmap.c +@@ -79,6 +79,7 @@ + #include "qapi/qapi-visit-migration.h" + #include "qapi/clone-visitor.h" + #include "trace.h" ++#include "options.h" + + #define CHUNK_SIZE (1 << 10) + +diff --git a/migration/block.c b/migration/block.c +index b2497bbd32..4b167fa5cf 100644 +--- a/migration/block.c ++++ b/migration/block.c +@@ -28,6 +28,7 @@ + #include "migration/vmstate.h" + #include "sysemu/block-backend.h" + #include "trace.h" ++#include "options.h" + + #define BLK_MIG_BLOCK_SIZE (1ULL << 20) + #define BDRV_SECTORS_PER_DIRTY_CHUNK (BLK_MIG_BLOCK_SIZE >> BDRV_SECTOR_BITS) +diff --git a/migration/colo.c b/migration/colo.c +index 0716e64689..93b78c9270 100644 +--- a/migration/colo.c ++++ b/migration/colo.c +@@ -36,6 +36,7 @@ + #include "sysemu/cpus.h" + #include "sysemu/runstate.h" + #include "net/filter.h" ++#include "options.h" + + static bool vmstate_loading; + static Notifier packets_compare_notifier; +diff --git a/migration/meson.build b/migration/meson.build +index 0d1bb9f96e..480ff6854a 100644 +--- a/migration/meson.build ++++ b/migration/meson.build +@@ -22,6 +22,7 @@ softmmu_ss.add(files( + 'migration.c', + 'multifd.c', + 'multifd-zlib.c', ++ 'options.c', + 'postcopy-ram.c', + 'savevm.c', + 'socket.c', +diff --git a/migration/migration.c b/migration/migration.c +index 18058fb597..66ea55be06 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -63,6 +63,7 @@ + #include "sysemu/cpus.h" + #include "yank_functions.h" + #include "sysemu/qtest.h" ++#include "options.h" + + #define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ + +@@ -357,15 +358,6 @@ static void migrate_generate_event(int new_state) + } + } + +-static bool migrate_late_block_activate(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; +-} +- + /* + * Send a message on the return channel back to the source + * of the migration. +@@ -2525,56 +2517,11 @@ void qmp_migrate_continue(MigrationStatus state, Error **errp) + qemu_sem_post(&s->pause_sem); + } + +-bool migrate_release_ram(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; +-} +- +-bool migrate_postcopy_ram(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; +-} +- + bool migrate_postcopy(void) + { + return migrate_postcopy_ram() || migrate_dirty_bitmaps(); + } + +-bool migrate_auto_converge(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; +-} +- +-bool migrate_zero_blocks(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; +-} +- +-bool migrate_postcopy_blocktime(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; +-} +- + bool migrate_use_compression(void) + { + MigrationState *s; +@@ -2620,33 +2567,6 @@ int migrate_decompress_threads(void) + return s->parameters.decompress_threads; + } + +-bool migrate_dirty_bitmaps(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; +-} +- +-bool migrate_ignore_shared(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED]; +-} +- +-bool migrate_validate_uuid(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; +-} +- + bool migrate_use_events(void) + { + MigrationState *s; +@@ -2665,15 +2585,6 @@ bool migrate_use_multifd(void) + return s->capabilities[MIGRATION_CAPABILITY_MULTIFD]; + } + +-bool migrate_pause_before_switchover(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; +-} +- + int migrate_multifd_channels(void) + { + MigrationState *s; +@@ -2785,24 +2696,6 @@ bool migrate_use_block_incremental(void) + return s->parameters.block_incremental; + } + +-bool migrate_background_snapshot(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; +-} +- +-bool migrate_postcopy_preempt(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]; +-} +- + /* migration thread support */ + /* + * Something bad happened to the RP stream, mark an error +diff --git a/migration/migration.h b/migration/migration.h +index 04e0860b4e..a25fed6ef0 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -449,16 +449,7 @@ MigrationState *migrate_get_current(void); + + bool migrate_postcopy(void); + +-bool migrate_release_ram(void); +-bool migrate_postcopy_ram(void); +-bool migrate_zero_blocks(void); +-bool migrate_dirty_bitmaps(void); +-bool migrate_ignore_shared(void); +-bool migrate_validate_uuid(void); +- +-bool migrate_auto_converge(void); + bool migrate_use_multifd(void); +-bool migrate_pause_before_switchover(void); + int migrate_multifd_channels(void); + MultiFDCompression migrate_multifd_compression(void); + int migrate_multifd_zlib_level(void); +@@ -487,9 +478,6 @@ int migrate_compress_threads(void); + int migrate_compress_wait_thread(void); + int migrate_decompress_threads(void); + bool migrate_use_events(void); +-bool migrate_postcopy_blocktime(void); +-bool migrate_background_snapshot(void); +-bool migrate_postcopy_preempt(void); + + /* Sending on the return path - generic and then for each message type */ + void migrate_send_rp_shut(MigrationIncomingState *mis, +diff --git a/migration/options.c b/migration/options.c +new file mode 100644 +index 0000000000..88a9a45913 +--- /dev/null ++++ b/migration/options.c +@@ -0,0 +1,124 @@ ++/* ++ * QEMU migration capabilities ++ * ++ * Copyright (c) 2012-2023 Red Hat Inc ++ * ++ * Authors: ++ * Orit Wasserman ++ * Juan Quintela ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++ ++#include "qemu/osdep.h" ++#include "migration.h" ++#include "options.h" ++ ++bool migrate_auto_converge(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; ++} ++ ++bool migrate_background_snapshot(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; ++} ++ ++bool migrate_dirty_bitmaps(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; ++} ++ ++bool migrate_ignore_shared(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED]; ++} ++ ++bool migrate_late_block_activate(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; ++} ++ ++bool migrate_pause_before_switchover(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; ++} ++ ++bool migrate_postcopy_blocktime(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; ++} ++ ++bool migrate_postcopy_preempt(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]; ++} ++ ++bool migrate_postcopy_ram(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; ++} ++ ++bool migrate_release_ram(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; ++} ++ ++bool migrate_validate_uuid(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; ++} ++ ++bool migrate_zero_blocks(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; ++} +diff --git a/migration/options.h b/migration/options.h +new file mode 100644 +index 0000000000..0dfa0af245 +--- /dev/null ++++ b/migration/options.h +@@ -0,0 +1,32 @@ ++/* ++ * QEMU migration capabilities ++ * ++ * Copyright (c) 2012-2023 Red Hat Inc ++ * ++ * Authors: ++ * Orit Wasserman ++ * Juan Quintela ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++ ++#ifndef QEMU_MIGRATION_OPTIONS_H ++#define QEMU_MIGRATION_OPTIONS_H ++ ++/* capabilities */ ++ ++bool migrate_auto_converge(void); ++bool migrate_background_snapshot(void); ++bool migrate_dirty_bitmaps(void); ++bool migrate_ignore_shared(void); ++bool migrate_late_block_activate(void); ++bool migrate_pause_before_switchover(void); ++bool migrate_postcopy_blocktime(void); ++bool migrate_postcopy_preempt(void); ++bool migrate_postcopy_ram(void); ++bool migrate_release_ram(void); ++bool migrate_validate_uuid(void); ++bool migrate_zero_blocks(void); ++ ++#endif +diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c +index d7b48dd920..0711500036 100644 +--- a/migration/postcopy-ram.c ++++ b/migration/postcopy-ram.c +@@ -37,6 +37,7 @@ + #include "tls.h" + #include "qemu/userfaultfd.h" + #include "qemu/mmap-alloc.h" ++#include "options.h" + + /* Arbitrary limit on size of each discard command, + * keeps them around ~200 bytes +diff --git a/migration/ram.c b/migration/ram.c +index 229714045a..912ccd89fa 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -57,6 +57,7 @@ + #include "qemu/iov.h" + #include "multifd.h" + #include "sysemu/runstate.h" ++#include "options.h" + + #include "hw/boards.h" /* for machine_dump_guest_core() */ + +diff --git a/migration/savevm.c b/migration/savevm.c +index 589ef926ab..ebcf571e37 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -67,6 +67,7 @@ + #include "qemu/yank.h" + #include "yank_functions.h" + #include "sysemu/qtest.h" ++#include "options.h" + + const unsigned int postcopy_ram_discard_version; + +diff --git a/migration/socket.c b/migration/socket.c +index e6fdf3c5e1..ebf9ac41af 100644 +--- a/migration/socket.c ++++ b/migration/socket.c +@@ -27,6 +27,7 @@ + #include "io/net-listener.h" + #include "trace.h" + #include "postcopy-ram.h" ++#include "options.h" + + struct SocketOutgoingArgs { + SocketAddress *saddr; +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Enable-switchover-ack-capability.patch b/SOURCES/kvm-migration-Enable-switchover-ack-capability.patch new file mode 100644 index 0000000..e08e5df --- /dev/null +++ b/SOURCES/kvm-migration-Enable-switchover-ack-capability.patch @@ -0,0 +1,56 @@ +From bbe565f7d3b7fe46971e020e9bd8e79dc9ffa69c Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 12/37] migration: Enable switchover ack capability +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [10/28] c4a7d7d26a97181c9516d133a6610bfa5dcb1d16 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 538ef4fe2f72 +Author: Avihai Horon +Date: Wed Jun 21 14:11:56 2023 +0300 + + migration: Enable switchover ack capability + + Now that switchover ack logic has been implemented, enable the + capability. + + Signed-off-by: Avihai Horon + Reviewed-by: Juan Quintela + Reviewed-by: Peter Xu + Tested-by: YangHang Liu + Acked-by: Alex Williamson + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + migration/options.c | 4 ---- + 1 file changed, 4 deletions(-) + +diff --git a/migration/options.c b/migration/options.c +index c3df6c6dde..ccd7ef3907 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -547,10 +547,6 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) + "'return-path'"); + return false; + } +- +- /* Disable this capability until it's implemented */ +- error_setg(errp, "'switchover-ack' is not implemented yet"); +- return false; + } + + return true; +-- +2.39.3 + diff --git a/SOURCES/kvm-migration-Handle-block-device-inactivation-failures-.patch b/SOURCES/kvm-migration-Handle-block-device-inactivation-failures-.patch new file mode 100644 index 0000000..26c8437 --- /dev/null +++ b/SOURCES/kvm-migration-Handle-block-device-inactivation-failures-.patch @@ -0,0 +1,116 @@ +From 2aac64623d8d2d06d248c1bcc71aa13572fc843c Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Fri, 14 Apr 2023 10:33:58 -0500 +Subject: [PATCH 1/2] migration: Handle block device inactivation failures + better + +RH-Author: Eric Blake +RH-MergeRequest: 161: Avoid migration assertion from failed NFS server. +RH-Bugzilla: 2058982 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [1/2] 5ae143c9234f6eee9fc5154944172bcd56975b36 (ebblake/centos-qemu-kvm) + +Consider what happens when performing a migration between two host +machines connected to an NFS server serving multiple block devices to +the guest, when the NFS server becomes unavailable. The migration +attempts to inactivate all block devices on the source (a necessary +step before the destination can take over); but if the NFS server is +non-responsive, the attempt to inactivate can itself fail. When that +happens, the destination fails to get the migrated guest (good, +because the source wasn't able to flush everything properly): + + (qemu) qemu-kvm: load of migration failed: Input/output error + +at which point, our only hope for the guest is for the source to take +back control. With the current code base, the host outputs a message, but then appears to resume: + + (qemu) qemu-kvm: qemu_savevm_state_complete_precopy_non_iterable: bdrv_inactivate_all() failed (-1) + + (src qemu)info status + VM status: running + +but a second migration attempt now asserts: + + (src qemu) qemu-kvm: ../block.c:6738: int bdrv_inactivate_recurse(BlockDriverState *): Assertion `!(bs->open_flags & BDRV_O_INACTIVE)' failed. + +Whether the guest is recoverable on the source after the first failure +is debatable, but what we do not want is to have qemu itself fail due +to an assertion. It looks like the problem is as follows: + +In migration.c:migration_completion(), the source sets 'inactivate' to +true (since COLO is not enabled), then tries +savevm.c:qemu_savevm_state_complete_precopy() with a request to +inactivate block devices. In turn, this calls +block.c:bdrv_inactivate_all(), which fails when flushing runs up +against the non-responsive NFS server. With savevm failing, we are +now left in a state where some, but not all, of the block devices have +been inactivated; but migration_completion() then jumps to 'fail' +rather than 'fail_invalidate' and skips an attempt to reclaim those +those disks by calling bdrv_activate_all(). Even if we do attempt to +reclaim disks, we aren't taking note of failure there, either. + +Thus, we have reached a state where the migration engine has forgotten +all state about whether a block device is inactive, because we did not +set s->block_inactive in enough places; so migration allows the source +to reach vm_start() and resume execution, violating the block layer +invariant that the guest CPUs should not be restarted while a device +is inactive. Note that the code in migration.c:migrate_fd_cancel() +will also try to reactivate all block devices if s->block_inactive was +set, but because we failed to set that flag after the first failure, +the source assumes it has reclaimed all devices, even though it still +has remaining inactivated devices and does not try again. Normally, +qmp_cont() will also try to reactivate all disks (or correctly fail if +the disks are not reclaimable because NFS is not yet back up), but the +auto-resumption of the source after a migration failure does not go +through qmp_cont(). And because we have left the block layer in an +inconsistent state with devices still inactivated, the later migration +attempt is hitting the assertion failure. + +Since it is important to not resume the source with inactive disks, +this patch marks s->block_inactive before attempting inactivation, +rather than after succeeding, in order to prevent any vm_start() until +it has successfully reactivated all devices. + +See also https://bugzilla.redhat.com/show_bug.cgi?id=2058982 + +Signed-off-by: Eric Blake +Reviewed-by: Juan Quintela +Acked-by: Lukas Straub +Tested-by: Lukas Straub +Signed-off-by: Juan Quintela +(cherry picked from commit 403d18ae384239876764bbfa111d6cc5dcb673d1) +--- + migration/migration.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index bda4789193..cb0d42c061 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -3444,13 +3444,11 @@ static void migration_completion(MigrationState *s) + MIGRATION_STATUS_DEVICE); + } + if (ret >= 0) { ++ s->block_inactive = inactivate; + qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); + ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, + inactivate); + } +- if (inactivate && ret >= 0) { +- s->block_inactive = true; +- } + } + qemu_mutex_unlock_iothread(); + +@@ -3522,6 +3520,7 @@ fail_invalidate: + bdrv_activate_all(&local_err); + if (local_err) { + error_report_err(local_err); ++ s->block_inactive = true; + } else { + s->block_inactive = false; + } +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Implement-switchover-ack-logic.patch b/SOURCES/kvm-migration-Implement-switchover-ack-logic.patch new file mode 100644 index 0000000..49b9f12 --- /dev/null +++ b/SOURCES/kvm-migration-Implement-switchover-ack-logic.patch @@ -0,0 +1,339 @@ +From 387c39f198d94f600be525e363edc7ca916dc261 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 11/37] migration: Implement switchover ack logic +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [9/28] 853e1978f3b9f87942863bba894a0ed908bde6b1 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 1b4adb10f898 +Author: Avihai Horon +Date: Wed Jun 21 14:11:55 2023 +0300 + + migration: Implement switchover ack logic + + Implement switchover ack logic. This prevents the source from stopping + the VM and completing the migration until an ACK is received from the + destination that it's OK to do so. + + To achieve this, a new SaveVMHandlers handler switchover_ack_needed() + and a new return path message MIG_RP_MSG_SWITCHOVER_ACK are added. + + The switchover_ack_needed() handler is called during migration setup in + the destination to check if switchover ack is used by the migrated + device. + + When switchover is approved by all migrated devices in the destination + that support this capability, the MIG_RP_MSG_SWITCHOVER_ACK return path + message is sent to the source to notify it that it's OK to do + switchover. + + Signed-off-by: Avihai Horon + Reviewed-by: Peter Xu + Tested-by: YangHang Liu + Acked-by: Alex Williamson + Signed-off-by: Cédric Le Goater + +Conflicts: + - migration/migration.c + context changes due to commit f4584076fc31 ("migration: switch + from .vm_was_running to .vm_old_state") + +Signed-off-by: Cédric Le Goater +--- + include/migration/register.h | 2 ++ + migration/migration.c | 32 +++++++++++++++++++-- + migration/migration.h | 14 ++++++++++ + migration/savevm.c | 54 ++++++++++++++++++++++++++++++++++++ + migration/savevm.h | 1 + + migration/trace-events | 3 ++ + 6 files changed, 104 insertions(+), 2 deletions(-) + +diff --git a/include/migration/register.h b/include/migration/register.h +index a8dfd8fefd..90914f32f5 100644 +--- a/include/migration/register.h ++++ b/include/migration/register.h +@@ -71,6 +71,8 @@ typedef struct SaveVMHandlers { + int (*load_cleanup)(void *opaque); + /* Called when postcopy migration wants to resume from failure */ + int (*resume_prepare)(MigrationState *s, void *opaque); ++ /* Checks if switchover ack should be used. Called only in dest */ ++ bool (*switchover_ack_needed)(void *opaque); + } SaveVMHandlers; + + int register_savevm_live(const char *idstr, +diff --git a/migration/migration.c b/migration/migration.c +index 1ac5f19bc2..9bf1caee6c 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -76,6 +76,7 @@ enum mig_rp_message_type { + MIG_RP_MSG_REQ_PAGES, /* data (start: be64, len: be32) */ + MIG_RP_MSG_RECV_BITMAP, /* send recved_bitmap back to source */ + MIG_RP_MSG_RESUME_ACK, /* tell source that we are ready to resume */ ++ MIG_RP_MSG_SWITCHOVER_ACK, /* Tell source it's OK to do switchover */ + + MIG_RP_MSG_MAX + }; +@@ -756,6 +757,11 @@ bool migration_has_all_channels(void) + return true; + } + ++int migrate_send_rp_switchover_ack(MigrationIncomingState *mis) ++{ ++ return migrate_send_rp_message(mis, MIG_RP_MSG_SWITCHOVER_ACK, 0, NULL); ++} ++ + /* + * Send a 'SHUT' message on the return channel with the given value + * to indicate that we've finished with the RP. Non-0 value indicates +@@ -1415,6 +1421,7 @@ void migrate_init(MigrationState *s) + s->vm_was_running = false; + s->iteration_initial_bytes = 0; + s->threshold_size = 0; ++ s->switchover_acked = false; + } + + int migrate_add_blocker_internal(Error *reason, Error **errp) +@@ -1731,6 +1738,7 @@ static struct rp_cmd_args { + [MIG_RP_MSG_REQ_PAGES_ID] = { .len = -1, .name = "REQ_PAGES_ID" }, + [MIG_RP_MSG_RECV_BITMAP] = { .len = -1, .name = "RECV_BITMAP" }, + [MIG_RP_MSG_RESUME_ACK] = { .len = 4, .name = "RESUME_ACK" }, ++ [MIG_RP_MSG_SWITCHOVER_ACK] = { .len = 0, .name = "SWITCHOVER_ACK" }, + [MIG_RP_MSG_MAX] = { .len = -1, .name = "MAX" }, + }; + +@@ -1969,6 +1977,11 @@ retry: + } + break; + ++ case MIG_RP_MSG_SWITCHOVER_ACK: ++ ms->switchover_acked = true; ++ trace_source_return_path_thread_switchover_acked(); ++ break; ++ + default: + break; + } +@@ -2720,6 +2733,20 @@ static void migration_update_counters(MigrationState *s, + bandwidth, s->threshold_size); + } + ++static bool migration_can_switchover(MigrationState *s) ++{ ++ if (!migrate_switchover_ack()) { ++ return true; ++ } ++ ++ /* No reason to wait for switchover ACK if VM is stopped */ ++ if (!runstate_is_running()) { ++ return true; ++ } ++ ++ return s->switchover_acked; ++} ++ + /* Migration thread iteration status */ + typedef enum { + MIG_ITERATE_RESUME, /* Resume current iteration */ +@@ -2735,6 +2762,7 @@ static MigIterateState migration_iteration_run(MigrationState *s) + { + uint64_t must_precopy, can_postcopy; + bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE; ++ bool can_switchover = migration_can_switchover(s); + + qemu_savevm_state_pending_estimate(&must_precopy, &can_postcopy); + uint64_t pending_size = must_precopy + can_postcopy; +@@ -2747,14 +2775,14 @@ static MigIterateState migration_iteration_run(MigrationState *s) + trace_migrate_pending_exact(pending_size, must_precopy, can_postcopy); + } + +- if (!pending_size || pending_size < s->threshold_size) { ++ if ((!pending_size || pending_size < s->threshold_size) && can_switchover) { + trace_migration_thread_low_pending(pending_size); + migration_completion(s); + return MIG_ITERATE_BREAK; + } + + /* Still a significant amount to transfer */ +- if (!in_postcopy && must_precopy <= s->threshold_size && ++ if (!in_postcopy && must_precopy <= s->threshold_size && can_switchover && + qatomic_read(&s->start_postcopy)) { + if (postcopy_start(s)) { + error_report("%s: postcopy failed to start", __func__); +diff --git a/migration/migration.h b/migration/migration.h +index 2b71df8617..e9679f8029 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -204,6 +204,13 @@ struct MigrationIncomingState { + * contains valid information. + */ + QemuMutex page_request_mutex; ++ ++ /* ++ * Number of devices that have yet to approve switchover. When this reaches ++ * zero an ACK that it's OK to do switchover is sent to the source. No lock ++ * is needed as this field is updated serially. ++ */ ++ unsigned int switchover_ack_pending_num; + }; + + MigrationIncomingState *migration_incoming_get_current(void); +@@ -421,6 +428,12 @@ struct MigrationState { + + /* QEMU_VM_VMDESCRIPTION content filled for all non-iterable devices. */ + JSONWriter *vmdesc; ++ ++ /* ++ * Indicates whether an ACK from the destination that it's OK to do ++ * switchover has been received. ++ */ ++ bool switchover_acked; + }; + + void migrate_set_state(int *state, int old_state, int new_state); +@@ -461,6 +474,7 @@ int migrate_send_rp_message_req_pages(MigrationIncomingState *mis, + void migrate_send_rp_recv_bitmap(MigrationIncomingState *mis, + char *block_name); + void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value); ++int migrate_send_rp_switchover_ack(MigrationIncomingState *mis); + + void dirty_bitmap_mig_before_vm_start(void); + void dirty_bitmap_mig_cancel_outgoing(void); +diff --git a/migration/savevm.c b/migration/savevm.c +index 211eff3a8b..aff70e6263 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -2358,6 +2358,21 @@ static int loadvm_process_command(QEMUFile *f) + error_report("CMD_OPEN_RETURN_PATH failed"); + return -1; + } ++ ++ /* ++ * Switchover ack is enabled but no device uses it, so send an ACK to ++ * source that it's OK to switchover. Do it here, after return path has ++ * been created. ++ */ ++ if (migrate_switchover_ack() && !mis->switchover_ack_pending_num) { ++ int ret = migrate_send_rp_switchover_ack(mis); ++ if (ret) { ++ error_report( ++ "Could not send switchover ack RP MSG, err %d (%s)", ret, ++ strerror(-ret)); ++ return ret; ++ } ++ } + break; + + case MIG_CMD_PING: +@@ -2584,6 +2599,23 @@ static int qemu_loadvm_state_header(QEMUFile *f) + return 0; + } + ++static void qemu_loadvm_state_switchover_ack_needed(MigrationIncomingState *mis) ++{ ++ SaveStateEntry *se; ++ ++ QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { ++ if (!se->ops || !se->ops->switchover_ack_needed) { ++ continue; ++ } ++ ++ if (se->ops->switchover_ack_needed(se->opaque)) { ++ mis->switchover_ack_pending_num++; ++ } ++ } ++ ++ trace_loadvm_state_switchover_ack_needed(mis->switchover_ack_pending_num); ++} ++ + static int qemu_loadvm_state_setup(QEMUFile *f) + { + SaveStateEntry *se; +@@ -2787,6 +2819,10 @@ int qemu_loadvm_state(QEMUFile *f) + return -EINVAL; + } + ++ if (migrate_switchover_ack()) { ++ qemu_loadvm_state_switchover_ack_needed(mis); ++ } ++ + cpu_synchronize_all_pre_loadvm(); + + ret = qemu_loadvm_state_main(f, mis); +@@ -2860,6 +2896,24 @@ int qemu_load_device_state(QEMUFile *f) + return 0; + } + ++int qemu_loadvm_approve_switchover(void) ++{ ++ MigrationIncomingState *mis = migration_incoming_get_current(); ++ ++ if (!mis->switchover_ack_pending_num) { ++ return -EINVAL; ++ } ++ ++ mis->switchover_ack_pending_num--; ++ trace_loadvm_approve_switchover(mis->switchover_ack_pending_num); ++ ++ if (mis->switchover_ack_pending_num) { ++ return 0; ++ } ++ ++ return migrate_send_rp_switchover_ack(mis); ++} ++ + bool save_snapshot(const char *name, bool overwrite, const char *vmstate, + bool has_devices, strList *devices, Error **errp) + { +diff --git a/migration/savevm.h b/migration/savevm.h +index fb636735f0..e894bbc143 100644 +--- a/migration/savevm.h ++++ b/migration/savevm.h +@@ -65,6 +65,7 @@ int qemu_loadvm_state(QEMUFile *f); + void qemu_loadvm_state_cleanup(void); + int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis); + int qemu_load_device_state(QEMUFile *f); ++int qemu_loadvm_approve_switchover(void); + int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f, + bool in_postcopy, bool inactivate_disks); + +diff --git a/migration/trace-events b/migration/trace-events +index 92161eeac5..cda807d271 100644 +--- a/migration/trace-events ++++ b/migration/trace-events +@@ -7,6 +7,7 @@ qemu_loadvm_state_section_partend(uint32_t section_id) "%u" + qemu_loadvm_state_post_main(int ret) "%d" + qemu_loadvm_state_section_startfull(uint32_t section_id, const char *idstr, uint32_t instance_id, uint32_t version_id) "%u(%s) %u %u" + qemu_savevm_send_packaged(void) "" ++loadvm_state_switchover_ack_needed(unsigned int switchover_ack_pending_num) "Switchover ack pending num=%u" + loadvm_state_setup(void) "" + loadvm_state_cleanup(void) "" + loadvm_handle_cmd_packaged(unsigned int length) "%u" +@@ -23,6 +24,7 @@ loadvm_postcopy_ram_handle_discard_end(void) "" + loadvm_postcopy_ram_handle_discard_header(const char *ramid, uint16_t len) "%s: %ud" + loadvm_process_command(const char *s, uint16_t len) "com=%s len=%d" + loadvm_process_command_ping(uint32_t val) "0x%x" ++loadvm_approve_switchover(unsigned int switchover_ack_pending_num) "Switchover ack pending num=%u" + postcopy_ram_listen_thread_exit(void) "" + postcopy_ram_listen_thread_start(void) "" + qemu_savevm_send_postcopy_advise(void) "" +@@ -180,6 +182,7 @@ source_return_path_thread_loop_top(void) "" + source_return_path_thread_pong(uint32_t val) "0x%x" + source_return_path_thread_shut(uint32_t val) "0x%x" + source_return_path_thread_resume_ack(uint32_t v) "%"PRIu32 ++source_return_path_thread_switchover_acked(void) "" + migration_thread_low_pending(uint64_t pending) "%" PRIu64 + migrate_transferred(uint64_t tranferred, uint64_t time_spent, uint64_t bandwidth, uint64_t size) "transferred %" PRIu64 " time_spent %" PRIu64 " bandwidth %" PRIu64 " max_size %" PRId64 + process_incoming_migration_co_end(int ret, int ps) "ret=%d postcopy-state=%d" +-- +2.39.3 + diff --git a/SOURCES/kvm-migration-Make-all-functions-check-have-the-same-for.patch b/SOURCES/kvm-migration-Make-all-functions-check-have-the-same-for.patch new file mode 100644 index 0000000..f873f3f --- /dev/null +++ b/SOURCES/kvm-migration-Make-all-functions-check-have-the-same-for.patch @@ -0,0 +1,431 @@ +From eaccfc91b34f93dcaf597e6b39f78741da618ff3 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 08/37] migration: Make all functions check have the same + format +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [6/28] 774df2a81502d3eab5d5b8f64fa9b69f8be43669 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 8f9c532756c5 +Author: Juan Quintela +Date: Wed Mar 1 23:11:08 2023 +0100 + + migration: Make all functions check have the same format + + Signed-off-by: Juan Quintela + Reviewed-by: Vladimir Sementsov-Ogievskiy + +Signed-off-by: Cédric Le Goater +--- + migration/options.c | 153 +++++++++++--------------------------------- + 1 file changed, 39 insertions(+), 114 deletions(-) + +diff --git a/migration/options.c b/migration/options.c +index e51d667e14..bcfe244fa9 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -33,27 +33,21 @@ + + bool migrate_auto_converge(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; + } + + bool migrate_background_snapshot(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; + } + + bool migrate_block(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_BLOCK]; + } +@@ -61,95 +55,76 @@ bool migrate_block(void) + bool migrate_colo(void) + { + MigrationState *s = migrate_get_current(); ++ + return s->capabilities[MIGRATION_CAPABILITY_X_COLO]; + } + + bool migrate_compress(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_COMPRESS]; + } + + bool migrate_dirty_bitmaps(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; + } + + bool migrate_events(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_EVENTS]; + } + + bool migrate_ignore_shared(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED]; + } + + bool migrate_late_block_activate(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; + } + + bool migrate_multifd(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_MULTIFD]; + } + + bool migrate_pause_before_switchover(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; + } + + bool migrate_postcopy_blocktime(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; + } + + bool migrate_postcopy_preempt(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]; + } + + bool migrate_postcopy_ram(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; + } +@@ -163,54 +138,42 @@ bool migrate_rdma_pin_all(void) + + bool migrate_release_ram(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; + } + + bool migrate_return_path(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; + } + + bool migrate_validate_uuid(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; + } + + bool migrate_xbzrle(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_XBZRLE]; + } + + bool migrate_zero_blocks(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; + } + + bool migrate_zero_copy_send(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; + } +@@ -224,9 +187,7 @@ bool migrate_postcopy(void) + + bool migrate_tls(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.tls_creds && *s->parameters.tls_creds; + } +@@ -491,126 +452,98 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, + + bool migrate_block_incremental(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.block_incremental; + } + + uint32_t migrate_checkpoint_delay(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.x_checkpoint_delay; + } + + int migrate_compress_level(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.compress_level; + } + + int migrate_compress_threads(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.compress_threads; + } + + int migrate_compress_wait_thread(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.compress_wait_thread; + } + + uint8_t migrate_cpu_throttle_increment(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.cpu_throttle_increment; + } + + uint8_t migrate_cpu_throttle_initial(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.cpu_throttle_initial; + } + + bool migrate_cpu_throttle_tailslow(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.cpu_throttle_tailslow; + } + + int migrate_decompress_threads(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.decompress_threads; + } + + uint8_t migrate_max_cpu_throttle(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.max_cpu_throttle; + } + + uint64_t migrate_max_bandwidth(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.max_bandwidth; + } + + int64_t migrate_max_postcopy_bandwidth(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.max_postcopy_bandwidth; + } + + int migrate_multifd_channels(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.multifd_channels; + } + + MultiFDCompression migrate_multifd_compression(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + assert(s->parameters.multifd_compression < MULTIFD_COMPRESSION__MAX); + return s->parameters.multifd_compression; +@@ -618,36 +551,28 @@ MultiFDCompression migrate_multifd_compression(void) + + int migrate_multifd_zlib_level(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.multifd_zlib_level; + } + + int migrate_multifd_zstd_level(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.multifd_zstd_level; + } + + uint8_t migrate_throttle_trigger_threshold(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.throttle_trigger_threshold; + } + + uint64_t migrate_xbzrle_cache_size(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.xbzrle_cache_size; + } +-- +2.39.3 + diff --git a/SOURCES/kvm-migration-Make-dirty_sync_count-atomic.patch b/SOURCES/kvm-migration-Make-dirty_sync_count-atomic.patch new file mode 100644 index 0000000..ad1de7b --- /dev/null +++ b/SOURCES/kvm-migration-Make-dirty_sync_count-atomic.patch @@ -0,0 +1,105 @@ +From 886b511e0a225b1c4428c646534d7bcc65bd9e2a Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 11 Apr 2023 18:02:34 +0200 +Subject: [PATCH 14/56] migration: Make dirty_sync_count atomic +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [13/50] ef3ae8cdd960e944ba9e73a53d54c9a5a55bb1ce (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Peter Xu +(cherry picked from commit 536b5a4e56ec67c958f46e7d46cbd5ac34e5a239) +Signed-off-by: Peter Xu +--- + migration/migration.c | 3 ++- + migration/ram.c | 13 +++++++------ + migration/ram.h | 2 +- + 3 files changed, 10 insertions(+), 8 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 8f2847d298..8fca751050 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1148,7 +1148,8 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + info->ram->normal = stat64_get(&ram_counters.normal); + info->ram->normal_bytes = info->ram->normal * page_size; + info->ram->mbps = s->mbps; +- info->ram->dirty_sync_count = ram_counters.dirty_sync_count; ++ info->ram->dirty_sync_count = ++ stat64_get(&ram_counters.dirty_sync_count); + info->ram->dirty_sync_missed_zero_copy = + stat64_get(&ram_counters.dirty_sync_missed_zero_copy); + info->ram->postcopy_requests = ram_counters.postcopy_requests; +diff --git a/migration/ram.c b/migration/ram.c +index b1722b6071..3c13136559 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -764,7 +764,7 @@ static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr) + /* We don't care if this fails to allocate a new cache page + * as long as it updated an old one */ + cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page, +- ram_counters.dirty_sync_count); ++ stat64_get(&ram_counters.dirty_sync_count)); + } + + #define ENCODING_FLAG_XBZRLE 0x1 +@@ -790,13 +790,13 @@ static int save_xbzrle_page(RAMState *rs, PageSearchStatus *pss, + int encoded_len = 0, bytes_xbzrle; + uint8_t *prev_cached_page; + QEMUFile *file = pss->pss_channel; ++ uint64_t generation = stat64_get(&ram_counters.dirty_sync_count); + +- if (!cache_is_cached(XBZRLE.cache, current_addr, +- ram_counters.dirty_sync_count)) { ++ if (!cache_is_cached(XBZRLE.cache, current_addr, generation)) { + xbzrle_counters.cache_miss++; + if (!rs->last_stage) { + if (cache_insert(XBZRLE.cache, current_addr, *current_data, +- ram_counters.dirty_sync_count) == -1) { ++ generation) == -1) { + return -1; + } else { + /* update *current_data when the page has been +@@ -1209,7 +1209,7 @@ static void migration_bitmap_sync(RAMState *rs) + RAMBlock *block; + int64_t end_time; + +- ram_counters.dirty_sync_count++; ++ stat64_add(&ram_counters.dirty_sync_count, 1); + + if (!rs->time_last_bitmap_sync) { + rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); +@@ -1246,7 +1246,8 @@ static void migration_bitmap_sync(RAMState *rs) + rs->bytes_xfer_prev = stat64_get(&ram_counters.transferred); + } + if (migrate_use_events()) { +- qapi_event_send_migration_pass(ram_counters.dirty_sync_count); ++ uint64_t generation = stat64_get(&ram_counters.dirty_sync_count); ++ qapi_event_send_migration_pass(generation); + } + } + +diff --git a/migration/ram.h b/migration/ram.h +index bb52632424..8c0d07c43a 100644 +--- a/migration/ram.h ++++ b/migration/ram.h +@@ -42,7 +42,7 @@ + */ + typedef struct { + int64_t dirty_pages_rate; +- int64_t dirty_sync_count; ++ Stat64 dirty_sync_count; + Stat64 dirty_sync_missed_zero_copy; + Stat64 downtime_bytes; + Stat64 duplicate; +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Make-dirty_sync_missed_zero_copy-atomic.patch b/SOURCES/kvm-migration-Make-dirty_sync_missed_zero_copy-atomic.patch new file mode 100644 index 0000000..b7b0f60 --- /dev/null +++ b/SOURCES/kvm-migration-Make-dirty_sync_missed_zero_copy-atomic.patch @@ -0,0 +1,92 @@ +From e9ff20d7f7e6c2354f3696e8bca265e535eeb801 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 11 Apr 2023 17:33:56 +0200 +Subject: [PATCH 11/56] migration: Make dirty_sync_missed_zero_copy atomic +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [10/50] 041230abb087db0e7ffae02b4f85772490b805a0 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Peter Xu +(cherry picked from commit 4291823694fd8507831d26e2558d9cd0030841f7) +Signed-off-by: Peter Xu +--- + migration/migration.c | 2 +- + migration/multifd.c | 2 +- + migration/ram.c | 5 ----- + migration/ram.h | 4 +--- + 4 files changed, 3 insertions(+), 10 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index ca68808b5c..645fb4b3c5 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1150,7 +1150,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + info->ram->mbps = s->mbps; + info->ram->dirty_sync_count = ram_counters.dirty_sync_count; + info->ram->dirty_sync_missed_zero_copy = +- ram_counters.dirty_sync_missed_zero_copy; ++ stat64_get(&ram_counters.dirty_sync_missed_zero_copy); + info->ram->postcopy_requests = ram_counters.postcopy_requests; + info->ram->page_size = page_size; + info->ram->multifd_bytes = stat64_get(&ram_counters.multifd_bytes); +diff --git a/migration/multifd.c b/migration/multifd.c +index 1c992abf53..903df2117b 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -576,7 +576,7 @@ static int multifd_zero_copy_flush(QIOChannel *c) + return -1; + } + if (ret == 1) { +- dirty_sync_missed_zero_copy(); ++ stat64_add(&ram_counters.dirty_sync_missed_zero_copy, 1); + } + + return ret; +diff --git a/migration/ram.c b/migration/ram.c +index 71320ed27a..93e0a48af4 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -472,11 +472,6 @@ void ram_transferred_add(uint64_t bytes) + stat64_add(&ram_counters.transferred, bytes); + } + +-void dirty_sync_missed_zero_copy(void) +-{ +- ram_counters.dirty_sync_missed_zero_copy++; +-} +- + struct MigrationOps { + int (*ram_save_target_page)(RAMState *rs, PageSearchStatus *pss); + }; +diff --git a/migration/ram.h b/migration/ram.h +index ed70391317..2170c55e67 100644 +--- a/migration/ram.h ++++ b/migration/ram.h +@@ -43,7 +43,7 @@ + typedef struct { + int64_t dirty_pages_rate; + int64_t dirty_sync_count; +- uint64_t dirty_sync_missed_zero_copy; ++ Stat64 dirty_sync_missed_zero_copy; + uint64_t downtime_bytes; + Stat64 duplicate; + Stat64 multifd_bytes; +@@ -114,6 +114,4 @@ void ram_write_tracking_prepare(void); + int ram_write_tracking_start(void); + void ram_write_tracking_stop(void); + +-void dirty_sync_missed_zero_copy(void); +- + #endif +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Make-downtime_bytes-atomic.patch b/SOURCES/kvm-migration-Make-downtime_bytes-atomic.patch new file mode 100644 index 0000000..9b206bc --- /dev/null +++ b/SOURCES/kvm-migration-Make-downtime_bytes-atomic.patch @@ -0,0 +1,68 @@ +From 4c6af064277b5445b31db4a598e1c4402ba56452 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 11 Apr 2023 17:38:11 +0200 +Subject: [PATCH 13/56] migration: Make downtime_bytes atomic +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [12/50] ebfc16aae8bc4a8c1fec431780a062950e6f50c4 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Peter Xu +(cherry picked from commit 296a4ac2aa63038b6b702f2ee8f0f93ae26727ae) +Signed-off-by: Peter Xu +--- + migration/migration.c | 2 +- + migration/ram.c | 2 +- + migration/ram.h | 2 +- + 3 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 3a68d93d69..8f2847d298 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1156,7 +1156,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + info->ram->multifd_bytes = stat64_get(&ram_counters.multifd_bytes); + info->ram->pages_per_second = s->pages_per_second; + info->ram->precopy_bytes = stat64_get(&ram_counters.precopy_bytes); +- info->ram->downtime_bytes = ram_counters.downtime_bytes; ++ info->ram->downtime_bytes = stat64_get(&ram_counters.downtime_bytes); + info->ram->postcopy_bytes = stat64_get(&ram_counters.postcopy_bytes); + + if (migrate_use_xbzrle()) { +diff --git a/migration/ram.c b/migration/ram.c +index 0b4693215e..b1722b6071 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -467,7 +467,7 @@ void ram_transferred_add(uint64_t bytes) + } else if (migration_in_postcopy()) { + stat64_add(&ram_counters.postcopy_bytes, bytes); + } else { +- ram_counters.downtime_bytes += bytes; ++ stat64_add(&ram_counters.downtime_bytes, bytes); + } + stat64_add(&ram_counters.transferred, bytes); + } +diff --git a/migration/ram.h b/migration/ram.h +index a766b895fa..bb52632424 100644 +--- a/migration/ram.h ++++ b/migration/ram.h +@@ -44,7 +44,7 @@ typedef struct { + int64_t dirty_pages_rate; + int64_t dirty_sync_count; + Stat64 dirty_sync_missed_zero_copy; +- uint64_t downtime_bytes; ++ Stat64 downtime_bytes; + Stat64 duplicate; + Stat64 multifd_bytes; + Stat64 normal; +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Make-multifd_bytes-atomic.patch b/SOURCES/kvm-migration-Make-multifd_bytes-atomic.patch new file mode 100644 index 0000000..b315fdc --- /dev/null +++ b/SOURCES/kvm-migration-Make-multifd_bytes-atomic.patch @@ -0,0 +1,99 @@ +From bfcc4bc8f60b541d545f1ea27b1ff156d8092d33 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 23 Nov 2022 20:36:56 +0100 +Subject: [PATCH 10/56] migration: Make multifd_bytes atomic +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [9/50] c2bc6b173770a0ea81c3f9d850c583c651647070 (peterx/qemu-kvm) + +In the spirit of: + +commit 394d323bc3451e4d07f13341cb8817fac8dfbadd +Author: Peter Xu +Date: Tue Oct 11 17:55:51 2022 -0400 + + migration: Use atomic ops properly for page accountings + +Reviewed-by: David Edmondson +Reviewed-by: Peter Xu +Signed-off-by: Juan Quintela +(cherry picked from commit cf671116facf4e29d91fce9c9ffb535385ffac81) +Signed-off-by: Peter Xu +--- + migration/migration.c | 4 ++-- + migration/multifd.c | 4 ++-- + migration/ram.h | 2 +- + 3 files changed, 5 insertions(+), 5 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index a91704d35c..ca68808b5c 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1153,7 +1153,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + ram_counters.dirty_sync_missed_zero_copy; + info->ram->postcopy_requests = ram_counters.postcopy_requests; + info->ram->page_size = page_size; +- info->ram->multifd_bytes = ram_counters.multifd_bytes; ++ info->ram->multifd_bytes = stat64_get(&ram_counters.multifd_bytes); + info->ram->pages_per_second = s->pages_per_second; + info->ram->precopy_bytes = ram_counters.precopy_bytes; + info->ram->downtime_bytes = ram_counters.downtime_bytes; +@@ -3780,7 +3780,7 @@ static MigThrError migration_detect_error(MigrationState *s) + static uint64_t migration_total_bytes(MigrationState *s) + { + return qemu_file_total_transferred(s->to_dst_file) + +- ram_counters.multifd_bytes; ++ stat64_get(&ram_counters.multifd_bytes); + } + + static void migration_calculate_complete(MigrationState *s) +diff --git a/migration/multifd.c b/migration/multifd.c +index 6ef3a27938..1c992abf53 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -432,9 +432,9 @@ static int multifd_send_pages(QEMUFile *f) + p->pages = pages; + transferred = ((uint64_t) pages->num) * p->page_size + p->packet_len; + qemu_file_acct_rate_limit(f, transferred); +- ram_counters.multifd_bytes += transferred; + qemu_mutex_unlock(&p->mutex); + stat64_add(&ram_counters.transferred, transferred); ++ stat64_add(&ram_counters.multifd_bytes, transferred); + qemu_sem_post(&p->sem); + + return 1; +@@ -627,9 +627,9 @@ int multifd_send_sync_main(QEMUFile *f) + p->flags |= MULTIFD_FLAG_SYNC; + p->pending_job++; + qemu_file_acct_rate_limit(f, p->packet_len); +- ram_counters.multifd_bytes += p->packet_len; + qemu_mutex_unlock(&p->mutex); + stat64_add(&ram_counters.transferred, p->packet_len); ++ stat64_add(&ram_counters.multifd_bytes, p->packet_len); + qemu_sem_post(&p->sem); + } + for (i = 0; i < migrate_multifd_channels(); i++) { +diff --git a/migration/ram.h b/migration/ram.h +index 7c026b5242..ed70391317 100644 +--- a/migration/ram.h ++++ b/migration/ram.h +@@ -46,7 +46,7 @@ typedef struct { + uint64_t dirty_sync_missed_zero_copy; + uint64_t downtime_bytes; + Stat64 duplicate; +- uint64_t multifd_bytes; ++ Stat64 multifd_bytes; + Stat64 normal; + Stat64 postcopy_bytes; + int64_t postcopy_requests; +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Make-postcopy_requests-atomic.patch b/SOURCES/kvm-migration-Make-postcopy_requests-atomic.patch new file mode 100644 index 0000000..894419a --- /dev/null +++ b/SOURCES/kvm-migration-Make-postcopy_requests-atomic.patch @@ -0,0 +1,69 @@ +From e6ff4536a5e5f5bbfda370ecb525d0e066c3ab1c Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 11 Apr 2023 18:04:59 +0200 +Subject: [PATCH 15/56] migration: Make postcopy_requests atomic +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [14/50] d15c6052b77e7ded7bf34c66caa11bf86b75f2e8 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Peter Xu +(cherry picked from commit 3c764f9b2bc3e5eb5ed93ab45c2de6d599fef00f) +Signed-off-by: Peter Xu +--- + migration/migration.c | 3 ++- + migration/ram.c | 2 +- + migration/ram.h | 2 +- + 3 files changed, 4 insertions(+), 3 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 8fca751050..39501a0ed8 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1152,7 +1152,8 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + stat64_get(&ram_counters.dirty_sync_count); + info->ram->dirty_sync_missed_zero_copy = + stat64_get(&ram_counters.dirty_sync_missed_zero_copy); +- info->ram->postcopy_requests = ram_counters.postcopy_requests; ++ info->ram->postcopy_requests = ++ stat64_get(&ram_counters.postcopy_requests); + info->ram->page_size = page_size; + info->ram->multifd_bytes = stat64_get(&ram_counters.multifd_bytes); + info->ram->pages_per_second = s->pages_per_second; +diff --git a/migration/ram.c b/migration/ram.c +index 3c13136559..fe69ecaef4 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -2169,7 +2169,7 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len) + RAMBlock *ramblock; + RAMState *rs = ram_state; + +- ram_counters.postcopy_requests++; ++ stat64_add(&ram_counters.postcopy_requests, 1); + RCU_READ_LOCK_GUARD(); + + if (!rbname) { +diff --git a/migration/ram.h b/migration/ram.h +index 8c0d07c43a..afa68521d7 100644 +--- a/migration/ram.h ++++ b/migration/ram.h +@@ -49,7 +49,7 @@ typedef struct { + Stat64 multifd_bytes; + Stat64 normal; + Stat64 postcopy_bytes; +- int64_t postcopy_requests; ++ Stat64 postcopy_requests; + Stat64 precopy_bytes; + int64_t remaining; + Stat64 transferred; +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Make-precopy_bytes-atomic.patch b/SOURCES/kvm-migration-Make-precopy_bytes-atomic.patch new file mode 100644 index 0000000..8e6c177 --- /dev/null +++ b/SOURCES/kvm-migration-Make-precopy_bytes-atomic.patch @@ -0,0 +1,68 @@ +From 7e4d4316855f7f6556364eb16828f925b61c80d4 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 11 Apr 2023 17:36:48 +0200 +Subject: [PATCH 12/56] migration: Make precopy_bytes atomic +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [11/50] 23bec49b4b8f4d23c2192b401416139e3ca13626 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Peter Xu +(cherry picked from commit b013b5d1f32ef88457e66c7ce576f6475238f97f) +Signed-off-by: Peter Xu +--- + migration/migration.c | 2 +- + migration/ram.c | 2 +- + migration/ram.h | 2 +- + 3 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 645fb4b3c5..3a68d93d69 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1155,7 +1155,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + info->ram->page_size = page_size; + info->ram->multifd_bytes = stat64_get(&ram_counters.multifd_bytes); + info->ram->pages_per_second = s->pages_per_second; +- info->ram->precopy_bytes = ram_counters.precopy_bytes; ++ info->ram->precopy_bytes = stat64_get(&ram_counters.precopy_bytes); + info->ram->downtime_bytes = ram_counters.downtime_bytes; + info->ram->postcopy_bytes = stat64_get(&ram_counters.postcopy_bytes); + +diff --git a/migration/ram.c b/migration/ram.c +index 93e0a48af4..0b4693215e 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -463,7 +463,7 @@ RAMStats ram_counters; + void ram_transferred_add(uint64_t bytes) + { + if (runstate_is_running()) { +- ram_counters.precopy_bytes += bytes; ++ stat64_add(&ram_counters.precopy_bytes, bytes); + } else if (migration_in_postcopy()) { + stat64_add(&ram_counters.postcopy_bytes, bytes); + } else { +diff --git a/migration/ram.h b/migration/ram.h +index 2170c55e67..a766b895fa 100644 +--- a/migration/ram.h ++++ b/migration/ram.h +@@ -50,7 +50,7 @@ typedef struct { + Stat64 normal; + Stat64 postcopy_bytes; + int64_t postcopy_requests; +- uint64_t precopy_bytes; ++ Stat64 precopy_bytes; + int64_t remaining; + Stat64 transferred; + } RAMStats; +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Merge-ram_counters-and-ram_atomic_counters.patch b/SOURCES/kvm-migration-Merge-ram_counters-and-ram_atomic_counters.patch new file mode 100644 index 0000000..0679e89 --- /dev/null +++ b/SOURCES/kvm-migration-Merge-ram_counters-and-ram_atomic_counters.patch @@ -0,0 +1,270 @@ +From 5a87058eea6ee56f37fb454486c35baaf693d691 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 22 Feb 2023 15:56:45 +0100 +Subject: [PATCH 08/56] migration: Merge ram_counters and ram_atomic_counters +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [7/50] 90e395de66aa32b886cf151f7996a680190471f5 (peterx/qemu-kvm) + +Using MgrationStats as type for ram_counters mean that we didn't have +to re-declare each value in another struct. The need of atomic +counters have make us to create MigrationAtomicStats for this atomic +counters. + +Create RAMStats type which is a merge of MigrationStats and +MigrationAtomicStats removing unused members. + +Signed-off-by: Juan Quintela +Reviewed-by: Peter Xu + +--- + +Fix typos found by David Edmondson + +(cherry picked from commit abce5fa16d126ed085ccf8a5b3fe61a1efa20994) +Signed-off-by: Peter Xu +--- + migration/migration.c | 8 ++++---- + migration/multifd.c | 4 ++-- + migration/ram.c | 39 ++++++++++++++++----------------------- + migration/ram.h | 28 +++++++++++++++------------- + 4 files changed, 37 insertions(+), 42 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 99f86bd6c2..a91704d35c 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1140,12 +1140,12 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + size_t page_size = qemu_target_page_size(); + + info->ram = g_malloc0(sizeof(*info->ram)); +- info->ram->transferred = stat64_get(&ram_atomic_counters.transferred); ++ info->ram->transferred = stat64_get(&ram_counters.transferred); + info->ram->total = ram_bytes_total(); +- info->ram->duplicate = stat64_get(&ram_atomic_counters.duplicate); ++ info->ram->duplicate = stat64_get(&ram_counters.duplicate); + /* legacy value. It is not used anymore */ + info->ram->skipped = 0; +- info->ram->normal = stat64_get(&ram_atomic_counters.normal); ++ info->ram->normal = stat64_get(&ram_counters.normal); + info->ram->normal_bytes = info->ram->normal * page_size; + info->ram->mbps = s->mbps; + info->ram->dirty_sync_count = ram_counters.dirty_sync_count; +@@ -1157,7 +1157,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + info->ram->pages_per_second = s->pages_per_second; + info->ram->precopy_bytes = ram_counters.precopy_bytes; + info->ram->downtime_bytes = ram_counters.downtime_bytes; +- info->ram->postcopy_bytes = stat64_get(&ram_atomic_counters.postcopy_bytes); ++ info->ram->postcopy_bytes = stat64_get(&ram_counters.postcopy_bytes); + + if (migrate_use_xbzrle()) { + info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache)); +diff --git a/migration/multifd.c b/migration/multifd.c +index cbc0dfe39b..01fab01a92 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -433,7 +433,7 @@ static int multifd_send_pages(QEMUFile *f) + transferred = ((uint64_t) pages->num) * p->page_size + p->packet_len; + qemu_file_acct_rate_limit(f, transferred); + ram_counters.multifd_bytes += transferred; +- stat64_add(&ram_atomic_counters.transferred, transferred); ++ stat64_add(&ram_counters.transferred, transferred); + qemu_mutex_unlock(&p->mutex); + qemu_sem_post(&p->sem); + +@@ -628,7 +628,7 @@ int multifd_send_sync_main(QEMUFile *f) + p->pending_job++; + qemu_file_acct_rate_limit(f, p->packet_len); + ram_counters.multifd_bytes += p->packet_len; +- stat64_add(&ram_atomic_counters.transferred, p->packet_len); ++ stat64_add(&ram_counters.transferred, p->packet_len); + qemu_mutex_unlock(&p->mutex); + qemu_sem_post(&p->sem); + } +diff --git a/migration/ram.c b/migration/ram.c +index 0e68099bf9..71320ed27a 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -458,25 +458,18 @@ uint64_t ram_bytes_remaining(void) + 0; + } + +-/* +- * NOTE: not all stats in ram_counters are used in reality. See comments +- * for struct MigrationAtomicStats. The ultimate result of ram migration +- * counters will be a merged version with both ram_counters and the atomic +- * fields in ram_atomic_counters. +- */ +-MigrationStats ram_counters; +-MigrationAtomicStats ram_atomic_counters; ++RAMStats ram_counters; + + void ram_transferred_add(uint64_t bytes) + { + if (runstate_is_running()) { + ram_counters.precopy_bytes += bytes; + } else if (migration_in_postcopy()) { +- stat64_add(&ram_atomic_counters.postcopy_bytes, bytes); ++ stat64_add(&ram_counters.postcopy_bytes, bytes); + } else { + ram_counters.downtime_bytes += bytes; + } +- stat64_add(&ram_atomic_counters.transferred, bytes); ++ stat64_add(&ram_counters.transferred, bytes); + } + + void dirty_sync_missed_zero_copy(void) +@@ -756,7 +749,7 @@ void mig_throttle_counter_reset(void) + + rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + rs->num_dirty_pages_period = 0; +- rs->bytes_xfer_prev = stat64_get(&ram_atomic_counters.transferred); ++ rs->bytes_xfer_prev = stat64_get(&ram_counters.transferred); + } + + /** +@@ -1130,8 +1123,8 @@ uint64_t ram_pagesize_summary(void) + + uint64_t ram_get_total_transferred_pages(void) + { +- return stat64_get(&ram_atomic_counters.normal) + +- stat64_get(&ram_atomic_counters.duplicate) + ++ return stat64_get(&ram_counters.normal) + ++ stat64_get(&ram_counters.duplicate) + + compression_counters.pages + xbzrle_counters.pages; + } + +@@ -1192,7 +1185,7 @@ static void migration_trigger_throttle(RAMState *rs) + MigrationState *s = migrate_get_current(); + uint64_t threshold = s->parameters.throttle_trigger_threshold; + uint64_t bytes_xfer_period = +- stat64_get(&ram_atomic_counters.transferred) - rs->bytes_xfer_prev; ++ stat64_get(&ram_counters.transferred) - rs->bytes_xfer_prev; + uint64_t bytes_dirty_period = rs->num_dirty_pages_period * TARGET_PAGE_SIZE; + uint64_t bytes_dirty_threshold = bytes_xfer_period * threshold / 100; + +@@ -1255,7 +1248,7 @@ static void migration_bitmap_sync(RAMState *rs) + /* reset period counters */ + rs->time_last_bitmap_sync = end_time; + rs->num_dirty_pages_period = 0; +- rs->bytes_xfer_prev = stat64_get(&ram_atomic_counters.transferred); ++ rs->bytes_xfer_prev = stat64_get(&ram_counters.transferred); + } + if (migrate_use_events()) { + qapi_event_send_migration_pass(ram_counters.dirty_sync_count); +@@ -1331,7 +1324,7 @@ static int save_zero_page(PageSearchStatus *pss, QEMUFile *f, RAMBlock *block, + int len = save_zero_page_to_file(pss, f, block, offset); + + if (len) { +- stat64_add(&ram_atomic_counters.duplicate, 1); ++ stat64_add(&ram_counters.duplicate, 1); + ram_transferred_add(len); + return 1; + } +@@ -1368,9 +1361,9 @@ static bool control_save_page(PageSearchStatus *pss, RAMBlock *block, + } + + if (bytes_xmit > 0) { +- stat64_add(&ram_atomic_counters.normal, 1); ++ stat64_add(&ram_counters.normal, 1); + } else if (bytes_xmit == 0) { +- stat64_add(&ram_atomic_counters.duplicate, 1); ++ stat64_add(&ram_counters.duplicate, 1); + } + + return true; +@@ -1402,7 +1395,7 @@ static int save_normal_page(PageSearchStatus *pss, RAMBlock *block, + qemu_put_buffer(file, buf, TARGET_PAGE_SIZE); + } + ram_transferred_add(TARGET_PAGE_SIZE); +- stat64_add(&ram_atomic_counters.normal, 1); ++ stat64_add(&ram_counters.normal, 1); + return 1; + } + +@@ -1458,7 +1451,7 @@ static int ram_save_multifd_page(QEMUFile *file, RAMBlock *block, + if (multifd_queue_page(file, block, offset) < 0) { + return -1; + } +- stat64_add(&ram_atomic_counters.normal, 1); ++ stat64_add(&ram_counters.normal, 1); + + return 1; + } +@@ -1497,7 +1490,7 @@ update_compress_thread_counts(const CompressParam *param, int bytes_xmit) + ram_transferred_add(bytes_xmit); + + if (param->zero_page) { +- stat64_add(&ram_atomic_counters.duplicate, 1); ++ stat64_add(&ram_counters.duplicate, 1); + return; + } + +@@ -2632,9 +2625,9 @@ void acct_update_position(QEMUFile *f, size_t size, bool zero) + uint64_t pages = size / TARGET_PAGE_SIZE; + + if (zero) { +- stat64_add(&ram_atomic_counters.duplicate, pages); ++ stat64_add(&ram_counters.duplicate, pages); + } else { +- stat64_add(&ram_atomic_counters.normal, pages); ++ stat64_add(&ram_counters.normal, pages); + ram_transferred_add(size); + qemu_file_credit_transfer(f, size); + } +diff --git a/migration/ram.h b/migration/ram.h +index 81cbb0947c..7c026b5242 100644 +--- a/migration/ram.h ++++ b/migration/ram.h +@@ -35,25 +35,27 @@ + #include "qemu/stats64.h" + + /* +- * These are the migration statistic counters that need to be updated using +- * atomic ops (can be accessed by more than one thread). Here since we +- * cannot modify MigrationStats directly to use Stat64 as it was defined in +- * the QAPI scheme, we define an internal structure to hold them, and we +- * propagate the real values when QMP queries happen. +- * +- * IOW, the corresponding fields within ram_counters on these specific +- * fields will be always zero and not being used at all; they're just +- * placeholders to make it QAPI-compatible. ++ * These are the ram migration statistic counters. It is loosely ++ * based on MigrationStats. We change to Stat64 any counter that ++ * needs to be updated using atomic ops (can be accessed by more than ++ * one thread). + */ + typedef struct { +- Stat64 transferred; ++ int64_t dirty_pages_rate; ++ int64_t dirty_sync_count; ++ uint64_t dirty_sync_missed_zero_copy; ++ uint64_t downtime_bytes; + Stat64 duplicate; ++ uint64_t multifd_bytes; + Stat64 normal; + Stat64 postcopy_bytes; +-} MigrationAtomicStats; ++ int64_t postcopy_requests; ++ uint64_t precopy_bytes; ++ int64_t remaining; ++ Stat64 transferred; ++} RAMStats; + +-extern MigrationAtomicStats ram_atomic_counters; +-extern MigrationStats ram_counters; ++extern RAMStats ram_counters; + extern XBZRLECacheStats xbzrle_counters; + extern CompressionStats compression_counters; + +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Minor-control-flow-simplification.patch b/SOURCES/kvm-migration-Minor-control-flow-simplification.patch new file mode 100644 index 0000000..a0dbdd9 --- /dev/null +++ b/SOURCES/kvm-migration-Minor-control-flow-simplification.patch @@ -0,0 +1,52 @@ +From c3bc974ea4b5186a76daa433209c1209d94dd0b7 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Thu, 20 Apr 2023 09:35:51 -0500 +Subject: [PATCH 2/2] migration: Minor control flow simplification + +RH-Author: Eric Blake +RH-MergeRequest: 161: Avoid migration assertion from failed NFS server. +RH-Bugzilla: 2058982 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [2/2] 5afd8c25d6f14bdb2a380ecc77bc6c2f2a26df87 (ebblake/centos-qemu-kvm) + +No need to declare a temporary variable. + +Suggested-by: Juan Quintela +Fixes: 1df36e8c6289 ("migration: Handle block device inactivation failures better") +Signed-off-by: Eric Blake +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit 5d39f44d7ac5c63f53d4d0900ceba9521bc27e49) +--- + migration/migration.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index cb0d42c061..08007cef4e 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -3436,7 +3436,6 @@ static void migration_completion(MigrationState *s) + ret = global_state_store(); + + if (!ret) { +- bool inactivate = !migrate_colo_enabled(); + ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); + trace_migration_completion_vm_stop(ret); + if (ret >= 0) { +@@ -3444,10 +3443,10 @@ static void migration_completion(MigrationState *s) + MIGRATION_STATUS_DEVICE); + } + if (ret >= 0) { +- s->block_inactive = inactivate; ++ s->block_inactive = !migrate_colo_enabled(); + qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); + ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, +- inactivate); ++ s->block_inactive); + } + } + qemu_mutex_unlock_iothread(); +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Move-migrate_announce_params-to-option.c.patch b/SOURCES/kvm-migration-Move-migrate_announce_params-to-option.c.patch new file mode 100644 index 0000000..24dcb16 --- /dev/null +++ b/SOURCES/kvm-migration-Move-migrate_announce_params-to-option.c.patch @@ -0,0 +1,90 @@ +From 1f5232d611ecaaf61bcac151e7d90b8b452ac161 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 01:17:23 +0100 +Subject: [PATCH 43/56] migration: Move migrate_announce_params() to option.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [42/50] 541be7adc7f81c269058485aef5b14e787b2efe6 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Fabiano Rosas + +--- + +Fix extra whitespace (fabiano) + +(cherry picked from commit 2682c4eea72c621dfd0fb0151cbd758e81d1bdff) +Signed-off-by: Peter Xu +--- + migration/migration.c | 14 -------------- + migration/options.c | 17 +++++++++++++++++ + 2 files changed, 17 insertions(+), 14 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 724e841eb9..f27ce30be2 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -954,20 +954,6 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) + return params; + } + +-AnnounceParameters *migrate_announce_params(void) +-{ +- static AnnounceParameters ap; +- +- MigrationState *s = migrate_get_current(); +- +- ap.initial = s->parameters.announce_initial; +- ap.max = s->parameters.announce_max; +- ap.rounds = s->parameters.announce_rounds; +- ap.step = s->parameters.announce_step; +- +- return ≈ +-} +- + /* + * Return true if we're already in the middle of a migration + * (i.e. any of the active or setup states) +diff --git a/migration/options.c b/migration/options.c +index 2cb04fbbd1..418aafac64 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -16,6 +16,7 @@ + #include "qapi/qapi-commands-migration.h" + #include "qapi/qmp/qerror.h" + #include "sysemu/runstate.h" ++#include "migration/misc.h" + #include "migration.h" + #include "ram.h" + #include "options.h" +@@ -589,3 +590,19 @@ uint64_t migrate_xbzrle_cache_size(void) + + return s->parameters.xbzrle_cache_size; + } ++ ++/* parameters helpers */ ++ ++AnnounceParameters *migrate_announce_params(void) ++{ ++ static AnnounceParameters ap; ++ ++ MigrationState *s = migrate_get_current(); ++ ++ ap.initial = s->parameters.announce_initial; ++ ap.max = s->parameters.announce_max; ++ ap.rounds = s->parameters.announce_rounds; ++ ap.step = s->parameters.announce_step; ++ ++ return ≈ ++} +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Move-migrate_cap_set-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_cap_set-to-options.c.patch new file mode 100644 index 0000000..0e33c4c --- /dev/null +++ b/SOURCES/kvm-migration-Move-migrate_cap_set-to-options.c.patch @@ -0,0 +1,110 @@ +From 9c4f8d869f5bbdd07381f6baad2ed755b07d03f4 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 23:25:44 +0100 +Subject: [PATCH 36/56] migration: Move migrate_cap_set() to options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [35/50] d0cd6b8e9cf0534a56795d94c3da18622fa10ad7 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit f80196b772ddeeb07d3d80d5c8382cb5d1063fa2) +Signed-off-by: Peter Xu +--- + migration/migration.c | 20 -------------------- + migration/options.c | 21 +++++++++++++++++++++ + migration/options.h | 1 + + 3 files changed, 22 insertions(+), 20 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 369cd91796..880a51210e 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1666,26 +1666,6 @@ void migrate_set_state(int *state, int old_state, int new_state) + } + } + +-static bool migrate_cap_set(int cap, bool value, Error **errp) +-{ +- MigrationState *s = migrate_get_current(); +- bool new_caps[MIGRATION_CAPABILITY__MAX]; +- +- if (migration_is_running(s->state)) { +- error_setg(errp, QERR_MIGRATION_ACTIVE); +- return false; +- } +- +- memcpy(new_caps, s->capabilities, sizeof(new_caps)); +- new_caps[cap] = value; +- +- if (!migrate_caps_check(s->capabilities, new_caps, errp)) { +- return false; +- } +- s->capabilities[cap] = value; +- return true; +-} +- + static void migrate_set_block_incremental(MigrationState *s, bool value) + { + s->parameters.block_incremental = value; +diff --git a/migration/options.c b/migration/options.c +index 4cbe77e35a..f3b2d6e482 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -14,6 +14,7 @@ + #include "qemu/osdep.h" + #include "qapi/error.h" + #include "qapi/qapi-commands-migration.h" ++#include "qapi/qmp/qerror.h" + #include "sysemu/runstate.h" + #include "migration.h" + #include "ram.h" +@@ -392,6 +393,26 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) + return true; + } + ++bool migrate_cap_set(int cap, bool value, Error **errp) ++{ ++ MigrationState *s = migrate_get_current(); ++ bool new_caps[MIGRATION_CAPABILITY__MAX]; ++ ++ if (migration_is_running(s->state)) { ++ error_setg(errp, QERR_MIGRATION_ACTIVE); ++ return false; ++ } ++ ++ memcpy(new_caps, s->capabilities, sizeof(new_caps)); ++ new_caps[cap] = value; ++ ++ if (!migrate_caps_check(s->capabilities, new_caps, errp)) { ++ return false; ++ } ++ s->capabilities[cap] = value; ++ return true; ++} ++ + MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) + { + MigrationCapabilityStatusList *head = NULL, **tail = &head; +diff --git a/migration/options.h b/migration/options.h +index e779f14161..5979e4ff90 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -41,5 +41,6 @@ bool migrate_zero_copy_send(void); + /* capabilities helpers */ + + bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp); ++bool migrate_cap_set(int cap, bool value, Error **errp); + + #endif +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Move-migrate_caps_check-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_caps_check-to-options.c.patch new file mode 100644 index 0000000..0d6fa08 --- /dev/null +++ b/SOURCES/kvm-migration-Move-migrate_caps_check-to-options.c.patch @@ -0,0 +1,458 @@ +From 3af7c7aaf7407ec14c19e54d52a2229ce4dbb7c5 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 23:05:53 +0100 +Subject: [PATCH 33/56] migration: Move migrate_caps_check() to options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [32/50] 12999471063d97fffb2b04c6dcb80083b902f963 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 77608706459bd197e25ac1ef54591b9f8a0b46f8) +Signed-off-by: Peter Xu +--- + migration/migration.c | 190 ----------------------------------------- + migration/options.c | 192 ++++++++++++++++++++++++++++++++++++++++++ + migration/options.h | 4 + + 3 files changed, 196 insertions(+), 190 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index f7facecd66..d9e30ca918 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -136,39 +136,6 @@ enum mig_rp_message_type { + MIG_RP_MSG_MAX + }; + +-/* Migration capabilities set */ +-struct MigrateCapsSet { +- int size; /* Capability set size */ +- MigrationCapability caps[]; /* Variadic array of capabilities */ +-}; +-typedef struct MigrateCapsSet MigrateCapsSet; +- +-/* Define and initialize MigrateCapsSet */ +-#define INITIALIZE_MIGRATE_CAPS_SET(_name, ...) \ +- MigrateCapsSet _name = { \ +- .size = sizeof((int []) { __VA_ARGS__ }) / sizeof(int), \ +- .caps = { __VA_ARGS__ } \ +- } +- +-/* Background-snapshot compatibility check list */ +-static const +-INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot, +- MIGRATION_CAPABILITY_POSTCOPY_RAM, +- MIGRATION_CAPABILITY_DIRTY_BITMAPS, +- MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME, +- MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE, +- MIGRATION_CAPABILITY_RETURN_PATH, +- MIGRATION_CAPABILITY_MULTIFD, +- MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER, +- MIGRATION_CAPABILITY_AUTO_CONVERGE, +- MIGRATION_CAPABILITY_RELEASE_RAM, +- MIGRATION_CAPABILITY_RDMA_PIN_ALL, +- MIGRATION_CAPABILITY_COMPRESS, +- MIGRATION_CAPABILITY_XBZRLE, +- MIGRATION_CAPABILITY_X_COLO, +- MIGRATION_CAPABILITY_VALIDATE_UUID, +- MIGRATION_CAPABILITY_ZERO_COPY_SEND); +- + /* When we add fault tolerance, we could have several + migrations at once. For now we don't need to add + dynamic creation of migration */ +@@ -1235,163 +1202,6 @@ static void fill_source_migration_info(MigrationInfo *info) + info->status = state; + } + +-typedef enum WriteTrackingSupport { +- WT_SUPPORT_UNKNOWN = 0, +- WT_SUPPORT_ABSENT, +- WT_SUPPORT_AVAILABLE, +- WT_SUPPORT_COMPATIBLE +-} WriteTrackingSupport; +- +-static +-WriteTrackingSupport migrate_query_write_tracking(void) +-{ +- /* Check if kernel supports required UFFD features */ +- if (!ram_write_tracking_available()) { +- return WT_SUPPORT_ABSENT; +- } +- /* +- * Check if current memory configuration is +- * compatible with required UFFD features. +- */ +- if (!ram_write_tracking_compatible()) { +- return WT_SUPPORT_AVAILABLE; +- } +- +- return WT_SUPPORT_COMPATIBLE; +-} +- +-/** +- * @migration_caps_check - check capability compatibility +- * +- * @old_caps: old capability list +- * @new_caps: new capability list +- * @errp: set *errp if the check failed, with reason +- * +- * Returns true if check passed, otherwise false. +- */ +-static bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) +-{ +- MigrationIncomingState *mis = migration_incoming_get_current(); +- +-#ifndef CONFIG_LIVE_BLOCK_MIGRATION +- if (new_caps[MIGRATION_CAPABILITY_BLOCK]) { +- error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) " +- "block migration"); +- error_append_hint(errp, "Use drive_mirror+NBD instead.\n"); +- return false; +- } +-#endif +- +-#ifndef CONFIG_REPLICATION +- if (new_caps[MIGRATION_CAPABILITY_X_COLO]) { +- error_setg(errp, "QEMU compiled without replication module" +- " can't enable COLO"); +- error_append_hint(errp, "Please enable replication before COLO.\n"); +- return false; +- } +-#endif +- +- if (new_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { +- /* This check is reasonably expensive, so only when it's being +- * set the first time, also it's only the destination that needs +- * special support. +- */ +- if (!old_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM] && +- runstate_check(RUN_STATE_INMIGRATE) && +- !postcopy_ram_supported_by_host(mis)) { +- /* postcopy_ram_supported_by_host will have emitted a more +- * detailed message +- */ +- error_setg(errp, "Postcopy is not supported"); +- return false; +- } +- +- if (new_caps[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) { +- error_setg(errp, "Postcopy is not compatible with ignore-shared"); +- return false; +- } +- } +- +- if (new_caps[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) { +- WriteTrackingSupport wt_support; +- int idx; +- /* +- * Check if 'background-snapshot' capability is supported by +- * host kernel and compatible with guest memory configuration. +- */ +- wt_support = migrate_query_write_tracking(); +- if (wt_support < WT_SUPPORT_AVAILABLE) { +- error_setg(errp, "Background-snapshot is not supported by host kernel"); +- return false; +- } +- if (wt_support < WT_SUPPORT_COMPATIBLE) { +- error_setg(errp, "Background-snapshot is not compatible " +- "with guest memory configuration"); +- return false; +- } +- +- /* +- * Check if there are any migration capabilities +- * incompatible with 'background-snapshot'. +- */ +- for (idx = 0; idx < check_caps_background_snapshot.size; idx++) { +- int incomp_cap = check_caps_background_snapshot.caps[idx]; +- if (new_caps[incomp_cap]) { +- error_setg(errp, +- "Background-snapshot is not compatible with %s", +- MigrationCapability_str(incomp_cap)); +- return false; +- } +- } +- } +- +-#ifdef CONFIG_LINUX +- if (new_caps[MIGRATION_CAPABILITY_ZERO_COPY_SEND] && +- (!new_caps[MIGRATION_CAPABILITY_MULTIFD] || +- new_caps[MIGRATION_CAPABILITY_COMPRESS] || +- new_caps[MIGRATION_CAPABILITY_XBZRLE] || +- migrate_multifd_compression() || +- migrate_use_tls())) { +- error_setg(errp, +- "Zero copy only available for non-compressed non-TLS multifd migration"); +- return false; +- } +-#else +- if (new_caps[MIGRATION_CAPABILITY_ZERO_COPY_SEND]) { +- error_setg(errp, +- "Zero copy currently only available on Linux"); +- return false; +- } +-#endif +- +- if (new_caps[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]) { +- if (!new_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { +- error_setg(errp, "Postcopy preempt requires postcopy-ram"); +- return false; +- } +- +- /* +- * Preempt mode requires urgent pages to be sent in separate +- * channel, OTOH compression logic will disorder all pages into +- * different compression channels, which is not compatible with the +- * preempt assumptions on channel assignments. +- */ +- if (new_caps[MIGRATION_CAPABILITY_COMPRESS]) { +- error_setg(errp, "Postcopy preempt not compatible with compress"); +- return false; +- } +- } +- +- if (new_caps[MIGRATION_CAPABILITY_MULTIFD]) { +- if (new_caps[MIGRATION_CAPABILITY_COMPRESS]) { +- error_setg(errp, "Multifd is not compatible with compress"); +- return false; +- } +- } +- +- return true; +-} +- + static void fill_destination_migration_info(MigrationInfo *info) + { + MigrationIncomingState *mis = migration_incoming_get_current(); +diff --git a/migration/options.c b/migration/options.c +index 9c9b8e5863..367c930f46 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -12,7 +12,10 @@ + */ + + #include "qemu/osdep.h" ++#include "qapi/error.h" ++#include "sysemu/runstate.h" + #include "migration.h" ++#include "ram.h" + #include "options.h" + + bool migrate_auto_converge(void) +@@ -198,3 +201,192 @@ bool migrate_zero_copy_send(void) + + return s->capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; + } ++typedef enum WriteTrackingSupport { ++ WT_SUPPORT_UNKNOWN = 0, ++ WT_SUPPORT_ABSENT, ++ WT_SUPPORT_AVAILABLE, ++ WT_SUPPORT_COMPATIBLE ++} WriteTrackingSupport; ++ ++static ++WriteTrackingSupport migrate_query_write_tracking(void) ++{ ++ /* Check if kernel supports required UFFD features */ ++ if (!ram_write_tracking_available()) { ++ return WT_SUPPORT_ABSENT; ++ } ++ /* ++ * Check if current memory configuration is ++ * compatible with required UFFD features. ++ */ ++ if (!ram_write_tracking_compatible()) { ++ return WT_SUPPORT_AVAILABLE; ++ } ++ ++ return WT_SUPPORT_COMPATIBLE; ++} ++ ++/* Migration capabilities set */ ++struct MigrateCapsSet { ++ int size; /* Capability set size */ ++ MigrationCapability caps[]; /* Variadic array of capabilities */ ++}; ++typedef struct MigrateCapsSet MigrateCapsSet; ++ ++/* Define and initialize MigrateCapsSet */ ++#define INITIALIZE_MIGRATE_CAPS_SET(_name, ...) \ ++ MigrateCapsSet _name = { \ ++ .size = sizeof((int []) { __VA_ARGS__ }) / sizeof(int), \ ++ .caps = { __VA_ARGS__ } \ ++ } ++ ++/* Background-snapshot compatibility check list */ ++static const ++INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot, ++ MIGRATION_CAPABILITY_POSTCOPY_RAM, ++ MIGRATION_CAPABILITY_DIRTY_BITMAPS, ++ MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME, ++ MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE, ++ MIGRATION_CAPABILITY_RETURN_PATH, ++ MIGRATION_CAPABILITY_MULTIFD, ++ MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER, ++ MIGRATION_CAPABILITY_AUTO_CONVERGE, ++ MIGRATION_CAPABILITY_RELEASE_RAM, ++ MIGRATION_CAPABILITY_RDMA_PIN_ALL, ++ MIGRATION_CAPABILITY_COMPRESS, ++ MIGRATION_CAPABILITY_XBZRLE, ++ MIGRATION_CAPABILITY_X_COLO, ++ MIGRATION_CAPABILITY_VALIDATE_UUID, ++ MIGRATION_CAPABILITY_ZERO_COPY_SEND); ++ ++/** ++ * @migration_caps_check - check capability compatibility ++ * ++ * @old_caps: old capability list ++ * @new_caps: new capability list ++ * @errp: set *errp if the check failed, with reason ++ * ++ * Returns true if check passed, otherwise false. ++ */ ++bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) ++{ ++ MigrationIncomingState *mis = migration_incoming_get_current(); ++ ++#ifndef CONFIG_LIVE_BLOCK_MIGRATION ++ if (new_caps[MIGRATION_CAPABILITY_BLOCK]) { ++ error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) " ++ "block migration"); ++ error_append_hint(errp, "Use drive_mirror+NBD instead.\n"); ++ return false; ++ } ++#endif ++ ++#ifndef CONFIG_REPLICATION ++ if (new_caps[MIGRATION_CAPABILITY_X_COLO]) { ++ error_setg(errp, "QEMU compiled without replication module" ++ " can't enable COLO"); ++ error_append_hint(errp, "Please enable replication before COLO.\n"); ++ return false; ++ } ++#endif ++ ++ if (new_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { ++ /* This check is reasonably expensive, so only when it's being ++ * set the first time, also it's only the destination that needs ++ * special support. ++ */ ++ if (!old_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM] && ++ runstate_check(RUN_STATE_INMIGRATE) && ++ !postcopy_ram_supported_by_host(mis)) { ++ /* postcopy_ram_supported_by_host will have emitted a more ++ * detailed message ++ */ ++ error_setg(errp, "Postcopy is not supported"); ++ return false; ++ } ++ ++ if (new_caps[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) { ++ error_setg(errp, "Postcopy is not compatible with ignore-shared"); ++ return false; ++ } ++ } ++ ++ if (new_caps[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) { ++ WriteTrackingSupport wt_support; ++ int idx; ++ /* ++ * Check if 'background-snapshot' capability is supported by ++ * host kernel and compatible with guest memory configuration. ++ */ ++ wt_support = migrate_query_write_tracking(); ++ if (wt_support < WT_SUPPORT_AVAILABLE) { ++ error_setg(errp, "Background-snapshot is not supported by host kernel"); ++ return false; ++ } ++ if (wt_support < WT_SUPPORT_COMPATIBLE) { ++ error_setg(errp, "Background-snapshot is not compatible " ++ "with guest memory configuration"); ++ return false; ++ } ++ ++ /* ++ * Check if there are any migration capabilities ++ * incompatible with 'background-snapshot'. ++ */ ++ for (idx = 0; idx < check_caps_background_snapshot.size; idx++) { ++ int incomp_cap = check_caps_background_snapshot.caps[idx]; ++ if (new_caps[incomp_cap]) { ++ error_setg(errp, ++ "Background-snapshot is not compatible with %s", ++ MigrationCapability_str(incomp_cap)); ++ return false; ++ } ++ } ++ } ++ ++#ifdef CONFIG_LINUX ++ if (new_caps[MIGRATION_CAPABILITY_ZERO_COPY_SEND] && ++ (!new_caps[MIGRATION_CAPABILITY_MULTIFD] || ++ new_caps[MIGRATION_CAPABILITY_COMPRESS] || ++ new_caps[MIGRATION_CAPABILITY_XBZRLE] || ++ migrate_multifd_compression() || ++ migrate_use_tls())) { ++ error_setg(errp, ++ "Zero copy only available for non-compressed non-TLS multifd migration"); ++ return false; ++ } ++#else ++ if (new_caps[MIGRATION_CAPABILITY_ZERO_COPY_SEND]) { ++ error_setg(errp, ++ "Zero copy currently only available on Linux"); ++ return false; ++ } ++#endif ++ ++ if (new_caps[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]) { ++ if (!new_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { ++ error_setg(errp, "Postcopy preempt requires postcopy-ram"); ++ return false; ++ } ++ ++ /* ++ * Preempt mode requires urgent pages to be sent in separate ++ * channel, OTOH compression logic will disorder all pages into ++ * different compression channels, which is not compatible with the ++ * preempt assumptions on channel assignments. ++ */ ++ if (new_caps[MIGRATION_CAPABILITY_COMPRESS]) { ++ error_setg(errp, "Postcopy preempt not compatible with compress"); ++ return false; ++ } ++ } ++ ++ if (new_caps[MIGRATION_CAPABILITY_MULTIFD]) { ++ if (new_caps[MIGRATION_CAPABILITY_COMPRESS]) { ++ error_setg(errp, "Multifd is not compatible with compress"); ++ return false; ++ } ++ } ++ ++ return true; ++} +diff --git a/migration/options.h b/migration/options.h +index 25c002b37a..e779f14161 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -38,4 +38,8 @@ bool migrate_xbzrle(void); + bool migrate_zero_blocks(void); + bool migrate_zero_copy_send(void); + ++/* capabilities helpers */ ++ ++bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp); ++ + #endif +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Move-migrate_colo_enabled-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_colo_enabled-to-options.c.patch new file mode 100644 index 0000000..47c6f83 --- /dev/null +++ b/SOURCES/kvm-migration-Move-migrate_colo_enabled-to-options.c.patch @@ -0,0 +1,136 @@ +From 13da9060fa2dfc666cd6f4b9bc85b7cee0fef45e Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 22:00:16 +0100 +Subject: [PATCH 24/56] migration: Move migrate_colo_enabled() to options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [23/50] 4809b1091edee38bd222af41b6313133705785c7 (peterx/qemu-kvm) + +Once that we are there, we rename the function to migrate_colo() to be +consistent with all other capabilities. + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 5e8046445575dc5879e63c5d07af893d174813d0) +Signed-off-by: Peter Xu +--- + migration/migration.c | 16 +++++----------- + migration/migration.h | 1 - + migration/options.c | 6 ++++++ + migration/options.h | 1 + + 4 files changed, 12 insertions(+), 12 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 66ea55be06..59ee0ef82b 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -2411,7 +2411,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, + } + + if (blk || blk_inc) { +- if (migrate_colo_enabled()) { ++ if (migrate_colo()) { + error_setg(errp, "No disk migration is required in COLO mode"); + return false; + } +@@ -3304,7 +3304,7 @@ static void migration_completion(MigrationState *s) + * have done so in order to remember to reactivate + * them if migration fails or is cancelled. + */ +- s->block_inactive = !migrate_colo_enabled(); ++ s->block_inactive = !migrate_colo(); + qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); + ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, + s->block_inactive); +@@ -3357,7 +3357,7 @@ static void migration_completion(MigrationState *s) + goto fail; + } + +- if (migrate_colo_enabled() && s->state == MIGRATION_STATUS_ACTIVE) { ++ if (migrate_colo() && s->state == MIGRATION_STATUS_ACTIVE) { + /* COLO does not support postcopy */ + migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE, + MIGRATION_STATUS_COLO); +@@ -3435,12 +3435,6 @@ fail: + MIGRATION_STATUS_FAILED); + } + +-bool migrate_colo_enabled(void) +-{ +- MigrationState *s = migrate_get_current(); +- return s->capabilities[MIGRATION_CAPABILITY_X_COLO]; +-} +- + typedef enum MigThrError { + /* No error detected */ + MIG_THR_ERR_NONE = 0, +@@ -3771,7 +3765,7 @@ static void migration_iteration_finish(MigrationState *s) + runstate_set(RUN_STATE_POSTMIGRATE); + break; + case MIGRATION_STATUS_COLO: +- if (!migrate_colo_enabled()) { ++ if (!migrate_colo()) { + error_report("%s: critical error: calling COLO code without " + "COLO enabled", __func__); + } +@@ -3967,7 +3961,7 @@ static void *migration_thread(void *opaque) + qemu_savevm_send_postcopy_advise(s->to_dst_file); + } + +- if (migrate_colo_enabled()) { ++ if (migrate_colo()) { + /* Notify migration destination that we enable COLO */ + qemu_savevm_send_colo_enable(s->to_dst_file); + } +diff --git a/migration/migration.h b/migration/migration.h +index a25fed6ef0..42f0c68b6f 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -463,7 +463,6 @@ bool migrate_use_zero_copy_send(void); + int migrate_use_tls(void); + int migrate_use_xbzrle(void); + uint64_t migrate_xbzrle_cache_size(void); +-bool migrate_colo_enabled(void); + + bool migrate_use_block(void); + bool migrate_use_block_incremental(void); +diff --git a/migration/options.c b/migration/options.c +index 88a9a45913..bd33c5da0a 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -33,6 +33,12 @@ bool migrate_background_snapshot(void) + return s->capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; + } + ++bool migrate_colo(void) ++{ ++ MigrationState *s = migrate_get_current(); ++ return s->capabilities[MIGRATION_CAPABILITY_X_COLO]; ++} ++ + bool migrate_dirty_bitmaps(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index 0dfa0af245..2a0ee61ff8 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -18,6 +18,7 @@ + + bool migrate_auto_converge(void); + bool migrate_background_snapshot(void); ++bool migrate_colo(void); + bool migrate_dirty_bitmaps(void); + bool migrate_ignore_shared(void); + bool migrate_late_block_activate(void); +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Move-migrate_postcopy-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_postcopy-to-options.c.patch new file mode 100644 index 0000000..892ec9e --- /dev/null +++ b/SOURCES/kvm-migration-Move-migrate_postcopy-to-options.c.patch @@ -0,0 +1,98 @@ +From 710fe195a3c13ffe96795a7a2b550c00319997ea Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 10:44:20 +0100 +Subject: [PATCH 47/56] migration: Move migrate_postcopy() to options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [46/50] a4f3455b3524a331f44b481bf7a79318aef5abaa (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Fabiano Rosas +(cherry picked from commit f774fde5d4e97cbfc64dab6622c2c53c5fe5c9fe) +Signed-off-by: Peter Xu +--- + migration/migration.c | 5 ----- + migration/migration.h | 2 -- + migration/options.c | 8 ++++++++ + migration/options.h | 9 +++++++++ + 4 files changed, 17 insertions(+), 7 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index f27ce30be2..46a5ea4d42 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -2245,11 +2245,6 @@ void qmp_migrate_continue(MigrationStatus state, Error **errp) + qemu_sem_post(&s->pause_sem); + } + +-bool migrate_postcopy(void) +-{ +- return migrate_postcopy_ram() || migrate_dirty_bitmaps(); +-} +- + int migrate_use_tls(void) + { + MigrationState *s; +diff --git a/migration/migration.h b/migration/migration.h +index 3ae938b19c..dcf906868d 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -447,8 +447,6 @@ bool migration_is_blocked(Error **errp); + bool migration_in_postcopy(void); + MigrationState *migrate_get_current(void); + +-bool migrate_postcopy(void); +- + int migrate_use_tls(void); + + uint64_t ram_get_total_transferred_pages(void); +diff --git a/migration/options.c b/migration/options.c +index 615534c151..8bd2d949ae 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -204,6 +204,14 @@ bool migrate_zero_copy_send(void) + + return s->capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; + } ++ ++/* pseudo capabilities */ ++ ++bool migrate_postcopy(void) ++{ ++ return migrate_postcopy_ram() || migrate_dirty_bitmaps(); ++} ++ + typedef enum WriteTrackingSupport { + WT_SUPPORT_UNKNOWN = 0, + WT_SUPPORT_ABSENT, +diff --git a/migration/options.h b/migration/options.h +index 99f6bbd7a1..093bc907a1 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -38,6 +38,15 @@ bool migrate_xbzrle(void); + bool migrate_zero_blocks(void); + bool migrate_zero_copy_send(void); + ++/* ++ * pseudo capabilities ++ * ++ * These are functions that are used in a similar way to capabilities ++ * check, but they are not a capability. ++ */ ++ ++bool migrate_postcopy(void); ++ + /* capabilities helpers */ + + bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp); +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Move-migrate_use_block-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_use_block-to-options.c.patch new file mode 100644 index 0000000..f7cb338 --- /dev/null +++ b/SOURCES/kvm-migration-Move-migrate_use_block-to-options.c.patch @@ -0,0 +1,134 @@ +From 276877a71778a5cef0dc5bc843e2679f0fdabb77 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 22:23:57 +0100 +Subject: [PATCH 30/56] migration: Move migrate_use_block() to options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [29/50] fcaeb0e07cf828f3cd0d115515b30d913525a0a2 (peterx/qemu-kvm) + +Once that we are there, we rename the function to migrate_block() +to be consistent with all other capabilities. + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 9d4b1e5f22a838285ebeb8f0eb7cc8df1161998f) +Signed-off-by: Peter Xu +--- + migration/block.c | 2 +- + migration/migration.c | 11 +---------- + migration/migration.h | 1 - + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + migration/savevm.c | 2 +- + 6 files changed, 13 insertions(+), 13 deletions(-) + +diff --git a/migration/block.c b/migration/block.c +index 4b167fa5cf..f0977217cf 100644 +--- a/migration/block.c ++++ b/migration/block.c +@@ -1001,7 +1001,7 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) + + static bool block_is_active(void *opaque) + { +- return migrate_use_block(); ++ return migrate_block(); + } + + static SaveVMHandlers savevm_block_handlers = { +diff --git a/migration/migration.c b/migration/migration.c +index a4ede4294e..96f82bd165 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -2415,7 +2415,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, + error_setg(errp, "No disk migration is required in COLO mode"); + return false; + } +- if (migrate_use_block() || migrate_use_block_incremental()) { ++ if (migrate_block() || migrate_use_block_incremental()) { + error_setg(errp, "Command options are incompatible with " + "current migration capabilities"); + return false; +@@ -2622,15 +2622,6 @@ static int64_t migrate_max_postcopy_bandwidth(void) + return s->parameters.max_postcopy_bandwidth; + } + +-bool migrate_use_block(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_BLOCK]; +-} +- + bool migrate_use_return_path(void) + { + MigrationState *s; +diff --git a/migration/migration.h b/migration/migration.h +index e2bb5b1e2f..d4b68b08a5 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -457,7 +457,6 @@ int migrate_multifd_zstd_level(void); + int migrate_use_tls(void); + uint64_t migrate_xbzrle_cache_size(void); + +-bool migrate_use_block(void); + bool migrate_use_block_incremental(void); + int migrate_max_cpu_throttle(void); + bool migrate_use_return_path(void); +diff --git a/migration/options.c b/migration/options.c +index 25264c500e..fe1eadeed6 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -33,6 +33,15 @@ bool migrate_background_snapshot(void) + return s->capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; + } + ++bool migrate_block(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_BLOCK]; ++} ++ + bool migrate_colo(void) + { + MigrationState *s = migrate_get_current(); +diff --git a/migration/options.h b/migration/options.h +index 8f76a88329..e985a5233e 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -18,6 +18,7 @@ + + bool migrate_auto_converge(void); + bool migrate_background_snapshot(void); ++bool migrate_block(void); + bool migrate_colo(void); + bool migrate_compress(void); + bool migrate_dirty_bitmaps(void); +diff --git a/migration/savevm.c b/migration/savevm.c +index ebcf571e37..9671211339 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -1612,7 +1612,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) + return -EINVAL; + } + +- if (migrate_use_block()) { ++ if (migrate_block()) { + error_setg(errp, "Block migration and snapshots are incompatible"); + return -EINVAL; + } +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Move-migrate_use_block_incremental-to-opti.patch b/SOURCES/kvm-migration-Move-migrate_use_block_incremental-to-opti.patch new file mode 100644 index 0000000..3f20289 --- /dev/null +++ b/SOURCES/kvm-migration-Move-migrate_use_block_incremental-to-opti.patch @@ -0,0 +1,121 @@ +From def66503f4ccb97cf8029f88efe8e955edc8d32f Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 00:49:47 +0100 +Subject: [PATCH 39/56] migration: Move migrate_use_block_incremental() to + option.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [38/50] 961fda6464df3384fbcee88c726b56a33c26e14e (peterx/qemu-kvm) + +To be consistent with every other parameter, rename to +migrate_block_incremental(). + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 6f8be7080a1f79bf3832cf798fba1697c409c597) +Signed-off-by: Peter Xu +--- + migration/block.c | 2 +- + migration/migration.c | 11 +---------- + migration/migration.h | 1 - + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + 5 files changed, 12 insertions(+), 12 deletions(-) + +diff --git a/migration/block.c b/migration/block.c +index f0977217cf..6d532ac7a2 100644 +--- a/migration/block.c ++++ b/migration/block.c +@@ -417,7 +417,7 @@ static int init_blk_migration(QEMUFile *f) + bmds->bulk_completed = 0; + bmds->total_sectors = sectors; + bmds->completed_sectors = 0; +- bmds->shared_base = migrate_use_block_incremental(); ++ bmds->shared_base = migrate_block_incremental(); + + assert(i < num_bs); + bmds_bs[i].bmds = bmds; +diff --git a/migration/migration.c b/migration/migration.c +index 78bca9a93f..724e841eb9 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -2157,7 +2157,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, + error_setg(errp, "No disk migration is required in COLO mode"); + return false; + } +- if (migrate_block() || migrate_use_block_incremental()) { ++ if (migrate_block() || migrate_block_incremental()) { + error_setg(errp, "Command options are incompatible with " + "current migration capabilities"); + return false; +@@ -2273,15 +2273,6 @@ int migrate_use_tls(void) + return s->parameters.tls_creds && *s->parameters.tls_creds; + } + +-bool migrate_use_block_incremental(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->parameters.block_incremental; +-} +- + /* migration thread support */ + /* + * Something bad happened to the RP stream, mark an error +diff --git a/migration/migration.h b/migration/migration.h +index 8451e5f2fe..86051af132 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -451,7 +451,6 @@ bool migrate_postcopy(void); + + int migrate_use_tls(void); + +-bool migrate_use_block_incremental(void); + int migrate_max_cpu_throttle(void); + + uint64_t ram_get_total_transferred_pages(void); +diff --git a/migration/options.c b/migration/options.c +index 8d15be858c..2b6d88b4b9 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -463,6 +463,15 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, + + /* parameters */ + ++bool migrate_block_incremental(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.block_incremental; ++} ++ + int migrate_compress_level(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index b24ee92283..96d5a8e6e4 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -45,6 +45,7 @@ bool migrate_cap_set(int cap, bool value, Error **errp); + + /* parameters */ + ++bool migrate_block_incremental(void); + int migrate_compress_level(void); + int migrate_compress_threads(void); + int migrate_compress_wait_thread(void); +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Move-migrate_use_compression-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_use_compression-to-options.c.patch new file mode 100644 index 0000000..8b74183 --- /dev/null +++ b/SOURCES/kvm-migration-Move-migrate_use_compression-to-options.c.patch @@ -0,0 +1,183 @@ +From ae183bfc9d7b001d3c4929556b095a76203bc08d Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 22:03:48 +0100 +Subject: [PATCH 25/56] migration: Move migrate_use_compression() to options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [24/50] 126b865f51bd4a1ae3a46411fdcd59033bfc5376 (peterx/qemu-kvm) + +Once that we are there, we rename the function to migrate_compress() +to be consistent with all other capabilities. + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit a7a94d14358dd7b445e20c2f26218ff987747642) +Signed-off-by: Peter Xu +--- + migration/migration.c | 11 +---------- + migration/migration.h | 1 - + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + migration/ram.c | 16 ++++++++-------- + 5 files changed, 19 insertions(+), 19 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 59ee0ef82b..c6e32555a8 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1133,7 +1133,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + info->xbzrle_cache->overflow = xbzrle_counters.overflow; + } + +- if (migrate_use_compression()) { ++ if (migrate_compress()) { + info->compression = g_malloc0(sizeof(*info->compression)); + info->compression->pages = compression_counters.pages; + info->compression->busy = compression_counters.busy; +@@ -2522,15 +2522,6 @@ bool migrate_postcopy(void) + return migrate_postcopy_ram() || migrate_dirty_bitmaps(); + } + +-bool migrate_use_compression(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_COMPRESS]; +-} +- + int migrate_compress_level(void) + { + MigrationState *s; +diff --git a/migration/migration.h b/migration/migration.h +index 42f0c68b6f..77aa91c840 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -471,7 +471,6 @@ bool migrate_use_return_path(void); + + uint64_t ram_get_total_transferred_pages(void); + +-bool migrate_use_compression(void); + int migrate_compress_level(void); + int migrate_compress_threads(void); + int migrate_compress_wait_thread(void); +diff --git a/migration/options.c b/migration/options.c +index bd33c5da0a..fa7a13d3dc 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -39,6 +39,15 @@ bool migrate_colo(void) + return s->capabilities[MIGRATION_CAPABILITY_X_COLO]; + } + ++bool migrate_compress(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_COMPRESS]; ++} ++ + bool migrate_dirty_bitmaps(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index 2a0ee61ff8..da2193fd94 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -19,6 +19,7 @@ + bool migrate_auto_converge(void); + bool migrate_background_snapshot(void); + bool migrate_colo(void); ++bool migrate_compress(void); + bool migrate_dirty_bitmaps(void); + bool migrate_ignore_shared(void); + bool migrate_late_block_activate(void); +diff --git a/migration/ram.c b/migration/ram.c +index 912ccd89fa..d050d0c5fd 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -586,7 +586,7 @@ static void compress_threads_save_cleanup(void) + { + int i, thread_count; + +- if (!migrate_use_compression() || !comp_param) { ++ if (!migrate_compress() || !comp_param) { + return; + } + +@@ -625,7 +625,7 @@ static int compress_threads_save_setup(void) + { + int i, thread_count; + +- if (!migrate_use_compression()) { ++ if (!migrate_compress()) { + return 0; + } + thread_count = migrate_compress_threads(); +@@ -1155,7 +1155,7 @@ static void migration_update_rates(RAMState *rs, int64_t end_time) + rs->xbzrle_bytes_prev = xbzrle_counters.bytes; + } + +- if (migrate_use_compression()) { ++ if (migrate_compress()) { + compression_counters.busy_rate = (double)(compression_counters.busy - + rs->compress_thread_busy_prev) / page_count; + rs->compress_thread_busy_prev = compression_counters.busy; +@@ -2270,7 +2270,7 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len) + + static bool save_page_use_compression(RAMState *rs) + { +- if (!migrate_use_compression()) { ++ if (!migrate_compress()) { + return false; + } + +@@ -3734,7 +3734,7 @@ static int wait_for_decompress_done(void) + { + int idx, thread_count; + +- if (!migrate_use_compression()) { ++ if (!migrate_compress()) { + return 0; + } + +@@ -3753,7 +3753,7 @@ static void compress_threads_load_cleanup(void) + { + int i, thread_count; + +- if (!migrate_use_compression()) { ++ if (!migrate_compress()) { + return; + } + thread_count = migrate_decompress_threads(); +@@ -3794,7 +3794,7 @@ static int compress_threads_load_setup(QEMUFile *f) + { + int i, thread_count; + +- if (!migrate_use_compression()) { ++ if (!migrate_compress()) { + return 0; + } + +@@ -4260,7 +4260,7 @@ static int ram_load_precopy(QEMUFile *f) + int flags = 0, ret = 0, invalid_flags = 0, len = 0, i = 0; + /* ADVISE is earlier, it shows the source has the postcopy capability on */ + bool postcopy_advised = migration_incoming_postcopy_advised(); +- if (!migrate_use_compression()) { ++ if (!migrate_compress()) { + invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE; + } + +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Move-migrate_use_events-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_use_events-to-options.c.patch new file mode 100644 index 0000000..41e05c3 --- /dev/null +++ b/SOURCES/kvm-migration-Move-migrate_use_events-to-options.c.patch @@ -0,0 +1,120 @@ +From 940f1eb4347c72edb3e1abc02c8d7e7c95753dcf Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 22:08:09 +0100 +Subject: [PATCH 26/56] migration: Move migrate_use_events() to options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [25/50] b3acd949af2a0fae18061d360e4f51dc12d32c6c (peterx/qemu-kvm) + +Once that we are there, we rename the function to migrate_events() +to be consistent with all other capabilities. + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit b890902c9c025b87d02e718eec3090fd3525ab18) +Signed-off-by: Peter Xu +--- + migration/migration.c | 11 +---------- + migration/migration.h | 1 - + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + migration/ram.c | 2 +- + 5 files changed, 12 insertions(+), 12 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index c6e32555a8..032cd5c050 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -353,7 +353,7 @@ void migration_incoming_state_destroy(void) + + static void migrate_generate_event(int new_state) + { +- if (migrate_use_events()) { ++ if (migrate_events()) { + qapi_event_send_migration(new_state); + } + } +@@ -2558,15 +2558,6 @@ int migrate_decompress_threads(void) + return s->parameters.decompress_threads; + } + +-bool migrate_use_events(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_EVENTS]; +-} +- + bool migrate_use_multifd(void) + { + MigrationState *s; +diff --git a/migration/migration.h b/migration/migration.h +index 77aa91c840..bd06520c19 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -475,7 +475,6 @@ int migrate_compress_level(void); + int migrate_compress_threads(void); + int migrate_compress_wait_thread(void); + int migrate_decompress_threads(void); +-bool migrate_use_events(void); + + /* Sending on the return path - generic and then for each message type */ + void migrate_send_rp_shut(MigrationIncomingState *mis, +diff --git a/migration/options.c b/migration/options.c +index fa7a13d3dc..d2219ee0e4 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -57,6 +57,15 @@ bool migrate_dirty_bitmaps(void) + return s->capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; + } + ++bool migrate_events(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_EVENTS]; ++} ++ + bool migrate_ignore_shared(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index da2193fd94..b998024eba 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -21,6 +21,7 @@ bool migrate_background_snapshot(void); + bool migrate_colo(void); + bool migrate_compress(void); + bool migrate_dirty_bitmaps(void); ++bool migrate_events(void); + bool migrate_ignore_shared(void); + bool migrate_late_block_activate(void); + bool migrate_pause_before_switchover(void); +diff --git a/migration/ram.c b/migration/ram.c +index d050d0c5fd..ee454a3849 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1246,7 +1246,7 @@ static void migration_bitmap_sync(RAMState *rs) + rs->num_dirty_pages_period = 0; + rs->bytes_xfer_prev = stat64_get(&ram_counters.transferred); + } +- if (migrate_use_events()) { ++ if (migrate_events()) { + uint64_t generation = stat64_get(&ram_counters.dirty_sync_count); + qapi_event_send_migration_pass(generation); + } +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Move-migrate_use_multifd-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_use_multifd-to-options.c.patch new file mode 100644 index 0000000..97d6597 --- /dev/null +++ b/SOURCES/kvm-migration-Move-migrate_use_multifd-to-options.c.patch @@ -0,0 +1,247 @@ +From afd8fb766af2be5cff97753b026847b91b09a30e Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 22:10:29 +0100 +Subject: [PATCH 27/56] migration: Move migrate_use_multifd() to options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [26/50] f2d72eae9cc80b2402ef613e809b40aa296d2e4c (peterx/qemu-kvm) + +Once that we are there, we rename the function to migrate_multifd() +to be consistent with all other capabilities. + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 51b07548f7c31793adc178c7460c5f4369733c61) +Signed-off-by: Peter Xu +--- + migration/migration.c | 19 +++++-------------- + migration/migration.h | 1 - + migration/multifd.c | 16 ++++++++-------- + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + migration/ram.c | 2 +- + migration/socket.c | 2 +- + 7 files changed, 25 insertions(+), 25 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 032cd5c050..e1d7f25786 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -186,7 +186,7 @@ static void migrate_fd_cancel(MigrationState *s); + + static bool migration_needs_multiple_sockets(void) + { +- return migrate_use_multifd() || migrate_postcopy_preempt(); ++ return migrate_multifd() || migrate_postcopy_preempt(); + } + + static bool uri_supports_multi_channels(const char *uri) +@@ -732,7 +732,7 @@ void migration_fd_process_incoming(QEMUFile *f, Error **errp) + static bool migration_should_start_incoming(bool main_channel) + { + /* Multifd doesn't start unless all channels are established */ +- if (migrate_use_multifd()) { ++ if (migrate_multifd()) { + return migration_has_all_channels(); + } + +@@ -759,7 +759,7 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) + uint32_t channel_magic = 0; + int ret = 0; + +- if (migrate_use_multifd() && !migrate_postcopy_ram() && ++ if (migrate_multifd() && !migrate_postcopy_ram() && + qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { + /* + * With multiple channels, it is possible that we receive channels +@@ -798,7 +798,7 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) + } else { + /* Multiple connections */ + assert(migration_needs_multiple_sockets()); +- if (migrate_use_multifd()) { ++ if (migrate_multifd()) { + multifd_recv_new_channel(ioc, &local_err); + } else { + assert(migrate_postcopy_preempt()); +@@ -834,7 +834,7 @@ bool migration_has_all_channels(void) + return false; + } + +- if (migrate_use_multifd()) { ++ if (migrate_multifd()) { + return multifd_recv_all_channels_created(); + } + +@@ -2558,15 +2558,6 @@ int migrate_decompress_threads(void) + return s->parameters.decompress_threads; + } + +-bool migrate_use_multifd(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_MULTIFD]; +-} +- + int migrate_multifd_channels(void) + { + MigrationState *s; +diff --git a/migration/migration.h b/migration/migration.h +index bd06520c19..49c0e13f41 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -449,7 +449,6 @@ MigrationState *migrate_get_current(void); + + bool migrate_postcopy(void); + +-bool migrate_use_multifd(void); + int migrate_multifd_channels(void); + MultiFDCompression migrate_multifd_compression(void); + int migrate_multifd_zlib_level(void); +diff --git a/migration/multifd.c b/migration/multifd.c +index 903df2117b..6807328189 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -516,7 +516,7 @@ void multifd_save_cleanup(void) + { + int i; + +- if (!migrate_use_multifd()) { ++ if (!migrate_multifd()) { + return; + } + multifd_send_terminate_threads(NULL); +@@ -587,7 +587,7 @@ int multifd_send_sync_main(QEMUFile *f) + int i; + bool flush_zero_copy; + +- if (!migrate_use_multifd()) { ++ if (!migrate_multifd()) { + return 0; + } + if (multifd_send_state->pages->num) { +@@ -911,7 +911,7 @@ int multifd_save_setup(Error **errp) + uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size(); + uint8_t i; + +- if (!migrate_use_multifd()) { ++ if (!migrate_multifd()) { + return 0; + } + +@@ -1016,7 +1016,7 @@ static void multifd_recv_terminate_threads(Error *err) + + void multifd_load_shutdown(void) + { +- if (migrate_use_multifd()) { ++ if (migrate_multifd()) { + multifd_recv_terminate_threads(NULL); + } + } +@@ -1025,7 +1025,7 @@ void multifd_load_cleanup(void) + { + int i; + +- if (!migrate_use_multifd()) { ++ if (!migrate_multifd()) { + return; + } + multifd_recv_terminate_threads(NULL); +@@ -1072,7 +1072,7 @@ void multifd_recv_sync_main(void) + { + int i; + +- if (!migrate_use_multifd()) { ++ if (!migrate_multifd()) { + return; + } + for (i = 0; i < migrate_multifd_channels(); i++) { +@@ -1170,7 +1170,7 @@ int multifd_load_setup(Error **errp) + * Return successfully if multiFD recv state is already initialised + * or multiFD is not enabled. + */ +- if (multifd_recv_state || !migrate_use_multifd()) { ++ if (multifd_recv_state || !migrate_multifd()) { + return 0; + } + +@@ -1216,7 +1216,7 @@ bool multifd_recv_all_channels_created(void) + { + int thread_count = migrate_multifd_channels(); + +- if (!migrate_use_multifd()) { ++ if (!migrate_multifd()) { + return true; + } + +diff --git a/migration/options.c b/migration/options.c +index d2219ee0e4..58673fc101 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -84,6 +84,15 @@ bool migrate_late_block_activate(void) + return s->capabilities[MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; + } + ++bool migrate_multifd(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_MULTIFD]; ++} ++ + bool migrate_pause_before_switchover(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index b998024eba..d07269ee38 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -24,6 +24,7 @@ bool migrate_dirty_bitmaps(void); + bool migrate_events(void); + bool migrate_ignore_shared(void); + bool migrate_late_block_activate(void); ++bool migrate_multifd(void); + bool migrate_pause_before_switchover(void); + bool migrate_postcopy_blocktime(void); + bool migrate_postcopy_preempt(void); +diff --git a/migration/ram.c b/migration/ram.c +index ee454a3849..859dd7b63f 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -2362,7 +2362,7 @@ static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss) + * if host page size == guest page size the dest guest during run may + * still see partially copied pages which is data corruption. + */ +- if (migrate_use_multifd() && !migration_in_postcopy()) { ++ if (migrate_multifd() && !migration_in_postcopy()) { + return ram_save_multifd_page(pss->pss_channel, block, offset); + } + +diff --git a/migration/socket.c b/migration/socket.c +index ebf9ac41af..f4835a256a 100644 +--- a/migration/socket.c ++++ b/migration/socket.c +@@ -183,7 +183,7 @@ socket_start_incoming_migration_internal(SocketAddress *saddr, + + qio_net_listener_set_name(listener, "migration-socket-listener"); + +- if (migrate_use_multifd()) { ++ if (migrate_multifd()) { + num = migrate_multifd_channels(); + } else if (migrate_postcopy_preempt()) { + num = RAM_CHANNEL_MAX; +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Move-migrate_use_return-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_use_return-to-options.c.patch new file mode 100644 index 0000000..b250d40 --- /dev/null +++ b/SOURCES/kvm-migration-Move-migrate_use_return-to-options.c.patch @@ -0,0 +1,138 @@ +From 145b630767dbc7020ddf39b20075f4691f71321a Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 22:25:47 +0100 +Subject: [PATCH 31/56] migration: Move migrate_use_return() to options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [30/50] 5cc150188bcc61b69ea0844253597594ab18fc13 (peterx/qemu-kvm) + +Once that we are there, we rename the function to migrate_return_path() +to be consistent with all other capabilities. + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 38ad1110e368bf91453c0abbd657224d57b65d47) +Signed-off-by: Peter Xu +--- + migration/migration.c | 11 +---------- + migration/migration.h | 1 - + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + migration/rdma.c | 6 +++--- + 5 files changed, 14 insertions(+), 14 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 96f82bd165..f7facecd66 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -2622,15 +2622,6 @@ static int64_t migrate_max_postcopy_bandwidth(void) + return s->parameters.max_postcopy_bandwidth; + } + +-bool migrate_use_return_path(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; +-} +- + bool migrate_use_block_incremental(void) + { + MigrationState *s; +@@ -4175,7 +4166,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) + * precopy, only if user specified "return-path" capability would + * QEMU uses the return path. + */ +- if (migrate_postcopy_ram() || migrate_use_return_path()) { ++ if (migrate_postcopy_ram() || migrate_return_path()) { + if (open_return_path_on_source(s, !resume)) { + error_report("Unable to open return-path for postcopy"); + migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); +diff --git a/migration/migration.h b/migration/migration.h +index d4b68b08a5..24184622a8 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -459,7 +459,6 @@ uint64_t migrate_xbzrle_cache_size(void); + + bool migrate_use_block_incremental(void); + int migrate_max_cpu_throttle(void); +-bool migrate_use_return_path(void); + + uint64_t ram_get_total_transferred_pages(void); + +diff --git a/migration/options.c b/migration/options.c +index fe1eadeed6..2003e413da 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -147,6 +147,15 @@ bool migrate_release_ram(void) + return s->capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; + } + ++bool migrate_return_path(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; ++} ++ + bool migrate_validate_uuid(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index e985a5233e..316efd1063 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -31,6 +31,7 @@ bool migrate_postcopy_blocktime(void); + bool migrate_postcopy_preempt(void); + bool migrate_postcopy_ram(void); + bool migrate_release_ram(void); ++bool migrate_return_path(void); + bool migrate_validate_uuid(void); + bool migrate_xbzrle(void); + bool migrate_zero_blocks(void); +diff --git a/migration/rdma.c b/migration/rdma.c +index f35f021963..bf55e2f163 100644 +--- a/migration/rdma.c ++++ b/migration/rdma.c +@@ -3373,7 +3373,7 @@ static int qemu_rdma_accept(RDMAContext *rdma) + * initialize the RDMAContext for return path for postcopy after first + * connection request reached. + */ +- if ((migrate_postcopy() || migrate_use_return_path()) ++ if ((migrate_postcopy() || migrate_return_path()) + && !rdma->is_return_path) { + rdma_return_path = qemu_rdma_data_init(rdma->host_port, NULL); + if (rdma_return_path == NULL) { +@@ -3456,7 +3456,7 @@ static int qemu_rdma_accept(RDMAContext *rdma) + } + + /* Accept the second connection request for return path */ +- if ((migrate_postcopy() || migrate_use_return_path()) ++ if ((migrate_postcopy() || migrate_return_path()) + && !rdma->is_return_path) { + qemu_set_fd_handler(rdma->channel->fd, rdma_accept_incoming_migration, + NULL, +@@ -4193,7 +4193,7 @@ void rdma_start_outgoing_migration(void *opaque, + } + + /* RDMA postcopy need a separate queue pair for return path */ +- if (migrate_postcopy() || migrate_use_return_path()) { ++ if (migrate_postcopy() || migrate_return_path()) { + rdma_return_path = qemu_rdma_data_init(host_port, errp); + + if (rdma_return_path == NULL) { +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Move-migrate_use_tls-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_use_tls-to-options.c.patch new file mode 100644 index 0000000..84734af --- /dev/null +++ b/SOURCES/kvm-migration-Move-migrate_use_tls-to-options.c.patch @@ -0,0 +1,134 @@ +From 2e2df63892e191e91216b8253171162f69b93387 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 10:41:23 +0100 +Subject: [PATCH 49/56] migration: Move migrate_use_tls() to options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [48/50] 314431b0f5e92d2211e58a8161f32d7b67d69e38 (peterx/qemu-kvm) + +Once there, rename it to migrate_tls() and make it return bool for +consistency. + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy + +--- + +Fix typos found by fabiano + +(cherry picked from commit 10d4703be5d884bbbb6ecafe0e8bb270ad6ea937) +Signed-off-by: Peter Xu +--- + migration/migration.c | 9 --------- + migration/migration.h | 2 -- + migration/options.c | 11 ++++++++++- + migration/options.h | 1 + + migration/tls.c | 3 ++- + 5 files changed, 13 insertions(+), 13 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index c2e109329d..22ef83c619 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -2177,15 +2177,6 @@ void qmp_migrate_continue(MigrationStatus state, Error **errp) + qemu_sem_post(&s->pause_sem); + } + +-int migrate_use_tls(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->parameters.tls_creds && *s->parameters.tls_creds; +-} +- + /* migration thread support */ + /* + * Something bad happened to the RP stream, mark an error +diff --git a/migration/migration.h b/migration/migration.h +index dcf906868d..2b71df8617 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -447,8 +447,6 @@ bool migration_is_blocked(Error **errp); + bool migration_in_postcopy(void); + MigrationState *migrate_get_current(void); + +-int migrate_use_tls(void); +- + uint64_t ram_get_total_transferred_pages(void); + + /* Sending on the return path - generic and then for each message type */ +diff --git a/migration/options.c b/migration/options.c +index 8e8753d9be..d4c0714683 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -214,6 +214,15 @@ bool migrate_postcopy(void) + return migrate_postcopy_ram() || migrate_dirty_bitmaps(); + } + ++bool migrate_tls(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.tls_creds && *s->parameters.tls_creds; ++} ++ + typedef enum WriteTrackingSupport { + WT_SUPPORT_UNKNOWN = 0, + WT_SUPPORT_ABSENT, +@@ -363,7 +372,7 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) + new_caps[MIGRATION_CAPABILITY_COMPRESS] || + new_caps[MIGRATION_CAPABILITY_XBZRLE] || + migrate_multifd_compression() || +- migrate_use_tls())) { ++ migrate_tls())) { + error_setg(errp, + "Zero copy only available for non-compressed non-TLS multifd migration"); + return false; +diff --git a/migration/options.h b/migration/options.h +index 1b78fa9f3d..13318a16c7 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -46,6 +46,7 @@ bool migrate_zero_copy_send(void); + */ + + bool migrate_postcopy(void); ++bool migrate_tls(void); + + /* capabilities helpers */ + +diff --git a/migration/tls.c b/migration/tls.c +index 4d2166a209..acd38e0b62 100644 +--- a/migration/tls.c ++++ b/migration/tls.c +@@ -22,6 +22,7 @@ + #include "channel.h" + #include "migration.h" + #include "tls.h" ++#include "options.h" + #include "crypto/tlscreds.h" + #include "qemu/error-report.h" + #include "qapi/error.h" +@@ -165,7 +166,7 @@ void migration_tls_channel_connect(MigrationState *s, + + bool migrate_channel_requires_tls_upgrade(QIOChannel *ioc) + { +- if (!migrate_use_tls()) { ++ if (!migrate_tls()) { + return false; + } + +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Move-migrate_use_xbzrle-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_use_xbzrle-to-options.c.patch new file mode 100644 index 0000000..e3a8bab --- /dev/null +++ b/SOURCES/kvm-migration-Move-migrate_use_xbzrle-to-options.c.patch @@ -0,0 +1,156 @@ +From 2184f7dae0df5fa52deba2dc884e09c6bdbc7b5f Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 22:20:13 +0100 +Subject: [PATCH 29/56] migration: Move migrate_use_xbzrle() to options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [28/50] fc8bee0f691a96e6bd0b41f2511abe507b81fea5 (peterx/qemu-kvm) + +Once that we are there, we rename the function to migrate_xbzrle() +to be consistent with all other capabilities. +We change the type to return bool also for consistency. + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 87dca0c9bb63014ef73ad82f7aedea1cb5a822e7) +Signed-off-by: Peter Xu +--- + migration/migration.c | 11 +---------- + migration/migration.h | 1 - + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + migration/ram.c | 10 +++++----- + 5 files changed, 16 insertions(+), 16 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 1d63718e88..a4ede4294e 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1122,7 +1122,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + info->ram->downtime_bytes = stat64_get(&ram_counters.downtime_bytes); + info->ram->postcopy_bytes = stat64_get(&ram_counters.postcopy_bytes); + +- if (migrate_use_xbzrle()) { ++ if (migrate_xbzrle()) { + info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache)); + info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size(); + info->xbzrle_cache->bytes = xbzrle_counters.bytes; +@@ -2604,15 +2604,6 @@ int migrate_use_tls(void) + return s->parameters.tls_creds && *s->parameters.tls_creds; + } + +-int migrate_use_xbzrle(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_XBZRLE]; +-} +- + uint64_t migrate_xbzrle_cache_size(void) + { + MigrationState *s; +diff --git a/migration/migration.h b/migration/migration.h +index c939f82d53..e2bb5b1e2f 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -455,7 +455,6 @@ int migrate_multifd_zlib_level(void); + int migrate_multifd_zstd_level(void); + + int migrate_use_tls(void); +-int migrate_use_xbzrle(void); + uint64_t migrate_xbzrle_cache_size(void); + + bool migrate_use_block(void); +diff --git a/migration/options.c b/migration/options.c +index f357c99996..25264c500e 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -147,6 +147,15 @@ bool migrate_validate_uuid(void) + return s->capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; + } + ++bool migrate_xbzrle(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_XBZRLE]; ++} ++ + bool migrate_zero_blocks(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index ad22f4d24a..8f76a88329 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -31,6 +31,7 @@ bool migrate_postcopy_preempt(void); + bool migrate_postcopy_ram(void); + bool migrate_release_ram(void); + bool migrate_validate_uuid(void); ++bool migrate_xbzrle(void); + bool migrate_zero_blocks(void); + bool migrate_zero_copy_send(void); + +diff --git a/migration/ram.c b/migration/ram.c +index 859dd7b63f..4576d0d849 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -156,14 +156,14 @@ static struct { + + static void XBZRLE_cache_lock(void) + { +- if (migrate_use_xbzrle()) { ++ if (migrate_xbzrle()) { + qemu_mutex_lock(&XBZRLE.lock); + } + } + + static void XBZRLE_cache_unlock(void) + { +- if (migrate_use_xbzrle()) { ++ if (migrate_xbzrle()) { + qemu_mutex_unlock(&XBZRLE.lock); + } + } +@@ -1137,7 +1137,7 @@ static void migration_update_rates(RAMState *rs, int64_t end_time) + return; + } + +- if (migrate_use_xbzrle()) { ++ if (migrate_xbzrle()) { + double encoded_size, unencoded_size; + + xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss - +@@ -1626,7 +1626,7 @@ static int find_dirty_block(RAMState *rs, PageSearchStatus *pss) + /* Flag that we've looped */ + pss->complete_round = true; + /* After the first round, enable XBZRLE. */ +- if (migrate_use_xbzrle()) { ++ if (migrate_xbzrle()) { + rs->xbzrle_enabled = true; + } + } +@@ -2979,7 +2979,7 @@ static int xbzrle_init(void) + { + Error *local_err = NULL; + +- if (!migrate_use_xbzrle()) { ++ if (!migrate_xbzrle()) { + return 0; + } + +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Move-migrate_use_zero_copy_send-to-options.patch b/SOURCES/kvm-migration-Move-migrate_use_zero_copy_send-to-options.patch new file mode 100644 index 0000000..90031df --- /dev/null +++ b/SOURCES/kvm-migration-Move-migrate_use_zero_copy_send-to-options.patch @@ -0,0 +1,167 @@ +From 6eb252887378d639ad2e90dd426a1812d4b72ca6 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 22:17:14 +0100 +Subject: [PATCH 28/56] migration: Move migrate_use_zero_copy_send() to + options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [27/50] 5a4c2b5e75c62e0f60f9c4121a2756bd140a60d9 (peterx/qemu-kvm) + +Once that we are there, we rename the function to +migrate_zero_copy_send() to be consistent with all other capabilities. + +We can remove the CONFIG_LINUX guard. We already check that we can't +setup this capability in migrate_caps_check(). + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit b4bc342c766640e0cb8a0b72f71e0ee5545fb790) +Signed-off-by: Peter Xu +--- + migration/migration.c | 13 +------------ + migration/migration.h | 5 ----- + migration/multifd.c | 8 ++++---- + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + migration/socket.c | 2 +- + 6 files changed, 16 insertions(+), 22 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index e1d7f25786..1d63718e88 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1609,7 +1609,7 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) + } + + #ifdef CONFIG_LINUX +- if (migrate_use_zero_copy_send() && ++ if (migrate_zero_copy_send() && + ((params->has_multifd_compression && params->multifd_compression) || + (params->tls_creds && *params->tls_creds))) { + error_setg(errp, +@@ -2595,17 +2595,6 @@ int migrate_multifd_zstd_level(void) + return s->parameters.multifd_zstd_level; + } + +-#ifdef CONFIG_LINUX +-bool migrate_use_zero_copy_send(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; +-} +-#endif +- + int migrate_use_tls(void) + { + MigrationState *s; +diff --git a/migration/migration.h b/migration/migration.h +index 49c0e13f41..c939f82d53 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -454,11 +454,6 @@ MultiFDCompression migrate_multifd_compression(void); + int migrate_multifd_zlib_level(void); + int migrate_multifd_zstd_level(void); + +-#ifdef CONFIG_LINUX +-bool migrate_use_zero_copy_send(void); +-#else +-#define migrate_use_zero_copy_send() (false) +-#endif + int migrate_use_tls(void); + int migrate_use_xbzrle(void); + uint64_t migrate_xbzrle_cache_size(void); +diff --git a/migration/multifd.c b/migration/multifd.c +index 6807328189..cce3ad6988 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -25,7 +25,7 @@ + #include "trace.h" + #include "multifd.h" + #include "threadinfo.h" +- ++#include "options.h" + #include "qemu/yank.h" + #include "io/channel-socket.h" + #include "yank_functions.h" +@@ -608,7 +608,7 @@ int multifd_send_sync_main(QEMUFile *f) + * all the dirty bitmaps. + */ + +- flush_zero_copy = migrate_use_zero_copy_send(); ++ flush_zero_copy = migrate_zero_copy_send(); + + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; +@@ -653,7 +653,7 @@ static void *multifd_send_thread(void *opaque) + MigrationThread *thread = NULL; + Error *local_err = NULL; + int ret = 0; +- bool use_zero_copy_send = migrate_use_zero_copy_send(); ++ bool use_zero_copy_send = migrate_zero_copy_send(); + + thread = MigrationThreadAdd(p->name, qemu_get_thread_id()); + +@@ -945,7 +945,7 @@ int multifd_save_setup(Error **errp) + p->page_size = qemu_target_page_size(); + p->page_count = page_count; + +- if (migrate_use_zero_copy_send()) { ++ if (migrate_zero_copy_send()) { + p->write_flags = QIO_CHANNEL_WRITE_FLAG_ZERO_COPY; + } else { + p->write_flags = 0; +diff --git a/migration/options.c b/migration/options.c +index 58673fc101..f357c99996 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -155,3 +155,12 @@ bool migrate_zero_blocks(void) + + return s->capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; + } ++ ++bool migrate_zero_copy_send(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; ++} +diff --git a/migration/options.h b/migration/options.h +index d07269ee38..ad22f4d24a 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -32,5 +32,6 @@ bool migrate_postcopy_ram(void); + bool migrate_release_ram(void); + bool migrate_validate_uuid(void); + bool migrate_zero_blocks(void); ++bool migrate_zero_copy_send(void); + + #endif +diff --git a/migration/socket.c b/migration/socket.c +index f4835a256a..1b6f5baefb 100644 +--- a/migration/socket.c ++++ b/migration/socket.c +@@ -98,7 +98,7 @@ static void socket_outgoing_migration(QIOTask *task, + + trace_migration_socket_outgoing_connected(data->hostname); + +- if (migrate_use_zero_copy_send() && ++ if (migrate_zero_copy_send() && + !qio_channel_has_feature(sioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) { + error_setg(&err, "Zero copy send feature not detected in host kernel"); + } +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Move-migration_properties-to-options.c.patch b/SOURCES/kvm-migration-Move-migration_properties-to-options.c.patch new file mode 100644 index 0000000..145b510 --- /dev/null +++ b/SOURCES/kvm-migration-Move-migration_properties-to-options.c.patch @@ -0,0 +1,409 @@ +From 0911e025a9dc8a0c85944ac11fb9df72e5ad0677 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 09/37] migration: Move migration_properties to options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [7/28] ff07358afa0c90f13125b177b0e08c74ef1b9905 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit f9436522c8dd +Author: Juan Quintela +Date: Thu Mar 2 12:55:57 2023 +0100 + + migration: Move migration_properties to options.c + + Signed-off-by: Juan Quintela + Reviewed-by: Vladimir Sementsov-Ogievskiy + +Signed-off-by: Cédric Le Goater +--- + migration/migration.c | 157 ------------------------------------------ + migration/options.c | 155 +++++++++++++++++++++++++++++++++++++++++ + migration/options.h | 7 ++ + 3 files changed, 162 insertions(+), 157 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 08f87f2b0e..1ac5f19bc2 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -52,8 +52,6 @@ + #include "io/channel-tls.h" + #include "migration/colo.h" + #include "hw/boards.h" +-#include "hw/qdev-properties.h" +-#include "hw/qdev-properties-system.h" + #include "monitor/monitor.h" + #include "net/announce.h" + #include "qemu/queue.h" +@@ -65,51 +63,6 @@ + #include "sysemu/qtest.h" + #include "options.h" + +-#define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ +- +-/* Time in milliseconds we are allowed to stop the source, +- * for sending the last part */ +-#define DEFAULT_MIGRATE_SET_DOWNTIME 300 +- +-/* Default compression thread count */ +-#define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8 +-/* Default decompression thread count, usually decompression is at +- * least 4 times as fast as compression.*/ +-#define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2 +-/*0: means nocompress, 1: best speed, ... 9: best compress ratio */ +-#define DEFAULT_MIGRATE_COMPRESS_LEVEL 1 +-/* Define default autoconverge cpu throttle migration parameters */ +-#define DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD 50 +-#define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20 +-#define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10 +-#define DEFAULT_MIGRATE_MAX_CPU_THROTTLE 99 +- +-/* Migration XBZRLE default cache size */ +-#define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024) +- +-/* The delay time (in ms) between two COLO checkpoints */ +-#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100) +-#define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2 +-#define DEFAULT_MIGRATE_MULTIFD_COMPRESSION MULTIFD_COMPRESSION_NONE +-/* 0: means nocompress, 1: best speed, ... 9: best compress ratio */ +-#define DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL 1 +-/* 0: means nocompress, 1: best speed, ... 20: best compress ratio */ +-#define DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL 1 +- +-/* Background transfer rate for postcopy, 0 means unlimited, note +- * that page requests can still exceed this limit. +- */ +-#define DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH 0 +- +-/* +- * Parameters for self_announce_delay giving a stream of RARP/ARP +- * packets after migration. +- */ +-#define DEFAULT_MIGRATE_ANNOUNCE_INITIAL 50 +-#define DEFAULT_MIGRATE_ANNOUNCE_MAX 550 +-#define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS 5 +-#define DEFAULT_MIGRATE_ANNOUNCE_STEP 100 +- + static NotifierList migration_state_notifiers = + NOTIFIER_LIST_INITIALIZER(migration_state_notifiers); + +@@ -3336,116 +3289,6 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) + s->migration_thread_running = true; + } + +-#define DEFINE_PROP_MIG_CAP(name, x) \ +- DEFINE_PROP_BOOL(name, MigrationState, capabilities[x], false) +- +-static Property migration_properties[] = { +- DEFINE_PROP_BOOL("store-global-state", MigrationState, +- store_global_state, true), +- DEFINE_PROP_BOOL("send-configuration", MigrationState, +- send_configuration, true), +- DEFINE_PROP_BOOL("send-section-footer", MigrationState, +- send_section_footer, true), +- DEFINE_PROP_BOOL("decompress-error-check", MigrationState, +- decompress_error_check, true), +- DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState, +- clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT), +- DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState, +- preempt_pre_7_2, false), +- +- /* Migration parameters */ +- DEFINE_PROP_UINT8("x-compress-level", MigrationState, +- parameters.compress_level, +- DEFAULT_MIGRATE_COMPRESS_LEVEL), +- DEFINE_PROP_UINT8("x-compress-threads", MigrationState, +- parameters.compress_threads, +- DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT), +- DEFINE_PROP_BOOL("x-compress-wait-thread", MigrationState, +- parameters.compress_wait_thread, true), +- DEFINE_PROP_UINT8("x-decompress-threads", MigrationState, +- parameters.decompress_threads, +- DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT), +- DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState, +- parameters.throttle_trigger_threshold, +- DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD), +- DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState, +- parameters.cpu_throttle_initial, +- DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL), +- DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState, +- parameters.cpu_throttle_increment, +- DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT), +- DEFINE_PROP_BOOL("x-cpu-throttle-tailslow", MigrationState, +- parameters.cpu_throttle_tailslow, false), +- DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState, +- parameters.max_bandwidth, MAX_THROTTLE), +- DEFINE_PROP_UINT64("x-downtime-limit", MigrationState, +- parameters.downtime_limit, +- DEFAULT_MIGRATE_SET_DOWNTIME), +- DEFINE_PROP_UINT32("x-checkpoint-delay", MigrationState, +- parameters.x_checkpoint_delay, +- DEFAULT_MIGRATE_X_CHECKPOINT_DELAY), +- DEFINE_PROP_UINT8("multifd-channels", MigrationState, +- parameters.multifd_channels, +- DEFAULT_MIGRATE_MULTIFD_CHANNELS), +- DEFINE_PROP_MULTIFD_COMPRESSION("multifd-compression", MigrationState, +- parameters.multifd_compression, +- DEFAULT_MIGRATE_MULTIFD_COMPRESSION), +- DEFINE_PROP_UINT8("multifd-zlib-level", MigrationState, +- parameters.multifd_zlib_level, +- DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL), +- DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, +- parameters.multifd_zstd_level, +- DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), +- DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, +- parameters.xbzrle_cache_size, +- DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), +- DEFINE_PROP_SIZE("max-postcopy-bandwidth", MigrationState, +- parameters.max_postcopy_bandwidth, +- DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH), +- DEFINE_PROP_UINT8("max-cpu-throttle", MigrationState, +- parameters.max_cpu_throttle, +- DEFAULT_MIGRATE_MAX_CPU_THROTTLE), +- DEFINE_PROP_SIZE("announce-initial", MigrationState, +- parameters.announce_initial, +- DEFAULT_MIGRATE_ANNOUNCE_INITIAL), +- DEFINE_PROP_SIZE("announce-max", MigrationState, +- parameters.announce_max, +- DEFAULT_MIGRATE_ANNOUNCE_MAX), +- DEFINE_PROP_SIZE("announce-rounds", MigrationState, +- parameters.announce_rounds, +- DEFAULT_MIGRATE_ANNOUNCE_ROUNDS), +- DEFINE_PROP_SIZE("announce-step", MigrationState, +- parameters.announce_step, +- DEFAULT_MIGRATE_ANNOUNCE_STEP), +- DEFINE_PROP_STRING("tls-creds", MigrationState, parameters.tls_creds), +- DEFINE_PROP_STRING("tls-hostname", MigrationState, parameters.tls_hostname), +- DEFINE_PROP_STRING("tls-authz", MigrationState, parameters.tls_authz), +- +- /* Migration capabilities */ +- DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE), +- DEFINE_PROP_MIG_CAP("x-rdma-pin-all", MIGRATION_CAPABILITY_RDMA_PIN_ALL), +- DEFINE_PROP_MIG_CAP("x-auto-converge", MIGRATION_CAPABILITY_AUTO_CONVERGE), +- DEFINE_PROP_MIG_CAP("x-zero-blocks", MIGRATION_CAPABILITY_ZERO_BLOCKS), +- DEFINE_PROP_MIG_CAP("x-compress", MIGRATION_CAPABILITY_COMPRESS), +- DEFINE_PROP_MIG_CAP("x-events", MIGRATION_CAPABILITY_EVENTS), +- DEFINE_PROP_MIG_CAP("x-postcopy-ram", MIGRATION_CAPABILITY_POSTCOPY_RAM), +- DEFINE_PROP_MIG_CAP("x-postcopy-preempt", +- MIGRATION_CAPABILITY_POSTCOPY_PREEMPT), +- DEFINE_PROP_MIG_CAP("x-colo", MIGRATION_CAPABILITY_X_COLO), +- DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM), +- DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK), +- DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH), +- DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD), +- DEFINE_PROP_MIG_CAP("x-background-snapshot", +- MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT), +-#ifdef CONFIG_LINUX +- DEFINE_PROP_MIG_CAP("x-zero-copy-send", +- MIGRATION_CAPABILITY_ZERO_COPY_SEND), +-#endif +- +- DEFINE_PROP_END_OF_LIST(), +-}; +- + static void migration_class_init(ObjectClass *klass, void *data) + { + DeviceClass *dc = DEVICE_CLASS(klass); +diff --git a/migration/options.c b/migration/options.c +index bcfe244fa9..a76984276d 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -31,6 +31,161 @@ + #define MAX_MIGRATE_DOWNTIME_SECONDS 2000 + #define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000) + ++#define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ ++ ++/* Time in milliseconds we are allowed to stop the source, ++ * for sending the last part */ ++#define DEFAULT_MIGRATE_SET_DOWNTIME 300 ++ ++/* Default compression thread count */ ++#define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8 ++/* Default decompression thread count, usually decompression is at ++ * least 4 times as fast as compression.*/ ++#define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2 ++/*0: means nocompress, 1: best speed, ... 9: best compress ratio */ ++#define DEFAULT_MIGRATE_COMPRESS_LEVEL 1 ++/* Define default autoconverge cpu throttle migration parameters */ ++#define DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD 50 ++#define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20 ++#define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10 ++#define DEFAULT_MIGRATE_MAX_CPU_THROTTLE 99 ++ ++/* Migration XBZRLE default cache size */ ++#define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024) ++ ++/* The delay time (in ms) between two COLO checkpoints */ ++#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100) ++#define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2 ++#define DEFAULT_MIGRATE_MULTIFD_COMPRESSION MULTIFD_COMPRESSION_NONE ++/* 0: means nocompress, 1: best speed, ... 9: best compress ratio */ ++#define DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL 1 ++/* 0: means nocompress, 1: best speed, ... 20: best compress ratio */ ++#define DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL 1 ++ ++/* Background transfer rate for postcopy, 0 means unlimited, note ++ * that page requests can still exceed this limit. ++ */ ++#define DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH 0 ++ ++/* ++ * Parameters for self_announce_delay giving a stream of RARP/ARP ++ * packets after migration. ++ */ ++#define DEFAULT_MIGRATE_ANNOUNCE_INITIAL 50 ++#define DEFAULT_MIGRATE_ANNOUNCE_MAX 550 ++#define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS 5 ++#define DEFAULT_MIGRATE_ANNOUNCE_STEP 100 ++ ++#define DEFINE_PROP_MIG_CAP(name, x) \ ++ DEFINE_PROP_BOOL(name, MigrationState, capabilities[x], false) ++ ++Property migration_properties[] = { ++ DEFINE_PROP_BOOL("store-global-state", MigrationState, ++ store_global_state, true), ++ DEFINE_PROP_BOOL("send-configuration", MigrationState, ++ send_configuration, true), ++ DEFINE_PROP_BOOL("send-section-footer", MigrationState, ++ send_section_footer, true), ++ DEFINE_PROP_BOOL("decompress-error-check", MigrationState, ++ decompress_error_check, true), ++ DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState, ++ clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT), ++ DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState, ++ preempt_pre_7_2, false), ++ ++ /* Migration parameters */ ++ DEFINE_PROP_UINT8("x-compress-level", MigrationState, ++ parameters.compress_level, ++ DEFAULT_MIGRATE_COMPRESS_LEVEL), ++ DEFINE_PROP_UINT8("x-compress-threads", MigrationState, ++ parameters.compress_threads, ++ DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT), ++ DEFINE_PROP_BOOL("x-compress-wait-thread", MigrationState, ++ parameters.compress_wait_thread, true), ++ DEFINE_PROP_UINT8("x-decompress-threads", MigrationState, ++ parameters.decompress_threads, ++ DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT), ++ DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState, ++ parameters.throttle_trigger_threshold, ++ DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD), ++ DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState, ++ parameters.cpu_throttle_initial, ++ DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL), ++ DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState, ++ parameters.cpu_throttle_increment, ++ DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT), ++ DEFINE_PROP_BOOL("x-cpu-throttle-tailslow", MigrationState, ++ parameters.cpu_throttle_tailslow, false), ++ DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState, ++ parameters.max_bandwidth, MAX_THROTTLE), ++ DEFINE_PROP_UINT64("x-downtime-limit", MigrationState, ++ parameters.downtime_limit, ++ DEFAULT_MIGRATE_SET_DOWNTIME), ++ DEFINE_PROP_UINT32("x-checkpoint-delay", MigrationState, ++ parameters.x_checkpoint_delay, ++ DEFAULT_MIGRATE_X_CHECKPOINT_DELAY), ++ DEFINE_PROP_UINT8("multifd-channels", MigrationState, ++ parameters.multifd_channels, ++ DEFAULT_MIGRATE_MULTIFD_CHANNELS), ++ DEFINE_PROP_MULTIFD_COMPRESSION("multifd-compression", MigrationState, ++ parameters.multifd_compression, ++ DEFAULT_MIGRATE_MULTIFD_COMPRESSION), ++ DEFINE_PROP_UINT8("multifd-zlib-level", MigrationState, ++ parameters.multifd_zlib_level, ++ DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL), ++ DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, ++ parameters.multifd_zstd_level, ++ DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), ++ DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, ++ parameters.xbzrle_cache_size, ++ DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), ++ DEFINE_PROP_SIZE("max-postcopy-bandwidth", MigrationState, ++ parameters.max_postcopy_bandwidth, ++ DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH), ++ DEFINE_PROP_UINT8("max-cpu-throttle", MigrationState, ++ parameters.max_cpu_throttle, ++ DEFAULT_MIGRATE_MAX_CPU_THROTTLE), ++ DEFINE_PROP_SIZE("announce-initial", MigrationState, ++ parameters.announce_initial, ++ DEFAULT_MIGRATE_ANNOUNCE_INITIAL), ++ DEFINE_PROP_SIZE("announce-max", MigrationState, ++ parameters.announce_max, ++ DEFAULT_MIGRATE_ANNOUNCE_MAX), ++ DEFINE_PROP_SIZE("announce-rounds", MigrationState, ++ parameters.announce_rounds, ++ DEFAULT_MIGRATE_ANNOUNCE_ROUNDS), ++ DEFINE_PROP_SIZE("announce-step", MigrationState, ++ parameters.announce_step, ++ DEFAULT_MIGRATE_ANNOUNCE_STEP), ++ DEFINE_PROP_STRING("tls-creds", MigrationState, parameters.tls_creds), ++ DEFINE_PROP_STRING("tls-hostname", MigrationState, parameters.tls_hostname), ++ DEFINE_PROP_STRING("tls-authz", MigrationState, parameters.tls_authz), ++ ++ /* Migration capabilities */ ++ DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE), ++ DEFINE_PROP_MIG_CAP("x-rdma-pin-all", MIGRATION_CAPABILITY_RDMA_PIN_ALL), ++ DEFINE_PROP_MIG_CAP("x-auto-converge", MIGRATION_CAPABILITY_AUTO_CONVERGE), ++ DEFINE_PROP_MIG_CAP("x-zero-blocks", MIGRATION_CAPABILITY_ZERO_BLOCKS), ++ DEFINE_PROP_MIG_CAP("x-compress", MIGRATION_CAPABILITY_COMPRESS), ++ DEFINE_PROP_MIG_CAP("x-events", MIGRATION_CAPABILITY_EVENTS), ++ DEFINE_PROP_MIG_CAP("x-postcopy-ram", MIGRATION_CAPABILITY_POSTCOPY_RAM), ++ DEFINE_PROP_MIG_CAP("x-postcopy-preempt", ++ MIGRATION_CAPABILITY_POSTCOPY_PREEMPT), ++ DEFINE_PROP_MIG_CAP("x-colo", MIGRATION_CAPABILITY_X_COLO), ++ DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM), ++ DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK), ++ DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH), ++ DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD), ++ DEFINE_PROP_MIG_CAP("x-background-snapshot", ++ MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT), ++#ifdef CONFIG_LINUX ++ DEFINE_PROP_MIG_CAP("x-zero-copy-send", ++ MIGRATION_CAPABILITY_ZERO_COPY_SEND), ++#endif ++ ++ DEFINE_PROP_END_OF_LIST(), ++}; ++ + bool migrate_auto_converge(void) + { + MigrationState *s = migrate_get_current(); +diff --git a/migration/options.h b/migration/options.h +index 89067e59a0..7b0f7245ad 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -14,6 +14,9 @@ + #ifndef QEMU_MIGRATION_OPTIONS_H + #define QEMU_MIGRATION_OPTIONS_H + ++#include "hw/qdev-properties.h" ++#include "hw/qdev-properties-system.h" ++ + /* constants */ + + /* Amount of time to allocate to each "chunk" of bandwidth-throttled +@@ -21,6 +24,10 @@ + #define BUFFER_DELAY 100 + #define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY) + ++/* migration properties */ ++ ++extern Property migration_properties[]; ++ + /* capabilities */ + + bool migrate_auto_converge(void); +-- +2.39.3 + diff --git a/SOURCES/kvm-migration-Move-more-initializations-to-migrate_init.patch b/SOURCES/kvm-migration-Move-more-initializations-to-migrate_init.patch new file mode 100644 index 0000000..10e5fe7 --- /dev/null +++ b/SOURCES/kvm-migration-Move-more-initializations-to-migrate_init.patch @@ -0,0 +1,94 @@ +From a90cae0dae6382cc1af63dfed8a51a3a27dc4bae Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Mon, 11 Sep 2023 16:10:19 +0200 +Subject: [PATCH 2/4] migration: Move more initializations to migrate_init() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 318: VFIO migration: fix a QEMU crash when postcopy is enabled +RH-Bugzilla: 2229868 +RH-Acked-by: Alex Williamson +RH-Acked-by: Peter Xu +RH-Commit: [2/4] 3706a3308c33046e2658ee511b364087e202708e + +Bugzilla: https://bugzilla.redhat.com/2229868 + +commit f543aa222da183ac37424d1ea3a65e5fb6202732 +Author: Avihai Horon +Date: Wed Sep 6 18:08:50 2023 +0300 + + migration: Move more initializations to migrate_init() + + Initialization of mig_stats, compression_counters and VFIO bytes + transferred is hard-coded in migration code path and snapshot code path. + + Make the code cleaner by initializing them in migrate_init(). + + Suggested-by: Cédric Le Goater + Signed-off-by: Avihai Horon + Reviewed-by: Cédric Le Goater + Signed-off-by: Cédric Le Goater + +Conflicts: + - migration/migration.c, migration/savevm.c + context changes in migrate_prepare() and qemu_savevm_state() due + to missing commit aff3f6606d14 ("migration: Rename ram_counters + to mig_stats") + +Signed-off-by: Cédric Le Goater +--- + migration/migration.c | 14 +++++++------- + migration/savevm.c | 3 --- + 2 files changed, 7 insertions(+), 10 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 5aa9e5dada..a85c8936d9 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1422,6 +1422,13 @@ void migrate_init(MigrationState *s) + s->iteration_initial_bytes = 0; + s->threshold_size = 0; + s->switchover_acked = false; ++ /* ++ * set mig_stats compression_counters memory to zero for a ++ * new migration ++ */ ++ memset(&ram_counters, 0, sizeof(ram_counters)); ++ memset(&compression_counters, 0, sizeof(compression_counters)); ++ migration_reset_vfio_bytes_transferred(); + } + + int migrate_add_blocker_internal(Error *reason, Error **errp) +@@ -1632,13 +1639,6 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, + } + + migrate_init(s); +- /* +- * set ram_counters compression_counters memory to zero for a +- * new migration +- */ +- memset(&ram_counters, 0, sizeof(ram_counters)); +- memset(&compression_counters, 0, sizeof(compression_counters)); +- migration_reset_vfio_bytes_transferred(); + + return true; + } +diff --git a/migration/savevm.c b/migration/savevm.c +index 05db79bfad..13c1a9afa1 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -1618,9 +1618,6 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) + } + + migrate_init(ms); +- memset(&ram_counters, 0, sizeof(ram_counters)); +- memset(&compression_counters, 0, sizeof(compression_counters)); +- migration_reset_vfio_bytes_transferred(); + ms->to_dst_file = f; + + qemu_mutex_unlock_iothread(); +-- +2.39.3 + diff --git a/SOURCES/kvm-migration-Move-parameters-functions-to-option.c.patch b/SOURCES/kvm-migration-Move-parameters-functions-to-option.c.patch new file mode 100644 index 0000000..ad4510b --- /dev/null +++ b/SOURCES/kvm-migration-Move-parameters-functions-to-option.c.patch @@ -0,0 +1,317 @@ +From d5ea4c82c44a59ac70313eb1eac77999ca5fde36 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 00:39:03 +0100 +Subject: [PATCH 37/56] migration: Move parameters functions to option.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [36/50] 2540921028025504723e762c0a1d2f295ac5a6d1 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 1dfc4b9e19bcf1ad41a1be9ac82db35b9647c3c1) +Signed-off-by: Peter Xu +--- + migration/migration.c | 91 --------------------------------------- + migration/migration.h | 11 ----- + migration/multifd-zlib.c | 1 + + migration/multifd-zstd.c | 1 + + migration/options.c | 93 ++++++++++++++++++++++++++++++++++++++++ + migration/options.h | 13 ++++++ + 6 files changed, 108 insertions(+), 102 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 880a51210e..7f2e770deb 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -2264,79 +2264,6 @@ bool migrate_postcopy(void) + return migrate_postcopy_ram() || migrate_dirty_bitmaps(); + } + +-int migrate_compress_level(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->parameters.compress_level; +-} +- +-int migrate_compress_threads(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->parameters.compress_threads; +-} +- +-int migrate_compress_wait_thread(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->parameters.compress_wait_thread; +-} +- +-int migrate_decompress_threads(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->parameters.decompress_threads; +-} +- +-int migrate_multifd_channels(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->parameters.multifd_channels; +-} +- +-MultiFDCompression migrate_multifd_compression(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- assert(s->parameters.multifd_compression < MULTIFD_COMPRESSION__MAX); +- return s->parameters.multifd_compression; +-} +- +-int migrate_multifd_zlib_level(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->parameters.multifd_zlib_level; +-} +- +-int migrate_multifd_zstd_level(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->parameters.multifd_zstd_level; +-} +- + int migrate_use_tls(void) + { + MigrationState *s; +@@ -2346,24 +2273,6 @@ int migrate_use_tls(void) + return s->parameters.tls_creds && *s->parameters.tls_creds; + } + +-uint64_t migrate_xbzrle_cache_size(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->parameters.xbzrle_cache_size; +-} +- +-static int64_t migrate_max_postcopy_bandwidth(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->parameters.max_postcopy_bandwidth; +-} +- + bool migrate_use_block_incremental(void) + { + MigrationState *s; +diff --git a/migration/migration.h b/migration/migration.h +index 24184622a8..8451e5f2fe 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -449,24 +449,13 @@ MigrationState *migrate_get_current(void); + + bool migrate_postcopy(void); + +-int migrate_multifd_channels(void); +-MultiFDCompression migrate_multifd_compression(void); +-int migrate_multifd_zlib_level(void); +-int migrate_multifd_zstd_level(void); +- + int migrate_use_tls(void); +-uint64_t migrate_xbzrle_cache_size(void); + + bool migrate_use_block_incremental(void); + int migrate_max_cpu_throttle(void); + + uint64_t ram_get_total_transferred_pages(void); + +-int migrate_compress_level(void); +-int migrate_compress_threads(void); +-int migrate_compress_wait_thread(void); +-int migrate_decompress_threads(void); +- + /* Sending on the return path - generic and then for each message type */ + void migrate_send_rp_shut(MigrationIncomingState *mis, + uint32_t value); +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index 37770248e1..81701250ad 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -18,6 +18,7 @@ + #include "qapi/error.h" + #include "migration.h" + #include "trace.h" ++#include "options.h" + #include "multifd.h" + + struct zlib_data { +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index f4a8e1ed1f..d1d29e76cc 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -18,6 +18,7 @@ + #include "qapi/error.h" + #include "migration.h" + #include "trace.h" ++#include "options.h" + #include "multifd.h" + + struct zstd_data { +diff --git a/migration/options.c b/migration/options.c +index f3b2d6e482..8d15be858c 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -460,3 +460,96 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, + s->capabilities[cap->value->capability] = cap->value->state; + } + } ++ ++/* parameters */ ++ ++int migrate_compress_level(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.compress_level; ++} ++ ++int migrate_compress_threads(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.compress_threads; ++} ++ ++int migrate_compress_wait_thread(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.compress_wait_thread; ++} ++ ++int migrate_decompress_threads(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.decompress_threads; ++} ++ ++int64_t migrate_max_postcopy_bandwidth(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.max_postcopy_bandwidth; ++} ++ ++int migrate_multifd_channels(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.multifd_channels; ++} ++ ++MultiFDCompression migrate_multifd_compression(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ assert(s->parameters.multifd_compression < MULTIFD_COMPRESSION__MAX); ++ return s->parameters.multifd_compression; ++} ++ ++int migrate_multifd_zlib_level(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.multifd_zlib_level; ++} ++ ++int migrate_multifd_zstd_level(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.multifd_zstd_level; ++} ++ ++uint64_t migrate_xbzrle_cache_size(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.xbzrle_cache_size; ++} +diff --git a/migration/options.h b/migration/options.h +index 5979e4ff90..b24ee92283 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -43,4 +43,17 @@ bool migrate_zero_copy_send(void); + bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp); + bool migrate_cap_set(int cap, bool value, Error **errp); + ++/* parameters */ ++ ++int migrate_compress_level(void); ++int migrate_compress_threads(void); ++int migrate_compress_wait_thread(void); ++int migrate_decompress_threads(void); ++int64_t migrate_max_postcopy_bandwidth(void); ++int migrate_multifd_channels(void); ++MultiFDCompression migrate_multifd_compression(void); ++int migrate_multifd_zlib_level(void); ++int migrate_multifd_zstd_level(void); ++uint64_t migrate_xbzrle_cache_size(void); ++ + #endif +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Move-qmp_migrate_set_capabilities-to-optio.patch b/SOURCES/kvm-migration-Move-qmp_migrate_set_capabilities-to-optio.patch new file mode 100644 index 0000000..10f185b --- /dev/null +++ b/SOURCES/kvm-migration-Move-qmp_migrate_set_capabilities-to-optio.patch @@ -0,0 +1,100 @@ +From d967ec22cdb20e0a846f050a2bc7bd4caa87940d Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 23:18:02 +0100 +Subject: [PATCH 35/56] migration: Move qmp_migrate_set_capabilities() to + options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [34/50] 16b62ca7e06c58d71389c449dc19c11939dd0882 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 45c1de13f09b1fd4ea26f54e6da12aae52f34cb8) +Signed-off-by: Peter Xu +--- + migration/migration.c | 26 -------------------------- + migration/options.c | 26 ++++++++++++++++++++++++++ + 2 files changed, 26 insertions(+), 26 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 3dc8ee4875..369cd91796 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1222,32 +1222,6 @@ MigrationInfo *qmp_query_migrate(Error **errp) + return info; + } + +-void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, +- Error **errp) +-{ +- MigrationState *s = migrate_get_current(); +- MigrationCapabilityStatusList *cap; +- bool new_caps[MIGRATION_CAPABILITY__MAX]; +- +- if (migration_is_running(s->state)) { +- error_setg(errp, QERR_MIGRATION_ACTIVE); +- return; +- } +- +- memcpy(new_caps, s->capabilities, sizeof(new_caps)); +- for (cap = params; cap; cap = cap->next) { +- new_caps[cap->value->capability] = cap->value->state; +- } +- +- if (!migrate_caps_check(s->capabilities, new_caps, errp)) { +- return; +- } +- +- for (cap = params; cap; cap = cap->next) { +- s->capabilities[cap->value->capability] = cap->value->state; +- } +-} +- + /* + * Check whether the parameters are valid. Error will be put into errp + * (if provided). Return true if valid, otherwise false. +diff --git a/migration/options.c b/migration/options.c +index ff621bdeb3..4cbe77e35a 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -413,3 +413,29 @@ MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) + + return head; + } ++ ++void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, ++ Error **errp) ++{ ++ MigrationState *s = migrate_get_current(); ++ MigrationCapabilityStatusList *cap; ++ bool new_caps[MIGRATION_CAPABILITY__MAX]; ++ ++ if (migration_is_running(s->state)) { ++ error_setg(errp, QERR_MIGRATION_ACTIVE); ++ return; ++ } ++ ++ memcpy(new_caps, s->capabilities, sizeof(new_caps)); ++ for (cap = params; cap; cap = cap->next) { ++ new_caps[cap->value->capability] = cap->value->state; ++ } ++ ++ if (!migrate_caps_check(s->capabilities, new_caps, errp)) { ++ return; ++ } ++ ++ for (cap = params; cap; cap = cap->next) { ++ s->capabilities[cap->value->capability] = cap->value->state; ++ } ++} +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Move-qmp_migrate_set_parameters-to-options.patch b/SOURCES/kvm-migration-Move-qmp_migrate_set_parameters-to-options.patch new file mode 100644 index 0000000..3685a33 --- /dev/null +++ b/SOURCES/kvm-migration-Move-qmp_migrate_set_parameters-to-options.patch @@ -0,0 +1,943 @@ +From 944bf4759d1279c342ddd29c47d47c9670b64625 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 10:13:16 +0100 +Subject: [PATCH 50/56] migration: Move qmp_migrate_set_parameters() to + options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [49/50] b55f7afe868e117d4212f1518b9a37514cc99b33 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 09d6c9658474e8573c5ada58dca8b20fe47dd99e) +Signed-off-by: Peter Xu +--- + migration/migration.c | 420 ------------------------------------------ + migration/options.c | 418 +++++++++++++++++++++++++++++++++++++++++ + migration/options.h | 11 ++ + 3 files changed, 429 insertions(+), 420 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 22ef83c619..08f87f2b0e 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -67,19 +67,10 @@ + + #define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ + +-/* Amount of time to allocate to each "chunk" of bandwidth-throttled +- * data. */ +-#define BUFFER_DELAY 100 +-#define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY) +- + /* Time in milliseconds we are allowed to stop the source, + * for sending the last part */ + #define DEFAULT_MIGRATE_SET_DOWNTIME 300 + +-/* Maximum migrate downtime set to 2000 seconds */ +-#define MAX_MIGRATE_DOWNTIME_SECONDS 2000 +-#define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000) +- + /* Default compression thread count */ + #define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8 + /* Default decompression thread count, usually decompression is at +@@ -1140,417 +1131,6 @@ MigrationInfo *qmp_query_migrate(Error **errp) + return info; + } + +-/* +- * Check whether the parameters are valid. Error will be put into errp +- * (if provided). Return true if valid, otherwise false. +- */ +-static bool migrate_params_check(MigrationParameters *params, Error **errp) +-{ +- if (params->has_compress_level && +- (params->compress_level > 9)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level", +- "a value between 0 and 9"); +- return false; +- } +- +- if (params->has_compress_threads && (params->compress_threads < 1)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "compress_threads", +- "a value between 1 and 255"); +- return false; +- } +- +- if (params->has_decompress_threads && (params->decompress_threads < 1)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "decompress_threads", +- "a value between 1 and 255"); +- return false; +- } +- +- if (params->has_throttle_trigger_threshold && +- (params->throttle_trigger_threshold < 1 || +- params->throttle_trigger_threshold > 100)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "throttle_trigger_threshold", +- "an integer in the range of 1 to 100"); +- return false; +- } +- +- if (params->has_cpu_throttle_initial && +- (params->cpu_throttle_initial < 1 || +- params->cpu_throttle_initial > 99)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "cpu_throttle_initial", +- "an integer in the range of 1 to 99"); +- return false; +- } +- +- if (params->has_cpu_throttle_increment && +- (params->cpu_throttle_increment < 1 || +- params->cpu_throttle_increment > 99)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "cpu_throttle_increment", +- "an integer in the range of 1 to 99"); +- return false; +- } +- +- if (params->has_max_bandwidth && (params->max_bandwidth > SIZE_MAX)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "max_bandwidth", +- "an integer in the range of 0 to "stringify(SIZE_MAX) +- " bytes/second"); +- return false; +- } +- +- if (params->has_downtime_limit && +- (params->downtime_limit > MAX_MIGRATE_DOWNTIME)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "downtime_limit", +- "an integer in the range of 0 to " +- stringify(MAX_MIGRATE_DOWNTIME)" ms"); +- return false; +- } +- +- /* x_checkpoint_delay is now always positive */ +- +- if (params->has_multifd_channels && (params->multifd_channels < 1)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "multifd_channels", +- "a value between 1 and 255"); +- return false; +- } +- +- if (params->has_multifd_zlib_level && +- (params->multifd_zlib_level > 9)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zlib_level", +- "a value between 0 and 9"); +- return false; +- } +- +- if (params->has_multifd_zstd_level && +- (params->multifd_zstd_level > 20)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zstd_level", +- "a value between 0 and 20"); +- return false; +- } +- +- if (params->has_xbzrle_cache_size && +- (params->xbzrle_cache_size < qemu_target_page_size() || +- !is_power_of_2(params->xbzrle_cache_size))) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "xbzrle_cache_size", +- "a power of two no less than the target page size"); +- return false; +- } +- +- if (params->has_max_cpu_throttle && +- (params->max_cpu_throttle < params->cpu_throttle_initial || +- params->max_cpu_throttle > 99)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "max_cpu_throttle", +- "an integer in the range of cpu_throttle_initial to 99"); +- return false; +- } +- +- if (params->has_announce_initial && +- params->announce_initial > 100000) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "announce_initial", +- "a value between 0 and 100000"); +- return false; +- } +- if (params->has_announce_max && +- params->announce_max > 100000) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "announce_max", +- "a value between 0 and 100000"); +- return false; +- } +- if (params->has_announce_rounds && +- params->announce_rounds > 1000) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "announce_rounds", +- "a value between 0 and 1000"); +- return false; +- } +- if (params->has_announce_step && +- (params->announce_step < 1 || +- params->announce_step > 10000)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "announce_step", +- "a value between 0 and 10000"); +- return false; +- } +- +- if (params->has_block_bitmap_mapping && +- !check_dirty_bitmap_mig_alias_map(params->block_bitmap_mapping, errp)) { +- error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); +- return false; +- } +- +-#ifdef CONFIG_LINUX +- if (migrate_zero_copy_send() && +- ((params->has_multifd_compression && params->multifd_compression) || +- (params->tls_creds && *params->tls_creds))) { +- error_setg(errp, +- "Zero copy only available for non-compressed non-TLS multifd migration"); +- return false; +- } +-#endif +- +- return true; +-} +- +-static void migrate_params_test_apply(MigrateSetParameters *params, +- MigrationParameters *dest) +-{ +- *dest = migrate_get_current()->parameters; +- +- /* TODO use QAPI_CLONE() instead of duplicating it inline */ +- +- if (params->has_compress_level) { +- dest->compress_level = params->compress_level; +- } +- +- if (params->has_compress_threads) { +- dest->compress_threads = params->compress_threads; +- } +- +- if (params->has_compress_wait_thread) { +- dest->compress_wait_thread = params->compress_wait_thread; +- } +- +- if (params->has_decompress_threads) { +- dest->decompress_threads = params->decompress_threads; +- } +- +- if (params->has_throttle_trigger_threshold) { +- dest->throttle_trigger_threshold = params->throttle_trigger_threshold; +- } +- +- if (params->has_cpu_throttle_initial) { +- dest->cpu_throttle_initial = params->cpu_throttle_initial; +- } +- +- if (params->has_cpu_throttle_increment) { +- dest->cpu_throttle_increment = params->cpu_throttle_increment; +- } +- +- if (params->has_cpu_throttle_tailslow) { +- dest->cpu_throttle_tailslow = params->cpu_throttle_tailslow; +- } +- +- if (params->tls_creds) { +- assert(params->tls_creds->type == QTYPE_QSTRING); +- dest->tls_creds = params->tls_creds->u.s; +- } +- +- if (params->tls_hostname) { +- assert(params->tls_hostname->type == QTYPE_QSTRING); +- dest->tls_hostname = params->tls_hostname->u.s; +- } +- +- if (params->has_max_bandwidth) { +- dest->max_bandwidth = params->max_bandwidth; +- } +- +- if (params->has_downtime_limit) { +- dest->downtime_limit = params->downtime_limit; +- } +- +- if (params->has_x_checkpoint_delay) { +- dest->x_checkpoint_delay = params->x_checkpoint_delay; +- } +- +- if (params->has_block_incremental) { +- dest->block_incremental = params->block_incremental; +- } +- if (params->has_multifd_channels) { +- dest->multifd_channels = params->multifd_channels; +- } +- if (params->has_multifd_compression) { +- dest->multifd_compression = params->multifd_compression; +- } +- if (params->has_xbzrle_cache_size) { +- dest->xbzrle_cache_size = params->xbzrle_cache_size; +- } +- if (params->has_max_postcopy_bandwidth) { +- dest->max_postcopy_bandwidth = params->max_postcopy_bandwidth; +- } +- if (params->has_max_cpu_throttle) { +- dest->max_cpu_throttle = params->max_cpu_throttle; +- } +- if (params->has_announce_initial) { +- dest->announce_initial = params->announce_initial; +- } +- if (params->has_announce_max) { +- dest->announce_max = params->announce_max; +- } +- if (params->has_announce_rounds) { +- dest->announce_rounds = params->announce_rounds; +- } +- if (params->has_announce_step) { +- dest->announce_step = params->announce_step; +- } +- +- if (params->has_block_bitmap_mapping) { +- dest->has_block_bitmap_mapping = true; +- dest->block_bitmap_mapping = params->block_bitmap_mapping; +- } +-} +- +-static void migrate_params_apply(MigrateSetParameters *params, Error **errp) +-{ +- MigrationState *s = migrate_get_current(); +- +- /* TODO use QAPI_CLONE() instead of duplicating it inline */ +- +- if (params->has_compress_level) { +- s->parameters.compress_level = params->compress_level; +- } +- +- if (params->has_compress_threads) { +- s->parameters.compress_threads = params->compress_threads; +- } +- +- if (params->has_compress_wait_thread) { +- s->parameters.compress_wait_thread = params->compress_wait_thread; +- } +- +- if (params->has_decompress_threads) { +- s->parameters.decompress_threads = params->decompress_threads; +- } +- +- if (params->has_throttle_trigger_threshold) { +- s->parameters.throttle_trigger_threshold = params->throttle_trigger_threshold; +- } +- +- if (params->has_cpu_throttle_initial) { +- s->parameters.cpu_throttle_initial = params->cpu_throttle_initial; +- } +- +- if (params->has_cpu_throttle_increment) { +- s->parameters.cpu_throttle_increment = params->cpu_throttle_increment; +- } +- +- if (params->has_cpu_throttle_tailslow) { +- s->parameters.cpu_throttle_tailslow = params->cpu_throttle_tailslow; +- } +- +- if (params->tls_creds) { +- g_free(s->parameters.tls_creds); +- assert(params->tls_creds->type == QTYPE_QSTRING); +- s->parameters.tls_creds = g_strdup(params->tls_creds->u.s); +- } +- +- if (params->tls_hostname) { +- g_free(s->parameters.tls_hostname); +- assert(params->tls_hostname->type == QTYPE_QSTRING); +- s->parameters.tls_hostname = g_strdup(params->tls_hostname->u.s); +- } +- +- if (params->tls_authz) { +- g_free(s->parameters.tls_authz); +- assert(params->tls_authz->type == QTYPE_QSTRING); +- s->parameters.tls_authz = g_strdup(params->tls_authz->u.s); +- } +- +- if (params->has_max_bandwidth) { +- s->parameters.max_bandwidth = params->max_bandwidth; +- if (s->to_dst_file && !migration_in_postcopy()) { +- qemu_file_set_rate_limit(s->to_dst_file, +- s->parameters.max_bandwidth / XFER_LIMIT_RATIO); +- } +- } +- +- if (params->has_downtime_limit) { +- s->parameters.downtime_limit = params->downtime_limit; +- } +- +- if (params->has_x_checkpoint_delay) { +- s->parameters.x_checkpoint_delay = params->x_checkpoint_delay; +- if (migration_in_colo_state()) { +- colo_checkpoint_notify(s); +- } +- } +- +- if (params->has_block_incremental) { +- s->parameters.block_incremental = params->block_incremental; +- } +- if (params->has_multifd_channels) { +- s->parameters.multifd_channels = params->multifd_channels; +- } +- if (params->has_multifd_compression) { +- s->parameters.multifd_compression = params->multifd_compression; +- } +- if (params->has_xbzrle_cache_size) { +- s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; +- xbzrle_cache_resize(params->xbzrle_cache_size, errp); +- } +- if (params->has_max_postcopy_bandwidth) { +- s->parameters.max_postcopy_bandwidth = params->max_postcopy_bandwidth; +- if (s->to_dst_file && migration_in_postcopy()) { +- qemu_file_set_rate_limit(s->to_dst_file, +- s->parameters.max_postcopy_bandwidth / XFER_LIMIT_RATIO); +- } +- } +- if (params->has_max_cpu_throttle) { +- s->parameters.max_cpu_throttle = params->max_cpu_throttle; +- } +- if (params->has_announce_initial) { +- s->parameters.announce_initial = params->announce_initial; +- } +- if (params->has_announce_max) { +- s->parameters.announce_max = params->announce_max; +- } +- if (params->has_announce_rounds) { +- s->parameters.announce_rounds = params->announce_rounds; +- } +- if (params->has_announce_step) { +- s->parameters.announce_step = params->announce_step; +- } +- +- if (params->has_block_bitmap_mapping) { +- qapi_free_BitmapMigrationNodeAliasList( +- s->parameters.block_bitmap_mapping); +- +- s->parameters.has_block_bitmap_mapping = true; +- s->parameters.block_bitmap_mapping = +- QAPI_CLONE(BitmapMigrationNodeAliasList, +- params->block_bitmap_mapping); +- } +-} +- +-void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp) +-{ +- MigrationParameters tmp; +- +- /* TODO Rewrite "" to null instead */ +- if (params->tls_creds +- && params->tls_creds->type == QTYPE_QNULL) { +- qobject_unref(params->tls_creds->u.n); +- params->tls_creds->type = QTYPE_QSTRING; +- params->tls_creds->u.s = strdup(""); +- } +- /* TODO Rewrite "" to null instead */ +- if (params->tls_hostname +- && params->tls_hostname->type == QTYPE_QNULL) { +- qobject_unref(params->tls_hostname->u.n); +- params->tls_hostname->type = QTYPE_QSTRING; +- params->tls_hostname->u.s = strdup(""); +- } +- +- migrate_params_test_apply(params, &tmp); +- +- if (!migrate_params_check(&tmp, errp)) { +- /* Invalid parameter */ +- return; +- } +- +- migrate_params_apply(params, errp); +-} +- +- + void qmp_migrate_start_postcopy(Error **errp) + { + MigrationState *s = migrate_get_current(); +diff --git a/migration/options.c b/migration/options.c +index d4c0714683..4701c75a4d 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -12,17 +12,25 @@ + */ + + #include "qemu/osdep.h" ++#include "exec/target_page.h" + #include "qapi/clone-visitor.h" + #include "qapi/error.h" + #include "qapi/qapi-commands-migration.h" + #include "qapi/qapi-visit-migration.h" + #include "qapi/qmp/qerror.h" ++#include "qapi/qmp/qnull.h" + #include "sysemu/runstate.h" ++#include "migration/colo.h" + #include "migration/misc.h" + #include "migration.h" ++#include "qemu-file.h" + #include "ram.h" + #include "options.h" + ++/* Maximum migrate downtime set to 2000 seconds */ ++#define MAX_MIGRATE_DOWNTIME_SECONDS 2000 ++#define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000) ++ + bool migrate_auto_converge(void) + { + MigrationState *s; +@@ -729,3 +737,413 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) + + return params; + } ++ ++/* ++ * Check whether the parameters are valid. Error will be put into errp ++ * (if provided). Return true if valid, otherwise false. ++ */ ++bool migrate_params_check(MigrationParameters *params, Error **errp) ++{ ++ if (params->has_compress_level && ++ (params->compress_level > 9)) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level", ++ "a value between 0 and 9"); ++ return false; ++ } ++ ++ if (params->has_compress_threads && (params->compress_threads < 1)) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "compress_threads", ++ "a value between 1 and 255"); ++ return false; ++ } ++ ++ if (params->has_decompress_threads && (params->decompress_threads < 1)) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "decompress_threads", ++ "a value between 1 and 255"); ++ return false; ++ } ++ ++ if (params->has_throttle_trigger_threshold && ++ (params->throttle_trigger_threshold < 1 || ++ params->throttle_trigger_threshold > 100)) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "throttle_trigger_threshold", ++ "an integer in the range of 1 to 100"); ++ return false; ++ } ++ ++ if (params->has_cpu_throttle_initial && ++ (params->cpu_throttle_initial < 1 || ++ params->cpu_throttle_initial > 99)) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "cpu_throttle_initial", ++ "an integer in the range of 1 to 99"); ++ return false; ++ } ++ ++ if (params->has_cpu_throttle_increment && ++ (params->cpu_throttle_increment < 1 || ++ params->cpu_throttle_increment > 99)) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "cpu_throttle_increment", ++ "an integer in the range of 1 to 99"); ++ return false; ++ } ++ ++ if (params->has_max_bandwidth && (params->max_bandwidth > SIZE_MAX)) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "max_bandwidth", ++ "an integer in the range of 0 to "stringify(SIZE_MAX) ++ " bytes/second"); ++ return false; ++ } ++ ++ if (params->has_downtime_limit && ++ (params->downtime_limit > MAX_MIGRATE_DOWNTIME)) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "downtime_limit", ++ "an integer in the range of 0 to " ++ stringify(MAX_MIGRATE_DOWNTIME)" ms"); ++ return false; ++ } ++ ++ /* x_checkpoint_delay is now always positive */ ++ ++ if (params->has_multifd_channels && (params->multifd_channels < 1)) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "multifd_channels", ++ "a value between 1 and 255"); ++ return false; ++ } ++ ++ if (params->has_multifd_zlib_level && ++ (params->multifd_zlib_level > 9)) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zlib_level", ++ "a value between 0 and 9"); ++ return false; ++ } ++ ++ if (params->has_multifd_zstd_level && ++ (params->multifd_zstd_level > 20)) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zstd_level", ++ "a value between 0 and 20"); ++ return false; ++ } ++ ++ if (params->has_xbzrle_cache_size && ++ (params->xbzrle_cache_size < qemu_target_page_size() || ++ !is_power_of_2(params->xbzrle_cache_size))) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "xbzrle_cache_size", ++ "a power of two no less than the target page size"); ++ return false; ++ } ++ ++ if (params->has_max_cpu_throttle && ++ (params->max_cpu_throttle < params->cpu_throttle_initial || ++ params->max_cpu_throttle > 99)) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "max_cpu_throttle", ++ "an integer in the range of cpu_throttle_initial to 99"); ++ return false; ++ } ++ ++ if (params->has_announce_initial && ++ params->announce_initial > 100000) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "announce_initial", ++ "a value between 0 and 100000"); ++ return false; ++ } ++ if (params->has_announce_max && ++ params->announce_max > 100000) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "announce_max", ++ "a value between 0 and 100000"); ++ return false; ++ } ++ if (params->has_announce_rounds && ++ params->announce_rounds > 1000) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "announce_rounds", ++ "a value between 0 and 1000"); ++ return false; ++ } ++ if (params->has_announce_step && ++ (params->announce_step < 1 || ++ params->announce_step > 10000)) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "announce_step", ++ "a value between 0 and 10000"); ++ return false; ++ } ++ ++ if (params->has_block_bitmap_mapping && ++ !check_dirty_bitmap_mig_alias_map(params->block_bitmap_mapping, errp)) { ++ error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); ++ return false; ++ } ++ ++#ifdef CONFIG_LINUX ++ if (migrate_zero_copy_send() && ++ ((params->has_multifd_compression && params->multifd_compression) || ++ (params->tls_creds && *params->tls_creds))) { ++ error_setg(errp, ++ "Zero copy only available for non-compressed non-TLS multifd migration"); ++ return false; ++ } ++#endif ++ ++ return true; ++} ++ ++static void migrate_params_test_apply(MigrateSetParameters *params, ++ MigrationParameters *dest) ++{ ++ *dest = migrate_get_current()->parameters; ++ ++ /* TODO use QAPI_CLONE() instead of duplicating it inline */ ++ ++ if (params->has_compress_level) { ++ dest->compress_level = params->compress_level; ++ } ++ ++ if (params->has_compress_threads) { ++ dest->compress_threads = params->compress_threads; ++ } ++ ++ if (params->has_compress_wait_thread) { ++ dest->compress_wait_thread = params->compress_wait_thread; ++ } ++ ++ if (params->has_decompress_threads) { ++ dest->decompress_threads = params->decompress_threads; ++ } ++ ++ if (params->has_throttle_trigger_threshold) { ++ dest->throttle_trigger_threshold = params->throttle_trigger_threshold; ++ } ++ ++ if (params->has_cpu_throttle_initial) { ++ dest->cpu_throttle_initial = params->cpu_throttle_initial; ++ } ++ ++ if (params->has_cpu_throttle_increment) { ++ dest->cpu_throttle_increment = params->cpu_throttle_increment; ++ } ++ ++ if (params->has_cpu_throttle_tailslow) { ++ dest->cpu_throttle_tailslow = params->cpu_throttle_tailslow; ++ } ++ ++ if (params->tls_creds) { ++ assert(params->tls_creds->type == QTYPE_QSTRING); ++ dest->tls_creds = params->tls_creds->u.s; ++ } ++ ++ if (params->tls_hostname) { ++ assert(params->tls_hostname->type == QTYPE_QSTRING); ++ dest->tls_hostname = params->tls_hostname->u.s; ++ } ++ ++ if (params->has_max_bandwidth) { ++ dest->max_bandwidth = params->max_bandwidth; ++ } ++ ++ if (params->has_downtime_limit) { ++ dest->downtime_limit = params->downtime_limit; ++ } ++ ++ if (params->has_x_checkpoint_delay) { ++ dest->x_checkpoint_delay = params->x_checkpoint_delay; ++ } ++ ++ if (params->has_block_incremental) { ++ dest->block_incremental = params->block_incremental; ++ } ++ if (params->has_multifd_channels) { ++ dest->multifd_channels = params->multifd_channels; ++ } ++ if (params->has_multifd_compression) { ++ dest->multifd_compression = params->multifd_compression; ++ } ++ if (params->has_xbzrle_cache_size) { ++ dest->xbzrle_cache_size = params->xbzrle_cache_size; ++ } ++ if (params->has_max_postcopy_bandwidth) { ++ dest->max_postcopy_bandwidth = params->max_postcopy_bandwidth; ++ } ++ if (params->has_max_cpu_throttle) { ++ dest->max_cpu_throttle = params->max_cpu_throttle; ++ } ++ if (params->has_announce_initial) { ++ dest->announce_initial = params->announce_initial; ++ } ++ if (params->has_announce_max) { ++ dest->announce_max = params->announce_max; ++ } ++ if (params->has_announce_rounds) { ++ dest->announce_rounds = params->announce_rounds; ++ } ++ if (params->has_announce_step) { ++ dest->announce_step = params->announce_step; ++ } ++ ++ if (params->has_block_bitmap_mapping) { ++ dest->has_block_bitmap_mapping = true; ++ dest->block_bitmap_mapping = params->block_bitmap_mapping; ++ } ++} ++ ++static void migrate_params_apply(MigrateSetParameters *params, Error **errp) ++{ ++ MigrationState *s = migrate_get_current(); ++ ++ /* TODO use QAPI_CLONE() instead of duplicating it inline */ ++ ++ if (params->has_compress_level) { ++ s->parameters.compress_level = params->compress_level; ++ } ++ ++ if (params->has_compress_threads) { ++ s->parameters.compress_threads = params->compress_threads; ++ } ++ ++ if (params->has_compress_wait_thread) { ++ s->parameters.compress_wait_thread = params->compress_wait_thread; ++ } ++ ++ if (params->has_decompress_threads) { ++ s->parameters.decompress_threads = params->decompress_threads; ++ } ++ ++ if (params->has_throttle_trigger_threshold) { ++ s->parameters.throttle_trigger_threshold = params->throttle_trigger_threshold; ++ } ++ ++ if (params->has_cpu_throttle_initial) { ++ s->parameters.cpu_throttle_initial = params->cpu_throttle_initial; ++ } ++ ++ if (params->has_cpu_throttle_increment) { ++ s->parameters.cpu_throttle_increment = params->cpu_throttle_increment; ++ } ++ ++ if (params->has_cpu_throttle_tailslow) { ++ s->parameters.cpu_throttle_tailslow = params->cpu_throttle_tailslow; ++ } ++ ++ if (params->tls_creds) { ++ g_free(s->parameters.tls_creds); ++ assert(params->tls_creds->type == QTYPE_QSTRING); ++ s->parameters.tls_creds = g_strdup(params->tls_creds->u.s); ++ } ++ ++ if (params->tls_hostname) { ++ g_free(s->parameters.tls_hostname); ++ assert(params->tls_hostname->type == QTYPE_QSTRING); ++ s->parameters.tls_hostname = g_strdup(params->tls_hostname->u.s); ++ } ++ ++ if (params->tls_authz) { ++ g_free(s->parameters.tls_authz); ++ assert(params->tls_authz->type == QTYPE_QSTRING); ++ s->parameters.tls_authz = g_strdup(params->tls_authz->u.s); ++ } ++ ++ if (params->has_max_bandwidth) { ++ s->parameters.max_bandwidth = params->max_bandwidth; ++ if (s->to_dst_file && !migration_in_postcopy()) { ++ qemu_file_set_rate_limit(s->to_dst_file, ++ s->parameters.max_bandwidth / XFER_LIMIT_RATIO); ++ } ++ } ++ ++ if (params->has_downtime_limit) { ++ s->parameters.downtime_limit = params->downtime_limit; ++ } ++ ++ if (params->has_x_checkpoint_delay) { ++ s->parameters.x_checkpoint_delay = params->x_checkpoint_delay; ++ if (migration_in_colo_state()) { ++ colo_checkpoint_notify(s); ++ } ++ } ++ ++ if (params->has_block_incremental) { ++ s->parameters.block_incremental = params->block_incremental; ++ } ++ if (params->has_multifd_channels) { ++ s->parameters.multifd_channels = params->multifd_channels; ++ } ++ if (params->has_multifd_compression) { ++ s->parameters.multifd_compression = params->multifd_compression; ++ } ++ if (params->has_xbzrle_cache_size) { ++ s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; ++ xbzrle_cache_resize(params->xbzrle_cache_size, errp); ++ } ++ if (params->has_max_postcopy_bandwidth) { ++ s->parameters.max_postcopy_bandwidth = params->max_postcopy_bandwidth; ++ if (s->to_dst_file && migration_in_postcopy()) { ++ qemu_file_set_rate_limit(s->to_dst_file, ++ s->parameters.max_postcopy_bandwidth / XFER_LIMIT_RATIO); ++ } ++ } ++ if (params->has_max_cpu_throttle) { ++ s->parameters.max_cpu_throttle = params->max_cpu_throttle; ++ } ++ if (params->has_announce_initial) { ++ s->parameters.announce_initial = params->announce_initial; ++ } ++ if (params->has_announce_max) { ++ s->parameters.announce_max = params->announce_max; ++ } ++ if (params->has_announce_rounds) { ++ s->parameters.announce_rounds = params->announce_rounds; ++ } ++ if (params->has_announce_step) { ++ s->parameters.announce_step = params->announce_step; ++ } ++ ++ if (params->has_block_bitmap_mapping) { ++ qapi_free_BitmapMigrationNodeAliasList( ++ s->parameters.block_bitmap_mapping); ++ ++ s->parameters.has_block_bitmap_mapping = true; ++ s->parameters.block_bitmap_mapping = ++ QAPI_CLONE(BitmapMigrationNodeAliasList, ++ params->block_bitmap_mapping); ++ } ++} ++ ++void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp) ++{ ++ MigrationParameters tmp; ++ ++ /* TODO Rewrite "" to null instead */ ++ if (params->tls_creds ++ && params->tls_creds->type == QTYPE_QNULL) { ++ qobject_unref(params->tls_creds->u.n); ++ params->tls_creds->type = QTYPE_QSTRING; ++ params->tls_creds->u.s = strdup(""); ++ } ++ /* TODO Rewrite "" to null instead */ ++ if (params->tls_hostname ++ && params->tls_hostname->type == QTYPE_QNULL) { ++ qobject_unref(params->tls_hostname->u.n); ++ params->tls_hostname->type = QTYPE_QSTRING; ++ params->tls_hostname->u.s = strdup(""); ++ } ++ ++ migrate_params_test_apply(params, &tmp); ++ ++ if (!migrate_params_check(&tmp, errp)) { ++ /* Invalid parameter */ ++ return; ++ } ++ ++ migrate_params_apply(params, errp); ++} +diff --git a/migration/options.h b/migration/options.h +index 13318a16c7..89067e59a0 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -14,6 +14,13 @@ + #ifndef QEMU_MIGRATION_OPTIONS_H + #define QEMU_MIGRATION_OPTIONS_H + ++/* constants */ ++ ++/* Amount of time to allocate to each "chunk" of bandwidth-throttled ++ * data. */ ++#define BUFFER_DELAY 100 ++#define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY) ++ + /* capabilities */ + + bool migrate_auto_converge(void); +@@ -74,4 +81,8 @@ int migrate_multifd_zstd_level(void); + uint8_t migrate_throttle_trigger_threshold(void); + uint64_t migrate_xbzrle_cache_size(void); + ++/* parameters helpers */ ++ ++bool migrate_params_check(MigrationParameters *params, Error **errp); ++ + #endif +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Move-qmp_query_migrate_capabilities-to-opt.patch b/SOURCES/kvm-migration-Move-qmp_query_migrate_capabilities-to-opt.patch new file mode 100644 index 0000000..d2564de --- /dev/null +++ b/SOURCES/kvm-migration-Move-qmp_query_migrate_capabilities-to-opt.patch @@ -0,0 +1,100 @@ +From 00cc3c3598828588619a7b3696819060bddaddb8 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 23:15:59 +0100 +Subject: [PATCH 34/56] migration: Move qmp_query_migrate_capabilities() to + options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [33/50] dbfa8f1e7aa7e000b4622ce2da12d7d418710f19 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 4d0c6b695bf5252402ebf967f83baebfd2f4b91e) +Signed-off-by: Peter Xu +--- + migration/migration.c | 22 ---------------------- + migration/options.c | 23 +++++++++++++++++++++++ + 2 files changed, 23 insertions(+), 22 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index d9e30ca918..3dc8ee4875 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -886,28 +886,6 @@ void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value) + migrate_send_rp_message(mis, MIG_RP_MSG_RESUME_ACK, sizeof(buf), &buf); + } + +-MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) +-{ +- MigrationCapabilityStatusList *head = NULL, **tail = &head; +- MigrationCapabilityStatus *caps; +- MigrationState *s = migrate_get_current(); +- int i; +- +- for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { +-#ifndef CONFIG_LIVE_BLOCK_MIGRATION +- if (i == MIGRATION_CAPABILITY_BLOCK) { +- continue; +- } +-#endif +- caps = g_malloc0(sizeof(*caps)); +- caps->capability = i; +- caps->state = s->capabilities[i]; +- QAPI_LIST_APPEND(tail, caps); +- } +- +- return head; +-} +- + MigrationParameters *qmp_query_migrate_parameters(Error **errp) + { + MigrationParameters *params; +diff --git a/migration/options.c b/migration/options.c +index 367c930f46..ff621bdeb3 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -13,6 +13,7 @@ + + #include "qemu/osdep.h" + #include "qapi/error.h" ++#include "qapi/qapi-commands-migration.h" + #include "sysemu/runstate.h" + #include "migration.h" + #include "ram.h" +@@ -390,3 +391,25 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) + + return true; + } ++ ++MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) ++{ ++ MigrationCapabilityStatusList *head = NULL, **tail = &head; ++ MigrationCapabilityStatus *caps; ++ MigrationState *s = migrate_get_current(); ++ int i; ++ ++ for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { ++#ifndef CONFIG_LIVE_BLOCK_MIGRATION ++ if (i == MIGRATION_CAPABILITY_BLOCK) { ++ continue; ++ } ++#endif ++ caps = g_malloc0(sizeof(*caps)); ++ caps->capability = i; ++ caps->state = s->capabilities[i]; ++ QAPI_LIST_APPEND(tail, caps); ++ } ++ ++ return head; ++} +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Pass-migrate_caps_check-the-old-and-new-ca.patch b/SOURCES/kvm-migration-Pass-migrate_caps_check-the-old-and-new-ca.patch new file mode 100644 index 0000000..7339ce0 --- /dev/null +++ b/SOURCES/kvm-migration-Pass-migrate_caps_check-the-old-and-new-ca.patch @@ -0,0 +1,226 @@ +From 4782b59a8b0b5762f87505ac7a83b37ddd2e0b3f Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 20:28:56 +0100 +Subject: [PATCH 19/56] migration: Pass migrate_caps_check() the old and new + caps +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [18/50] df78d680d03f15d7cb7401ad89e68a4fc93fa835 (peterx/qemu-kvm) + +We used to pass the old capabilities array and the new +capabilities as a list. + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit b02c7fc9ef447787414e6fa67eff75e7b7b30180) +Signed-off-by: Peter Xu +--- + migration/migration.c | 80 +++++++++++++++++-------------------------- + 1 file changed, 31 insertions(+), 49 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index d8e5fb6226..e8f596bcfa 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1299,30 +1299,20 @@ WriteTrackingSupport migrate_query_write_tracking(void) + } + + /** +- * @migration_caps_check - check capability validity ++ * @migration_caps_check - check capability compatibility + * +- * @cap_list: old capability list, array of bool +- * @params: new capabilities to be applied soon ++ * @old_caps: old capability list ++ * @new_caps: new capability list + * @errp: set *errp if the check failed, with reason + * + * Returns true if check passed, otherwise false. + */ +-static bool migrate_caps_check(bool *cap_list, +- MigrationCapabilityStatusList *params, +- Error **errp) ++static bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) + { +- MigrationCapabilityStatusList *cap; +- bool old_postcopy_cap; + MigrationIncomingState *mis = migration_incoming_get_current(); + +- old_postcopy_cap = cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]; +- +- for (cap = params; cap; cap = cap->next) { +- cap_list[cap->value->capability] = cap->value->state; +- } +- + #ifndef CONFIG_LIVE_BLOCK_MIGRATION +- if (cap_list[MIGRATION_CAPABILITY_BLOCK]) { ++ if (new_caps[MIGRATION_CAPABILITY_BLOCK]) { + error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) " + "block migration"); + error_append_hint(errp, "Use drive_mirror+NBD instead.\n"); +@@ -1331,7 +1321,7 @@ static bool migrate_caps_check(bool *cap_list, + #endif + + #ifndef CONFIG_REPLICATION +- if (cap_list[MIGRATION_CAPABILITY_X_COLO]) { ++ if (new_caps[MIGRATION_CAPABILITY_X_COLO]) { + error_setg(errp, "QEMU compiled without replication module" + " can't enable COLO"); + error_append_hint(errp, "Please enable replication before COLO.\n"); +@@ -1339,12 +1329,13 @@ static bool migrate_caps_check(bool *cap_list, + } + #endif + +- if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { ++ if (new_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { + /* This check is reasonably expensive, so only when it's being + * set the first time, also it's only the destination that needs + * special support. + */ +- if (!old_postcopy_cap && runstate_check(RUN_STATE_INMIGRATE) && ++ if (!old_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM] && ++ runstate_check(RUN_STATE_INMIGRATE) && + !postcopy_ram_supported_by_host(mis)) { + /* postcopy_ram_supported_by_host will have emitted a more + * detailed message +@@ -1353,13 +1344,13 @@ static bool migrate_caps_check(bool *cap_list, + return false; + } + +- if (cap_list[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) { ++ if (new_caps[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) { + error_setg(errp, "Postcopy is not compatible with ignore-shared"); + return false; + } + } + +- if (cap_list[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) { ++ if (new_caps[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) { + WriteTrackingSupport wt_support; + int idx; + /* +@@ -1383,7 +1374,7 @@ static bool migrate_caps_check(bool *cap_list, + */ + for (idx = 0; idx < check_caps_background_snapshot.size; idx++) { + int incomp_cap = check_caps_background_snapshot.caps[idx]; +- if (cap_list[incomp_cap]) { ++ if (new_caps[incomp_cap]) { + error_setg(errp, + "Background-snapshot is not compatible with %s", + MigrationCapability_str(incomp_cap)); +@@ -1393,10 +1384,10 @@ static bool migrate_caps_check(bool *cap_list, + } + + #ifdef CONFIG_LINUX +- if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND] && +- (!cap_list[MIGRATION_CAPABILITY_MULTIFD] || +- cap_list[MIGRATION_CAPABILITY_COMPRESS] || +- cap_list[MIGRATION_CAPABILITY_XBZRLE] || ++ if (new_caps[MIGRATION_CAPABILITY_ZERO_COPY_SEND] && ++ (!new_caps[MIGRATION_CAPABILITY_MULTIFD] || ++ new_caps[MIGRATION_CAPABILITY_COMPRESS] || ++ new_caps[MIGRATION_CAPABILITY_XBZRLE] || + migrate_multifd_compression() || + migrate_use_tls())) { + error_setg(errp, +@@ -1404,15 +1395,15 @@ static bool migrate_caps_check(bool *cap_list, + return false; + } + #else +- if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND]) { ++ if (new_caps[MIGRATION_CAPABILITY_ZERO_COPY_SEND]) { + error_setg(errp, + "Zero copy currently only available on Linux"); + return false; + } + #endif + +- if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]) { +- if (!cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { ++ if (new_caps[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]) { ++ if (!new_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { + error_setg(errp, "Postcopy preempt requires postcopy-ram"); + return false; + } +@@ -1423,14 +1414,14 @@ static bool migrate_caps_check(bool *cap_list, + * different compression channels, which is not compatible with the + * preempt assumptions on channel assignments. + */ +- if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) { ++ if (new_caps[MIGRATION_CAPABILITY_COMPRESS]) { + error_setg(errp, "Postcopy preempt not compatible with compress"); + return false; + } + } + +- if (cap_list[MIGRATION_CAPABILITY_MULTIFD]) { +- if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) { ++ if (new_caps[MIGRATION_CAPABILITY_MULTIFD]) { ++ if (new_caps[MIGRATION_CAPABILITY_COMPRESS]) { + error_setg(errp, "Multifd is not compatible with compress"); + return false; + } +@@ -1486,15 +1477,19 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, + { + MigrationState *s = migrate_get_current(); + MigrationCapabilityStatusList *cap; +- bool cap_list[MIGRATION_CAPABILITY__MAX]; ++ bool new_caps[MIGRATION_CAPABILITY__MAX]; + + if (migration_is_running(s->state)) { + error_setg(errp, QERR_MIGRATION_ACTIVE); + return; + } + +- memcpy(cap_list, s->capabilities, sizeof(cap_list)); +- if (!migrate_caps_check(cap_list, params, errp)) { ++ memcpy(new_caps, s->capabilities, sizeof(new_caps)); ++ for (cap = params; cap; cap = cap->next) { ++ new_caps[cap->value->capability] = cap->value->state; ++ } ++ ++ if (!migrate_caps_check(s->capabilities, new_caps, errp)) { + return; + } + +@@ -4634,27 +4629,14 @@ static void migration_instance_init(Object *obj) + */ + static bool migration_object_check(MigrationState *ms, Error **errp) + { +- MigrationCapabilityStatusList *head = NULL; + /* Assuming all off */ +- bool cap_list[MIGRATION_CAPABILITY__MAX] = { 0 }, ret; +- int i; ++ bool old_caps[MIGRATION_CAPABILITY__MAX] = { 0 }; + + if (!migrate_params_check(&ms->parameters, errp)) { + return false; + } + +- for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { +- if (ms->capabilities[i]) { +- QAPI_LIST_PREPEND(head, migrate_cap_add(i, true)); +- } +- } +- +- ret = migrate_caps_check(cap_list, head, errp); +- +- /* It works with head == NULL */ +- qapi_free_MigrationCapabilityStatusList(head); +- +- return ret; ++ return migrate_caps_check(old_caps, ms->capabilities, errp); + } + + static const TypeInfo migration_type = { +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Rename-duplicate-to-zero_pages.patch b/SOURCES/kvm-migration-Rename-duplicate-to-zero_pages.patch new file mode 100644 index 0000000..22acab5 --- /dev/null +++ b/SOURCES/kvm-migration-Rename-duplicate-to-zero_pages.patch @@ -0,0 +1,109 @@ +From 3cecf66655a0dd599666bcac8add2dee85d5651f Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 19 Apr 2023 18:16:05 +0200 +Subject: [PATCH 16/56] migration: Rename duplicate to zero_pages +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [15/50] 89db3c8b167c0f411ba95ce2730540c0e8f1206b (peterx/qemu-kvm) + +Rest of counters that refer to pages has a _pages suffix. +And historically, this showed the number of pages composed of the same +character, here comes the name "duplicated". But since years ago, it +refers to the number of zero_pages. + +Signed-off-by: Juan Quintela +Reviewed-by: Peter Xu +(cherry picked from commit 1a386e8de5995fb5478ea99baa6d3e71abcf4b80) +Signed-off-by: Peter Xu +--- + migration/migration.c | 2 +- + migration/ram.c | 10 +++++----- + migration/ram.h | 2 +- + 3 files changed, 7 insertions(+), 7 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 39501a0ed8..c15e2a61ca 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1142,7 +1142,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + info->ram = g_malloc0(sizeof(*info->ram)); + info->ram->transferred = stat64_get(&ram_counters.transferred); + info->ram->total = ram_bytes_total(); +- info->ram->duplicate = stat64_get(&ram_counters.duplicate); ++ info->ram->duplicate = stat64_get(&ram_counters.zero_pages); + /* legacy value. It is not used anymore */ + info->ram->skipped = 0; + info->ram->normal = stat64_get(&ram_counters.normal); +diff --git a/migration/ram.c b/migration/ram.c +index fe69ecaef4..19d345a030 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1119,7 +1119,7 @@ uint64_t ram_pagesize_summary(void) + uint64_t ram_get_total_transferred_pages(void) + { + return stat64_get(&ram_counters.normal) + +- stat64_get(&ram_counters.duplicate) + ++ stat64_get(&ram_counters.zero_pages) + + compression_counters.pages + xbzrle_counters.pages; + } + +@@ -1320,7 +1320,7 @@ static int save_zero_page(PageSearchStatus *pss, QEMUFile *f, RAMBlock *block, + int len = save_zero_page_to_file(pss, f, block, offset); + + if (len) { +- stat64_add(&ram_counters.duplicate, 1); ++ stat64_add(&ram_counters.zero_pages, 1); + ram_transferred_add(len); + return 1; + } +@@ -1359,7 +1359,7 @@ static bool control_save_page(PageSearchStatus *pss, RAMBlock *block, + if (bytes_xmit > 0) { + stat64_add(&ram_counters.normal, 1); + } else if (bytes_xmit == 0) { +- stat64_add(&ram_counters.duplicate, 1); ++ stat64_add(&ram_counters.zero_pages, 1); + } + + return true; +@@ -1486,7 +1486,7 @@ update_compress_thread_counts(const CompressParam *param, int bytes_xmit) + ram_transferred_add(bytes_xmit); + + if (param->zero_page) { +- stat64_add(&ram_counters.duplicate, 1); ++ stat64_add(&ram_counters.zero_pages, 1); + return; + } + +@@ -2621,7 +2621,7 @@ void acct_update_position(QEMUFile *f, size_t size, bool zero) + uint64_t pages = size / TARGET_PAGE_SIZE; + + if (zero) { +- stat64_add(&ram_counters.duplicate, pages); ++ stat64_add(&ram_counters.zero_pages, pages); + } else { + stat64_add(&ram_counters.normal, pages); + ram_transferred_add(size); +diff --git a/migration/ram.h b/migration/ram.h +index afa68521d7..55258334fe 100644 +--- a/migration/ram.h ++++ b/migration/ram.h +@@ -45,7 +45,7 @@ typedef struct { + Stat64 dirty_sync_count; + Stat64 dirty_sync_missed_zero_copy; + Stat64 downtime_bytes; +- Stat64 duplicate; ++ Stat64 zero_pages; + Stat64 multifd_bytes; + Stat64 normal; + Stat64 postcopy_bytes; +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Rename-normal-to-normal_pages.patch b/SOURCES/kvm-migration-Rename-normal-to-normal_pages.patch new file mode 100644 index 0000000..8ad6447 --- /dev/null +++ b/SOURCES/kvm-migration-Rename-normal-to-normal_pages.patch @@ -0,0 +1,109 @@ +From 7e27e7ea83856e1a7222ff46d91495f48fb6be4d Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 19 Apr 2023 18:19:45 +0200 +Subject: [PATCH 17/56] migration: Rename normal to normal_pages +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [16/50] 7df8b946918def9657bbe357861a6d72b5399ac6 (peterx/qemu-kvm) + +Rest of counters that refer to pages has a _pages suffix. +And historically, this showed the number of full pages transferred. +The name "normal" refered to the fact that they were sent without any +optimization (compression, xbzrle, zero_page, ...). + +Signed-off-by: Juan Quintela +Reviewed-by: Peter Xu +(cherry picked from commit 8c0cda8fa0de0a50148e2c60552afca9cffca643) +Signed-off-by: Peter Xu +--- + migration/migration.c | 2 +- + migration/ram.c | 10 +++++----- + migration/ram.h | 2 +- + 3 files changed, 7 insertions(+), 7 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index c15e2a61ca..f1b3439e5f 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1145,7 +1145,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + info->ram->duplicate = stat64_get(&ram_counters.zero_pages); + /* legacy value. It is not used anymore */ + info->ram->skipped = 0; +- info->ram->normal = stat64_get(&ram_counters.normal); ++ info->ram->normal = stat64_get(&ram_counters.normal_pages); + info->ram->normal_bytes = info->ram->normal * page_size; + info->ram->mbps = s->mbps; + info->ram->dirty_sync_count = +diff --git a/migration/ram.c b/migration/ram.c +index 19d345a030..229714045a 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1118,7 +1118,7 @@ uint64_t ram_pagesize_summary(void) + + uint64_t ram_get_total_transferred_pages(void) + { +- return stat64_get(&ram_counters.normal) + ++ return stat64_get(&ram_counters.normal_pages) + + stat64_get(&ram_counters.zero_pages) + + compression_counters.pages + xbzrle_counters.pages; + } +@@ -1357,7 +1357,7 @@ static bool control_save_page(PageSearchStatus *pss, RAMBlock *block, + } + + if (bytes_xmit > 0) { +- stat64_add(&ram_counters.normal, 1); ++ stat64_add(&ram_counters.normal_pages, 1); + } else if (bytes_xmit == 0) { + stat64_add(&ram_counters.zero_pages, 1); + } +@@ -1391,7 +1391,7 @@ static int save_normal_page(PageSearchStatus *pss, RAMBlock *block, + qemu_put_buffer(file, buf, TARGET_PAGE_SIZE); + } + ram_transferred_add(TARGET_PAGE_SIZE); +- stat64_add(&ram_counters.normal, 1); ++ stat64_add(&ram_counters.normal_pages, 1); + return 1; + } + +@@ -1447,7 +1447,7 @@ static int ram_save_multifd_page(QEMUFile *file, RAMBlock *block, + if (multifd_queue_page(file, block, offset) < 0) { + return -1; + } +- stat64_add(&ram_counters.normal, 1); ++ stat64_add(&ram_counters.normal_pages, 1); + + return 1; + } +@@ -2623,7 +2623,7 @@ void acct_update_position(QEMUFile *f, size_t size, bool zero) + if (zero) { + stat64_add(&ram_counters.zero_pages, pages); + } else { +- stat64_add(&ram_counters.normal, pages); ++ stat64_add(&ram_counters.normal_pages, pages); + ram_transferred_add(size); + qemu_file_credit_transfer(f, size); + } +diff --git a/migration/ram.h b/migration/ram.h +index 55258334fe..a6e0d70226 100644 +--- a/migration/ram.h ++++ b/migration/ram.h +@@ -47,7 +47,7 @@ typedef struct { + Stat64 downtime_bytes; + Stat64 zero_pages; + Stat64 multifd_bytes; +- Stat64 normal; ++ Stat64 normal_pages; + Stat64 postcopy_bytes; + Stat64 postcopy_requests; + Stat64 precopy_bytes; +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Update-atomic-stats-out-of-the-mutex.patch b/SOURCES/kvm-migration-Update-atomic-stats-out-of-the-mutex.patch new file mode 100644 index 0000000..7e78d82 --- /dev/null +++ b/SOURCES/kvm-migration-Update-atomic-stats-out-of-the-mutex.patch @@ -0,0 +1,52 @@ +From c0d377e1bf442a09b82fddbb8588fcddf6439854 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 24 Nov 2022 17:26:19 +0100 +Subject: [PATCH 09/56] migration: Update atomic stats out of the mutex +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [8/50] 88e9dbc9a3e5aef60a7c98c871144904c7062b1f (peterx/qemu-kvm) + +Reviewed-by: David Edmondson +Reviewed-by: Peter Xu +Signed-off-by: Juan Quintela +(cherry picked from commit 30fb22cda45bea43a3c0e26049ebdd71a9503ffd) +Signed-off-by: Peter Xu +--- + migration/multifd.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 01fab01a92..6ef3a27938 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -433,8 +433,8 @@ static int multifd_send_pages(QEMUFile *f) + transferred = ((uint64_t) pages->num) * p->page_size + p->packet_len; + qemu_file_acct_rate_limit(f, transferred); + ram_counters.multifd_bytes += transferred; +- stat64_add(&ram_counters.transferred, transferred); + qemu_mutex_unlock(&p->mutex); ++ stat64_add(&ram_counters.transferred, transferred); + qemu_sem_post(&p->sem); + + return 1; +@@ -628,8 +628,8 @@ int multifd_send_sync_main(QEMUFile *f) + p->pending_job++; + qemu_file_acct_rate_limit(f, p->packet_len); + ram_counters.multifd_bytes += p->packet_len; +- stat64_add(&ram_counters.transferred, p->packet_len); + qemu_mutex_unlock(&p->mutex); ++ stat64_add(&ram_counters.transferred, p->packet_len); + qemu_sem_post(&p->sem); + } + for (i = 0; i < migrate_multifd_channels(); i++) { +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-Use-migrate_max_postcopy_bandwidth.patch b/SOURCES/kvm-migration-Use-migrate_max_postcopy_bandwidth.patch new file mode 100644 index 0000000..f179761 --- /dev/null +++ b/SOURCES/kvm-migration-Use-migrate_max_postcopy_bandwidth.patch @@ -0,0 +1,40 @@ +From 8d203baa6cbd1f371e308c2c9d59a5ca7d29dca8 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 10:55:30 +0100 +Subject: [PATCH 38/56] migration: Use migrate_max_postcopy_bandwidth() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [37/50] d62948e9ee40a85ed9b460a583c3b0e43cd5d47f (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 5390adec03a7d8bc6bcf5887f726b0ddaeb90681) +Signed-off-by: Peter Xu +--- + migration/migration.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 7f2e770deb..78bca9a93f 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -3799,7 +3799,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) + + if (resume) { + /* This is a resumed migration */ +- rate_limit = s->parameters.max_postcopy_bandwidth / ++ rate_limit = migrate_max_postcopy_bandwidth() / + XFER_LIMIT_RATIO; + } else { + /* This is a fresh new migration */ +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-check-magic-value-for-deciding-the-mapping.patch b/SOURCES/kvm-migration-check-magic-value-for-deciding-the-mapping.patch deleted file mode 100644 index 387d0b3..0000000 --- a/SOURCES/kvm-migration-check-magic-value-for-deciding-the-mapping.patch +++ /dev/null @@ -1,330 +0,0 @@ -From 29eee1fbb84c0e2f0ece9e6d996afa7238ed2912 Mon Sep 17 00:00:00 2001 -From: "manish.mishra" -Date: Tue, 20 Dec 2022 18:44:18 +0000 -Subject: [PATCH 7/8] migration: check magic value for deciding the mapping of - channels -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 150: migration: Fix multifd crash on channel disorders -RH-Bugzilla: 2169732 -RH-Acked-by: quintela1 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Dr. David Alan Gilbert -RH-Commit: [2/2] 4fb9408478923415a91fe0527bf4b1a0f022f329 (peterx/qemu-kvm) - -Current logic assumes that channel connections on the destination side are -always established in the same order as the source and the first one will -always be the main channel followed by the multifid or post-copy -preemption channel. This may not be always true, as even if a channel has a -connection established on the source side it can be in the pending state on -the destination side and a newer connection can be established first. -Basically causing out of order mapping of channels on the destination side. -Currently, all channels except post-copy preempt send a magic number, this -patch uses that magic number to decide the type of channel. This logic is -applicable only for precopy(multifd) live migration, as mentioned, the -post-copy preempt channel does not send any magic number. Also, tls live -migrations already does tls handshake before creating other channels, so -this issue is not possible with tls, hence this logic is avoided for tls -live migrations. This patch uses read peek to check the magic number of -channels so that current data/control stream management remains -un-effected. - -Reviewed-by: Peter Xu -Reviewed-by: Daniel P. Berrange -Reviewed-by: Juan Quintela -Suggested-by: Daniel P. Berrange -Signed-off-by: manish.mishra -Signed-off-by: Juan Quintela -(cherry picked from commit 6720c2b32725e6ac404f22851a0ecd0a71d0cbe2) -Signed-off-by: Peter Xu ---- - migration/channel.c | 45 +++++++++++++++++++++++++++++++++ - migration/channel.h | 5 ++++ - migration/migration.c | 54 ++++++++++++++++++++++++++++------------ - migration/multifd.c | 19 +++++++------- - migration/multifd.h | 2 +- - migration/postcopy-ram.c | 5 +--- - migration/postcopy-ram.h | 2 +- - 7 files changed, 101 insertions(+), 31 deletions(-) - -diff --git a/migration/channel.c b/migration/channel.c -index 1b0815039f..ca3319a309 100644 ---- a/migration/channel.c -+++ b/migration/channel.c -@@ -92,3 +92,48 @@ void migration_channel_connect(MigrationState *s, - migrate_fd_connect(s, error); - error_free(error); - } -+ -+ -+/** -+ * @migration_channel_read_peek - Peek at migration channel, without -+ * actually removing it from channel buffer. -+ * -+ * @ioc: the channel object -+ * @buf: the memory region to read data into -+ * @buflen: the number of bytes to read in @buf -+ * @errp: pointer to a NULL-initialized error object -+ * -+ * Returns 0 if successful, returns -1 and sets @errp if fails. -+ */ -+int migration_channel_read_peek(QIOChannel *ioc, -+ const char *buf, -+ const size_t buflen, -+ Error **errp) -+{ -+ ssize_t len = 0; -+ struct iovec iov = { .iov_base = (char *)buf, .iov_len = buflen }; -+ -+ while (true) { -+ len = qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, -+ QIO_CHANNEL_READ_FLAG_MSG_PEEK, errp); -+ -+ if (len <= 0 && len != QIO_CHANNEL_ERR_BLOCK) { -+ error_setg(errp, -+ "Failed to peek at channel"); -+ return -1; -+ } -+ -+ if (len == buflen) { -+ break; -+ } -+ -+ /* 1ms sleep. */ -+ if (qemu_in_coroutine()) { -+ qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 1000000); -+ } else { -+ g_usleep(1000); -+ } -+ } -+ -+ return 0; -+} -diff --git a/migration/channel.h b/migration/channel.h -index 67a461c28a..5bdb8208a7 100644 ---- a/migration/channel.h -+++ b/migration/channel.h -@@ -24,4 +24,9 @@ void migration_channel_connect(MigrationState *s, - QIOChannel *ioc, - const char *hostname, - Error *error_in); -+ -+int migration_channel_read_peek(QIOChannel *ioc, -+ const char *buf, -+ const size_t buflen, -+ Error **errp); - #endif -diff --git a/migration/migration.c b/migration/migration.c -index f485eea5fb..593dbd25de 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -31,6 +31,7 @@ - #include "migration.h" - #include "savevm.h" - #include "qemu-file.h" -+#include "channel.h" - #include "migration/vmstate.h" - #include "block/block.h" - #include "qapi/error.h" -@@ -663,10 +664,6 @@ static bool migration_incoming_setup(QEMUFile *f, Error **errp) - { - MigrationIncomingState *mis = migration_incoming_get_current(); - -- if (multifd_load_setup(errp) != 0) { -- return false; -- } -- - if (!mis->from_src_file) { - mis->from_src_file = f; - } -@@ -733,31 +730,56 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) - { - MigrationIncomingState *mis = migration_incoming_get_current(); - Error *local_err = NULL; -- bool start_migration; - QEMUFile *f; -+ bool default_channel = true; -+ uint32_t channel_magic = 0; -+ int ret = 0; - -- if (!mis->from_src_file) { -- /* The first connection (multifd may have multiple) */ -+ if (migrate_use_multifd() && !migrate_postcopy_ram() && -+ qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { -+ /* -+ * With multiple channels, it is possible that we receive channels -+ * out of order on destination side, causing incorrect mapping of -+ * source channels on destination side. Check channel MAGIC to -+ * decide type of channel. Please note this is best effort, postcopy -+ * preempt channel does not send any magic number so avoid it for -+ * postcopy live migration. Also tls live migration already does -+ * tls handshake while initializing main channel so with tls this -+ * issue is not possible. -+ */ -+ ret = migration_channel_read_peek(ioc, (void *)&channel_magic, -+ sizeof(channel_magic), &local_err); -+ -+ if (ret != 0) { -+ error_propagate(errp, local_err); -+ return; -+ } -+ -+ default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC)); -+ } else { -+ default_channel = !mis->from_src_file; -+ } -+ -+ if (multifd_load_setup(errp) != 0) { -+ error_setg(errp, "Failed to setup multifd channels"); -+ return; -+ } -+ -+ if (default_channel) { - f = qemu_file_new_input(ioc); - - if (!migration_incoming_setup(f, errp)) { - return; - } -- -- /* -- * Common migration only needs one channel, so we can start -- * right now. Some features need more than one channel, we wait. -- */ -- start_migration = !migration_needs_multiple_sockets(); - } else { - /* Multiple connections */ - assert(migration_needs_multiple_sockets()); - if (migrate_use_multifd()) { -- start_migration = multifd_recv_new_channel(ioc, &local_err); -+ multifd_recv_new_channel(ioc, &local_err); - } else { - assert(migrate_postcopy_preempt()); - f = qemu_file_new_input(ioc); -- start_migration = postcopy_preempt_new_channel(mis, f); -+ postcopy_preempt_new_channel(mis, f); - } - if (local_err) { - error_propagate(errp, local_err); -@@ -765,7 +787,7 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) - } - } - -- if (start_migration) { -+ if (migration_has_all_channels()) { - /* If it's a recovery, we're done */ - if (postcopy_try_recover()) { - return; -diff --git a/migration/multifd.c b/migration/multifd.c -index 509bbbe3bf..c3385529cf 100644 ---- a/migration/multifd.c -+++ b/migration/multifd.c -@@ -1167,9 +1167,14 @@ int multifd_load_setup(Error **errp) - uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size(); - uint8_t i; - -- if (!migrate_use_multifd()) { -+ /* -+ * Return successfully if multiFD recv state is already initialised -+ * or multiFD is not enabled. -+ */ -+ if (multifd_recv_state || !migrate_use_multifd()) { - return 0; - } -+ - if (!migrate_multi_channels_is_allowed()) { - error_setg(errp, "multifd is not supported by current protocol"); - return -1; -@@ -1228,11 +1233,9 @@ bool multifd_recv_all_channels_created(void) - - /* - * Try to receive all multifd channels to get ready for the migration. -- * - Return true and do not set @errp when correctly receiving all channels; -- * - Return false and do not set @errp when correctly receiving the current one; -- * - Return false and set @errp when failing to receive the current channel. -+ * Sets @errp when failing to receive the current channel. - */ --bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) -+void multifd_recv_new_channel(QIOChannel *ioc, Error **errp) - { - MultiFDRecvParams *p; - Error *local_err = NULL; -@@ -1245,7 +1248,7 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) - "failed to receive packet" - " via multifd channel %d: ", - qatomic_read(&multifd_recv_state->count)); -- return false; -+ return; - } - trace_multifd_recv_new_channel(id); - -@@ -1255,7 +1258,7 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) - id); - multifd_recv_terminate_threads(local_err); - error_propagate(errp, local_err); -- return false; -+ return; - } - p->c = ioc; - object_ref(OBJECT(ioc)); -@@ -1266,6 +1269,4 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) - qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p, - QEMU_THREAD_JOINABLE); - qatomic_inc(&multifd_recv_state->count); -- return qatomic_read(&multifd_recv_state->count) == -- migrate_multifd_channels(); - } -diff --git a/migration/multifd.h b/migration/multifd.h -index 519f498643..913e4ba274 100644 ---- a/migration/multifd.h -+++ b/migration/multifd.h -@@ -18,7 +18,7 @@ void multifd_save_cleanup(void); - int multifd_load_setup(Error **errp); - int multifd_load_cleanup(Error **errp); - bool multifd_recv_all_channels_created(void); --bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp); -+void multifd_recv_new_channel(QIOChannel *ioc, Error **errp); - void multifd_recv_sync_main(void); - int multifd_send_sync_main(QEMUFile *f); - int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset); -diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c -index 0c55df0e52..b98e95dab0 100644 ---- a/migration/postcopy-ram.c -+++ b/migration/postcopy-ram.c -@@ -1538,7 +1538,7 @@ void postcopy_unregister_shared_ufd(struct PostCopyFD *pcfd) - } - } - --bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file) -+void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file) - { - /* - * The new loading channel has its own threads, so it needs to be -@@ -1547,9 +1547,6 @@ bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file) - qemu_file_set_blocking(file, true); - mis->postcopy_qemufile_dst = file; - trace_postcopy_preempt_new_channel(); -- -- /* Start the migration immediately */ -- return true; - } - - /* -diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h -index 6147bf7d1d..25881c4127 100644 ---- a/migration/postcopy-ram.h -+++ b/migration/postcopy-ram.h -@@ -190,7 +190,7 @@ enum PostcopyChannels { - RAM_CHANNEL_MAX, - }; - --bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file); -+void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file); - int postcopy_preempt_setup(MigrationState *s, Error **errp); - int postcopy_preempt_wait_channel(MigrationState *s); - --- -2.31.1 - diff --git a/SOURCES/kvm-migration-mark-mixed-functions-that-can-suspend.patch b/SOURCES/kvm-migration-mark-mixed-functions-that-can-suspend.patch new file mode 100644 index 0000000..9451696 --- /dev/null +++ b/SOURCES/kvm-migration-mark-mixed-functions-that-can-suspend.patch @@ -0,0 +1,153 @@ +From cfdf5715a2334ad06b5966ec986d134bbd5ba08b Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 16 Dec 2022 12:48:16 +0100 +Subject: [PATCH 05/56] migration: mark mixed functions that can suspend +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [4/50] 9f055b526edd06a3440999d5de91e5d624678c7d (peterx/qemu-kvm) + +There should be no paths from a coroutine_fn to aio_poll, however in +practice coroutine_mixed_fn will call aio_poll in the !qemu_in_coroutine() +path. By marking mixed functions, we can track accurately the call paths +that execute entirely in coroutine context, and find more missing +coroutine_fn markers. This results in more accurate checks that +coroutine code does not end up blocking. + +If the marking were extended transitively to all functions that call +these ones, static analysis could be done much more efficiently. +However, this is a start and makes it possible to use vrc's path-based +searches to find potential bugs where coroutine_fns call blocking functions. + +Signed-off-by: Paolo Bonzini +(cherry picked from commit 394b9407e4c515f96df6647d629ee28cbb86f07c) +Signed-off-by: Peter Xu +--- + include/migration/qemu-file-types.h | 4 ++-- + migration/qemu-file.c | 14 +++++++------- + migration/qemu-file.h | 6 +++--- + 3 files changed, 12 insertions(+), 12 deletions(-) + +diff --git a/include/migration/qemu-file-types.h b/include/migration/qemu-file-types.h +index 2867e3da84..1436f9ce92 100644 +--- a/include/migration/qemu-file-types.h ++++ b/include/migration/qemu-file-types.h +@@ -35,7 +35,7 @@ void qemu_put_byte(QEMUFile *f, int v); + void qemu_put_be16(QEMUFile *f, unsigned int v); + void qemu_put_be32(QEMUFile *f, unsigned int v); + void qemu_put_be64(QEMUFile *f, uint64_t v); +-size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size); ++size_t coroutine_mixed_fn qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size); + + int qemu_get_byte(QEMUFile *f); + +@@ -161,7 +161,7 @@ static inline void qemu_get_sbe64s(QEMUFile *f, int64_t *pv) + qemu_get_be64s(f, (uint64_t *)pv); + } + +-size_t qemu_get_counted_string(QEMUFile *f, char buf[256]); ++size_t coroutine_mixed_fn qemu_get_counted_string(QEMUFile *f, char buf[256]); + + void qemu_put_counted_string(QEMUFile *f, const char *name); + +diff --git a/migration/qemu-file.c b/migration/qemu-file.c +index 102ab3b439..ee04240a21 100644 +--- a/migration/qemu-file.c ++++ b/migration/qemu-file.c +@@ -392,7 +392,7 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset, + * case if the underlying file descriptor gives a short read, and that can + * happen even on a blocking fd. + */ +-static ssize_t qemu_fill_buffer(QEMUFile *f) ++static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f) + { + int len; + int pending; +@@ -585,7 +585,7 @@ void qemu_file_skip(QEMUFile *f, int size) + * return as many as it managed to read (assuming blocking fd's which + * all current QEMUFile are) + */ +-size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset) ++size_t coroutine_mixed_fn qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset) + { + ssize_t pending; + size_t index; +@@ -633,7 +633,7 @@ size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset) + * return as many as it managed to read (assuming blocking fd's which + * all current QEMUFile are) + */ +-size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size) ++size_t coroutine_mixed_fn qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size) + { + size_t pending = size; + size_t done = 0; +@@ -674,7 +674,7 @@ size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size) + * Note: Since **buf may get changed, the caller should take care to + * keep a pointer to the original buffer if it needs to deallocate it. + */ +-size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size) ++size_t coroutine_mixed_fn qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size) + { + if (size < IO_BUF_SIZE) { + size_t res; +@@ -696,7 +696,7 @@ size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size) + * Peeks a single byte from the buffer; this isn't guaranteed to work if + * offset leaves a gap after the previous read/peeked data. + */ +-int qemu_peek_byte(QEMUFile *f, int offset) ++int coroutine_mixed_fn qemu_peek_byte(QEMUFile *f, int offset) + { + int index = f->buf_index + offset; + +@@ -713,7 +713,7 @@ int qemu_peek_byte(QEMUFile *f, int offset) + return f->buf[index]; + } + +-int qemu_get_byte(QEMUFile *f) ++int coroutine_mixed_fn qemu_get_byte(QEMUFile *f) + { + int result; + +@@ -894,7 +894,7 @@ int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_src) + * else 0 + * (Note a 0 length string will return 0 either way) + */ +-size_t qemu_get_counted_string(QEMUFile *f, char buf[256]) ++size_t coroutine_fn qemu_get_counted_string(QEMUFile *f, char buf[256]) + { + size_t len = qemu_get_byte(f); + size_t res = qemu_get_buffer(f, (uint8_t *)buf, len); +diff --git a/migration/qemu-file.h b/migration/qemu-file.h +index 9d0155a2a1..d16cd50448 100644 +--- a/migration/qemu-file.h ++++ b/migration/qemu-file.h +@@ -108,8 +108,8 @@ bool qemu_file_is_writable(QEMUFile *f); + + #include "migration/qemu-file-types.h" + +-size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset); +-size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size); ++size_t coroutine_mixed_fn qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset); ++size_t coroutine_mixed_fn qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size); + ssize_t qemu_put_compression_data(QEMUFile *f, z_stream *stream, + const uint8_t *p, size_t size); + int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_src); +@@ -119,7 +119,7 @@ int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_src); + * is; you aren't guaranteed to be able to peak to +n bytes unless you've + * previously peeked +n-1. + */ +-int qemu_peek_byte(QEMUFile *f, int offset); ++int coroutine_mixed_fn qemu_peek_byte(QEMUFile *f, int offset); + void qemu_file_skip(QEMUFile *f, int size); + /* + * qemu_file_credit_transfer: +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-move-migration_global_dump-to-migration-hm.patch b/SOURCES/kvm-migration-move-migration_global_dump-to-migration-hm.patch new file mode 100644 index 0000000..4e73c80 --- /dev/null +++ b/SOURCES/kvm-migration-move-migration_global_dump-to-migration-hm.patch @@ -0,0 +1,121 @@ +From 96e6914cbfb18bb8287c57b9ac9a6b364d3e7a22 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 22 Feb 2023 17:18:05 +0100 +Subject: [PATCH 20/56] migration: move migration_global_dump() to + migration-hmp-cmds.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [19/50] c8d330a2833c706b9bd78f7154be882e3977ad06 (peterx/qemu-kvm) + +It is only used there, so we can make it static. +Once there, remove spice.h that it is not used. + +Signed-off-by: Juan Quintela +Reviewed-by: Peter Xu +Reviewed-by: Philippe Mathieu-Daudé + +--- + +fix David Edmonson ui/qemu-spice.h unintended removal + +(cherry picked from commit c938157713e723165a42cb6e8364adb6fcbd0e22) +Signed-off-by: Peter Xu +--- + include/migration/misc.h | 1 - + migration/migration-hmp-cmds.c | 22 +++++++++++++++++++++- + migration/migration.c | 19 ------------------- + 3 files changed, 21 insertions(+), 21 deletions(-) + +diff --git a/include/migration/misc.h b/include/migration/misc.h +index 8b49841016..5ebe13b4b9 100644 +--- a/include/migration/misc.h ++++ b/include/migration/misc.h +@@ -66,7 +66,6 @@ bool migration_has_finished(MigrationState *); + bool migration_has_failed(MigrationState *); + /* ...and after the device transmission */ + bool migration_in_postcopy_after_devices(MigrationState *); +-void migration_global_dump(Monitor *mon); + /* True if incoming migration entered POSTCOPY_INCOMING_DISCARD */ + bool migration_in_incoming_postcopy(void); + /* True if incoming migration entered POSTCOPY_INCOMING_ADVISE */ +diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c +index 72519ea99f..71da91967a 100644 +--- a/migration/migration-hmp-cmds.c ++++ b/migration/migration-hmp-cmds.c +@@ -15,7 +15,6 @@ + + #include "qemu/osdep.h" + #include "block/qapi.h" +-#include "migration/misc.h" + #include "migration/snapshot.h" + #include "monitor/hmp.h" + #include "monitor/monitor.h" +@@ -30,6 +29,27 @@ + #include "qemu/sockets.h" + #include "sysemu/runstate.h" + #include "ui/qemu-spice.h" ++#include "sysemu/sysemu.h" ++#include "migration.h" ++ ++static void migration_global_dump(Monitor *mon) ++{ ++ MigrationState *ms = migrate_get_current(); ++ ++ monitor_printf(mon, "globals:\n"); ++ monitor_printf(mon, "store-global-state: %s\n", ++ ms->store_global_state ? "on" : "off"); ++ monitor_printf(mon, "only-migratable: %s\n", ++ only_migratable ? "on" : "off"); ++ monitor_printf(mon, "send-configuration: %s\n", ++ ms->send_configuration ? "on" : "off"); ++ monitor_printf(mon, "send-section-footer: %s\n", ++ ms->send_section_footer ? "on" : "off"); ++ monitor_printf(mon, "decompress-error-check: %s\n", ++ ms->decompress_error_check ? "on" : "off"); ++ monitor_printf(mon, "clear-bitmap-shift: %u\n", ++ ms->clear_bitmap_shift); ++} + + void hmp_info_migrate(Monitor *mon, const QDict *qdict) + { +diff --git a/migration/migration.c b/migration/migration.c +index e8f596bcfa..aa96ffdc5b 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -4420,25 +4420,6 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) + s->migration_thread_running = true; + } + +-void migration_global_dump(Monitor *mon) +-{ +- MigrationState *ms = migrate_get_current(); +- +- monitor_printf(mon, "globals:\n"); +- monitor_printf(mon, "store-global-state: %s\n", +- ms->store_global_state ? "on" : "off"); +- monitor_printf(mon, "only-migratable: %s\n", +- only_migratable ? "on" : "off"); +- monitor_printf(mon, "send-configuration: %s\n", +- ms->send_configuration ? "on" : "off"); +- monitor_printf(mon, "send-section-footer: %s\n", +- ms->send_section_footer ? "on" : "off"); +- monitor_printf(mon, "decompress-error-check: %s\n", +- ms->decompress_error_check ? "on" : "off"); +- monitor_printf(mon, "clear-bitmap-shift: %u\n", +- ms->clear_bitmap_shift); +-} +- + #define DEFINE_PROP_MIG_CAP(name, x) \ + DEFINE_PROP_BOOL(name, MigrationState, capabilities[x], false) + +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-postcopy-Detect-file-system-on-dest-host.patch b/SOURCES/kvm-migration-postcopy-Detect-file-system-on-dest-host.patch new file mode 100644 index 0000000..7700466 --- /dev/null +++ b/SOURCES/kvm-migration-postcopy-Detect-file-system-on-dest-host.patch @@ -0,0 +1,117 @@ +From 4827d5be5357ab89e0c46f606ad828bf97d36471 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Wed, 19 Apr 2023 12:17:38 -0400 +Subject: [PATCH 04/56] migration/postcopy: Detect file system on dest host +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [3/50] 121aeeda8a019f79dba6c077c7018bd1c86f3d71 (peterx/qemu-kvm) + +Postcopy requires the memory support userfaultfd to work. Right now we +check it but it's a bit too late (when switching to postcopy migration). + +Do that early right at enabling of postcopy. + +Note that this is still only a best effort because ramblocks can be +dynamically created. We can add check in hostmem creations and fail if +postcopy enabled, but maybe that's too aggressive. + +Still, we have chance to fail the most obvious where we know there's an +existing unsupported ramblock. + +Signed-off-by: Peter Xu +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit ae30b9b2892b85e6c3d5c0b8d1949c4d77a2954a) +Signed-off-by: Peter Xu +--- + migration/postcopy-ram.c | 34 ++++++++++++++++++++++++++++++---- + 1 file changed, 30 insertions(+), 4 deletions(-) + +diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c +index 93f39f8e06..bbb8af61ae 100644 +--- a/migration/postcopy-ram.c ++++ b/migration/postcopy-ram.c +@@ -36,6 +36,7 @@ + #include "yank_functions.h" + #include "tls.h" + #include "qemu/userfaultfd.h" ++#include "qemu/mmap-alloc.h" + + /* Arbitrary limit on size of each discard command, + * keeps them around ~200 bytes +@@ -336,11 +337,12 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) + + /* Callback from postcopy_ram_supported_by_host block iterator. + */ +-static int test_ramblock_postcopiable(RAMBlock *rb, void *opaque) ++static int test_ramblock_postcopiable(RAMBlock *rb) + { + const char *block_name = qemu_ram_get_idstr(rb); + ram_addr_t length = qemu_ram_get_used_length(rb); + size_t pagesize = qemu_ram_pagesize(rb); ++ QemuFsType fs; + + if (length % pagesize) { + error_report("Postcopy requires RAM blocks to be a page size multiple," +@@ -348,6 +350,15 @@ static int test_ramblock_postcopiable(RAMBlock *rb, void *opaque) + "page size of 0x%zx", block_name, length, pagesize); + return 1; + } ++ ++ if (rb->fd >= 0) { ++ fs = qemu_fd_getfs(rb->fd); ++ if (fs != QEMU_FS_TYPE_TMPFS && fs != QEMU_FS_TYPE_HUGETLBFS) { ++ error_report("Host backend files need to be TMPFS or HUGETLBFS only"); ++ return 1; ++ } ++ } ++ + return 0; + } + +@@ -366,6 +377,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) + struct uffdio_range range_struct; + uint64_t feature_mask; + Error *local_err = NULL; ++ RAMBlock *block; + + if (qemu_target_page_size() > pagesize) { + error_report("Target page size bigger than host page size"); +@@ -390,9 +402,23 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) + goto out; + } + +- /* We don't support postcopy with shared RAM yet */ +- if (foreach_not_ignored_block(test_ramblock_postcopiable, NULL)) { +- goto out; ++ /* ++ * We don't support postcopy with some type of ramblocks. ++ * ++ * NOTE: we explicitly ignored ramblock_is_ignored() instead we checked ++ * all possible ramblocks. This is because this function can be called ++ * when creating the migration object, during the phase RAM_MIGRATABLE ++ * is not even properly set for all the ramblocks. ++ * ++ * A side effect of this is we'll also check against RAM_SHARED ++ * ramblocks even if migrate_ignore_shared() is set (in which case ++ * we'll never migrate RAM_SHARED at all), but normally this shouldn't ++ * affect in reality, or we can revisit. ++ */ ++ RAMBLOCK_FOREACH(block) { ++ if (test_ramblock_postcopiable(block)) { ++ goto out; ++ } + } + + /* +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-remove-extra-whitespace-character-for-code.patch b/SOURCES/kvm-migration-remove-extra-whitespace-character-for-code.patch new file mode 100644 index 0000000..88eb791 --- /dev/null +++ b/SOURCES/kvm-migration-remove-extra-whitespace-character-for-code.patch @@ -0,0 +1,44 @@ +From 93c9a1ae812720d3a29980a3c5fcfc1e916993de Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?=E6=9D=8E=E7=9A=86=E4=BF=8A?= +Date: Fri, 17 Mar 2023 09:57:13 +0000 +Subject: [PATCH 07/56] migration: remove extra whitespace character for code + style +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [6/50] bc1cd812f8dfc18e47e1644b5333c703eae23d2d (peterx/qemu-kvm) + +Fix code style. + +Signed-off-by: 李皆俊 +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit 8ebb6ecc3798e66a9ba98355983762bedfa1b72d) +Signed-off-by: Peter Xu +--- + migration/ram.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/migration/ram.c b/migration/ram.c +index 79d881f735..0e68099bf9 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -3293,7 +3293,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) + + migration_ops = g_malloc0(sizeof(MigrationOps)); + migration_ops->ram_save_target_page = ram_save_target_page_legacy; +- ret = multifd_send_sync_main(f); ++ ret = multifd_send_sync_main(f); + if (ret < 0) { + return ret; + } +-- +2.39.1 + diff --git a/SOURCES/kvm-migration-rename-enabled_capabilities-to-capabilitie.patch b/SOURCES/kvm-migration-rename-enabled_capabilities-to-capabilitie.patch new file mode 100644 index 0000000..52b19b3 --- /dev/null +++ b/SOURCES/kvm-migration-rename-enabled_capabilities-to-capabilitie.patch @@ -0,0 +1,329 @@ +From ee566ec12099992f9134bda1db92dd568427245a Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 18:26:59 +0100 +Subject: [PATCH 18/56] migration: rename enabled_capabilities to capabilities +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [17/50] 841a27addf273d8f559bc8ebd2c854200e8ca673 (peterx/qemu-kvm) + +It is clear from the context what that means, and such a long name +with the extra long names of the capabilities make very difficilut to +stay inside the 80 columns limit. + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 0cec2056ff67557c18d7b8ab1b70ab47c9e31f2f) +Signed-off-by: Peter Xu +--- + migration/migration.c | 52 +++++++++++++++++++++---------------------- + migration/migration.h | 2 +- + migration/rdma.c | 4 ++-- + migration/savevm.c | 6 ++--- + 4 files changed, 31 insertions(+), 33 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index f1b3439e5f..d8e5fb6226 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -364,8 +364,7 @@ static bool migrate_late_block_activate(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[ +- MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; ++ return s->capabilities[MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; + } + + /* +@@ -944,7 +943,7 @@ MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) + #endif + caps = g_malloc0(sizeof(*caps)); + caps->capability = i; +- caps->state = s->enabled_capabilities[i]; ++ caps->state = s->capabilities[i]; + QAPI_LIST_APPEND(tail, caps); + } + +@@ -1494,13 +1493,13 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, + return; + } + +- memcpy(cap_list, s->enabled_capabilities, sizeof(cap_list)); ++ memcpy(cap_list, s->capabilities, sizeof(cap_list)); + if (!migrate_caps_check(cap_list, params, errp)) { + return; + } + + for (cap = params; cap; cap = cap->next) { +- s->enabled_capabilities[cap->value->capability] = cap->value->state; ++ s->capabilities[cap->value->capability] = cap->value->state; + } + } + +@@ -2569,7 +2568,7 @@ bool migrate_release_ram(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; ++ return s->capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; + } + + bool migrate_postcopy_ram(void) +@@ -2578,7 +2577,7 @@ bool migrate_postcopy_ram(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; ++ return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; + } + + bool migrate_postcopy(void) +@@ -2592,7 +2591,7 @@ bool migrate_auto_converge(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; ++ return s->capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; + } + + bool migrate_zero_blocks(void) +@@ -2601,7 +2600,7 @@ bool migrate_zero_blocks(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; ++ return s->capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; + } + + bool migrate_postcopy_blocktime(void) +@@ -2610,7 +2609,7 @@ bool migrate_postcopy_blocktime(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; ++ return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; + } + + bool migrate_use_compression(void) +@@ -2619,7 +2618,7 @@ bool migrate_use_compression(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS]; ++ return s->capabilities[MIGRATION_CAPABILITY_COMPRESS]; + } + + int migrate_compress_level(void) +@@ -2664,7 +2663,7 @@ bool migrate_dirty_bitmaps(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; ++ return s->capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; + } + + bool migrate_ignore_shared(void) +@@ -2673,7 +2672,7 @@ bool migrate_ignore_shared(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED]; ++ return s->capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED]; + } + + bool migrate_validate_uuid(void) +@@ -2682,7 +2681,7 @@ bool migrate_validate_uuid(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; ++ return s->capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; + } + + bool migrate_use_events(void) +@@ -2691,7 +2690,7 @@ bool migrate_use_events(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS]; ++ return s->capabilities[MIGRATION_CAPABILITY_EVENTS]; + } + + bool migrate_use_multifd(void) +@@ -2700,7 +2699,7 @@ bool migrate_use_multifd(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_MULTIFD]; ++ return s->capabilities[MIGRATION_CAPABILITY_MULTIFD]; + } + + bool migrate_pause_before_switchover(void) +@@ -2709,8 +2708,7 @@ bool migrate_pause_before_switchover(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[ +- MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; ++ return s->capabilities[MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; + } + + int migrate_multifd_channels(void) +@@ -2757,7 +2755,7 @@ bool migrate_use_zero_copy_send(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; ++ return s->capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; + } + #endif + +@@ -2776,7 +2774,7 @@ int migrate_use_xbzrle(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE]; ++ return s->capabilities[MIGRATION_CAPABILITY_XBZRLE]; + } + + uint64_t migrate_xbzrle_cache_size(void) +@@ -2803,7 +2801,7 @@ bool migrate_use_block(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK]; ++ return s->capabilities[MIGRATION_CAPABILITY_BLOCK]; + } + + bool migrate_use_return_path(void) +@@ -2812,7 +2810,7 @@ bool migrate_use_return_path(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; ++ return s->capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; + } + + bool migrate_use_block_incremental(void) +@@ -2830,7 +2828,7 @@ bool migrate_background_snapshot(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; ++ return s->capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; + } + + bool migrate_postcopy_preempt(void) +@@ -2839,7 +2837,7 @@ bool migrate_postcopy_preempt(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]; ++ return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]; + } + + /* migration thread support */ +@@ -3584,7 +3582,7 @@ fail: + bool migrate_colo_enabled(void) + { + MigrationState *s = migrate_get_current(); +- return s->enabled_capabilities[MIGRATION_CAPABILITY_X_COLO]; ++ return s->capabilities[MIGRATION_CAPABILITY_X_COLO]; + } + + typedef enum MigThrError { +@@ -4447,7 +4445,7 @@ void migration_global_dump(Monitor *mon) + } + + #define DEFINE_PROP_MIG_CAP(name, x) \ +- DEFINE_PROP_BOOL(name, MigrationState, enabled_capabilities[x], false) ++ DEFINE_PROP_BOOL(name, MigrationState, capabilities[x], false) + + static Property migration_properties[] = { + DEFINE_PROP_BOOL("store-global-state", MigrationState, +@@ -4646,7 +4644,7 @@ static bool migration_object_check(MigrationState *ms, Error **errp) + } + + for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { +- if (ms->enabled_capabilities[i]) { ++ if (ms->capabilities[i]) { + QAPI_LIST_PREPEND(head, migrate_cap_add(i, true)); + } + } +diff --git a/migration/migration.h b/migration/migration.h +index 310ae8901b..04e0860b4e 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -310,7 +310,7 @@ struct MigrationState { + int64_t downtime_start; + int64_t downtime; + int64_t expected_downtime; +- bool enabled_capabilities[MIGRATION_CAPABILITY__MAX]; ++ bool capabilities[MIGRATION_CAPABILITY__MAX]; + int64_t setup_time; + /* + * Whether guest was running when we enter the completion stage. +diff --git a/migration/rdma.c b/migration/rdma.c +index df646be35e..f35f021963 100644 +--- a/migration/rdma.c ++++ b/migration/rdma.c +@@ -4179,7 +4179,7 @@ void rdma_start_outgoing_migration(void *opaque, + } + + ret = qemu_rdma_source_init(rdma, +- s->enabled_capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp); ++ s->capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp); + + if (ret) { + goto err; +@@ -4201,7 +4201,7 @@ void rdma_start_outgoing_migration(void *opaque, + } + + ret = qemu_rdma_source_init(rdma_return_path, +- s->enabled_capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp); ++ s->capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp); + + if (ret) { + goto return_path_err; +diff --git a/migration/savevm.c b/migration/savevm.c +index aa54a67fda..589ef926ab 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -253,7 +253,7 @@ static uint32_t get_validatable_capabilities_count(void) + uint32_t result = 0; + int i; + for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { +- if (should_validate_capability(i) && s->enabled_capabilities[i]) { ++ if (should_validate_capability(i) && s->capabilities[i]) { + result++; + } + } +@@ -275,7 +275,7 @@ static int configuration_pre_save(void *opaque) + state->capabilities = g_renew(MigrationCapability, state->capabilities, + state->caps_count); + for (i = j = 0; i < MIGRATION_CAPABILITY__MAX; i++) { +- if (should_validate_capability(i) && s->enabled_capabilities[i]) { ++ if (should_validate_capability(i) && s->capabilities[i]) { + state->capabilities[j++] = i; + } + } +@@ -325,7 +325,7 @@ static bool configuration_validate_capabilities(SaveState *state) + continue; + } + source_state = test_bit(i, source_caps_bm); +- target_state = s->enabled_capabilities[i]; ++ target_state = s->capabilities[i]; + if (source_state != target_state) { + error_report("Capability %s is %s, but received capability is %s", + MigrationCapability_str(i), +-- +2.39.1 + diff --git a/SOURCES/kvm-multifd-Create-property-multifd-flush-after-each-sec.patch b/SOURCES/kvm-multifd-Create-property-multifd-flush-after-each-sec.patch new file mode 100644 index 0000000..0bebd2e --- /dev/null +++ b/SOURCES/kvm-multifd-Create-property-multifd-flush-after-each-sec.patch @@ -0,0 +1,127 @@ +From 2a5ea92ca0a5dffad54e4d06a683f683996cea9a Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 21 Jun 2022 12:13:14 +0200 +Subject: [PATCH 05/12] multifd: Create property + multifd-flush-after-each-section +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: quintela1 +RH-MergeRequest: 186: Multifd flushes its channels 10 times per second +RH-Bugzilla: 2196295 +RH-Acked-by: Peter Xu +RH-Acked-by: Leonardo Brás +RH-Commit: [1/3] 5bf5348e8be5b1d1629b859ce1ddb7aa0d72c0d6 (juan.quintela/c9s-qemu-kvm) + +We used to flush all channels at the end of each RAM section +sent. That is not needed, so preparing to only flush after a full +iteration through all the RAM. + +Default value of the property is false. But we return "true" in +migrate_multifd_flush_after_each_section() until we implement the code +in following patches. + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +Acked-by: Peter Xu + +--- + +Rename each-iteration to after-each-section +Rename multifd-sync-after-each-section to + multifd-flush-after-each-section +Move to machine-8.0 (peter) + +conflit hw_compat_8_0 and hw_compat_rhel_9_2 + +(cherry picked from commit 77c259a4cb1c9799754b48f570301ebf1de5ded8) +--- + hw/core/machine.c | 2 ++ + migration/migration.h | 12 ++++++++++++ + migration/options.c | 13 +++++++++++++ + migration/options.h | 1 + + 4 files changed, 28 insertions(+) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 5abdc8c39b..5ea52317b9 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -54,6 +54,8 @@ const char *rhel_old_machine_deprecation = + "machine types for previous major releases are deprecated"; + + GlobalProperty hw_compat_rhel_9_2[] = { ++ /* hw_compat_rhel_9_2 from hw_compat_8_0 */ ++ { "migration", "multifd-flush-after-each-section", "on"}, + /* hw_compat_rhel_9_2 from hw_compat_7_2 */ + { "e1000e", "migrate-timadj", "off" }, + /* hw_compat_rhel_9_2 from hw_compat_7_2 */ +diff --git a/migration/migration.h b/migration/migration.h +index 7ccf460aa2..04c78c1fd6 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -411,6 +411,18 @@ struct MigrationState { + */ + bool preempt_pre_7_2; + ++ /* ++ * flush every channel after each section sent. ++ * ++ * This assures that we can't mix pages from one iteration through ++ * ram pages with pages for the following iteration. We really ++ * only need to do this flush after we have go through all the ++ * dirty pages. For historical reasons, we do that after each ++ * section. This is suboptimal (we flush too many times). ++ * Default value is false. Setting this property has no effect ++ * until the patch that removes this comment. (since 8.1) ++ */ ++ bool multifd_flush_after_each_section; + /* + * This decides the size of guest memory chunk that will be used + * to track dirty bitmap clearing. The size of memory chunk will +diff --git a/migration/options.c b/migration/options.c +index ccd7ef3907..5b0d080ecb 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -88,6 +88,8 @@ Property migration_properties[] = { + send_section_footer, true), + DEFINE_PROP_BOOL("decompress-error-check", MigrationState, + decompress_error_check, true), ++ DEFINE_PROP_BOOL("multifd-flush-after-each-section", MigrationState, ++ multifd_flush_after_each_section, true), + DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState, + clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT), + DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState, +@@ -344,6 +346,17 @@ bool migrate_zero_copy_send(void) + + /* pseudo capabilities */ + ++bool migrate_multifd_flush_after_each_section(void) ++{ ++ MigrationState *s = migrate_get_current(); ++ ++ /* ++ * Until the patch that remove this comment, we always return that ++ * the property is enabled. ++ */ ++ return true || s->multifd_flush_after_each_section; ++} ++ + bool migrate_postcopy(void) + { + return migrate_postcopy_ram() || migrate_dirty_bitmaps(); +diff --git a/migration/options.h b/migration/options.h +index 0fc7be6869..271f49ae5f 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -60,6 +60,7 @@ bool migrate_zero_copy_send(void); + * check, but they are not a capability. + */ + ++bool migrate_multifd_flush_after_each_section(void); + bool migrate_postcopy(void); + bool migrate_tls(void); + +-- +2.39.3 + diff --git a/SOURCES/kvm-multifd-Fix-the-number-of-channels-ready.patch b/SOURCES/kvm-multifd-Fix-the-number-of-channels-ready.patch new file mode 100644 index 0000000..abf21e6 --- /dev/null +++ b/SOURCES/kvm-multifd-Fix-the-number-of-channels-ready.patch @@ -0,0 +1,58 @@ +From af6f2a543c7db6d67d33fd12615a50e57fc3fe66 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 26 Apr 2023 12:20:36 +0200 +Subject: [PATCH 19/21] multifd: Fix the number of channels ready +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 171: multifd: Fix the number of channels ready +RH-Bugzilla: 2196289 +RH-Acked-by: Peter Xu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/1] a5e271ba249d85b27a68d3cff10480ca3a112c5d (LeoBras/centos-qemu-kvm) + +We don't wait in the sem when we are doing a sync_main. Make it wait +there. To make things clearer, we mark the channel ready at the +begining of the thread loop. + +Signed-off-by: Juan Quintela +Reviewed-by: Fabiano Rosas +(cherry picked from commit d2026ee117147893f8d80f060cede6d872ecbd7f) +Signed-off-by: Leonardo Bras +--- + migration/multifd.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index cce3ad6988..6a59c03dd2 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -635,6 +635,7 @@ int multifd_send_sync_main(QEMUFile *f) + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + ++ qemu_sem_wait(&multifd_send_state->channels_ready); + trace_multifd_send_sync_main_wait(p->id); + qemu_sem_wait(&p->sem_sync); + +@@ -668,6 +669,7 @@ static void *multifd_send_thread(void *opaque) + p->num_packets = 1; + + while (true) { ++ qemu_sem_post(&multifd_send_state->channels_ready); + qemu_sem_wait(&p->sem); + + if (qatomic_read(&multifd_send_state->exiting)) { +@@ -736,7 +738,6 @@ static void *multifd_send_thread(void *opaque) + if (flags & MULTIFD_FLAG_SYNC) { + qemu_sem_post(&p->sem_sync); + } +- qemu_sem_post(&multifd_send_state->channels_ready); + } else if (p->quit) { + qemu_mutex_unlock(&p->mutex); + break; +-- +2.39.3 + diff --git a/SOURCES/kvm-multifd-Only-flush-once-each-full-round-of-memory.patch b/SOURCES/kvm-multifd-Only-flush-once-each-full-round-of-memory.patch new file mode 100644 index 0000000..3f76384 --- /dev/null +++ b/SOURCES/kvm-multifd-Only-flush-once-each-full-round-of-memory.patch @@ -0,0 +1,166 @@ +From e6f770506091eada46c63ac1c8b934b508e3807f Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 21 Jun 2022 13:36:11 +0200 +Subject: [PATCH 07/12] multifd: Only flush once each full round of memory +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: quintela1 +RH-MergeRequest: 186: Multifd flushes its channels 10 times per second +RH-Bugzilla: 2196295 +RH-Acked-by: Peter Xu +RH-Acked-by: Leonardo Brás +RH-Commit: [3/3] 33f76dfc72a2552a42dc7f0fe3923564185a7bf7 (juan.quintela/c9s-qemu-kvm) + +We need to add a new flag to mean to flush at that point. +Notice that we still flush at the end of setup and at the end of +complete stages. + +Signed-off-by: Juan Quintela +Acked-by: Peter Xu + +--- + +Add missing qemu_fflush(), now it passes all tests always. +In the previous version, the check that changes the default value to +false got lost in some rebase. Get it back. + +(cherry picked from commit 294e5a4034e81b3d8db03b4e0f691386f20d6ed3) +--- + migration/migration.h | 3 +-- + migration/options.c | 8 ++------ + migration/ram.c | 28 +++++++++++++++++++++++++++- + 3 files changed, 30 insertions(+), 9 deletions(-) + +diff --git a/migration/migration.h b/migration/migration.h +index 04c78c1fd6..dfec649af8 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -419,8 +419,7 @@ struct MigrationState { + * only need to do this flush after we have go through all the + * dirty pages. For historical reasons, we do that after each + * section. This is suboptimal (we flush too many times). +- * Default value is false. Setting this property has no effect +- * until the patch that removes this comment. (since 8.1) ++ * Default value is false. (since 8.1) + */ + bool multifd_flush_after_each_section; + /* +diff --git a/migration/options.c b/migration/options.c +index 5b0d080ecb..e13c7cb8e5 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -89,7 +89,7 @@ Property migration_properties[] = { + DEFINE_PROP_BOOL("decompress-error-check", MigrationState, + decompress_error_check, true), + DEFINE_PROP_BOOL("multifd-flush-after-each-section", MigrationState, +- multifd_flush_after_each_section, true), ++ multifd_flush_after_each_section, false), + DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState, + clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT), + DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState, +@@ -350,11 +350,7 @@ bool migrate_multifd_flush_after_each_section(void) + { + MigrationState *s = migrate_get_current(); + +- /* +- * Until the patch that remove this comment, we always return that +- * the property is enabled. +- */ +- return true || s->multifd_flush_after_each_section; ++ return s->multifd_flush_after_each_section; + } + + bool migrate_postcopy(void) +diff --git a/migration/ram.c b/migration/ram.c +index 1e2414d681..e9dcda8b9d 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -86,6 +86,7 @@ + #define RAM_SAVE_FLAG_XBZRLE 0x40 + /* 0x80 is reserved in qemu-file.h for RAM_SAVE_FLAG_HOOK */ + #define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100 ++#define RAM_SAVE_FLAG_MULTIFD_FLUSH 0x200 + /* We can't use any flag that is bigger than 0x200 */ + + int (*xbzrle_encode_buffer_func)(uint8_t *, uint8_t *, int, +@@ -1581,6 +1582,7 @@ retry: + * associated with the search process. + * + * Returns: ++ * <0: An error happened + * PAGE_ALL_CLEAN: no dirty page found, give up + * PAGE_TRY_AGAIN: no dirty page found, retry for next block + * PAGE_DIRTY_FOUND: dirty page found +@@ -1608,6 +1610,15 @@ static int find_dirty_block(RAMState *rs, PageSearchStatus *pss) + pss->page = 0; + pss->block = QLIST_NEXT_RCU(pss->block, next); + if (!pss->block) { ++ if (!migrate_multifd_flush_after_each_section()) { ++ QEMUFile *f = rs->pss[RAM_CHANNEL_PRECOPY].pss_channel; ++ int ret = multifd_send_sync_main(f); ++ if (ret < 0) { ++ return ret; ++ } ++ qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH); ++ qemu_fflush(f); ++ } + /* + * If memory migration starts over, we will meet a dirtied page + * which may still exists in compression threads's ring, so we +@@ -2600,6 +2611,9 @@ static int ram_find_and_save_block(RAMState *rs) + break; + } else if (res == PAGE_TRY_AGAIN) { + continue; ++ } else if (res < 0) { ++ pages = res; ++ break; + } + } + } +@@ -3286,6 +3300,10 @@ static int ram_save_setup(QEMUFile *f, void *opaque) + return ret; + } + ++ if (!migrate_multifd_flush_after_each_section()) { ++ qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH); ++ } ++ + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + qemu_fflush(f); + +@@ -3471,6 +3489,9 @@ static int ram_save_complete(QEMUFile *f, void *opaque) + return ret; + } + ++ if (!migrate_multifd_flush_after_each_section()) { ++ qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH); ++ } + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + qemu_fflush(f); + +@@ -4152,7 +4173,9 @@ int ram_load_postcopy(QEMUFile *f, int channel) + } + decompress_data_with_multi_threads(f, page_buffer, len); + break; +- ++ case RAM_SAVE_FLAG_MULTIFD_FLUSH: ++ multifd_recv_sync_main(); ++ break; + case RAM_SAVE_FLAG_EOS: + /* normal exit */ + if (migrate_multifd_flush_after_each_section()) { +@@ -4426,6 +4449,9 @@ static int ram_load_precopy(QEMUFile *f) + break; + } + break; ++ case RAM_SAVE_FLAG_MULTIFD_FLUSH: ++ multifd_recv_sync_main(); ++ break; + case RAM_SAVE_FLAG_EOS: + /* normal exit */ + if (migrate_multifd_flush_after_each_section()) { +-- +2.39.3 + diff --git a/SOURCES/kvm-multifd-Protect-multifd_send_sync_main-calls.patch b/SOURCES/kvm-multifd-Protect-multifd_send_sync_main-calls.patch new file mode 100644 index 0000000..779841f --- /dev/null +++ b/SOURCES/kvm-multifd-Protect-multifd_send_sync_main-calls.patch @@ -0,0 +1,78 @@ +From c4bfb4900b95e13bef2d86b83c33786c7c4f6289 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 21 Jun 2022 12:21:32 +0200 +Subject: [PATCH 06/12] multifd: Protect multifd_send_sync_main() calls +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: quintela1 +RH-MergeRequest: 186: Multifd flushes its channels 10 times per second +RH-Bugzilla: 2196295 +RH-Acked-by: Peter Xu +RH-Acked-by: Leonardo Brás +RH-Commit: [2/3] a91adf59c6b2f39bf4a308f566b00e39cae6e0ae (juan.quintela/c9s-qemu-kvm) + +We only need to do that on the ram_save_iterate() call on sending and +on destination when we get a RAM_SAVE_FLAG_EOS. + +In setup() and complete() we need to synch in both new and old cases, +so don't add a check there. + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +Acked-by: Peter Xu + +--- + +Remove the wrappers that we take out on patch 5. + +(cherry picked from commit b05292c237030343516d073b1a1e5f49ffc017a8) +--- + migration/ram.c | 16 +++++++++++----- + 1 file changed, 11 insertions(+), 5 deletions(-) + +diff --git a/migration/ram.c b/migration/ram.c +index 01356f60a4..1e2414d681 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -3394,9 +3394,11 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) + out: + if (ret >= 0 + && migration_is_setup_or_active(migrate_get_current()->state)) { +- ret = multifd_send_sync_main(rs->pss[RAM_CHANNEL_PRECOPY].pss_channel); +- if (ret < 0) { +- return ret; ++ if (migrate_multifd_flush_after_each_section()) { ++ ret = multifd_send_sync_main(rs->pss[RAM_CHANNEL_PRECOPY].pss_channel); ++ if (ret < 0) { ++ return ret; ++ } + } + + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); +@@ -4153,7 +4155,9 @@ int ram_load_postcopy(QEMUFile *f, int channel) + + case RAM_SAVE_FLAG_EOS: + /* normal exit */ +- multifd_recv_sync_main(); ++ if (migrate_multifd_flush_after_each_section()) { ++ multifd_recv_sync_main(); ++ } + break; + default: + error_report("Unknown combination of migration flags: 0x%x" +@@ -4424,7 +4428,9 @@ static int ram_load_precopy(QEMUFile *f) + break; + case RAM_SAVE_FLAG_EOS: + /* normal exit */ +- multifd_recv_sync_main(); ++ if (migrate_multifd_flush_after_each_section()) { ++ multifd_recv_sync_main(); ++ } + break; + default: + if (flags & RAM_SAVE_FLAG_HOOK) { +-- +2.39.3 + diff --git a/SOURCES/kvm-nbd-server-Fix-drained_poll-to-wake-coroutine-in-rig.patch b/SOURCES/kvm-nbd-server-Fix-drained_poll-to-wake-coroutine-in-rig.patch new file mode 100644 index 0000000..214b6dd --- /dev/null +++ b/SOURCES/kvm-nbd-server-Fix-drained_poll-to-wake-coroutine-in-rig.patch @@ -0,0 +1,159 @@ +From 639f65d2cd4c6627a1d22c4b418b41400fe40154 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 17 May 2023 17:28:33 +0200 +Subject: [PATCH 03/21] nbd/server: Fix drained_poll to wake coroutine in right + AioContext + +RH-Author: Kevin Wolf +RH-MergeRequest: 166: block/graph-lock: Disable locking for now +RH-Bugzilla: 2186725 +RH-Acked-by: Eric Blake +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [3/4] 177092e61360c2feb04377890b32fdeb2d1cfefc (kmwolf/centos-qemu-kvm) + +nbd_drained_poll() generally runs in the main thread, not whatever +iothread the NBD server coroutine is meant to run in, so it can't +directly reenter the coroutines to wake them up. + +The code seems to have the right intention, it specifies the correct +AioContext when it calls qemu_aio_coroutine_enter(). However, this +functions doesn't schedule the coroutine to run in that AioContext, but +it assumes it is already called in the home thread of the AioContext. + +To fix this, add a new thread-safe qio_channel_wake_read() that can be +called in the main thread to wake up the coroutine in its AioContext, +and use this in nbd_drained_poll(). + +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Message-Id: <20230517152834.277483-3-kwolf@redhat.com> +Reviewed-by: Eric Blake +Signed-off-by: Kevin Wolf +(cherry picked from commit 7c1f51bf38de8cea4ed5030467646c37b46edeb7) +Signed-off-by: Kevin Wolf +--- + include/io/channel.h | 10 ++++++++++ + io/channel.c | 33 +++++++++++++++++++++++++++------ + nbd/server.c | 3 +-- + 3 files changed, 38 insertions(+), 8 deletions(-) + +diff --git a/include/io/channel.h b/include/io/channel.h +index 153fbd2904..2b905423a9 100644 +--- a/include/io/channel.h ++++ b/include/io/channel.h +@@ -757,6 +757,16 @@ void qio_channel_detach_aio_context(QIOChannel *ioc); + void coroutine_fn qio_channel_yield(QIOChannel *ioc, + GIOCondition condition); + ++/** ++ * qio_channel_wake_read: ++ * @ioc: the channel object ++ * ++ * If qio_channel_yield() is currently waiting for the channel to become ++ * readable, interrupt it and reenter immediately. This function is safe to call ++ * from any thread. ++ */ ++void qio_channel_wake_read(QIOChannel *ioc); ++ + /** + * qio_channel_wait: + * @ioc: the channel object +diff --git a/io/channel.c b/io/channel.c +index a8c7f11649..3c9b7beb65 100644 +--- a/io/channel.c ++++ b/io/channel.c +@@ -19,6 +19,7 @@ + */ + + #include "qemu/osdep.h" ++#include "block/aio-wait.h" + #include "io/channel.h" + #include "qapi/error.h" + #include "qemu/main-loop.h" +@@ -514,7 +515,11 @@ int qio_channel_flush(QIOChannel *ioc, + static void qio_channel_restart_read(void *opaque) + { + QIOChannel *ioc = opaque; +- Coroutine *co = ioc->read_coroutine; ++ Coroutine *co = qatomic_xchg(&ioc->read_coroutine, NULL); ++ ++ if (!co) { ++ return; ++ } + + /* Assert that aio_co_wake() reenters the coroutine directly */ + assert(qemu_get_current_aio_context() == +@@ -525,7 +530,11 @@ static void qio_channel_restart_read(void *opaque) + static void qio_channel_restart_write(void *opaque) + { + QIOChannel *ioc = opaque; +- Coroutine *co = ioc->write_coroutine; ++ Coroutine *co = qatomic_xchg(&ioc->write_coroutine, NULL); ++ ++ if (!co) { ++ return; ++ } + + /* Assert that aio_co_wake() reenters the coroutine directly */ + assert(qemu_get_current_aio_context() == +@@ -568,7 +577,11 @@ void qio_channel_detach_aio_context(QIOChannel *ioc) + void coroutine_fn qio_channel_yield(QIOChannel *ioc, + GIOCondition condition) + { ++ AioContext *ioc_ctx = ioc->ctx ?: qemu_get_aio_context(); ++ + assert(qemu_in_coroutine()); ++ assert(in_aio_context_home_thread(ioc_ctx)); ++ + if (condition == G_IO_IN) { + assert(!ioc->read_coroutine); + ioc->read_coroutine = qemu_coroutine_self(); +@@ -580,18 +593,26 @@ void coroutine_fn qio_channel_yield(QIOChannel *ioc, + } + qio_channel_set_aio_fd_handlers(ioc); + qemu_coroutine_yield(); ++ assert(in_aio_context_home_thread(ioc_ctx)); + + /* Allow interrupting the operation by reentering the coroutine other than + * through the aio_fd_handlers. */ +- if (condition == G_IO_IN && ioc->read_coroutine) { +- ioc->read_coroutine = NULL; ++ if (condition == G_IO_IN) { ++ assert(ioc->read_coroutine == NULL); + qio_channel_set_aio_fd_handlers(ioc); +- } else if (condition == G_IO_OUT && ioc->write_coroutine) { +- ioc->write_coroutine = NULL; ++ } else if (condition == G_IO_OUT) { ++ assert(ioc->write_coroutine == NULL); + qio_channel_set_aio_fd_handlers(ioc); + } + } + ++void qio_channel_wake_read(QIOChannel *ioc) ++{ ++ Coroutine *co = qatomic_xchg(&ioc->read_coroutine, NULL); ++ if (co) { ++ aio_co_wake(co); ++ } ++} + + static gboolean qio_channel_wait_complete(QIOChannel *ioc, + GIOCondition condition, +diff --git a/nbd/server.c b/nbd/server.c +index 3d8d0d81df..ea47522e8f 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -1599,8 +1599,7 @@ static bool nbd_drained_poll(void *opaque) + * enter it here so we don't depend on the client to wake it up. + */ + if (client->recv_coroutine != NULL && client->read_yielding) { +- qemu_aio_coroutine_enter(exp->common.ctx, +- client->recv_coroutine); ++ qio_channel_wake_read(client->ioc); + } + + return true; +-- +2.39.3 + diff --git a/SOURCES/kvm-net-socket-move-fd-type-checking-to-its-own-function.patch b/SOURCES/kvm-net-socket-move-fd-type-checking-to-its-own-function.patch new file mode 100644 index 0000000..20b9c04 --- /dev/null +++ b/SOURCES/kvm-net-socket-move-fd-type-checking-to-its-own-function.patch @@ -0,0 +1,78 @@ +From d6b3f9e4b388b8d621761104ddf075d6087f6d6c Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Fri, 9 Jun 2023 09:27:47 +0200 +Subject: [PATCH 09/12] net: socket: move fd type checking to its own function + +RH-Author: Laurent Vivier +RH-MergeRequest: 187: net: socket: do not close file descriptor if it's not a socket +RH-Jira: RHEL-582 +RH-Acked-by: Stefano Brivio +RH-Acked-by: Jason Wang +RH-Acked-by: David Gibson (Red Hat) +RH-Commit: [2/3] 9726f0ae81ac209b5db33dc7767f652867d8ca0a (lvivier/qemu-kvm-centos) + +JIRA: https://issues.redhat.com/browse/RHEL-582 + +Reviewed-by: David Gibson +Signed-off-by: Laurent Vivier +Signed-off-by: Jason Wang +(cherry picked from commit 23455ae341656ca867ee4a171826b9d280d6acb5) +--- + net/socket.c | 28 ++++++++++++++++++++-------- + 1 file changed, 20 insertions(+), 8 deletions(-) + +diff --git a/net/socket.c b/net/socket.c +index 24dcaa55bc..6b1f0fec3a 100644 +--- a/net/socket.c ++++ b/net/socket.c +@@ -446,16 +446,32 @@ static NetSocketState *net_socket_fd_init_stream(NetClientState *peer, + return s; + } + ++static int net_socket_fd_check(int fd, Error **errp) ++{ ++ int so_type, optlen = sizeof(so_type); ++ ++ if (getsockopt(fd, SOL_SOCKET, SO_TYPE, (char *)&so_type, ++ (socklen_t *)&optlen) < 0) { ++ error_setg(errp, "can't get socket option SO_TYPE"); ++ return -1; ++ } ++ if (so_type != SOCK_DGRAM && so_type != SOCK_STREAM) { ++ error_setg(errp, "socket type=%d for fd=%d must be either" ++ " SOCK_DGRAM or SOCK_STREAM", so_type, fd); ++ return -1; ++ } ++ return so_type; ++} ++ + static NetSocketState *net_socket_fd_init(NetClientState *peer, + const char *model, const char *name, + int fd, int is_connected, + const char *mc, Error **errp) + { +- int so_type = -1, optlen=sizeof(so_type); ++ int so_type; + +- if(getsockopt(fd, SOL_SOCKET, SO_TYPE, (char *)&so_type, +- (socklen_t *)&optlen)< 0) { +- error_setg(errp, "can't get socket option SO_TYPE"); ++ so_type = net_socket_fd_check(fd, errp); ++ if (so_type < 0) { + close(fd); + return NULL; + } +@@ -465,10 +481,6 @@ static NetSocketState *net_socket_fd_init(NetClientState *peer, + mc, errp); + case SOCK_STREAM: + return net_socket_fd_init_stream(peer, model, name, fd, is_connected); +- default: +- error_setg(errp, "socket type=%d for fd=%d must be either" +- " SOCK_DGRAM or SOCK_STREAM", so_type, fd); +- close(fd); + } + return NULL; + } +-- +2.39.3 + diff --git a/SOURCES/kvm-net-socket-prepare-to-cleanup-net_init_socket.patch b/SOURCES/kvm-net-socket-prepare-to-cleanup-net_init_socket.patch new file mode 100644 index 0000000..269da29 --- /dev/null +++ b/SOURCES/kvm-net-socket-prepare-to-cleanup-net_init_socket.patch @@ -0,0 +1,60 @@ +From a467540e49e76c5961d86e3f47d3f8fcad8cef09 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Fri, 9 Jun 2023 09:27:46 +0200 +Subject: [PATCH 08/12] net: socket: prepare to cleanup net_init_socket() + +RH-Author: Laurent Vivier +RH-MergeRequest: 187: net: socket: do not close file descriptor if it's not a socket +RH-Jira: RHEL-582 +RH-Acked-by: Stefano Brivio +RH-Acked-by: Jason Wang +RH-Acked-by: David Gibson (Red Hat) +RH-Commit: [1/3] 3e4f8370586ae1ac2474fef971a239edb31eeb67 (lvivier/qemu-kvm-centos) + +JIRA: https://issues.redhat.com/browse/RHEL-582 + +Use directly net_socket_fd_init_stream() and net_socket_fd_init_dgram() +when the socket type is already known. + +Reviewed-by: David Gibson +Signed-off-by: Laurent Vivier +Signed-off-by: Jason Wang +(cherry picked from commit 006c3fa74c3edb978ff46d2851699e9a95609da5) +--- + net/socket.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/net/socket.c b/net/socket.c +index ba6e5b0b00..24dcaa55bc 100644 +--- a/net/socket.c ++++ b/net/socket.c +@@ -587,7 +587,7 @@ static int net_socket_connect_init(NetClientState *peer, + break; + } + } +- s = net_socket_fd_init(peer, model, name, fd, connected, NULL, errp); ++ s = net_socket_fd_init_stream(peer, model, name, fd, connected); + if (!s) { + return -1; + } +@@ -629,7 +629,7 @@ static int net_socket_mcast_init(NetClientState *peer, + return -1; + } + +- s = net_socket_fd_init(peer, model, name, fd, 0, NULL, errp); ++ s = net_socket_fd_init_dgram(peer, model, name, fd, 0, NULL, errp); + if (!s) { + return -1; + } +@@ -683,7 +683,7 @@ static int net_socket_udp_init(NetClientState *peer, + } + qemu_socket_set_nonblock(fd); + +- s = net_socket_fd_init(peer, model, name, fd, 0, NULL, errp); ++ s = net_socket_fd_init_dgram(peer, model, name, fd, 0, NULL, errp); + if (!s) { + return -1; + } +-- +2.39.3 + diff --git a/SOURCES/kvm-net-socket-remove-net_init_socket.patch b/SOURCES/kvm-net-socket-remove-net_init_socket.patch new file mode 100644 index 0000000..98c96f2 --- /dev/null +++ b/SOURCES/kvm-net-socket-remove-net_init_socket.patch @@ -0,0 +1,102 @@ +From ecb4f97895849c562112b76a30ddc2037e8df79e Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Fri, 9 Jun 2023 09:27:48 +0200 +Subject: [PATCH 10/12] net: socket: remove net_init_socket() + +RH-Author: Laurent Vivier +RH-MergeRequest: 187: net: socket: do not close file descriptor if it's not a socket +RH-Jira: RHEL-582 +RH-Acked-by: Stefano Brivio +RH-Acked-by: Jason Wang +RH-Acked-by: David Gibson (Red Hat) +RH-Commit: [3/3] e1d7939f5df4a77c2fff62d1ae4899a7a3615ad9 (lvivier/qemu-kvm-centos) + +JIRA: https://issues.redhat.com/browse/RHEL-582 + +Move the file descriptor type checking before doing anything with it. +If it's not usable, don't close it as it could be in use by another +part of QEMU, only fail and report an error. + +Reviewed-by: David Gibson +Signed-off-by: Laurent Vivier +Signed-off-by: Jason Wang +(cherry picked from commit b6aeee02980e193f744f74c48fd900940feb2799) +--- + net/socket.c | 43 +++++++++++++++++-------------------------- + 1 file changed, 17 insertions(+), 26 deletions(-) + +diff --git a/net/socket.c b/net/socket.c +index 6b1f0fec3a..8e3702e1f3 100644 +--- a/net/socket.c ++++ b/net/socket.c +@@ -463,28 +463,6 @@ static int net_socket_fd_check(int fd, Error **errp) + return so_type; + } + +-static NetSocketState *net_socket_fd_init(NetClientState *peer, +- const char *model, const char *name, +- int fd, int is_connected, +- const char *mc, Error **errp) +-{ +- int so_type; +- +- so_type = net_socket_fd_check(fd, errp); +- if (so_type < 0) { +- close(fd); +- return NULL; +- } +- switch(so_type) { +- case SOCK_DGRAM: +- return net_socket_fd_init_dgram(peer, model, name, fd, is_connected, +- mc, errp); +- case SOCK_STREAM: +- return net_socket_fd_init_stream(peer, model, name, fd, is_connected); +- } +- return NULL; +-} +- + static void net_socket_accept(void *opaque) + { + NetSocketState *s = opaque; +@@ -728,21 +706,34 @@ int net_init_socket(const Netdev *netdev, const char *name, + } + + if (sock->fd) { +- int fd, ret; ++ int fd, ret, so_type; + + fd = monitor_fd_param(monitor_cur(), sock->fd, errp); + if (fd == -1) { + return -1; + } ++ so_type = net_socket_fd_check(fd, errp); ++ if (so_type < 0) { ++ return -1; ++ } + ret = qemu_socket_try_set_nonblock(fd); + if (ret < 0) { + error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d", + name, fd); + return -1; + } +- if (!net_socket_fd_init(peer, "socket", name, fd, 1, sock->mcast, +- errp)) { +- return -1; ++ switch (so_type) { ++ case SOCK_DGRAM: ++ if (!net_socket_fd_init_dgram(peer, "socket", name, fd, 1, ++ sock->mcast, errp)) { ++ return -1; ++ } ++ break; ++ case SOCK_STREAM: ++ if (!net_socket_fd_init_stream(peer, "socket", name, fd, 1)) { ++ return -1; ++ } ++ break; + } + return 0; + } +-- +2.39.3 + diff --git a/SOURCES/kvm-net-stream-add-a-new-option-to-automatically-reconne.patch b/SOURCES/kvm-net-stream-add-a-new-option-to-automatically-reconne.patch deleted file mode 100644 index 707c80f..0000000 --- a/SOURCES/kvm-net-stream-add-a-new-option-to-automatically-reconne.patch +++ /dev/null @@ -1,325 +0,0 @@ -From e5834364958a3914d7b8b46b985a1b054728b466 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Thu, 19 Jan 2023 11:16:45 +0100 -Subject: [PATCH 2/8] net: stream: add a new option to automatically reconnect -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 148: net: stream: add a new option to automatically reconnect -RH-Bugzilla: 2169232 -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Cindy Lu -RH-Acked-by: MST -RH-Acked-by: David Gibson (Red Hat) -RH-Commit: [2/2] 9b87647a9ed2e7c1b91bdfa9d0a736e091c892a5 (lvivier/qemu-kvm-centos) - -In stream mode, if the server shuts down there is currently -no way to reconnect the client to a new server without removing -the NIC device and the netdev backend (or to reboot). - -This patch introduces a reconnect option that specifies a delay -to try to reconnect with the same parameters. - -Add a new test in qtest to test the reconnect option and the -connect/disconnect events. - -Signed-off-by: Laurent Vivier -Signed-off-by: Jason Wang -(cherry picked from commit b95c0d4440950fba6dbef0f781962911fa42abdb) ---- - net/stream.c | 53 ++++++++++++++++++- - qapi/net.json | 7 ++- - qemu-options.hx | 6 +-- - tests/qtest/netdev-socket.c | 101 ++++++++++++++++++++++++++++++++++++ - 4 files changed, 162 insertions(+), 5 deletions(-) - -diff --git a/net/stream.c b/net/stream.c -index 37ff727e0c..9204b4c96e 100644 ---- a/net/stream.c -+++ b/net/stream.c -@@ -39,6 +39,8 @@ - #include "io/channel-socket.h" - #include "io/net-listener.h" - #include "qapi/qapi-events-net.h" -+#include "qapi/qapi-visit-sockets.h" -+#include "qapi/clone-visitor.h" - - typedef struct NetStreamState { - NetClientState nc; -@@ -49,11 +51,15 @@ typedef struct NetStreamState { - guint ioc_write_tag; - SocketReadState rs; - unsigned int send_index; /* number of bytes sent*/ -+ uint32_t reconnect; -+ guint timer_tag; -+ SocketAddress *addr; - } NetStreamState; - - static void net_stream_listen(QIONetListener *listener, - QIOChannelSocket *cioc, - void *opaque); -+static void net_stream_arm_reconnect(NetStreamState *s); - - static gboolean net_stream_writable(QIOChannel *ioc, - GIOCondition condition, -@@ -170,6 +176,7 @@ static gboolean net_stream_send(QIOChannel *ioc, - qemu_set_info_str(&s->nc, "%s", ""); - - qapi_event_send_netdev_stream_disconnected(s->nc.name); -+ net_stream_arm_reconnect(s); - - return G_SOURCE_REMOVE; - } -@@ -187,6 +194,14 @@ static gboolean net_stream_send(QIOChannel *ioc, - static void net_stream_cleanup(NetClientState *nc) - { - NetStreamState *s = DO_UPCAST(NetStreamState, nc, nc); -+ if (s->timer_tag) { -+ g_source_remove(s->timer_tag); -+ s->timer_tag = 0; -+ } -+ if (s->addr) { -+ qapi_free_SocketAddress(s->addr); -+ s->addr = NULL; -+ } - if (s->ioc) { - if (QIO_CHANNEL_SOCKET(s->ioc)->fd != -1) { - if (s->ioc_read_tag) { -@@ -346,12 +361,37 @@ static void net_stream_client_connected(QIOTask *task, gpointer opaque) - error: - object_unref(OBJECT(s->ioc)); - s->ioc = NULL; -+ net_stream_arm_reconnect(s); -+} -+ -+static gboolean net_stream_reconnect(gpointer data) -+{ -+ NetStreamState *s = data; -+ QIOChannelSocket *sioc; -+ -+ s->timer_tag = 0; -+ -+ sioc = qio_channel_socket_new(); -+ s->ioc = QIO_CHANNEL(sioc); -+ qio_channel_socket_connect_async(sioc, s->addr, -+ net_stream_client_connected, s, -+ NULL, NULL); -+ return G_SOURCE_REMOVE; -+} -+ -+static void net_stream_arm_reconnect(NetStreamState *s) -+{ -+ if (s->reconnect && s->timer_tag == 0) { -+ s->timer_tag = g_timeout_add_seconds(s->reconnect, -+ net_stream_reconnect, s); -+ } - } - - static int net_stream_client_init(NetClientState *peer, - const char *model, - const char *name, - SocketAddress *addr, -+ uint32_t reconnect, - Error **errp) - { - NetStreamState *s; -@@ -364,6 +404,10 @@ static int net_stream_client_init(NetClientState *peer, - s->ioc = QIO_CHANNEL(sioc); - s->nc.link_down = true; - -+ s->reconnect = reconnect; -+ if (reconnect) { -+ s->addr = QAPI_CLONE(SocketAddress, addr); -+ } - qio_channel_socket_connect_async(sioc, addr, - net_stream_client_connected, s, - NULL, NULL); -@@ -380,7 +424,14 @@ int net_init_stream(const Netdev *netdev, const char *name, - sock = &netdev->u.stream; - - if (!sock->has_server || !sock->server) { -- return net_stream_client_init(peer, "stream", name, sock->addr, errp); -+ return net_stream_client_init(peer, "stream", name, sock->addr, -+ sock->has_reconnect ? sock->reconnect : 0, -+ errp); -+ } -+ if (sock->has_reconnect) { -+ error_setg(errp, "'reconnect' option is incompatible with " -+ "socket in server mode"); -+ return -1; - } - return net_stream_server_init(peer, "stream", name, sock->addr, errp); - } -diff --git a/qapi/net.json b/qapi/net.json -index 522ac582ed..d6eb30008b 100644 ---- a/qapi/net.json -+++ b/qapi/net.json -@@ -585,6 +585,10 @@ - # @addr: socket address to listen on (server=true) - # or connect to (server=false) - # @server: create server socket (default: false) -+# @reconnect: For a client socket, if a socket is disconnected, -+# then attempt a reconnect after the given number of seconds. -+# Setting this to zero disables this function. (default: 0) -+# (since 8.0) - # - # Only SocketAddress types 'unix', 'inet' and 'fd' are supported. - # -@@ -593,7 +597,8 @@ - { 'struct': 'NetdevStreamOptions', - 'data': { - 'addr': 'SocketAddress', -- '*server': 'bool' } } -+ '*server': 'bool', -+ '*reconnect': 'uint32' } } - - ## - # @NetdevDgramOptions: -diff --git a/qemu-options.hx b/qemu-options.hx -index ea02ca3a45..48eef4aa2c 100644 ---- a/qemu-options.hx -+++ b/qemu-options.hx -@@ -2766,9 +2766,9 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev, - "-netdev socket,id=str[,fd=h][,udp=host:port][,localaddr=host:port]\n" - " configure a network backend to connect to another network\n" - " using an UDP tunnel\n" -- "-netdev stream,id=str[,server=on|off],addr.type=inet,addr.host=host,addr.port=port[,to=maxport][,numeric=on|off][,keep-alive=on|off][,mptcp=on|off][,addr.ipv4=on|off][,addr.ipv6=on|off]\n" -- "-netdev stream,id=str[,server=on|off],addr.type=unix,addr.path=path[,abstract=on|off][,tight=on|off]\n" -- "-netdev stream,id=str[,server=on|off],addr.type=fd,addr.str=file-descriptor\n" -+ "-netdev stream,id=str[,server=on|off],addr.type=inet,addr.host=host,addr.port=port[,to=maxport][,numeric=on|off][,keep-alive=on|off][,mptcp=on|off][,addr.ipv4=on|off][,addr.ipv6=on|off][,reconnect=seconds]\n" -+ "-netdev stream,id=str[,server=on|off],addr.type=unix,addr.path=path[,abstract=on|off][,tight=on|off][,reconnect=seconds]\n" -+ "-netdev stream,id=str[,server=on|off],addr.type=fd,addr.str=file-descriptor[,reconnect=seconds]\n" - " configure a network backend to connect to another network\n" - " using a socket connection in stream mode.\n" - "-netdev dgram,id=str,remote.type=inet,remote.host=maddr,remote.port=port[,local.type=inet,local.host=addr]\n" -diff --git a/tests/qtest/netdev-socket.c b/tests/qtest/netdev-socket.c -index 6ba256e173..acc32c378b 100644 ---- a/tests/qtest/netdev-socket.c -+++ b/tests/qtest/netdev-socket.c -@@ -11,6 +11,10 @@ - #include - #include "../unit/socket-helpers.h" - #include "libqtest.h" -+#include "qapi/qmp/qstring.h" -+#include "qemu/sockets.h" -+#include "qapi/qobject-input-visitor.h" -+#include "qapi/qapi-visit-sockets.h" - - #define CONNECTION_TIMEOUT 5 - -@@ -142,6 +146,101 @@ static void test_stream_inet_ipv4(void) - qtest_quit(qts0); - } - -+static void wait_stream_connected(QTestState *qts, const char *id, -+ SocketAddress **addr) -+{ -+ QDict *resp, *data; -+ QString *qstr; -+ QObject *obj; -+ Visitor *v = NULL; -+ -+ resp = qtest_qmp_eventwait_ref(qts, "NETDEV_STREAM_CONNECTED"); -+ g_assert_nonnull(resp); -+ data = qdict_get_qdict(resp, "data"); -+ g_assert_nonnull(data); -+ -+ qstr = qobject_to(QString, qdict_get(data, "netdev-id")); -+ g_assert_nonnull(data); -+ -+ g_assert(!strcmp(qstring_get_str(qstr), id)); -+ -+ obj = qdict_get(data, "addr"); -+ -+ v = qobject_input_visitor_new(obj); -+ visit_type_SocketAddress(v, NULL, addr, NULL); -+ visit_free(v); -+ qobject_unref(resp); -+} -+ -+static void wait_stream_disconnected(QTestState *qts, const char *id) -+{ -+ QDict *resp, *data; -+ QString *qstr; -+ -+ resp = qtest_qmp_eventwait_ref(qts, "NETDEV_STREAM_DISCONNECTED"); -+ g_assert_nonnull(resp); -+ data = qdict_get_qdict(resp, "data"); -+ g_assert_nonnull(data); -+ -+ qstr = qobject_to(QString, qdict_get(data, "netdev-id")); -+ g_assert_nonnull(data); -+ -+ g_assert(!strcmp(qstring_get_str(qstr), id)); -+ qobject_unref(resp); -+} -+ -+static void test_stream_inet_reconnect(void) -+{ -+ QTestState *qts0, *qts1; -+ int port; -+ SocketAddress *addr; -+ -+ port = inet_get_free_port(false); -+ qts0 = qtest_initf("-nodefaults -M none " -+ "-netdev stream,id=st0,server=true,addr.type=inet," -+ "addr.ipv4=on,addr.ipv6=off," -+ "addr.host=127.0.0.1,addr.port=%d", port); -+ -+ EXPECT_STATE(qts0, "st0: index=0,type=stream,\r\n", 0); -+ -+ qts1 = qtest_initf("-nodefaults -M none " -+ "-netdev stream,server=false,id=st0,addr.type=inet," -+ "addr.ipv4=on,addr.ipv6=off,reconnect=1," -+ "addr.host=127.0.0.1,addr.port=%d", port); -+ -+ wait_stream_connected(qts0, "st0", &addr); -+ g_assert_cmpint(addr->type, ==, SOCKET_ADDRESS_TYPE_INET); -+ g_assert_cmpstr(addr->u.inet.host, ==, "127.0.0.1"); -+ qapi_free_SocketAddress(addr); -+ -+ /* kill server */ -+ qtest_quit(qts0); -+ -+ /* check client has been disconnected */ -+ wait_stream_disconnected(qts1, "st0"); -+ -+ /* restart server */ -+ qts0 = qtest_initf("-nodefaults -M none " -+ "-netdev stream,id=st0,server=true,addr.type=inet," -+ "addr.ipv4=on,addr.ipv6=off," -+ "addr.host=127.0.0.1,addr.port=%d", port); -+ -+ /* wait connection events*/ -+ wait_stream_connected(qts0, "st0", &addr); -+ g_assert_cmpint(addr->type, ==, SOCKET_ADDRESS_TYPE_INET); -+ g_assert_cmpstr(addr->u.inet.host, ==, "127.0.0.1"); -+ qapi_free_SocketAddress(addr); -+ -+ wait_stream_connected(qts1, "st0", &addr); -+ g_assert_cmpint(addr->type, ==, SOCKET_ADDRESS_TYPE_INET); -+ g_assert_cmpstr(addr->u.inet.host, ==, "127.0.0.1"); -+ g_assert_cmpint(atoi(addr->u.inet.port), ==, port); -+ qapi_free_SocketAddress(addr); -+ -+ qtest_quit(qts1); -+ qtest_quit(qts0); -+} -+ - static void test_stream_inet_ipv6(void) - { - QTestState *qts0, *qts1; -@@ -418,6 +517,8 @@ int main(int argc, char **argv) - #ifndef _WIN32 - qtest_add_func("/netdev/dgram/mcast", test_dgram_mcast); - #endif -+ qtest_add_func("/netdev/stream/inet/reconnect", -+ test_stream_inet_reconnect); - } - if (has_ipv6) { - qtest_add_func("/netdev/stream/inet/ipv6", test_stream_inet_ipv6); --- -2.31.1 - diff --git a/SOURCES/kvm-numa-Validate-cluster-and-NUMA-node-boundary-if-requ.patch b/SOURCES/kvm-numa-Validate-cluster-and-NUMA-node-boundary-if-requ.patch new file mode 100644 index 0000000..66d68f1 --- /dev/null +++ b/SOURCES/kvm-numa-Validate-cluster-and-NUMA-node-boundary-if-requ.patch @@ -0,0 +1,145 @@ +From 760a2f284f6d4cd3cd3b1685411bbca21c4ad233 Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Tue, 27 Jun 2023 20:20:09 +1000 +Subject: [PATCH 1/6] numa: Validate cluster and NUMA node boundary if required +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Gavin Shan +RH-MergeRequest: 175: hw/arm: Validate CPU cluster and NUMA node boundary for RHEL machines +RH-Bugzilla: 2171363 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Eric Auger +RH-Commit: [1/3] 24580064b9a0076ec4d9a916839d85135ac48cd9 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2171363 + +For some architectures like ARM64, multiple CPUs in one cluster can be +associated with different NUMA nodes, which is irregular configuration +because we shouldn't have this in baremetal environment. The irregular +configuration causes Linux guest to misbehave, as the following warning +messages indicate. + + -smp 6,maxcpus=6,sockets=2,clusters=1,cores=3,threads=1 \ + -numa node,nodeid=0,cpus=0-1,memdev=ram0 \ + -numa node,nodeid=1,cpus=2-3,memdev=ram1 \ + -numa node,nodeid=2,cpus=4-5,memdev=ram2 \ + + ------------[ cut here ]------------ + WARNING: CPU: 0 PID: 1 at kernel/sched/topology.c:2271 build_sched_domains+0x284/0x910 + Modules linked in: + CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-268.el9.aarch64 #1 + pstate: 00400005 (nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) + pc : build_sched_domains+0x284/0x910 + lr : build_sched_domains+0x184/0x910 + sp : ffff80000804bd50 + x29: ffff80000804bd50 x28: 0000000000000002 x27: 0000000000000000 + x26: ffff800009cf9a80 x25: 0000000000000000 x24: ffff800009cbf840 + x23: ffff000080325000 x22: ffff0000005df800 x21: ffff80000a4ce508 + x20: 0000000000000000 x19: ffff000080324440 x18: 0000000000000014 + x17: 00000000388925c0 x16: 000000005386a066 x15: 000000009c10cc2e + x14: 00000000000001c0 x13: 0000000000000001 x12: ffff00007fffb1a0 + x11: ffff00007fffb180 x10: ffff80000a4ce508 x9 : 0000000000000041 + x8 : ffff80000a4ce500 x7 : ffff80000a4cf920 x6 : 0000000000000001 + x5 : 0000000000000001 x4 : 0000000000000007 x3 : 0000000000000002 + x2 : 0000000000001000 x1 : ffff80000a4cf928 x0 : 0000000000000001 + Call trace: + build_sched_domains+0x284/0x910 + sched_init_domains+0xac/0xe0 + sched_init_smp+0x48/0xc8 + kernel_init_freeable+0x140/0x1ac + kernel_init+0x28/0x140 + ret_from_fork+0x10/0x20 + +Improve the situation to warn when multiple CPUs in one cluster have +been associated with different NUMA nodes. However, one NUMA node is +allowed to be associated with different clusters. + +Signed-off-by: Gavin Shan +Acked-by: Philippe Mathieu-Daudé +Acked-by: Igor Mammedov +Message-Id: <20230509002739.18388-2-gshan@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit a494fdb715832000ee9047a549a35aacfea8175e) +Signed-off-by: Gavin Shan +--- + hw/core/machine.c | 42 ++++++++++++++++++++++++++++++++++++++++++ + include/hw/boards.h | 1 + + 2 files changed, 43 insertions(+) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index c28702b690..5abdc8c39b 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -1496,6 +1496,45 @@ static void machine_numa_finish_cpu_init(MachineState *machine) + g_string_free(s, true); + } + ++static void validate_cpu_cluster_to_numa_boundary(MachineState *ms) ++{ ++ MachineClass *mc = MACHINE_GET_CLASS(ms); ++ NumaState *state = ms->numa_state; ++ const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms); ++ const CPUArchId *cpus = possible_cpus->cpus; ++ int i, j; ++ ++ if (state->num_nodes <= 1 || possible_cpus->len <= 1) { ++ return; ++ } ++ ++ /* ++ * The Linux scheduling domain can't be parsed when the multiple CPUs ++ * in one cluster have been associated with different NUMA nodes. However, ++ * it's fine to associate one NUMA node with CPUs in different clusters. ++ */ ++ for (i = 0; i < possible_cpus->len; i++) { ++ for (j = i + 1; j < possible_cpus->len; j++) { ++ if (cpus[i].props.has_socket_id && ++ cpus[i].props.has_cluster_id && ++ cpus[i].props.has_node_id && ++ cpus[j].props.has_socket_id && ++ cpus[j].props.has_cluster_id && ++ cpus[j].props.has_node_id && ++ cpus[i].props.socket_id == cpus[j].props.socket_id && ++ cpus[i].props.cluster_id == cpus[j].props.cluster_id && ++ cpus[i].props.node_id != cpus[j].props.node_id) { ++ warn_report("CPU-%d and CPU-%d in socket-%" PRId64 "-cluster-%" PRId64 ++ " have been associated with node-%" PRId64 " and node-%" PRId64 ++ " respectively. It can cause OSes like Linux to" ++ " misbehave", i, j, cpus[i].props.socket_id, ++ cpus[i].props.cluster_id, cpus[i].props.node_id, ++ cpus[j].props.node_id); ++ } ++ } ++ } ++} ++ + MemoryRegion *machine_consume_memdev(MachineState *machine, + HostMemoryBackend *backend) + { +@@ -1581,6 +1620,9 @@ void machine_run_board_init(MachineState *machine, const char *mem_path, Error * + numa_complete_configuration(machine); + if (machine->numa_state->num_nodes) { + machine_numa_finish_cpu_init(machine); ++ if (machine_class->cpu_cluster_has_numa_boundary) { ++ validate_cpu_cluster_to_numa_boundary(machine); ++ } + } + } + +diff --git a/include/hw/boards.h b/include/hw/boards.h +index 5f08bd7550..3628671228 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -275,6 +275,7 @@ struct MachineClass { + bool nvdimm_supported; + bool numa_mem_supported; + bool auto_enable_numa; ++ bool cpu_cluster_has_numa_boundary; + SMPCompatProps smp_props; + const char *default_ram_id; + +-- +2.39.3 + diff --git a/SOURCES/kvm-pc-bios-s390-ccw-Don-t-use-__bss_start-with-the-larl.patch b/SOURCES/kvm-pc-bios-s390-ccw-Don-t-use-__bss_start-with-the-larl.patch new file mode 100644 index 0000000..312af68 --- /dev/null +++ b/SOURCES/kvm-pc-bios-s390-ccw-Don-t-use-__bss_start-with-the-larl.patch @@ -0,0 +1,78 @@ +From 7495a51c586818925470fb247882f5ba0f7b0ffd Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Tue, 27 Jun 2023 09:47:03 +0200 +Subject: [PATCH 34/37] pc-bios/s390-ccw: Don't use __bss_start with the "larl" + instruction +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 180: Fix misaligned symbol error in the s390-ccw image during qemu-kvm build with binutils 2.40 +RH-Bugzilla: 2220866 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Miroslav Rezanina +RH-Commit: [4/4] 2483a50c0ed37fa29db649ec44220ac83c215698 (thuth/qemu-kvm-cs9) + +start.S currently cannot be compiled with Clang 16 and binutils 2.40: + + ld: start.o(.text+0x8): misaligned symbol `__bss_start' (0xc1e5) for + relocation R_390_PC32DBL + +According to the built-in linker script of ld, the symbol __bss_start +can actually point *before* the .bss section and does not need to have +any alignment, so in certain situations (like when using the internal +assembler of Clang), the __bss_start symbol can indeed be unaligned +and thus it is not suitable for being used with the "larl" instruction +that needs an address that is at least aligned to halfwords. +The problem went unnoticed so far since binutils <= 2.39 did not +check the alignment, but starting with binutils 2.40, such unaligned +addresses are now refused. + +Fix it by loading the address indirectly instead. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2216662 +Reported-by: Miroslav Rezanina +Suggested-by: Andreas Krebbel +Message-Id: <20230629104821.194859-8-thuth@redhat.com> +Reviewed-by: Claudio Imbrenda +Signed-off-by: Thomas Huth +(cherry picked from commit 7cd50cbe4ca3e2860b31b06ec92c17c54bd82d48) +--- + pc-bios/s390-ccw/start.S | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/pc-bios/s390-ccw/start.S b/pc-bios/s390-ccw/start.S +index abd6fe6639..22c1c296df 100644 +--- a/pc-bios/s390-ccw/start.S ++++ b/pc-bios/s390-ccw/start.S +@@ -19,7 +19,8 @@ _start: + larl %r15,stack + STACK_SIZE - STACK_FRAME_SIZE /* Set up stack */ + + /* clear bss */ +- larl %r2,__bss_start ++ larl %r2,bss_start_literal /* __bss_start might be unaligned ... */ ++ lg %r2,0(%r2) /* ... so load it indirectly */ + larl %r3,_end + slgr %r3,%r2 /* get sizeof bss */ + ltgr %r3,%r3 /* bss empty? */ +@@ -45,7 +46,6 @@ done: + memsetxc: + xc 0(1,%r1),0(%r1) + +- + /* + * void disabled_wait(void) + * +@@ -113,6 +113,8 @@ io_new_code: + br %r14 + + .align 8 ++bss_start_literal: ++ .quad __bss_start + disabled_wait_psw: + .quad 0x0002000180000000,0x0000000000000000 + enabled_wait_psw: +-- +2.39.3 + diff --git a/SOURCES/kvm-pc-bios-s390-ccw-Fix-indentation-in-start.S.patch b/SOURCES/kvm-pc-bios-s390-ccw-Fix-indentation-in-start.S.patch new file mode 100644 index 0000000..bd13187 --- /dev/null +++ b/SOURCES/kvm-pc-bios-s390-ccw-Fix-indentation-in-start.S.patch @@ -0,0 +1,218 @@ +From 24bc8fc932ae1c88cc2e97f0f90786a7be411bb2 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Tue, 27 Jun 2023 09:47:00 +0200 +Subject: [PATCH 32/37] pc-bios/s390-ccw: Fix indentation in start.S +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 180: Fix misaligned symbol error in the s390-ccw image during qemu-kvm build with binutils 2.40 +RH-Bugzilla: 2220866 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/4] cf8fa053602ce1cfac0b6efa67f491688d4f9348 (thuth/qemu-kvm-cs9) + +start.S is currently indented with a mixture of spaces and tabs, which +is quite ugly. QEMU coding style says indentation should be 4 spaces, +and this is also what we are using in the assembler files in the +tests/tcg/s390x/ folder already, so let's adjust start.S accordingly. + +Reviewed-by: Cédric Le Goater +Message-Id: <20230627074703.99608-2-thuth@redhat.com> +Reviewed-by: Claudio Imbrenda +Reviewed-by: Eric Farman +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Thomas Huth +(cherry picked from commit f52420fa4fd9f519dc42c20d2616aba4149adc25) +--- + pc-bios/s390-ccw/start.S | 136 +++++++++++++++++++-------------------- + 1 file changed, 68 insertions(+), 68 deletions(-) + +diff --git a/pc-bios/s390-ccw/start.S b/pc-bios/s390-ccw/start.S +index 6072906df4..d29de09cc6 100644 +--- a/pc-bios/s390-ccw/start.S ++++ b/pc-bios/s390-ccw/start.S +@@ -10,37 +10,37 @@ + * directory. + */ + +- .globl _start ++ .globl _start + _start: + +- larl %r15, stack + 0x8000 /* Set up stack */ ++ larl %r15,stack + 0x8000 /* Set up stack */ + +- /* clear bss */ +- larl %r2, __bss_start +- larl %r3, _end +- slgr %r3, %r2 /* get sizeof bss */ +- ltgr %r3,%r3 /* bss empty? */ +- jz done +- aghi %r3,-1 +- srlg %r4,%r3,8 /* how many 256 byte chunks? */ +- ltgr %r4,%r4 +- lgr %r1,%r2 +- jz remainder ++ /* clear bss */ ++ larl %r2,__bss_start ++ larl %r3,_end ++ slgr %r3,%r2 /* get sizeof bss */ ++ ltgr %r3,%r3 /* bss empty? */ ++ jz done ++ aghi %r3,-1 ++ srlg %r4,%r3,8 /* how many 256 byte chunks? */ ++ ltgr %r4,%r4 ++ lgr %r1,%r2 ++ jz remainder + loop: +- xc 0(256,%r1),0(%r1) +- la %r1,256(%r1) +- brctg %r4,loop ++ xc 0(256,%r1),0(%r1) ++ la %r1,256(%r1) ++ brctg %r4,loop + remainder: +- larl %r2,memsetxc +- ex %r3,0(%r2) ++ larl %r2,memsetxc ++ ex %r3,0(%r2) + done: +- /* set up a pgm exception disabled wait psw */ +- larl %r2, disabled_wait_psw +- mvc 0x01d0(16), 0(%r2) +- j main /* And call C */ ++ /* set up a pgm exception disabled wait psw */ ++ larl %r2,disabled_wait_psw ++ mvc 0x01d0(16),0(%r2) ++ j main /* And call C */ + + memsetxc: +- xc 0(1,%r1),0(%r1) ++ xc 0(1,%r1),0(%r1) + + + /* +@@ -48,11 +48,11 @@ memsetxc: + * + * stops the current guest cpu. + */ +- .globl disabled_wait ++ .globl disabled_wait + disabled_wait: +- larl %r1,disabled_wait_psw +- lpswe 0(%r1) +-1: j 1b ++ larl %r1,disabled_wait_psw ++ lpswe 0(%r1) ++1: j 1b + + + /* +@@ -60,61 +60,61 @@ disabled_wait: + * + * eats one sclp interrupt + */ +- .globl consume_sclp_int ++ .globl consume_sclp_int + consume_sclp_int: +- /* enable service interrupts in cr0 */ +- stctg %c0,%c0,0(%r15) +- oi 6(%r15),0x2 +- lctlg %c0,%c0,0(%r15) +- /* prepare external call handler */ +- larl %r1, external_new_code +- stg %r1, 0x1b8 +- larl %r1, external_new_mask +- mvc 0x1b0(8),0(%r1) +- /* load enabled wait PSW */ +- larl %r1, enabled_wait_psw +- lpswe 0(%r1) ++ /* enable service interrupts in cr0 */ ++ stctg %c0,%c0,0(%r15) ++ oi 6(%r15),0x2 ++ lctlg %c0,%c0,0(%r15) ++ /* prepare external call handler */ ++ larl %r1,external_new_code ++ stg %r1,0x1b8 ++ larl %r1,external_new_mask ++ mvc 0x1b0(8),0(%r1) ++ /* load enabled wait PSW */ ++ larl %r1,enabled_wait_psw ++ lpswe 0(%r1) + + /* + * void consume_io_int(void) + * + * eats one I/O interrupt + */ +- .globl consume_io_int ++ .globl consume_io_int + consume_io_int: +- /* enable I/O interrupts in cr6 */ +- stctg %c6,%c6,0(%r15) +- oi 4(%r15), 0xff +- lctlg %c6,%c6,0(%r15) +- /* prepare i/o call handler */ +- larl %r1, io_new_code +- stg %r1, 0x1f8 +- larl %r1, io_new_mask +- mvc 0x1f0(8),0(%r1) +- /* load enabled wait PSW */ +- larl %r1, enabled_wait_psw +- lpswe 0(%r1) ++ /* enable I/O interrupts in cr6 */ ++ stctg %c6,%c6,0(%r15) ++ oi 4(%r15), 0xff ++ lctlg %c6,%c6,0(%r15) ++ /* prepare i/o call handler */ ++ larl %r1,io_new_code ++ stg %r1,0x1f8 ++ larl %r1,io_new_mask ++ mvc 0x1f0(8),0(%r1) ++ /* load enabled wait PSW */ ++ larl %r1,enabled_wait_psw ++ lpswe 0(%r1) + + external_new_code: +- /* disable service interrupts in cr0 */ +- stctg %c0,%c0,0(%r15) +- ni 6(%r15),0xfd +- lctlg %c0,%c0,0(%r15) +- br %r14 ++ /* disable service interrupts in cr0 */ ++ stctg %c0,%c0,0(%r15) ++ ni 6(%r15),0xfd ++ lctlg %c0,%c0,0(%r15) ++ br %r14 + + io_new_code: +- /* disable I/O interrupts in cr6 */ +- stctg %c6,%c6,0(%r15) +- ni 4(%r15), 0x00 +- lctlg %c6,%c6,0(%r15) +- br %r14 ++ /* disable I/O interrupts in cr6 */ ++ stctg %c6,%c6,0(%r15) ++ ni 4(%r15),0x00 ++ lctlg %c6,%c6,0(%r15) ++ br %r14 + +- .align 8 ++ .align 8 + disabled_wait_psw: +- .quad 0x0002000180000000,0x0000000000000000 ++ .quad 0x0002000180000000,0x0000000000000000 + enabled_wait_psw: +- .quad 0x0302000180000000,0x0000000000000000 ++ .quad 0x0302000180000000,0x0000000000000000 + external_new_mask: +- .quad 0x0000000180000000 ++ .quad 0x0000000180000000 + io_new_mask: +- .quad 0x0000000180000000 ++ .quad 0x0000000180000000 +-- +2.39.3 + diff --git a/SOURCES/kvm-pc-bios-s390-ccw-Makefile-Use-z-noexecstack-to-silen.patch b/SOURCES/kvm-pc-bios-s390-ccw-Makefile-Use-z-noexecstack-to-silen.patch new file mode 100644 index 0000000..907fe43 --- /dev/null +++ b/SOURCES/kvm-pc-bios-s390-ccw-Makefile-Use-z-noexecstack-to-silen.patch @@ -0,0 +1,50 @@ +From b5b243cbbb897b236c08699529e13457e1e49924 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Thu, 22 Jun 2023 15:08:22 +0200 +Subject: [PATCH 31/37] pc-bios/s390-ccw/Makefile: Use -z noexecstack to + silence linker warning +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 180: Fix misaligned symbol error in the s390-ccw image during qemu-kvm build with binutils 2.40 +RH-Bugzilla: 2220866 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/4] 04f6f83169f1c5545a0e2772b4babfc6a50bd5bf (thuth/qemu-kvm-cs9) + +Recent versions of ld complain when linking the s390-ccw bios: + + /usr/bin/ld: warning: start.o: missing .note.GNU-stack section implies + executable stack + /usr/bin/ld: NOTE: This behaviour is deprecated and will be removed in + a future version of the linker + +We can silence the warning by telling the linker to mark the stack +as not executable. + +Message-Id: <20230622130822.396793-1-thuth@redhat.com> +Acked-by: Christian Borntraeger +Signed-off-by: Thomas Huth +(cherry picked from commit 442ef32ee5b6059a8f247fb2def9d449578d0a89) +--- + pc-bios/s390-ccw/Makefile | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/pc-bios/s390-ccw/Makefile b/pc-bios/s390-ccw/Makefile +index 10e8f5cb63..2a590af4a9 100644 +--- a/pc-bios/s390-ccw/Makefile ++++ b/pc-bios/s390-ccw/Makefile +@@ -53,7 +53,7 @@ config-cc.mak: Makefile + $(call cc-option,-march=z900,-march=z10)) 3> config-cc.mak + -include config-cc.mak + +-LDFLAGS += -Wl,-pie -nostdlib ++LDFLAGS += -Wl,-pie -nostdlib -z noexecstack + + build-all: s390-ccw.img s390-netboot.img + +-- +2.39.3 + diff --git a/SOURCES/kvm-pc-bios-s390-ccw-Provide-space-for-initial-stack-fra.patch b/SOURCES/kvm-pc-bios-s390-ccw-Provide-space-for-initial-stack-fra.patch new file mode 100644 index 0000000..0c4ce6f --- /dev/null +++ b/SOURCES/kvm-pc-bios-s390-ccw-Provide-space-for-initial-stack-fra.patch @@ -0,0 +1,59 @@ +From 2c52aebf90f28121a3e46a9305304406023b9747 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Tue, 27 Jun 2023 09:47:01 +0200 +Subject: [PATCH 33/37] pc-bios/s390-ccw: Provide space for initial stack frame + in start.S +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 180: Fix misaligned symbol error in the s390-ccw image during qemu-kvm build with binutils 2.40 +RH-Bugzilla: 2220866 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/4] c2f69ce5998861fe20b799bf0113def8cf0cd128 (thuth/qemu-kvm-cs9) + +Providing the space of a stack frame is the duty of the caller, +so we should reserve 160 bytes before jumping into the main function. +Otherwise the main() function might write past the stack array. + +While we're at it, add a proper STACK_SIZE macro for the stack size +instead of using magic numbers (this is also required for the following +patch). + +Reviewed-by: Christian Borntraeger +Reviewed-by: Cédric Le Goater +Message-Id: <20230627074703.99608-3-thuth@redhat.com> +Reviewed-by: Eric Farman +Reviewed-by: Claudio Imbrenda +Reviewed-by: Marc Hartmayer +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Thomas Huth +(cherry picked from commit 74fe98ee7fb3344dbd085d1fa32c0dc2fc2c831f) +--- + pc-bios/s390-ccw/start.S | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/pc-bios/s390-ccw/start.S b/pc-bios/s390-ccw/start.S +index d29de09cc6..abd6fe6639 100644 +--- a/pc-bios/s390-ccw/start.S ++++ b/pc-bios/s390-ccw/start.S +@@ -10,10 +10,13 @@ + * directory. + */ + ++#define STACK_SIZE 0x8000 ++#define STACK_FRAME_SIZE 160 ++ + .globl _start + _start: + +- larl %r15,stack + 0x8000 /* Set up stack */ ++ larl %r15,stack + STACK_SIZE - STACK_FRAME_SIZE /* Set up stack */ + + /* clear bss */ + larl %r2,__bss_start +-- +2.39.3 + diff --git a/SOURCES/kvm-pcie-Add-hotplug-detect-state-register-to-cmask.patch b/SOURCES/kvm-pcie-Add-hotplug-detect-state-register-to-cmask.patch new file mode 100644 index 0000000..1ec1c82 --- /dev/null +++ b/SOURCES/kvm-pcie-Add-hotplug-detect-state-register-to-cmask.patch @@ -0,0 +1,87 @@ +From 2732b6c5ef249d3ec9affca66768cc2fc476ff7c Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Thu, 6 Jul 2023 01:55:47 -0300 +Subject: [PATCH 11/12] pcie: Add hotplug detect state register to cmask +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 188: pcie: Add hotplug detect state register to cmask +RH-Bugzilla: 2215819 +RH-Acked-by: Peter Xu +RH-Acked-by: quintela1 +RH-Commit: [1/1] a125fa337711bddbc957c399044393e82272b143 (LeoBras/centos-qemu-kvm) + +When trying to migrate a machine type pc-q35-6.0 or lower, with this +cmdline options, + +-device driver=pcie-root-port,port=18,chassis=19,id=pcie-root-port18,bus=pcie.0,addr=0x12 \ +-device driver=nec-usb-xhci,p2=4,p3=4,id=nex-usb-xhci0,bus=pcie-root-port18,addr=0x12.0x1 + +the following bug happens after all ram pages were sent: + +qemu-kvm: get_pci_config_device: Bad config data: i=0x6e read: 0 device: 40 cmask: ff wmask: 0 w1cmask:19 +qemu-kvm: Failed to load PCIDevice:config +qemu-kvm: Failed to load pcie-root-port:parent_obj.parent_obj.parent_obj +qemu-kvm: error while loading state for instance 0x0 of device '0000:00:12.0/pcie-root-port' +qemu-kvm: load of migration failed: Invalid argument + +This happens on pc-q35-6.0 or lower because of: +{ "ICH9-LPC", ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, "off" } + +In this scenario, hotplug_handler_plug() calls pcie_cap_slot_plug_cb(), +which sets dev->config byte 0x6e with bit PCI_EXP_SLTSTA_PDS to signal PCI +hotplug for the guest. After a while the guest will deal with this hotplug +and qemu will clear the above bit. + +Then, during migration, get_pci_config_device() will compare the +configs of both the freshly created device and the one that is being +received via migration, which will differ due to the PCI_EXP_SLTSTA_PDS bit +and cause the bug to reproduce. + +To avoid this fake incompatibility, there are tree fields in PCIDevice that +can help: + +- wmask: Used to implement R/W bytes, and +- w1cmask: Used to implement RW1C(Write 1 to Clear) bytes +- cmask: Used to enable config checks on load. + +According to PCI Express® Base Specification Revision 5.0 Version 1.0, +table 7-27 (Slot Status Register) bit 6, the "Presence Detect State" is +listed as RO (read-only), so it only makes sense to make use of the cmask +field. + +So, clear PCI_EXP_SLTSTA_PDS bit on cmask, so the fake incompatibility on +get_pci_config_device() does not abort the migration. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2215819 +Signed-off-by: Leonardo Bras +Message-Id: <20230706045546.593605-3-leobras@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Juan Quintela +(cherry picked from commit 625b370c45f4acd155ee625d61c0057d770a5b5e) +Signed-off-by: Leonardo Bras +--- + hw/pci/pcie.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c +index b8c24cf45f..8bc4a4ee57 100644 +--- a/hw/pci/pcie.c ++++ b/hw/pci/pcie.c +@@ -659,6 +659,10 @@ void pcie_cap_slot_init(PCIDevice *dev, PCIESlot *s) + pci_word_test_and_set_mask(dev->w1cmask + pos + PCI_EXP_SLTSTA, + PCI_EXP_HP_EV_SUPPORTED); + ++ /* Avoid migration abortion when this device hot-removed by guest */ ++ pci_word_test_and_clear_mask(dev->cmask + pos + PCI_EXP_SLTSTA, ++ PCI_EXP_SLTSTA_PDS); ++ + dev->exp.hpev_notified = false; + + qbus_set_hotplug_handler(BUS(pci_bridge_get_sec_bus(PCI_BRIDGE(dev))), +-- +2.39.3 + diff --git a/SOURCES/kvm-physmem-add-missing-memory-barrier.patch b/SOURCES/kvm-physmem-add-missing-memory-barrier.patch deleted file mode 100644 index 3eafa78..0000000 --- a/SOURCES/kvm-physmem-add-missing-memory-barrier.patch +++ /dev/null @@ -1,55 +0,0 @@ -From 0dd4be411e35f00d006d89a15d9161f5d8783c1d Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Thu, 9 Mar 2023 08:24:36 -0500 -Subject: [PATCH 10/12] physmem: add missing memory barrier - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() -RH-Bugzilla: 2175660 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Eric Auger -RH-Commit: [7/9] ee4875cb8c564f0510e48b00a5d95c0e6ea6301b (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 - -commit 33828ca11da08436e1b32f3e79dabce3061a0427 -Author: Paolo Bonzini -Date: Fri Mar 3 14:36:32 2023 +0100 - - physmem: add missing memory barrier - - Reviewed-by: Richard Henderson - Reviewed-by: David Hildenbrand - Signed-off-by: Paolo Bonzini - -Signed-off-by: Emanuele Giuseppe Esposito ---- - softmmu/physmem.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/softmmu/physmem.c b/softmmu/physmem.c -index 1b606a3002..772c9896cd 100644 ---- a/softmmu/physmem.c -+++ b/softmmu/physmem.c -@@ -3117,6 +3117,8 @@ void cpu_register_map_client(QEMUBH *bh) - qemu_mutex_lock(&map_client_list_lock); - client->bh = bh; - QLIST_INSERT_HEAD(&map_client_list, client, link); -+ /* Write map_client_list before reading in_use. */ -+ smp_mb(); - if (!qatomic_read(&bounce.in_use)) { - cpu_notify_map_clients_locked(); - } -@@ -3309,6 +3311,7 @@ void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len, - qemu_vfree(bounce.buffer); - bounce.buffer = NULL; - memory_region_unref(bounce.mr); -+ /* Clear in_use before reading map_client_list. */ - qatomic_mb_set(&bounce.in_use, false); - cpu_notify_map_clients(); - } --- -2.39.1 - diff --git a/SOURCES/kvm-postcopy-ram-do-not-use-qatomic_mb_read.patch b/SOURCES/kvm-postcopy-ram-do-not-use-qatomic_mb_read.patch new file mode 100644 index 0000000..0421e33 --- /dev/null +++ b/SOURCES/kvm-postcopy-ram-do-not-use-qatomic_mb_read.patch @@ -0,0 +1,42 @@ +From ab9b8620c62540f3267d005c198920671ef9abc3 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 3 Mar 2023 11:15:28 +0100 +Subject: [PATCH 06/56] postcopy-ram: do not use qatomic_mb_read +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [5/50] 534c0e13362dfc994fa90c79bfb5ed6ee8c27dfc (peterx/qemu-kvm) + +It does not even pair with a qatomic_mb_set(), so it is clearer to use +load-acquire in this case; they are synonyms. + +Signed-off-by: Paolo Bonzini +(cherry picked from commit 4592eaf38755a28300d113cd128f65b5b38495f2) +Signed-off-by: Peter Xu +--- + migration/postcopy-ram.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c +index bbb8af61ae..d7b48dd920 100644 +--- a/migration/postcopy-ram.c ++++ b/migration/postcopy-ram.c +@@ -1526,7 +1526,7 @@ static PostcopyState incoming_postcopy_state; + + PostcopyState postcopy_state_get(void) + { +- return qatomic_mb_read(&incoming_postcopy_state); ++ return qatomic_load_acquire(&incoming_postcopy_state); + } + + /* Set the state and return the old state */ +-- +2.39.1 + diff --git a/SOURCES/kvm-qapi-add-fdset-feature-for-BlockdevOptionsVirtioBlkV.patch b/SOURCES/kvm-qapi-add-fdset-feature-for-BlockdevOptionsVirtioBlkV.patch new file mode 100644 index 0000000..abaadf8 --- /dev/null +++ b/SOURCES/kvm-qapi-add-fdset-feature-for-BlockdevOptionsVirtioBlkV.patch @@ -0,0 +1,79 @@ +From 99f27e14856c528f442b628e8f4a7881e6e63179 Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Tue, 30 May 2023 09:19:41 +0200 +Subject: [PATCH 4/5] qapi: add '@fdset' feature for + BlockdevOptionsVirtioBlkVhostVdpa + +RH-Author: Stefano Garzarella +RH-MergeRequest: 169: block/blkio: support fd passing for virtio-blk-vhost-vdpa driver +RH-Bugzilla: 2180076 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [2/2] abee2a542e41f9eaa17dd204b74778e232d1eb60 (sgarzarella/qemu-kvm-c-9-s) + +The virtio-blk-vhost-vdpa driver in libblkio 1.3.0 supports the fd +passing through the new 'fd' property. + +Since now we are using qemu_open() on '@path' if the virtio-blk driver +supports the fd passing, let's announce it. +In this way, the management layer can pass the file descriptor of an +already opened vhost-vdpa character device. This is useful especially +when the device can only be accessed with certain privileges. + +Add the '@fdset' feature only when the virtio-blk-vhost-vdpa driver +in libblkio supports it. + +Suggested-by: Markus Armbruster +Reviewed-by: Stefan Hajnoczi +Signed-off-by: Stefano Garzarella +Message-id: 20230530071941.8954-3-sgarzare@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 98b126f5e3228a346c774e569e26689943b401dd) +- changed doc indentantion since QAPI parser failed downstream because + we don't have commit 08349786c84306863a3b659c8a9b28bb74c405c6 + downstream. It relaxed the indentation rules. +Signed-off-by: Stefano Garzarella +--- + meson.build | 4 ++++ + qapi/block-core.json | 6 ++++++ + 2 files changed, 10 insertions(+) + +diff --git a/meson.build b/meson.build +index d964e741e7..a18cc64531 100644 +--- a/meson.build ++++ b/meson.build +@@ -1843,6 +1843,10 @@ config_host_data.set('CONFIG_LZO', lzo.found()) + config_host_data.set('CONFIG_MPATH', mpathpersist.found()) + config_host_data.set('CONFIG_MPATH_NEW_API', mpathpersist_new_api) + config_host_data.set('CONFIG_BLKIO', blkio.found()) ++if blkio.found() ++ config_host_data.set('CONFIG_BLKIO_VHOST_VDPA_FD', ++ blkio.version().version_compare('>=1.3.0')) ++endif + config_host_data.set('CONFIG_CURL', curl.found()) + config_host_data.set('CONFIG_CURSES', curses.found()) + config_host_data.set('CONFIG_GBM', gbm.found()) +diff --git a/qapi/block-core.json b/qapi/block-core.json +index c05ad0c07e..81b48a8d3b 100644 +--- a/qapi/block-core.json ++++ b/qapi/block-core.json +@@ -3841,10 +3841,16 @@ + # + # @path: path to the vhost-vdpa character device. + # ++# Features: ++# @fdset: Member @path supports the special "/dev/fdset/N" path ++# (since 8.1) ++# + # Since: 7.2 + ## + { 'struct': 'BlockdevOptionsVirtioBlkVhostVdpa', + 'data': { 'path': 'str' }, ++ 'features': [ { 'name' :'fdset', ++ 'if': 'CONFIG_BLKIO_VHOST_VDPA_FD' } ], + 'if': 'CONFIG_BLKIO' } + + ## +-- +2.39.3 + diff --git a/SOURCES/kvm-qapi-i386-sev-Change-the-reduced-phys-bits-value-fro.patch b/SOURCES/kvm-qapi-i386-sev-Change-the-reduced-phys-bits-value-fro.patch new file mode 100644 index 0000000..a95895b --- /dev/null +++ b/SOURCES/kvm-qapi-i386-sev-Change-the-reduced-phys-bits-value-fro.patch @@ -0,0 +1,50 @@ +From cbf9c74ef46d71c015b9de53f4514941dca8a035 Mon Sep 17 00:00:00 2001 +From: Bandan Das +Date: Thu, 3 Aug 2023 14:19:37 -0400 +Subject: [PATCH 10/14] qapi, i386/sev: Change the reduced-phys-bits value from + 5 to 1 + +RH-Author: Bandan Das +RH-MergeRequest: 196: Updates to SEV reduced-phys-bits parameter +RH-Bugzilla: 2214839 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/4] 4243578db33f89461e60b745eb96fee402218c9f (bdas1/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2214839 + +commit 798a818f50a9bfc01e8b5943090de458863b897b +Author: Tom Lendacky +Date: Fri Sep 30 10:14:27 2022 -0500 + + qapi, i386/sev: Change the reduced-phys-bits value from 5 to 1 + + A guest only ever experiences, at most, 1 bit of reduced physical + addressing. Change the query-sev-capabilities json comment to use 1. + + Fixes: 31dd67f684 ("sev/i386: qmp: add query-sev-capabilities command") + Signed-off-by: Tom Lendacky + Reviewed-by: Dr. David Alan Gilbert + Message-Id: + Signed-off-by: Paolo Bonzini + +Signed-off-by: Bandan Das +--- + qapi/misc-target.json | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/qapi/misc-target.json b/qapi/misc-target.json +index de91054523..bf04042f45 100644 +--- a/qapi/misc-target.json ++++ b/qapi/misc-target.json +@@ -172,7 +172,7 @@ + # -> { "execute": "query-sev-capabilities" } + # <- { "return": { "pdh": "8CCDD8DDD", "cert-chain": "888CCCDDDEE", + # "cpu0-id": "2lvmGwo+...61iEinw==", +-# "cbitpos": 47, "reduced-phys-bits": 5}} ++# "cbitpos": 47, "reduced-phys-bits": 1}} + # + ## + { 'command': 'query-sev-capabilities', 'returns': 'SevCapability', +-- +2.39.3 + diff --git a/SOURCES/kvm-qatomic-add-smp_mb__before-after_rmw.patch b/SOURCES/kvm-qatomic-add-smp_mb__before-after_rmw.patch deleted file mode 100644 index acc8c7d..0000000 --- a/SOURCES/kvm-qatomic-add-smp_mb__before-after_rmw.patch +++ /dev/null @@ -1,177 +0,0 @@ -From 1fdc864f9ac927f3ea407f35f6771a4b2e8f509f Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Thu, 9 Mar 2023 08:24:36 -0500 -Subject: [PATCH 04/12] qatomic: add smp_mb__before/after_rmw() - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() -RH-Bugzilla: 2175660 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Eric Auger -RH-Commit: [1/9] e8d0b64670bff778d275b1fb477dcee0c109251a (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 - -commit ff00bed1897c3d27adc5b0cec6f6eeb5a7d13176 -Author: Paolo Bonzini -Date: Thu Mar 2 11:10:56 2023 +0100 - - qatomic: add smp_mb__before/after_rmw() - - On ARM, seqcst loads and stores (which QEMU does not use) are compiled - respectively as LDAR and STLR instructions. Even though LDAR is - also used for load-acquire operations, it also waits for all STLRs to - leave the store buffer. Thus, LDAR and STLR alone are load-acquire - and store-release operations, but LDAR also provides store-against-load - ordering as long as the previous store is a STLR. - - Compare this to ARMv7, where store-release is DMB+STR and load-acquire - is LDR+DMB, but an additional DMB is needed between store-seqcst and - load-seqcst (e.g. DMB+STR+DMB+LDR+DMB); or with x86, where MOV provides - load-acquire and store-release semantics and the two can be reordered. - - Likewise, on ARM sequentially consistent read-modify-write operations only - need to use LDAXR and STLXR respectively for the load and the store, while - on x86 they need to use the stronger LOCK prefix. - - In a strange twist of events, however, the _stronger_ semantics - of the ARM instructions can end up causing bugs on ARM, not on x86. - The problems occur when seqcst atomics are mixed with relaxed atomics. - - QEMU's atomics try to bridge the Linux API (that most of the developers - are familiar with) and the C11 API, and the two have a substantial - difference: - - - in Linux, strongly-ordered atomics such as atomic_add_return() affect - the global ordering of _all_ memory operations, including for example - READ_ONCE()/WRITE_ONCE() - - - in C11, sequentially consistent atomics (except for seq-cst fences) - only affect the ordering of sequentially consistent operations. - In particular, since relaxed loads are done with LDR on ARM, they are - not ordered against seqcst stores (which are done with STLR). - - QEMU implements high-level synchronization primitives with the idea that - the primitives contain the necessary memory barriers, and the callers can - use relaxed atomics (qatomic_read/qatomic_set) or even regular accesses. - This is very much incompatible with the C11 view that seqcst accesses - are only ordered against other seqcst accesses, and requires using seqcst - fences as in the following example: - - qatomic_set(&y, 1); qatomic_set(&x, 1); - smp_mb(); smp_mb(); - ... qatomic_read(&x) ... ... qatomic_read(&y) ... - - When a qatomic_*() read-modify write operation is used instead of one - or both stores, developers that are more familiar with the Linux API may - be tempted to omit the smp_mb(), which will work on x86 but not on ARM. - - This nasty difference between Linux and C11 read-modify-write operations - has already caused issues in util/async.c and more are being found. - Provide something similar to Linux smp_mb__before/after_atomic(); this - has the double function of documenting clearly why there is a memory - barrier, and avoiding a double barrier on x86 and s390x systems. - - The new macro can already be put to use in qatomic_mb_set(). - - Reviewed-by: Richard Henderson - Reviewed-by: David Hildenbrand - Signed-off-by: Paolo Bonzini - -Signed-off-by: Emanuele Giuseppe Esposito ---- - docs/devel/atomics.rst | 26 +++++++++++++++++++++----- - include/qemu/atomic.h | 17 ++++++++++++++++- - 2 files changed, 37 insertions(+), 6 deletions(-) - -diff --git a/docs/devel/atomics.rst b/docs/devel/atomics.rst -index 52baa0736d..10fbfc58bb 100644 ---- a/docs/devel/atomics.rst -+++ b/docs/devel/atomics.rst -@@ -25,7 +25,8 @@ provides macros that fall in three camps: - - - weak atomic access and manual memory barriers: ``qatomic_read()``, - ``qatomic_set()``, ``smp_rmb()``, ``smp_wmb()``, ``smp_mb()``, -- ``smp_mb_acquire()``, ``smp_mb_release()``, ``smp_read_barrier_depends()``; -+ ``smp_mb_acquire()``, ``smp_mb_release()``, ``smp_read_barrier_depends()``, -+ ``smp_mb__before_rmw()``, ``smp_mb__after_rmw()``; - - - sequentially consistent atomic access: everything else. - -@@ -470,7 +471,7 @@ and memory barriers, and the equivalents in QEMU: - sequential consistency. - - - in QEMU, ``qatomic_read()`` and ``qatomic_set()`` do not participate in -- the total ordering enforced by sequentially-consistent operations. -+ the ordering enforced by read-modify-write operations. - This is because QEMU uses the C11 memory model. The following example - is correct in Linux but not in QEMU: - -@@ -486,9 +487,24 @@ and memory barriers, and the equivalents in QEMU: - because the read of ``y`` can be moved (by either the processor or the - compiler) before the write of ``x``. - -- Fixing this requires an ``smp_mb()`` memory barrier between the write -- of ``x`` and the read of ``y``. In the common case where only one thread -- writes ``x``, it is also possible to write it like this: -+ Fixing this requires a full memory barrier between the write of ``x`` and -+ the read of ``y``. QEMU provides ``smp_mb__before_rmw()`` and -+ ``smp_mb__after_rmw()``; they act both as an optimization, -+ avoiding the memory barrier on processors where it is unnecessary, -+ and as a clarification of this corner case of the C11 memory model: -+ -+ +--------------------------------+ -+ | QEMU (correct) | -+ +================================+ -+ | :: | -+ | | -+ | a = qatomic_fetch_add(&x, 2);| -+ | smp_mb__after_rmw(); | -+ | b = qatomic_read(&y); | -+ +--------------------------------+ -+ -+ In the common case where only one thread writes ``x``, it is also possible -+ to write it like this: - - +--------------------------------+ - | QEMU (correct) | -diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h -index 874134fd19..f85834ee8b 100644 ---- a/include/qemu/atomic.h -+++ b/include/qemu/atomic.h -@@ -245,6 +245,20 @@ - #define smp_wmb() smp_mb_release() - #define smp_rmb() smp_mb_acquire() - -+/* -+ * SEQ_CST is weaker than the older __sync_* builtins and Linux -+ * kernel read-modify-write atomics. Provide a macro to obtain -+ * the same semantics. -+ */ -+#if !defined(QEMU_SANITIZE_THREAD) && \ -+ (defined(__i386__) || defined(__x86_64__) || defined(__s390x__)) -+# define smp_mb__before_rmw() signal_barrier() -+# define smp_mb__after_rmw() signal_barrier() -+#else -+# define smp_mb__before_rmw() smp_mb() -+# define smp_mb__after_rmw() smp_mb() -+#endif -+ - /* qatomic_mb_read/set semantics map Java volatile variables. They are - * less expensive on some platforms (notably POWER) than fully - * sequentially consistent operations. -@@ -259,7 +273,8 @@ - #if !defined(QEMU_SANITIZE_THREAD) && \ - (defined(__i386__) || defined(__x86_64__) || defined(__s390x__)) - /* This is more efficient than a store plus a fence. */ --# define qatomic_mb_set(ptr, i) ((void)qatomic_xchg(ptr, i)) -+# define qatomic_mb_set(ptr, i) \ -+ ({ (void)qatomic_xchg(ptr, i); smp_mb__after_rmw(); }) - #else - # define qatomic_mb_set(ptr, i) \ - ({ qatomic_store_release(ptr, i); smp_mb(); }) --- -2.39.1 - diff --git a/SOURCES/kvm-qcow2-Fix-theoretical-corruption-in-store_bitmap-err.patch b/SOURCES/kvm-qcow2-Fix-theoretical-corruption-in-store_bitmap-err.patch deleted file mode 100644 index 7f39f4a..0000000 --- a/SOURCES/kvm-qcow2-Fix-theoretical-corruption-in-store_bitmap-err.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 46ead2c391924b68741d6da28f28f909b80f5914 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Thu, 12 Jan 2023 20:14:51 +0100 -Subject: [PATCH 01/20] qcow2: Fix theoretical corruption in store_bitmap() - error path -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Kevin Wolf -RH-MergeRequest: 143: qemu-img: Fix exit code for errors closing the image -RH-Bugzilla: 2150180 -RH-Acked-by: Thomas Huth -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Stefano Garzarella -RH-Commit: [1/4] a6a497947179431567d330d0501247a3749fb9fd (kmwolf/centos-qemu-kvm) - -In order to write the bitmap table to the image file, it is converted to -big endian. If the write fails, it is passed to clear_bitmap_table() to -free all of the clusters it had allocated before. However, if we don't -convert it back to native endianness first, we'll free things at a wrong -offset. - -In practical terms, the offsets will be so high that we won't actually -free any allocated clusters, but just run into an error, but in theory -this can cause image corruption. - -Cc: qemu-stable@nongnu.org -Signed-off-by: Kevin Wolf -Message-Id: <20230112191454.169353-2-kwolf@redhat.com> -Reviewed-by: Hanna Czenczek -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Kevin Wolf -(cherry picked from commit b03dd9613bcf8fe948581b2b3585510cb525c382) -Signed-off-by: Kevin Wolf ---- - block/qcow2-bitmap.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c -index bcad567c0c..3dff99ba06 100644 ---- a/block/qcow2-bitmap.c -+++ b/block/qcow2-bitmap.c -@@ -115,7 +115,7 @@ static int update_header_sync(BlockDriverState *bs) - return bdrv_flush(bs->file->bs); - } - --static inline void bitmap_table_to_be(uint64_t *bitmap_table, size_t size) -+static inline void bitmap_table_bswap_be(uint64_t *bitmap_table, size_t size) - { - size_t i; - -@@ -1401,9 +1401,10 @@ static int store_bitmap(BlockDriverState *bs, Qcow2Bitmap *bm, Error **errp) - goto fail; - } - -- bitmap_table_to_be(tb, tb_size); -+ bitmap_table_bswap_be(tb, tb_size); - ret = bdrv_pwrite(bs->file, tb_offset, tb_size * sizeof(tb[0]), tb, 0); - if (ret < 0) { -+ bitmap_table_bswap_be(tb, tb_size); - error_setg_errno(errp, -ret, "Failed to write bitmap '%s' to file", - bm_name); - goto fail; --- -2.31.1 - diff --git a/SOURCES/kvm-qed-Don-t-yield-in-bdrv_qed_co_drain_begin.patch b/SOURCES/kvm-qed-Don-t-yield-in-bdrv_qed_co_drain_begin.patch deleted file mode 100644 index d2dacbc..0000000 --- a/SOURCES/kvm-qed-Don-t-yield-in-bdrv_qed_co_drain_begin.patch +++ /dev/null @@ -1,84 +0,0 @@ -From f628a08d20b9ab6be24c2ab18b38a934a314c78b Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:40:56 +0100 -Subject: [PATCH 14/31] qed: Don't yield in bdrv_qed_co_drain_begin() - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [2/16] f18e9aebb7e04a62e309b656bac8f2ab83df657f (sgarzarella/qemu-kvm-c-9-s) - -We want to change .bdrv_co_drained_begin() back to be a non-coroutine -callback, so in preparation, avoid yielding in its implementation. - -Because we increase bs->in_flight and bdrv_drained_begin() polls, the -behaviour is unchanged. - -Signed-off-by: Kevin Wolf -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Emanuele Giuseppe Esposito -Reviewed-by: Hanna Reitz -Message-Id: <20221118174110.55183-2-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 6d47eb0c8bf2d50682c7dccae74d24104076fe23) -Signed-off-by: Stefano Garzarella ---- - block/qed.c | 20 +++++++++++++++++--- - 1 file changed, 17 insertions(+), 3 deletions(-) - -diff --git a/block/qed.c b/block/qed.c -index 2f36ad342c..013f826c44 100644 ---- a/block/qed.c -+++ b/block/qed.c -@@ -282,9 +282,8 @@ static void coroutine_fn qed_unplug_allocating_write_reqs(BDRVQEDState *s) - qemu_co_mutex_unlock(&s->table_lock); - } - --static void coroutine_fn qed_need_check_timer_entry(void *opaque) -+static void coroutine_fn qed_need_check_timer(BDRVQEDState *s) - { -- BDRVQEDState *s = opaque; - int ret; - - trace_qed_need_check_timer_cb(s); -@@ -310,9 +309,20 @@ static void coroutine_fn qed_need_check_timer_entry(void *opaque) - (void) ret; - } - -+static void coroutine_fn qed_need_check_timer_entry(void *opaque) -+{ -+ BDRVQEDState *s = opaque; -+ -+ qed_need_check_timer(opaque); -+ bdrv_dec_in_flight(s->bs); -+} -+ - static void qed_need_check_timer_cb(void *opaque) - { -+ BDRVQEDState *s = opaque; - Coroutine *co = qemu_coroutine_create(qed_need_check_timer_entry, opaque); -+ -+ bdrv_inc_in_flight(s->bs); - qemu_coroutine_enter(co); - } - -@@ -363,8 +373,12 @@ static void coroutine_fn bdrv_qed_co_drain_begin(BlockDriverState *bs) - * header is flushed. - */ - if (s->need_check_timer && timer_pending(s->need_check_timer)) { -+ Coroutine *co; -+ - qed_cancel_need_check_timer(s); -- qed_need_check_timer_entry(s); -+ co = qemu_coroutine_create(qed_need_check_timer_entry, s); -+ bdrv_inc_in_flight(bs); -+ aio_co_enter(bdrv_get_aio_context(bs), co); - } - } - --- -2.31.1 - diff --git a/SOURCES/kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch b/SOURCES/kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch deleted file mode 100644 index 86e94db..0000000 --- a/SOURCES/kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch +++ /dev/null @@ -1,75 +0,0 @@ -From 7a9907c65e3e2bbb0c119acdbbeb4381e7f1d902 Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Thu, 9 Mar 2023 08:24:36 -0500 -Subject: [PATCH 09/12] qemu-coroutine-lock: add smp_mb__after_rmw() - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() -RH-Bugzilla: 2175660 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Eric Auger -RH-Commit: [6/9] 4b1723b1ad670ec4c85240390b4fc15ff361154f (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 - -commit e3a3b6ec8169eab2feb241b4982585001512cd55 -Author: Paolo Bonzini -Date: Fri Mar 3 10:52:59 2023 +0100 - - qemu-coroutine-lock: add smp_mb__after_rmw() - - mutex->from_push and mutex->handoff in qemu-coroutine-lock implement - the familiar pattern: - - write a write b - smp_mb() smp_mb() - read b read a - - The memory barrier is required by the C memory model even after a - SEQ_CST read-modify-write operation such as QSLIST_INSERT_HEAD_ATOMIC. - Add it and avoid the unclear qatomic_mb_read() operation. - - Reviewed-by: Richard Henderson - Reviewed-by: David Hildenbrand - Signed-off-by: Paolo Bonzini - -Signed-off-by: Emanuele Giuseppe Esposito ---- - util/qemu-coroutine-lock.c | 9 ++++++++- - 1 file changed, 8 insertions(+), 1 deletion(-) - -diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c -index 45c6b57374..c5897bd963 100644 ---- a/util/qemu-coroutine-lock.c -+++ b/util/qemu-coroutine-lock.c -@@ -202,10 +202,16 @@ static void coroutine_fn qemu_co_mutex_lock_slowpath(AioContext *ctx, - trace_qemu_co_mutex_lock_entry(mutex, self); - push_waiter(mutex, &w); - -+ /* -+ * Add waiter before reading mutex->handoff. Pairs with qatomic_mb_set -+ * in qemu_co_mutex_unlock. -+ */ -+ smp_mb__after_rmw(); -+ - /* This is the "Responsibility Hand-Off" protocol; a lock() picks from - * a concurrent unlock() the responsibility of waking somebody up. - */ -- old_handoff = qatomic_mb_read(&mutex->handoff); -+ old_handoff = qatomic_read(&mutex->handoff); - if (old_handoff && - has_waiters(mutex) && - qatomic_cmpxchg(&mutex->handoff, old_handoff, 0) == old_handoff) { -@@ -304,6 +310,7 @@ void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex) - } - - our_handoff = mutex->sequence; -+ /* Set handoff before checking for waiters. */ - qatomic_mb_set(&mutex->handoff, our_handoff); - if (!has_waiters(mutex)) { - /* The concurrent lock has not added itself yet, so it --- -2.39.1 - diff --git a/SOURCES/kvm-qemu-img-Change-info-key-names-for-protocol-nodes.patch b/SOURCES/kvm-qemu-img-Change-info-key-names-for-protocol-nodes.patch deleted file mode 100644 index eff4d2e..0000000 --- a/SOURCES/kvm-qemu-img-Change-info-key-names-for-protocol-nodes.patch +++ /dev/null @@ -1,197 +0,0 @@ -From b1970c733dc46b2a8f648997a7e1c5d12900ff54 Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Mon, 20 Jun 2022 18:27:04 +0200 -Subject: [PATCH 17/20] qemu-img: Change info key names for protocol nodes - -RH-Author: Hanna Czenczek -RH-MergeRequest: 145: Show protocol-level information in qemu-img info -RH-Bugzilla: 1860292 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella -RH-Commit: [12/12] 67c260aaa05466410503fecee6210bf9d47e8c7c (hreitz/qemu-kvm-c-9-s) - -Currently, when querying a qcow2 image, qemu-img info reports something -like this: - -image: test.qcow2 -file format: qcow2 -virtual size: 64 MiB (67108864 bytes) -disk size: 196 KiB -cluster_size: 65536 -Format specific information: - compat: 1.1 - compression type: zlib - lazy refcounts: false - refcount bits: 16 - corrupt: false - extended l2: false -Child node '/file': - image: test.qcow2 - file format: file - virtual size: 192 KiB (197120 bytes) - disk size: 196 KiB - Format specific information: - extent size hint: 1048576 - -Notably, the way the keys are named is specific for image files: The -filename is shown under "image", the BDS driver under "file format", and -the BDS length under "virtual size". This does not make much sense for -nodes that are not actually supposed to be guest images, like the /file -child node shown above. - -Give bdrv_node_info_dump() a @protocol parameter that gives a hint that -the respective node is probably just used for data storage and does not -necessarily present the data for a VM guest disk. This renames the keys -so that with this patch, the output becomes: - -image: test.qcow2 -[...] -Child node '/file': - filename: test.qcow2 - protocol type: file - file length: 192 KiB (197120 bytes) - disk size: 196 KiB - Format specific information: - extent size hint: 1048576 - -(Perhaps we should also rename "Format specific information", but I -could not come up with anything better that will not become problematic -if we guess wrong with the protocol "heuristic".) - -This change affects iotest 302, which has protocol node information in -its reference output. - -Signed-off-by: Hanna Reitz -Message-Id: <20220620162704.80987-13-hreitz@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit d570177b50c389f379f93183155a27d44856ab46) -Signed-off-by: Hanna Czenczek ---- - block/monitor/block-hmp-cmds.c | 2 +- - block/qapi.c | 39 ++++++++++++++++++++++++++++------ - include/block/qapi.h | 2 +- - qemu-img.c | 3 ++- - tests/qemu-iotests/302.out | 6 +++--- - 5 files changed, 39 insertions(+), 13 deletions(-) - -diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c -index 72824d4e2e..4d83339a5d 100644 ---- a/block/monitor/block-hmp-cmds.c -+++ b/block/monitor/block-hmp-cmds.c -@@ -734,7 +734,7 @@ static void print_block_info(Monitor *mon, BlockInfo *info, - monitor_printf(mon, "\nImages:\n"); - image_info = inserted->image; - while (1) { -- bdrv_node_info_dump(qapi_ImageInfo_base(image_info), 0); -+ bdrv_node_info_dump(qapi_ImageInfo_base(image_info), 0, false); - if (image_info->has_backing_image) { - image_info = image_info->backing_image; - } else { -diff --git a/block/qapi.c b/block/qapi.c -index 3e35603f0c..56f398c500 100644 ---- a/block/qapi.c -+++ b/block/qapi.c -@@ -934,24 +934,49 @@ void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, - visit_free(v); - } - --void bdrv_node_info_dump(BlockNodeInfo *info, int indentation) -+/** -+ * Print the given @info object in human-readable form. Every field is indented -+ * using the given @indentation (four spaces per indentation level). -+ * -+ * When using this to print a whole block graph, @protocol can be set to true to -+ * signify that the given information is associated with a protocol node, i.e. -+ * just data storage for an image, such that the data it presents is not really -+ * a full VM disk. If so, several fields change name: For example, "virtual -+ * size" is printed as "file length". -+ * (Consider a qcow2 image, which is represented by a qcow2 node and a file -+ * node. Printing a "virtual size" for the file node does not make sense, -+ * because without the qcow2 node, it is not really a guest disk, so it does not -+ * have a "virtual size". Therefore, we call it "file length" instead.) -+ * -+ * @protocol is ignored when @indentation is 0, because we take that to mean -+ * that the associated node is the root node in the queried block graph, and -+ * thus is always to be interpreted as a standalone guest disk. -+ */ -+void bdrv_node_info_dump(BlockNodeInfo *info, int indentation, bool protocol) - { - char *size_buf, *dsize_buf; - g_autofree char *ind_s = g_strdup_printf("%*s", indentation * 4, ""); - -+ if (indentation == 0) { -+ /* Top level, consider this a normal image */ -+ protocol = false; -+ } -+ - if (!info->has_actual_size) { - dsize_buf = g_strdup("unavailable"); - } else { - dsize_buf = size_to_str(info->actual_size); - } - size_buf = size_to_str(info->virtual_size); -- qemu_printf("%simage: %s\n" -- "%sfile format: %s\n" -- "%svirtual size: %s (%" PRId64 " bytes)\n" -+ qemu_printf("%s%s: %s\n" -+ "%s%s: %s\n" -+ "%s%s: %s (%" PRId64 " bytes)\n" - "%sdisk size: %s\n", -- ind_s, info->filename, -- ind_s, info->format, -- ind_s, size_buf, info->virtual_size, -+ ind_s, protocol ? "filename" : "image", info->filename, -+ ind_s, protocol ? "protocol type" : "file format", -+ info->format, -+ ind_s, protocol ? "file length" : "virtual size", -+ size_buf, info->virtual_size, - ind_s, dsize_buf); - g_free(size_buf); - g_free(dsize_buf); -diff --git a/include/block/qapi.h b/include/block/qapi.h -index 38855f2ae9..26113da21a 100644 ---- a/include/block/qapi.h -+++ b/include/block/qapi.h -@@ -51,5 +51,5 @@ void bdrv_snapshot_dump(QEMUSnapshotInfo *sn); - void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, - const char *prefix, - int indentation); --void bdrv_node_info_dump(BlockNodeInfo *info, int indentation); -+void bdrv_node_info_dump(BlockNodeInfo *info, int indentation, bool protocol); - #endif -diff --git a/qemu-img.c b/qemu-img.c -index e281011245..2943625c67 100644 ---- a/qemu-img.c -+++ b/qemu-img.c -@@ -2853,7 +2853,8 @@ static void dump_human_image_info(BlockGraphInfo *info, int indentation, - { - BlockChildInfoList *children_list; - -- bdrv_node_info_dump(qapi_BlockGraphInfo_base(info), indentation); -+ bdrv_node_info_dump(qapi_BlockGraphInfo_base(info), indentation, -+ info->children == NULL); - - for (children_list = info->children; children_list; - children_list = children_list->next) -diff --git a/tests/qemu-iotests/302.out b/tests/qemu-iotests/302.out -index edfa1c4f05..7b5014cdd8 100644 ---- a/tests/qemu-iotests/302.out -+++ b/tests/qemu-iotests/302.out -@@ -5,9 +5,9 @@ file format: raw - virtual size: 448 KiB (458752 bytes) - disk size: unavailable - Child node '/file': -- image: nbd+unix:///exp?socket=SOCK_DIR/PID-nbd-sock -- file format: nbd -- virtual size: 448 KiB (458752 bytes) -+ filename: nbd+unix:///exp?socket=SOCK_DIR/PID-nbd-sock -+ protocol type: nbd -+ file length: 448 KiB (458752 bytes) - disk size: unavailable - - === Converted image info === --- -2.31.1 - diff --git a/SOURCES/kvm-qemu-img-Let-info-print-block-graph.patch b/SOURCES/kvm-qemu-img-Let-info-print-block-graph.patch deleted file mode 100644 index 536df69..0000000 --- a/SOURCES/kvm-qemu-img-Let-info-print-block-graph.patch +++ /dev/null @@ -1,261 +0,0 @@ -From ea73e9de42b446ce1049805c23f7706e4f87ed1f Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Mon, 20 Jun 2022 18:27:03 +0200 -Subject: [PATCH 16/20] qemu-img: Let info print block graph - -RH-Author: Hanna Czenczek -RH-MergeRequest: 145: Show protocol-level information in qemu-img info -RH-Bugzilla: 1860292 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella -RH-Commit: [11/12] 2c1b8a03c918484449e876acf4c6663766848ad8 (hreitz/qemu-kvm-c-9-s) - -For every node in the backing chain, collect its BlockGraphInfo struct -using bdrv_query_block_graph_info(). Print all nodes' information, -indenting child nodes and labelling them with a path constructed from -the child names leading to the node from the root (e.g. /file/file). - -Note that we open each image with BDRV_O_NO_BACKING, so its backing -child is omitted from this graph, and thus presented in the previous -manner: By simply concatenating all images' information, separated with -blank lines. - -This affects two iotests: -- 065: Here we try to get the format node's format specific information. - The pre-patch code does so by taking all lines from "Format specific - information:" until an empty line. This format specific information - is no longer followed by an empty line, though, but by child node - information, so limit the range by "Child node '/file':". -- 302: Calls qemu_img() for qemu-img info directly, which does not - filter the output, so the child node information ends up in the - output. - -Signed-off-by: Hanna Reitz -Message-Id: <20220620162704.80987-12-hreitz@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit c04d0ab026201d21873a63f768cb69c4554dfec1) -Signed-off-by: Hanna Czenczek ---- - qapi/block-core.json | 4 +-- - qemu-img.c | 69 ++++++++++++++++++++++++++------------ - tests/qemu-iotests/065 | 2 +- - tests/qemu-iotests/302.out | 5 +++ - 4 files changed, 56 insertions(+), 24 deletions(-) - -diff --git a/qapi/block-core.json b/qapi/block-core.json -index d703e0fb16..7f331eb8ea 100644 ---- a/qapi/block-core.json -+++ b/qapi/block-core.json -@@ -5831,9 +5831,9 @@ - ## - # @DummyBlockCoreForceArrays: - # --# Not used by QMP; hack to let us use BlockNodeInfoList internally -+# Not used by QMP; hack to let us use BlockGraphInfoList internally - # - # Since: 8.0 - ## - { 'struct': 'DummyBlockCoreForceArrays', -- 'data': { 'unused-block-node-info': ['BlockNodeInfo'] } } -+ 'data': { 'unused-block-graph-info': ['BlockGraphInfo'] } } -diff --git a/qemu-img.c b/qemu-img.c -index 30b4ea58bb..e281011245 100644 ---- a/qemu-img.c -+++ b/qemu-img.c -@@ -2816,13 +2816,13 @@ static void dump_snapshots(BlockDriverState *bs) - g_free(sn_tab); - } - --static void dump_json_block_node_info_list(BlockNodeInfoList *list) -+static void dump_json_block_graph_info_list(BlockGraphInfoList *list) - { - GString *str; - QObject *obj; - Visitor *v = qobject_output_visitor_new(&obj); - -- visit_type_BlockNodeInfoList(v, NULL, &list, &error_abort); -+ visit_type_BlockGraphInfoList(v, NULL, &list, &error_abort); - visit_complete(v, &obj); - str = qobject_to_json_pretty(obj, true); - assert(str != NULL); -@@ -2832,13 +2832,13 @@ static void dump_json_block_node_info_list(BlockNodeInfoList *list) - g_string_free(str, true); - } - --static void dump_json_block_node_info(BlockNodeInfo *info) -+static void dump_json_block_graph_info(BlockGraphInfo *info) - { - GString *str; - QObject *obj; - Visitor *v = qobject_output_visitor_new(&obj); - -- visit_type_BlockNodeInfo(v, NULL, &info, &error_abort); -+ visit_type_BlockGraphInfo(v, NULL, &info, &error_abort); - visit_complete(v, &obj); - str = qobject_to_json_pretty(obj, true); - assert(str != NULL); -@@ -2848,9 +2848,29 @@ static void dump_json_block_node_info(BlockNodeInfo *info) - g_string_free(str, true); - } - --static void dump_human_image_info_list(BlockNodeInfoList *list) -+static void dump_human_image_info(BlockGraphInfo *info, int indentation, -+ const char *path) - { -- BlockNodeInfoList *elem; -+ BlockChildInfoList *children_list; -+ -+ bdrv_node_info_dump(qapi_BlockGraphInfo_base(info), indentation); -+ -+ for (children_list = info->children; children_list; -+ children_list = children_list->next) -+ { -+ BlockChildInfo *child = children_list->value; -+ g_autofree char *child_path = NULL; -+ -+ printf("%*sChild node '%s%s':\n", -+ indentation * 4, "", path, child->name); -+ child_path = g_strdup_printf("%s%s/", path, child->name); -+ dump_human_image_info(child->info, indentation + 1, child_path); -+ } -+} -+ -+static void dump_human_image_info_list(BlockGraphInfoList *list) -+{ -+ BlockGraphInfoList *elem; - bool delim = false; - - for (elem = list; elem; elem = elem->next) { -@@ -2859,7 +2879,7 @@ static void dump_human_image_info_list(BlockNodeInfoList *list) - } - delim = true; - -- bdrv_node_info_dump(elem->value, 0); -+ dump_human_image_info(elem->value, 0, "/"); - } - } - -@@ -2869,7 +2889,7 @@ static gboolean str_equal_func(gconstpointer a, gconstpointer b) - } - - /** -- * Open an image file chain and return an BlockNodeInfoList -+ * Open an image file chain and return an BlockGraphInfoList - * - * @filename: topmost image filename - * @fmt: topmost image format (may be NULL to autodetect) -@@ -2880,13 +2900,13 @@ static gboolean str_equal_func(gconstpointer a, gconstpointer b) - * opening an image file. If there was an error a message will have been - * printed to stderr. - */ --static BlockNodeInfoList *collect_image_info_list(bool image_opts, -- const char *filename, -- const char *fmt, -- bool chain, bool force_share) -+static BlockGraphInfoList *collect_image_info_list(bool image_opts, -+ const char *filename, -+ const char *fmt, -+ bool chain, bool force_share) - { -- BlockNodeInfoList *head = NULL; -- BlockNodeInfoList **tail = &head; -+ BlockGraphInfoList *head = NULL; -+ BlockGraphInfoList **tail = &head; - GHashTable *filenames; - Error *err = NULL; - -@@ -2895,7 +2915,7 @@ static BlockNodeInfoList *collect_image_info_list(bool image_opts, - while (filename) { - BlockBackend *blk; - BlockDriverState *bs; -- BlockNodeInfo *info; -+ BlockGraphInfo *info; - - if (g_hash_table_lookup_extended(filenames, filename, NULL, NULL)) { - error_report("Backing file '%s' creates an infinite loop.", -@@ -2912,7 +2932,14 @@ static BlockNodeInfoList *collect_image_info_list(bool image_opts, - } - bs = blk_bs(blk); - -- bdrv_query_block_node_info(bs, &info, &err); -+ /* -+ * Note that the returned BlockGraphInfo object will not have -+ * information about this image's backing node, because we have opened -+ * it with BDRV_O_NO_BACKING. Printing this object will therefore not -+ * duplicate the backing chain information that we obtain by walking -+ * the chain manually here. -+ */ -+ bdrv_query_block_graph_info(bs, &info, &err); - if (err) { - error_report_err(err); - blk_unref(blk); -@@ -2945,7 +2972,7 @@ static BlockNodeInfoList *collect_image_info_list(bool image_opts, - return head; - - err: -- qapi_free_BlockNodeInfoList(head); -+ qapi_free_BlockGraphInfoList(head); - g_hash_table_destroy(filenames); - return NULL; - } -@@ -2956,7 +2983,7 @@ static int img_info(int argc, char **argv) - OutputFormat output_format = OFORMAT_HUMAN; - bool chain = false; - const char *filename, *fmt, *output; -- BlockNodeInfoList *list; -+ BlockGraphInfoList *list; - bool image_opts = false; - bool force_share = false; - -@@ -3035,14 +3062,14 @@ static int img_info(int argc, char **argv) - break; - case OFORMAT_JSON: - if (chain) { -- dump_json_block_node_info_list(list); -+ dump_json_block_graph_info_list(list); - } else { -- dump_json_block_node_info(list->value); -+ dump_json_block_graph_info(list->value); - } - break; - } - -- qapi_free_BlockNodeInfoList(list); -+ qapi_free_BlockGraphInfoList(list); - return 0; - } - -diff --git a/tests/qemu-iotests/065 b/tests/qemu-iotests/065 -index b724c89c7c..b76701c71e 100755 ---- a/tests/qemu-iotests/065 -+++ b/tests/qemu-iotests/065 -@@ -56,7 +56,7 @@ class TestQemuImgInfo(TestImageInfoSpecific): - def test_human(self): - data = qemu_img('info', '--output=human', test_img).stdout.split('\n') - data = data[(data.index('Format specific information:') + 1) -- :data.index('')] -+ :data.index("Child node '/file':")] - for field in data: - self.assertTrue(re.match('^ {4}[^ ]', field) is not None) - data = [line.strip() for line in data] -diff --git a/tests/qemu-iotests/302.out b/tests/qemu-iotests/302.out -index 3e7c281b91..edfa1c4f05 100644 ---- a/tests/qemu-iotests/302.out -+++ b/tests/qemu-iotests/302.out -@@ -4,6 +4,11 @@ image: nbd+unix:///exp?socket=SOCK_DIR/PID-nbd-sock - file format: raw - virtual size: 448 KiB (458752 bytes) - disk size: unavailable -+Child node '/file': -+ image: nbd+unix:///exp?socket=SOCK_DIR/PID-nbd-sock -+ file format: nbd -+ virtual size: 448 KiB (458752 bytes) -+ disk size: unavailable - - === Converted image info === - image: TEST_IMG --- -2.31.1 - diff --git a/SOURCES/kvm-qemu-img-Use-BlockNodeInfo.patch b/SOURCES/kvm-qemu-img-Use-BlockNodeInfo.patch deleted file mode 100644 index 7bfb7e6..0000000 --- a/SOURCES/kvm-qemu-img-Use-BlockNodeInfo.patch +++ /dev/null @@ -1,241 +0,0 @@ -From dca4cbe680baff837ca8ac8bd39b77b46af3f64b Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Mon, 20 Jun 2022 18:26:57 +0200 -Subject: [PATCH 10/20] qemu-img: Use BlockNodeInfo - -RH-Author: Hanna Czenczek -RH-MergeRequest: 145: Show protocol-level information in qemu-img info -RH-Bugzilla: 1860292 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella -RH-Commit: [5/12] b599af3ec05951a0ba11d9eae2ee19148d6bf624 (hreitz/qemu-kvm-c-9-s) - -qemu-img info never uses ImageInfo's backing-image field, because it -opens the backing chain one by one with BDRV_O_NO_BACKING, and prints -all backing chain nodes' information consecutively. Use BlockNodeInfo -to make it clear that we only print information about a single node, and -that we are not using the backing-image field. - -Notably, bdrv_image_info_dump() does not evaluate the backing-image -field, so we can easily make it take a BlockNodeInfo pointer (and -consequentially rename it to bdrv_node_info_dump()). It makes more -sense this way, because again, the interface now makes it syntactically -clear that backing-image is ignored by this function. - -Signed-off-by: Hanna Reitz -Message-Id: <20220620162704.80987-6-hreitz@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit b1f4cd1589a16fec02f264a09bd3560e4ccce3c2) -Signed-off-by: Hanna Czenczek ---- - block/monitor/block-hmp-cmds.c | 2 +- - block/qapi.c | 2 +- - include/block/qapi.h | 2 +- - qapi/block-core.json | 4 +-- - qemu-img.c | 48 +++++++++++++++++----------------- - 5 files changed, 29 insertions(+), 29 deletions(-) - -diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c -index b6135e9bfe..aa37faa601 100644 ---- a/block/monitor/block-hmp-cmds.c -+++ b/block/monitor/block-hmp-cmds.c -@@ -734,7 +734,7 @@ static void print_block_info(Monitor *mon, BlockInfo *info, - monitor_printf(mon, "\nImages:\n"); - image_info = inserted->image; - while (1) { -- bdrv_image_info_dump(image_info); -+ bdrv_node_info_dump(qapi_ImageInfo_base(image_info)); - if (image_info->has_backing_image) { - image_info = image_info->backing_image; - } else { -diff --git a/block/qapi.c b/block/qapi.c -index e5022b4481..ad88bf9b38 100644 ---- a/block/qapi.c -+++ b/block/qapi.c -@@ -865,7 +865,7 @@ void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, - visit_free(v); - } - --void bdrv_image_info_dump(ImageInfo *info) -+void bdrv_node_info_dump(BlockNodeInfo *info) - { - char *size_buf, *dsize_buf; - if (!info->has_actual_size) { -diff --git a/include/block/qapi.h b/include/block/qapi.h -index c7de4e3fa9..22198dcd0c 100644 ---- a/include/block/qapi.h -+++ b/include/block/qapi.h -@@ -45,5 +45,5 @@ void bdrv_query_image_info(BlockDriverState *bs, - void bdrv_snapshot_dump(QEMUSnapshotInfo *sn); - void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, - const char *prefix); --void bdrv_image_info_dump(ImageInfo *info); -+void bdrv_node_info_dump(BlockNodeInfo *info); - #endif -diff --git a/qapi/block-core.json b/qapi/block-core.json -index 7720da0498..4cf2deeb6c 100644 ---- a/qapi/block-core.json -+++ b/qapi/block-core.json -@@ -5796,9 +5796,9 @@ - ## - # @DummyBlockCoreForceArrays: - # --# Not used by QMP; hack to let us use ImageInfoList internally -+# Not used by QMP; hack to let us use BlockNodeInfoList internally - # - # Since: 8.0 - ## - { 'struct': 'DummyBlockCoreForceArrays', -- 'data': { 'unused-image-info': ['ImageInfo'] } } -+ 'data': { 'unused-block-node-info': ['BlockNodeInfo'] } } -diff --git a/qemu-img.c b/qemu-img.c -index 2f85bb7ede..3b2ca3bbcb 100644 ---- a/qemu-img.c -+++ b/qemu-img.c -@@ -2816,13 +2816,13 @@ static void dump_snapshots(BlockDriverState *bs) - g_free(sn_tab); - } - --static void dump_json_image_info_list(ImageInfoList *list) -+static void dump_json_block_node_info_list(BlockNodeInfoList *list) - { - GString *str; - QObject *obj; - Visitor *v = qobject_output_visitor_new(&obj); - -- visit_type_ImageInfoList(v, NULL, &list, &error_abort); -+ visit_type_BlockNodeInfoList(v, NULL, &list, &error_abort); - visit_complete(v, &obj); - str = qobject_to_json_pretty(obj, true); - assert(str != NULL); -@@ -2832,13 +2832,13 @@ static void dump_json_image_info_list(ImageInfoList *list) - g_string_free(str, true); - } - --static void dump_json_image_info(ImageInfo *info) -+static void dump_json_block_node_info(BlockNodeInfo *info) - { - GString *str; - QObject *obj; - Visitor *v = qobject_output_visitor_new(&obj); - -- visit_type_ImageInfo(v, NULL, &info, &error_abort); -+ visit_type_BlockNodeInfo(v, NULL, &info, &error_abort); - visit_complete(v, &obj); - str = qobject_to_json_pretty(obj, true); - assert(str != NULL); -@@ -2848,9 +2848,9 @@ static void dump_json_image_info(ImageInfo *info) - g_string_free(str, true); - } - --static void dump_human_image_info_list(ImageInfoList *list) -+static void dump_human_image_info_list(BlockNodeInfoList *list) - { -- ImageInfoList *elem; -+ BlockNodeInfoList *elem; - bool delim = false; - - for (elem = list; elem; elem = elem->next) { -@@ -2859,7 +2859,7 @@ static void dump_human_image_info_list(ImageInfoList *list) - } - delim = true; - -- bdrv_image_info_dump(elem->value); -+ bdrv_node_info_dump(elem->value); - } - } - -@@ -2869,24 +2869,24 @@ static gboolean str_equal_func(gconstpointer a, gconstpointer b) - } - - /** -- * Open an image file chain and return an ImageInfoList -+ * Open an image file chain and return an BlockNodeInfoList - * - * @filename: topmost image filename - * @fmt: topmost image format (may be NULL to autodetect) - * @chain: true - enumerate entire backing file chain - * false - only topmost image file - * -- * Returns a list of ImageInfo objects or NULL if there was an error opening an -- * image file. If there was an error a message will have been printed to -- * stderr. -+ * Returns a list of BlockNodeInfo objects or NULL if there was an error -+ * opening an image file. If there was an error a message will have been -+ * printed to stderr. - */ --static ImageInfoList *collect_image_info_list(bool image_opts, -- const char *filename, -- const char *fmt, -- bool chain, bool force_share) -+static BlockNodeInfoList *collect_image_info_list(bool image_opts, -+ const char *filename, -+ const char *fmt, -+ bool chain, bool force_share) - { -- ImageInfoList *head = NULL; -- ImageInfoList **tail = &head; -+ BlockNodeInfoList *head = NULL; -+ BlockNodeInfoList **tail = &head; - GHashTable *filenames; - Error *err = NULL; - -@@ -2895,7 +2895,7 @@ static ImageInfoList *collect_image_info_list(bool image_opts, - while (filename) { - BlockBackend *blk; - BlockDriverState *bs; -- ImageInfo *info; -+ BlockNodeInfo *info; - - if (g_hash_table_lookup_extended(filenames, filename, NULL, NULL)) { - error_report("Backing file '%s' creates an infinite loop.", -@@ -2912,7 +2912,7 @@ static ImageInfoList *collect_image_info_list(bool image_opts, - } - bs = blk_bs(blk); - -- bdrv_query_image_info(bs, &info, &err); -+ bdrv_query_block_node_info(bs, &info, &err); - if (err) { - error_report_err(err); - blk_unref(blk); -@@ -2945,7 +2945,7 @@ static ImageInfoList *collect_image_info_list(bool image_opts, - return head; - - err: -- qapi_free_ImageInfoList(head); -+ qapi_free_BlockNodeInfoList(head); - g_hash_table_destroy(filenames); - return NULL; - } -@@ -2956,7 +2956,7 @@ static int img_info(int argc, char **argv) - OutputFormat output_format = OFORMAT_HUMAN; - bool chain = false; - const char *filename, *fmt, *output; -- ImageInfoList *list; -+ BlockNodeInfoList *list; - bool image_opts = false; - bool force_share = false; - -@@ -3035,14 +3035,14 @@ static int img_info(int argc, char **argv) - break; - case OFORMAT_JSON: - if (chain) { -- dump_json_image_info_list(list); -+ dump_json_block_node_info_list(list); - } else { -- dump_json_image_info(list->value); -+ dump_json_block_node_info(list->value); - } - break; - } - -- qapi_free_ImageInfoList(list); -+ qapi_free_BlockNodeInfoList(list); - return 0; - } - --- -2.31.1 - diff --git a/SOURCES/kvm-qemu-img-bitmap-Report-errors-while-closing-the-imag.patch b/SOURCES/kvm-qemu-img-bitmap-Report-errors-while-closing-the-imag.patch deleted file mode 100644 index 693049c..0000000 --- a/SOURCES/kvm-qemu-img-bitmap-Report-errors-while-closing-the-imag.patch +++ /dev/null @@ -1,70 +0,0 @@ -From d0d3d694b3a8d200442484ae0c9d263e0439cd04 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Thu, 12 Jan 2023 20:14:53 +0100 -Subject: [PATCH 03/20] qemu-img bitmap: Report errors while closing the image -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Kevin Wolf -RH-MergeRequest: 143: qemu-img: Fix exit code for errors closing the image -RH-Bugzilla: 2150180 -RH-Acked-by: Thomas Huth -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Stefano Garzarella -RH-Commit: [3/4] 4a704fec2e3bcb47b2be1529e27fd1833d58c517 (kmwolf/centos-qemu-kvm) - -blk_unref() can't report any errors that happen while closing the image. -For example, if qcow2 hits an -ENOSPC error while writing out dirty -bitmaps when it's closed, it prints error messages to stderr, but -'qemu-img bitmap' won't see any error return value and will therefore -look successful with exit code 0. - -In order to fix this, manually inactivate the image first before calling -blk_unref(). This already performs the operations that would be most -likely to fail while closing the image, but it can still return errors. - -Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1330 -Signed-off-by: Kevin Wolf -Message-Id: <20230112191454.169353-4-kwolf@redhat.com> -Reviewed-by: Hanna Czenczek -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Kevin Wolf -(cherry picked from commit c5e477110dcb8ef4642dce399777c3dee68fa96c) -Signed-off-by: Kevin Wolf ---- - qemu-img.c | 11 +++++++++++ - 1 file changed, 11 insertions(+) - -diff --git a/qemu-img.c b/qemu-img.c -index 3cbdda9f76..2f85bb7ede 100644 ---- a/qemu-img.c -+++ b/qemu-img.c -@@ -4646,6 +4646,7 @@ static int img_bitmap(int argc, char **argv) - QSIMPLEQ_HEAD(, ImgBitmapAction) actions; - ImgBitmapAction *act, *act_next; - const char *op; -+ int inactivate_ret; - - QSIMPLEQ_INIT(&actions); - -@@ -4830,6 +4831,16 @@ static int img_bitmap(int argc, char **argv) - ret = 0; - - out: -+ /* -+ * Manually inactivate the images first because this way we can know whether -+ * an error occurred. blk_unref() doesn't tell us about failures. -+ */ -+ inactivate_ret = bdrv_inactivate_all(); -+ if (inactivate_ret < 0) { -+ error_report("Error while closing the image: %s", strerror(-inactivate_ret)); -+ ret = 1; -+ } -+ - blk_unref(src); - blk_unref(blk); - qemu_opts_del(opts); --- -2.31.1 - diff --git a/SOURCES/kvm-qemu-img-commit-Report-errors-while-closing-the-imag.patch b/SOURCES/kvm-qemu-img-commit-Report-errors-while-closing-the-imag.patch deleted file mode 100644 index 5cac3ba..0000000 --- a/SOURCES/kvm-qemu-img-commit-Report-errors-while-closing-the-imag.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 2f5369f0effaa23be746f9b5d9f6a0bfc346fb7d Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Thu, 12 Jan 2023 20:14:52 +0100 -Subject: [PATCH 02/20] qemu-img commit: Report errors while closing the image - -RH-Author: Kevin Wolf -RH-MergeRequest: 143: qemu-img: Fix exit code for errors closing the image -RH-Bugzilla: 2150180 -RH-Acked-by: Thomas Huth -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Stefano Garzarella -RH-Commit: [2/4] faedd43355463b1210a3f21ecd430f478bd06f5a (kmwolf/centos-qemu-kvm) - -blk_unref() can't report any errors that happen while closing the image. -For example, if qcow2 hits an -ENOSPC error while writing out dirty -bitmaps when it's closed, it prints error messages to stderr, but -'qemu-img commit' won't see any error return value and will therefore -look successful with exit code 0. - -In order to fix this, manually inactivate the image first before calling -blk_unref(). This already performs the operations that would be most -likely to fail while closing the image, but it can still return errors. - -Signed-off-by: Kevin Wolf -Message-Id: <20230112191454.169353-3-kwolf@redhat.com> -Reviewed-by: Hanna Czenczek -Signed-off-by: Kevin Wolf -(cherry picked from commit 44efba2d713aca076c411594d0c1a2b99155eeb3) -Signed-off-by: Kevin Wolf ---- - qemu-img.c | 13 +++++++++++++ - 1 file changed, 13 insertions(+) - -diff --git a/qemu-img.c b/qemu-img.c -index a9b3a8103c..3cbdda9f76 100644 ---- a/qemu-img.c -+++ b/qemu-img.c -@@ -449,6 +449,11 @@ static BlockBackend *img_open(bool image_opts, - blk = img_open_file(filename, NULL, fmt, flags, writethrough, quiet, - force_share); - } -+ -+ if (blk) { -+ blk_set_force_allow_inactivate(blk); -+ } -+ - return blk; - } - -@@ -1119,6 +1124,14 @@ unref_backing: - done: - qemu_progress_end(); - -+ /* -+ * Manually inactivate the image first because this way we can know whether -+ * an error occurred. blk_unref() doesn't tell us about failures. -+ */ -+ ret = bdrv_inactivate_all(); -+ if (ret < 0 && !local_err) { -+ error_setg_errno(&local_err, -ret, "Error while closing the image"); -+ } - blk_unref(blk); - - if (local_err) { --- -2.31.1 - diff --git a/SOURCES/kvm-qemu-iotests-Test-qemu-img-bitmap-commit-exit-code-o.patch b/SOURCES/kvm-qemu-iotests-Test-qemu-img-bitmap-commit-exit-code-o.patch deleted file mode 100644 index 6b88e5c..0000000 --- a/SOURCES/kvm-qemu-iotests-Test-qemu-img-bitmap-commit-exit-code-o.patch +++ /dev/null @@ -1,166 +0,0 @@ -From 06030aa79fcb2d90d6a670e75d959aa0c3204b5c Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Thu, 12 Jan 2023 20:14:54 +0100 -Subject: [PATCH 04/20] qemu-iotests: Test qemu-img bitmap/commit exit code on - error - -RH-Author: Kevin Wolf -RH-MergeRequest: 143: qemu-img: Fix exit code for errors closing the image -RH-Bugzilla: 2150180 -RH-Acked-by: Thomas Huth -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Stefano Garzarella -RH-Commit: [4/4] b96bb671bcfb7ae18015fda14db70f42a83a6ea7 (kmwolf/centos-qemu-kvm) - -This tests that when an error happens while writing back bitmaps to the -image file in qcow2_inactivate(), 'qemu-img bitmap/commit' actually -return an error value in their exit code instead of making the operation -look successful to scripts. - -Signed-off-by: Kevin Wolf -Message-Id: <20230112191454.169353-5-kwolf@redhat.com> -Reviewed-by: Hanna Czenczek -Signed-off-by: Kevin Wolf -(cherry picked from commit 07a4e1f8e5418f36424cd57d5d061b090a238c65) -Signed-off-by: Kevin Wolf ---- - .../qemu-iotests/tests/qemu-img-close-errors | 96 +++++++++++++++++++ - .../tests/qemu-img-close-errors.out | 23 +++++ - 2 files changed, 119 insertions(+) - create mode 100755 tests/qemu-iotests/tests/qemu-img-close-errors - create mode 100644 tests/qemu-iotests/tests/qemu-img-close-errors.out - -diff --git a/tests/qemu-iotests/tests/qemu-img-close-errors b/tests/qemu-iotests/tests/qemu-img-close-errors -new file mode 100755 -index 0000000000..50bfb6cfa2 ---- /dev/null -+++ b/tests/qemu-iotests/tests/qemu-img-close-errors -@@ -0,0 +1,96 @@ -+#!/usr/bin/env bash -+# group: rw auto quick -+# -+# Check that errors while closing the image, in particular writing back dirty -+# bitmaps, is correctly reported with a failing qemu-img exit code. -+# -+# Copyright (C) 2023 Red Hat, Inc. -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+# -+ -+# creator -+owner=kwolf@redhat.com -+ -+seq="$(basename $0)" -+echo "QA output created by $seq" -+ -+status=1 # failure is the default! -+ -+_cleanup() -+{ -+ _cleanup_test_img -+} -+trap "_cleanup; exit \$status" 0 1 2 3 15 -+ -+# get standard environment, filters and checks -+cd .. -+. ./common.rc -+. ./common.filter -+ -+_supported_fmt qcow2 -+_supported_proto file -+_supported_os Linux -+ -+size=1G -+ -+# The error we are going to use is ENOSPC. Depending on how many bitmaps we -+# create in the backing file (and therefore increase the used up space), we get -+# failures in different places. With a low number, only merging the bitmap -+# fails, whereas with a higher number, already 'qemu-img commit' fails. -+for max_bitmap in 6 7; do -+ echo -+ echo "=== Test with $max_bitmap bitmaps ===" -+ -+ TEST_IMG="$TEST_IMG.base" _make_test_img -q $size -+ for i in $(seq 1 $max_bitmap); do -+ $QEMU_IMG bitmap --add "$TEST_IMG.base" "stale-bitmap-$i" -+ done -+ -+ # Simulate a block device of 128 MB by resizing the image file accordingly -+ # and then enforcing the size with the raw driver -+ $QEMU_IO -f raw -c "truncate 128M" "$TEST_IMG.base" -+ BASE_JSON='json:{ -+ "driver": "qcow2", -+ "file": { -+ "driver": "raw", -+ "size": 134217728, -+ "file": { -+ "driver": "file", -+ "filename":"'"$TEST_IMG.base"'" -+ } -+ } -+ }' -+ -+ _make_test_img -q -b "$BASE_JSON" -F $IMGFMT -+ $QEMU_IMG bitmap --add "$TEST_IMG" "good-bitmap" -+ -+ $QEMU_IO -c 'write 0 126m' "$TEST_IMG" | _filter_qemu_io -+ -+ $QEMU_IMG commit -d "$TEST_IMG" 2>&1 | _filter_generated_node_ids -+ echo "qemu-img commit exit code: ${PIPESTATUS[0]}" -+ -+ $QEMU_IMG bitmap --add "$BASE_JSON" "good-bitmap" -+ echo "qemu-img bitmap --add exit code: $?" -+ -+ $QEMU_IMG bitmap --merge "good-bitmap" -b "$TEST_IMG" "$BASE_JSON" \ -+ "good-bitmap" 2>&1 | _filter_generated_node_ids -+ echo "qemu-img bitmap --merge exit code: ${PIPESTATUS[0]}" -+done -+ -+# success, all done -+echo "*** done" -+rm -f $seq.full -+status=0 -+ -diff --git a/tests/qemu-iotests/tests/qemu-img-close-errors.out b/tests/qemu-iotests/tests/qemu-img-close-errors.out -new file mode 100644 -index 0000000000..1bfe88f176 ---- /dev/null -+++ b/tests/qemu-iotests/tests/qemu-img-close-errors.out -@@ -0,0 +1,23 @@ -+QA output created by qemu-img-close-errors -+ -+=== Test with 6 bitmaps === -+wrote 132120576/132120576 bytes at offset 0 -+126 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+Image committed. -+qemu-img commit exit code: 0 -+qemu-img bitmap --add exit code: 0 -+qemu-img: Lost persistent bitmaps during inactivation of node 'NODE_NAME': Failed to write bitmap 'good-bitmap' to file: No space left on device -+qemu-img: Error while closing the image: Invalid argument -+qemu-img: Lost persistent bitmaps during inactivation of node 'NODE_NAME': Failed to write bitmap 'good-bitmap' to file: No space left on device -+qemu-img bitmap --merge exit code: 1 -+ -+=== Test with 7 bitmaps === -+wrote 132120576/132120576 bytes at offset 0 -+126 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+qemu-img: Lost persistent bitmaps during inactivation of node 'NODE_NAME': Failed to write bitmap 'stale-bitmap-7' to file: No space left on device -+qemu-img: Lost persistent bitmaps during inactivation of node 'NODE_NAME': Failed to write bitmap 'stale-bitmap-7' to file: No space left on device -+qemu-img: Error while closing the image: Invalid argument -+qemu-img commit exit code: 1 -+qemu-img bitmap --add exit code: 0 -+qemu-img bitmap --merge exit code: 0 -+*** done --- -2.31.1 - diff --git a/SOURCES/kvm-qemu-options.hx-Update-the-reduced-phys-bits-documen.patch b/SOURCES/kvm-qemu-options.hx-Update-the-reduced-phys-bits-documen.patch new file mode 100644 index 0000000..6830692 --- /dev/null +++ b/SOURCES/kvm-qemu-options.hx-Update-the-reduced-phys-bits-documen.patch @@ -0,0 +1,60 @@ +From 50c833fc3c7d8d3a5124cfdb2f2dc06b910c2252 Mon Sep 17 00:00:00 2001 +From: Bandan Das +Date: Thu, 3 Aug 2023 14:21:25 -0400 +Subject: [PATCH 11/14] qemu-options.hx: Update the reduced-phys-bits + documentation + +RH-Author: Bandan Das +RH-MergeRequest: 196: Updates to SEV reduced-phys-bits parameter +RH-Bugzilla: 2214839 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/4] b0c4a19e9f4185c97ddf71857bc9367cea01ffa8 (bdas1/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2214839 + +commit 326e3015c4c6f3197157ea0bb00826ae740e2fad +Author: Tom Lendacky +Date: Fri Sep 30 10:14:28 2022 -0500 + + qemu-options.hx: Update the reduced-phys-bits documentation + + A guest only ever experiences, at most, 1 bit of reduced physical + addressing. Update the documentation to reflect this as well as change + the example value on the reduced-phys-bits option. + + Fixes: a9b4942f48 ("target/i386: add Secure Encrypted Virtualization (SEV) object") + Signed-off-by: Tom Lendacky + Reviewed-by: Dr. David Alan Gilbert + Message-Id: <13a62ced1808546c1d398e2025cf85f4c94ae123.1664550870.git.thomas.lendacky@amd.com> + Signed-off-by: Paolo Bonzini + +Signed-off-by: Bandan Das +--- + qemu-options.hx | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/qemu-options.hx b/qemu-options.hx +index b18f933703..edf10a5aac 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -5417,7 +5417,7 @@ SRST + physical address space. The ``reduced-phys-bits`` is used to + provide the number of bits we loose in physical address space. + Similar to C-bit, the value is Host family dependent. On EPYC, +- the value should be 5. ++ a guest will lose a maximum of 1 bit, so the value should be 1. + + The ``sev-device`` provides the device file to use for + communicating with the SEV firmware running inside AMD Secure +@@ -5452,7 +5452,7 @@ SRST + + # |qemu_system_x86| \\ + ...... \\ +- -object sev-guest,id=sev0,cbitpos=47,reduced-phys-bits=5 \\ ++ -object sev-guest,id=sev0,cbitpos=47,reduced-phys-bits=1 \\ + -machine ...,memory-encryption=sev0 \\ + ..... + +-- +2.39.3 + diff --git a/SOURCES/kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch b/SOURCES/kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch deleted file mode 100644 index 25f30ff..0000000 --- a/SOURCES/kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch +++ /dev/null @@ -1,146 +0,0 @@ -From aa61e4c437d29a791ea09a01f7230231f1e53356 Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Thu, 9 Mar 2023 08:24:36 -0500 -Subject: [PATCH 05/12] qemu-thread-posix: cleanup, fix, document QemuEvent - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() -RH-Bugzilla: 2175660 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Eric Auger -RH-Commit: [2/9] c3bdf75f884e137c667316aaac96bb4a0b9ec2d9 (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 - -commit 9586a1329f5dce6c1d7f4de53cf0536644d7e593 -Author: Paolo Bonzini -Date: Thu Mar 2 11:19:52 2023 +0100 - - qemu-thread-posix: cleanup, fix, document QemuEvent - - QemuEvent is currently broken on ARM due to missing memory barriers - after qatomic_*(). Apart from adding the memory barrier, a closer look - reveals some unpaired memory barriers too. Document more clearly what - is going on. - - Reviewed-by: Richard Henderson - Reviewed-by: David Hildenbrand - Signed-off-by: Paolo Bonzini - -Signed-off-by: Emanuele Giuseppe Esposito ---- - util/qemu-thread-posix.c | 69 ++++++++++++++++++++++++++++------------ - 1 file changed, 49 insertions(+), 20 deletions(-) - -diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c -index bae938c670..cc74f4ede0 100644 ---- a/util/qemu-thread-posix.c -+++ b/util/qemu-thread-posix.c -@@ -379,13 +379,21 @@ void qemu_event_destroy(QemuEvent *ev) - - void qemu_event_set(QemuEvent *ev) - { -- /* qemu_event_set has release semantics, but because it *loads* -+ assert(ev->initialized); -+ -+ /* -+ * Pairs with both qemu_event_reset() and qemu_event_wait(). -+ * -+ * qemu_event_set has release semantics, but because it *loads* - * ev->value we need a full memory barrier here. - */ -- assert(ev->initialized); - smp_mb(); - if (qatomic_read(&ev->value) != EV_SET) { -- if (qatomic_xchg(&ev->value, EV_SET) == EV_BUSY) { -+ int old = qatomic_xchg(&ev->value, EV_SET); -+ -+ /* Pairs with memory barrier in kernel futex_wait system call. */ -+ smp_mb__after_rmw(); -+ if (old == EV_BUSY) { - /* There were waiters, wake them up. */ - qemu_futex_wake(ev, INT_MAX); - } -@@ -394,18 +402,19 @@ void qemu_event_set(QemuEvent *ev) - - void qemu_event_reset(QemuEvent *ev) - { -- unsigned value; -- - assert(ev->initialized); -- value = qatomic_read(&ev->value); -- smp_mb_acquire(); -- if (value == EV_SET) { -- /* -- * If there was a concurrent reset (or even reset+wait), -- * do nothing. Otherwise change EV_SET->EV_FREE. -- */ -- qatomic_or(&ev->value, EV_FREE); -- } -+ -+ /* -+ * If there was a concurrent reset (or even reset+wait), -+ * do nothing. Otherwise change EV_SET->EV_FREE. -+ */ -+ qatomic_or(&ev->value, EV_FREE); -+ -+ /* -+ * Order reset before checking the condition in the caller. -+ * Pairs with the first memory barrier in qemu_event_set(). -+ */ -+ smp_mb__after_rmw(); - } - - void qemu_event_wait(QemuEvent *ev) -@@ -413,20 +422,40 @@ void qemu_event_wait(QemuEvent *ev) - unsigned value; - - assert(ev->initialized); -- value = qatomic_read(&ev->value); -- smp_mb_acquire(); -+ -+ /* -+ * qemu_event_wait must synchronize with qemu_event_set even if it does -+ * not go down the slow path, so this load-acquire is needed that -+ * synchronizes with the first memory barrier in qemu_event_set(). -+ * -+ * If we do go down the slow path, there is no requirement at all: we -+ * might miss a qemu_event_set() here but ultimately the memory barrier in -+ * qemu_futex_wait() will ensure the check is done correctly. -+ */ -+ value = qatomic_load_acquire(&ev->value); - if (value != EV_SET) { - if (value == EV_FREE) { - /* -- * Leave the event reset and tell qemu_event_set that there -- * are waiters. No need to retry, because there cannot be -- * a concurrent busy->free transition. After the CAS, the -- * event will be either set or busy. -+ * Leave the event reset and tell qemu_event_set that there are -+ * waiters. No need to retry, because there cannot be a concurrent -+ * busy->free transition. After the CAS, the event will be either -+ * set or busy. -+ * -+ * This cmpxchg doesn't have particular ordering requirements if it -+ * succeeds (moving the store earlier can only cause qemu_event_set() -+ * to issue _more_ wakeups), the failing case needs acquire semantics -+ * like the load above. - */ - if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) { - return; - } - } -+ -+ /* -+ * This is the final check for a concurrent set, so it does need -+ * a smp_mb() pairing with the second barrier of qemu_event_set(). -+ * The barrier is inside the FUTEX_WAIT system call. -+ */ - qemu_futex_wait(ev, EV_BUSY); - } - } --- -2.39.1 - diff --git a/SOURCES/kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch b/SOURCES/kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch deleted file mode 100644 index 631d541..0000000 --- a/SOURCES/kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch +++ /dev/null @@ -1,162 +0,0 @@ -From 02347869410fe53d814487501fb586f7dc614375 Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Thu, 9 Mar 2023 08:24:36 -0500 -Subject: [PATCH 06/12] qemu-thread-win32: cleanup, fix, document QemuEvent - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() -RH-Bugzilla: 2175660 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Eric Auger -RH-Commit: [3/9] d228e9d6a4a75dd1f0a23a6dceaf4fea23d69192 (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 - -commit 6c5df4b48f0c52a61342ecb307a43f4c2a3565c4 -Author: Paolo Bonzini -Date: Thu Mar 2 11:22:50 2023 +0100 - - qemu-thread-win32: cleanup, fix, document QemuEvent - - QemuEvent is currently broken on ARM due to missing memory barriers - after qatomic_*(). Apart from adding the memory barrier, a closer look - reveals some unpaired memory barriers that are not really needed and - complicated the functions unnecessarily. Also, it is relying on - a memory barrier in ResetEvent(); the barrier _ought_ to be there - but there is really no documentation about it, so make it explicit. - - Reviewed-by: Richard Henderson - Reviewed-by: David Hildenbrand - Signed-off-by: Paolo Bonzini - -Signed-off-by: Emanuele Giuseppe Esposito ---- - util/qemu-thread-win32.c | 82 +++++++++++++++++++++++++++------------- - 1 file changed, 56 insertions(+), 26 deletions(-) - -diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c -index 69db254ac7..a7fe3cc345 100644 ---- a/util/qemu-thread-win32.c -+++ b/util/qemu-thread-win32.c -@@ -272,12 +272,20 @@ void qemu_event_destroy(QemuEvent *ev) - void qemu_event_set(QemuEvent *ev) - { - assert(ev->initialized); -- /* qemu_event_set has release semantics, but because it *loads* -+ -+ /* -+ * Pairs with both qemu_event_reset() and qemu_event_wait(). -+ * -+ * qemu_event_set has release semantics, but because it *loads* - * ev->value we need a full memory barrier here. - */ - smp_mb(); - if (qatomic_read(&ev->value) != EV_SET) { -- if (qatomic_xchg(&ev->value, EV_SET) == EV_BUSY) { -+ int old = qatomic_xchg(&ev->value, EV_SET); -+ -+ /* Pairs with memory barrier after ResetEvent. */ -+ smp_mb__after_rmw(); -+ if (old == EV_BUSY) { - /* There were waiters, wake them up. */ - SetEvent(ev->event); - } -@@ -286,17 +294,19 @@ void qemu_event_set(QemuEvent *ev) - - void qemu_event_reset(QemuEvent *ev) - { -- unsigned value; -- - assert(ev->initialized); -- value = qatomic_read(&ev->value); -- smp_mb_acquire(); -- if (value == EV_SET) { -- /* If there was a concurrent reset (or even reset+wait), -- * do nothing. Otherwise change EV_SET->EV_FREE. -- */ -- qatomic_or(&ev->value, EV_FREE); -- } -+ -+ /* -+ * If there was a concurrent reset (or even reset+wait), -+ * do nothing. Otherwise change EV_SET->EV_FREE. -+ */ -+ qatomic_or(&ev->value, EV_FREE); -+ -+ /* -+ * Order reset before checking the condition in the caller. -+ * Pairs with the first memory barrier in qemu_event_set(). -+ */ -+ smp_mb__after_rmw(); - } - - void qemu_event_wait(QemuEvent *ev) -@@ -304,29 +314,49 @@ void qemu_event_wait(QemuEvent *ev) - unsigned value; - - assert(ev->initialized); -- value = qatomic_read(&ev->value); -- smp_mb_acquire(); -+ -+ /* -+ * qemu_event_wait must synchronize with qemu_event_set even if it does -+ * not go down the slow path, so this load-acquire is needed that -+ * synchronizes with the first memory barrier in qemu_event_set(). -+ * -+ * If we do go down the slow path, there is no requirement at all: we -+ * might miss a qemu_event_set() here but ultimately the memory barrier in -+ * qemu_futex_wait() will ensure the check is done correctly. -+ */ -+ value = qatomic_load_acquire(&ev->value); - if (value != EV_SET) { - if (value == EV_FREE) { -- /* qemu_event_set is not yet going to call SetEvent, but we are -- * going to do another check for EV_SET below when setting EV_BUSY. -- * At that point it is safe to call WaitForSingleObject. -+ /* -+ * Here the underlying kernel event is reset, but qemu_event_set is -+ * not yet going to call SetEvent. However, there will be another -+ * check for EV_SET below when setting EV_BUSY. At that point it -+ * is safe to call WaitForSingleObject. - */ - ResetEvent(ev->event); - -- /* Tell qemu_event_set that there are waiters. No need to retry -- * because there cannot be a concurrent busy->free transition. -- * After the CAS, the event will be either set or busy. -+ /* -+ * It is not clear whether ResetEvent provides this barrier; kernel -+ * APIs (KeResetEvent/KeClearEvent) do not. Better safe than sorry! -+ */ -+ smp_mb(); -+ -+ /* -+ * Leave the event reset and tell qemu_event_set that there are -+ * waiters. No need to retry, because there cannot be a concurrent -+ * busy->free transition. After the CAS, the event will be either -+ * set or busy. - */ - if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) { -- value = EV_SET; -- } else { -- value = EV_BUSY; -+ return; - } - } -- if (value == EV_BUSY) { -- WaitForSingleObject(ev->event, INFINITE); -- } -+ -+ /* -+ * ev->value is now EV_BUSY. Since we didn't observe EV_SET, -+ * qemu_event_set() must observe EV_BUSY and call SetEvent(). -+ */ -+ WaitForSingleObject(ev->event, INFINITE); - } - } - --- -2.39.1 - diff --git a/SOURCES/kvm-raven-disable-reentrancy-detection-for-iomem.patch b/SOURCES/kvm-raven-disable-reentrancy-detection-for-iomem.patch new file mode 100644 index 0000000..4a4a2cc --- /dev/null +++ b/SOURCES/kvm-raven-disable-reentrancy-detection-for-iomem.patch @@ -0,0 +1,54 @@ +From 936e21428a04524ccffeb36110d1aa61de9f44e5 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 May 2023 10:29:03 -0400 +Subject: [PATCH 11/21] raven: disable reentrancy detection for iomem + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [7/13] 48278583aa1ab08b912f49cd8b3a79d1bb3abf5f (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit 6dad5a6810d9c60ca320d01276f6133bbcfa1fc7 +Author: Alexander Bulekov +Date: Thu Apr 27 17:10:12 2023 -0400 + + raven: disable reentrancy detection for iomem + + As the code is designed for re-entrant calls from raven_io_ops to + pci-conf, mark raven_io_ops as reentrancy-safe. + + Signed-off-by: Alexander Bulekov + Message-Id: <20230427211013.2994127-8-alxndr@bu.edu> + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + hw/pci-host/raven.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/hw/pci-host/raven.c b/hw/pci-host/raven.c +index 072ffe3c5e..9a11ac4b2b 100644 +--- a/hw/pci-host/raven.c ++++ b/hw/pci-host/raven.c +@@ -294,6 +294,13 @@ static void raven_pcihost_initfn(Object *obj) + memory_region_init(&s->pci_memory, obj, "pci-memory", 0x3f000000); + address_space_init(&s->pci_io_as, &s->pci_io, "raven-io"); + ++ /* ++ * Raven's raven_io_ops use the address-space API to access pci-conf-idx ++ * (which is also owned by the raven device). As such, mark the ++ * pci_io_non_contiguous as re-entrancy safe. ++ */ ++ s->pci_io_non_contiguous.disable_reentrancy_guard = true; ++ + /* CPU address space */ + memory_region_add_subregion(address_space_mem, PCI_IO_BASE_ADDR, + &s->pci_io); +-- +2.39.3 + diff --git a/SOURCES/kvm-redhat-fix-virt-rhel9.2.0-compat-props.patch b/SOURCES/kvm-redhat-fix-virt-rhel9.2.0-compat-props.patch deleted file mode 100644 index 1a2e863..0000000 --- a/SOURCES/kvm-redhat-fix-virt-rhel9.2.0-compat-props.patch +++ /dev/null @@ -1,43 +0,0 @@ -From 546e4213c4e8a7b2e369315a71bc9aec091eed6e Mon Sep 17 00:00:00 2001 -From: Cornelia Huck -Date: Mon, 19 Dec 2022 10:30:26 +0100 -Subject: redhat: fix virt-rhel9.2.0 compat props - -RH-Author: Cornelia Huck -RH-MergeRequest: 127: redhat: fix virt-rhel9.2.0 compat props -RH-Bugzilla: 2154640 -RH-Acked-by: Eric Auger -RH-Acked-by: Gavin Shan -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/1] 49635fdc1d9a934ece78abd160b07c19909f876a (cohuck/qemu-kvm-c9s) - -We need to include arm_rhel_compat props in the latest machine. - -Signed-off-by: Cornelia Huck ---- - hw/arm/virt.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 0a94f31dd1..bf18838b87 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -3520,6 +3520,7 @@ type_init(rhel_machine_init); - - static void rhel920_virt_options(MachineClass *mc) - { -+ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); - } - DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0) - -@@ -3529,7 +3530,6 @@ static void rhel900_virt_options(MachineClass *mc) - - rhel920_virt_options(mc); - -- compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); - compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len); - - /* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */ --- -2.38.1 - diff --git a/SOURCES/kvm-s390x-ap-Wire-up-the-device-request-notifier-interfa.patch b/SOURCES/kvm-s390x-ap-Wire-up-the-device-request-notifier-interfa.patch new file mode 100644 index 0000000..d0eb303 --- /dev/null +++ b/SOURCES/kvm-s390x-ap-Wire-up-the-device-request-notifier-interfa.patch @@ -0,0 +1,220 @@ +From 41987ce0dd79d8734088002cbd34f20704dd017a Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Mon, 17 Jul 2023 17:36:07 +0200 +Subject: [PATCH 04/12] s390x/ap: Wire up the device request notifier interface +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 185: Backport s390x fixes from QEMU 8.1 +RH-Jira: RHEL-794 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Thomas Huth +RH-Commit: [1/1] ab6c912a1b8cdb584adacac16af79352fdfe7355 (clegoate/qemu-kvm-c9s) + +Jira: https://issues.redhat.com/browse/RHEL-794 + +commit 1360b2ad1f673d32a09de5826cd71ecd0510164a +Author: Tony Krowiak +Date: Fri Jun 2 10:11:25 2023 -0400 + + s390x/ap: Wire up the device request notifier interface + + Let's wire up the device request notifier interface to handle device unplug + requests for AP. + + Signed-off-by: Tony Krowiak + Link: https://lore.kernel.org/qemu-devel/20230530225544.280031-1-akrowiak@linux.ibm.com/ + Signed-off-by: Cédric Le Goater + +Backport note: + + - linux-headers/linux/vfio.h + updated to v6.5-rc1 level for VFIO_AP_REQ_IRQ_INDEX definition + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/ap.c | 113 +++++++++++++++++++++++++++++++++++++ + linux-headers/linux/vfio.h | 9 +++ + 2 files changed, 122 insertions(+) + +diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c +index e0dd561e85..6e21d1da5a 100644 +--- a/hw/vfio/ap.c ++++ b/hw/vfio/ap.c +@@ -18,6 +18,8 @@ + #include "hw/vfio/vfio-common.h" + #include "hw/s390x/ap-device.h" + #include "qemu/error-report.h" ++#include "qemu/event_notifier.h" ++#include "qemu/main-loop.h" + #include "qemu/module.h" + #include "qemu/option.h" + #include "qemu/config-file.h" +@@ -33,6 +35,7 @@ + struct VFIOAPDevice { + APDevice apdev; + VFIODevice vdev; ++ EventNotifier req_notifier; + }; + + OBJECT_DECLARE_SIMPLE_TYPE(VFIOAPDevice, VFIO_AP_DEVICE) +@@ -84,10 +87,110 @@ static VFIOGroup *vfio_ap_get_group(VFIOAPDevice *vapdev, Error **errp) + return vfio_get_group(groupid, &address_space_memory, errp); + } + ++static void vfio_ap_req_notifier_handler(void *opaque) ++{ ++ VFIOAPDevice *vapdev = opaque; ++ Error *err = NULL; ++ ++ if (!event_notifier_test_and_clear(&vapdev->req_notifier)) { ++ return; ++ } ++ ++ qdev_unplug(DEVICE(vapdev), &err); ++ ++ if (err) { ++ warn_reportf_err(err, VFIO_MSG_PREFIX, vapdev->vdev.name); ++ } ++} ++ ++static void vfio_ap_register_irq_notifier(VFIOAPDevice *vapdev, ++ unsigned int irq, Error **errp) ++{ ++ int fd; ++ size_t argsz; ++ IOHandler *fd_read; ++ EventNotifier *notifier; ++ struct vfio_irq_info *irq_info; ++ VFIODevice *vdev = &vapdev->vdev; ++ ++ switch (irq) { ++ case VFIO_AP_REQ_IRQ_INDEX: ++ notifier = &vapdev->req_notifier; ++ fd_read = vfio_ap_req_notifier_handler; ++ break; ++ default: ++ error_setg(errp, "vfio: Unsupported device irq(%d)", irq); ++ return; ++ } ++ ++ if (vdev->num_irqs < irq + 1) { ++ error_setg(errp, "vfio: IRQ %u not available (number of irqs %u)", ++ irq, vdev->num_irqs); ++ return; ++ } ++ ++ argsz = sizeof(*irq_info); ++ irq_info = g_malloc0(argsz); ++ irq_info->index = irq; ++ irq_info->argsz = argsz; ++ ++ if (ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, ++ irq_info) < 0 || irq_info->count < 1) { ++ error_setg_errno(errp, errno, "vfio: Error getting irq info"); ++ goto out_free_info; ++ } ++ ++ if (event_notifier_init(notifier, 0)) { ++ error_setg_errno(errp, errno, ++ "vfio: Unable to init event notifier for irq (%d)", ++ irq); ++ goto out_free_info; ++ } ++ ++ fd = event_notifier_get_fd(notifier); ++ qemu_set_fd_handler(fd, fd_read, NULL, vapdev); ++ ++ if (vfio_set_irq_signaling(vdev, irq, 0, VFIO_IRQ_SET_ACTION_TRIGGER, fd, ++ errp)) { ++ qemu_set_fd_handler(fd, NULL, NULL, vapdev); ++ event_notifier_cleanup(notifier); ++ } ++ ++out_free_info: ++ g_free(irq_info); ++ ++} ++ ++static void vfio_ap_unregister_irq_notifier(VFIOAPDevice *vapdev, ++ unsigned int irq) ++{ ++ Error *err = NULL; ++ EventNotifier *notifier; ++ ++ switch (irq) { ++ case VFIO_AP_REQ_IRQ_INDEX: ++ notifier = &vapdev->req_notifier; ++ break; ++ default: ++ error_report("vfio: Unsupported device irq(%d)", irq); ++ return; ++ } ++ ++ if (vfio_set_irq_signaling(&vapdev->vdev, irq, 0, ++ VFIO_IRQ_SET_ACTION_TRIGGER, -1, &err)) { ++ warn_reportf_err(err, VFIO_MSG_PREFIX, vapdev->vdev.name); ++ } ++ ++ qemu_set_fd_handler(event_notifier_get_fd(notifier), ++ NULL, NULL, vapdev); ++ event_notifier_cleanup(notifier); ++} ++ + static void vfio_ap_realize(DeviceState *dev, Error **errp) + { + int ret; + char *mdevid; ++ Error *err = NULL; + VFIOGroup *vfio_group; + APDevice *apdev = AP_DEVICE(dev); + VFIOAPDevice *vapdev = VFIO_AP_DEVICE(apdev); +@@ -116,6 +219,15 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp) + goto out_get_dev_err; + } + ++ vfio_ap_register_irq_notifier(vapdev, VFIO_AP_REQ_IRQ_INDEX, &err); ++ if (err) { ++ /* ++ * Report this error, but do not make it a failing condition. ++ * Lack of this IRQ in the host does not prevent normal operation. ++ */ ++ error_report_err(err); ++ } ++ + return; + + out_get_dev_err: +@@ -129,6 +241,7 @@ static void vfio_ap_unrealize(DeviceState *dev) + VFIOAPDevice *vapdev = VFIO_AP_DEVICE(apdev); + VFIOGroup *group = vapdev->vdev.group; + ++ vfio_ap_unregister_irq_notifier(vapdev, VFIO_AP_REQ_IRQ_INDEX); + vfio_ap_put_device(vapdev); + vfio_put_group(group); + } +diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h +index c59692ce0b..ce464957c8 100644 +--- a/linux-headers/linux/vfio.h ++++ b/linux-headers/linux/vfio.h +@@ -642,6 +642,15 @@ enum { + VFIO_CCW_NUM_IRQS + }; + ++/* ++ * The vfio-ap bus driver makes use of the following IRQ index mapping. ++ * Unimplemented IRQ types return a count of zero. ++ */ ++enum { ++ VFIO_AP_REQ_IRQ_INDEX, ++ VFIO_AP_NUM_IRQS ++}; ++ + /** + * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 12, + * struct vfio_pci_hot_reset_info) +-- +2.39.3 + diff --git a/SOURCES/kvm-s390x-pci-coalesce-unmap-operations.patch b/SOURCES/kvm-s390x-pci-coalesce-unmap-operations.patch deleted file mode 100644 index 8bf1f61..0000000 --- a/SOURCES/kvm-s390x-pci-coalesce-unmap-operations.patch +++ /dev/null @@ -1,125 +0,0 @@ -From ed90f91b61844abd2dff2eb970f721a6cf072235 Mon Sep 17 00:00:00 2001 -From: Matthew Rosato -Date: Fri, 28 Oct 2022 15:47:57 -0400 -Subject: [PATCH 6/9] s390x/pci: coalesce unmap operations -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 141: s390x/pci: reset ISM passthrough devices on shutdown and system reset -RH-Bugzilla: 2163701 -RH-Acked-by: Thomas Huth -RH-Acked-by: Cornelia Huck -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/3] 80c3a2c1d720057ae2a80b338ea06c9c6c804532 (clegoate/qemu-kvm-c9s) - -Currently, each unmapped page is handled as an individual iommu -region notification. Attempt to group contiguous unmap operations -into fewer notifications to reduce overhead. - -Signed-off-by: Matthew Rosato -Message-Id: <20221028194758.204007-3-mjrosato@linux.ibm.com> -Reviewed-by: Eric Farman -Signed-off-by: Thomas Huth -(cherry picked from commit ef536007c3301bbd6a787e4c2210ea289adaa6f0) -Signed-off-by: Cédric Le Goater ---- - hw/s390x/s390-pci-inst.c | 51 ++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 51 insertions(+) - -diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c -index 7cc4bcf850..66e764f901 100644 ---- a/hw/s390x/s390-pci-inst.c -+++ b/hw/s390x/s390-pci-inst.c -@@ -640,6 +640,8 @@ static uint32_t s390_pci_update_iotlb(S390PCIIOMMU *iommu, - } - g_hash_table_remove(iommu->iotlb, &entry->iova); - inc_dma_avail(iommu); -+ /* Don't notify the iommu yet, maybe we can bundle contiguous unmaps */ -+ goto out; - } else { - if (cache) { - if (cache->perm == entry->perm && -@@ -663,15 +665,44 @@ static uint32_t s390_pci_update_iotlb(S390PCIIOMMU *iommu, - dec_dma_avail(iommu); - } - -+ /* -+ * All associated iotlb entries have already been cleared, trigger the -+ * unmaps. -+ */ - memory_region_notify_iommu(&iommu->iommu_mr, 0, event); - - out: - return iommu->dma_limit ? iommu->dma_limit->avail : 1; - } - -+static void s390_pci_batch_unmap(S390PCIIOMMU *iommu, uint64_t iova, -+ uint64_t len) -+{ -+ uint64_t remain = len, start = iova, end = start + len - 1, mask, size; -+ IOMMUTLBEvent event = { -+ .type = IOMMU_NOTIFIER_UNMAP, -+ .entry = { -+ .target_as = &address_space_memory, -+ .translated_addr = 0, -+ .perm = IOMMU_NONE, -+ }, -+ }; -+ -+ while (remain >= TARGET_PAGE_SIZE) { -+ mask = dma_aligned_pow2_mask(start, end, 64); -+ size = mask + 1; -+ event.entry.iova = start; -+ event.entry.addr_mask = mask; -+ memory_region_notify_iommu(&iommu->iommu_mr, 0, event); -+ start += size; -+ remain -= size; -+ } -+} -+ - int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) - { - CPUS390XState *env = &cpu->env; -+ uint64_t iova, coalesce = 0; - uint32_t fh; - uint16_t error = 0; - S390PCIBusDevice *pbdev; -@@ -742,6 +773,21 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) - break; - } - -+ /* -+ * If this is an unmap of a PTE, let's try to coalesce multiple unmaps -+ * into as few notifier events as possible. -+ */ -+ if (entry.perm == IOMMU_NONE && entry.len == TARGET_PAGE_SIZE) { -+ if (coalesce == 0) { -+ iova = entry.iova; -+ } -+ coalesce += entry.len; -+ } else if (coalesce > 0) { -+ /* Unleash the coalesced unmap before processing a new map */ -+ s390_pci_batch_unmap(iommu, iova, coalesce); -+ coalesce = 0; -+ } -+ - start += entry.len; - while (entry.iova < start && entry.iova < end) { - if (dma_avail > 0 || entry.perm == IOMMU_NONE) { -@@ -759,6 +805,11 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) - } - } - } -+ if (coalesce) { -+ /* Unleash the coalesced unmap before finishing rpcit */ -+ s390_pci_batch_unmap(iommu, iova, coalesce); -+ coalesce = 0; -+ } - if (again && dma_avail > 0) - goto retry; - err: --- -2.31.1 - diff --git a/SOURCES/kvm-s390x-pci-reset-ISM-passthrough-devices-on-shutdown-.patch b/SOURCES/kvm-s390x-pci-reset-ISM-passthrough-devices-on-shutdown-.patch deleted file mode 100644 index bbe2595..0000000 --- a/SOURCES/kvm-s390x-pci-reset-ISM-passthrough-devices-on-shutdown-.patch +++ /dev/null @@ -1,147 +0,0 @@ -From 1ed1f8fc20a4883bc0bc1f58d299b0278abc5442 Mon Sep 17 00:00:00 2001 -From: Matthew Rosato -Date: Fri, 9 Dec 2022 14:57:00 -0500 -Subject: [PATCH 8/9] s390x/pci: reset ISM passthrough devices on shutdown and - system reset -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 141: s390x/pci: reset ISM passthrough devices on shutdown and system reset -RH-Bugzilla: 2163701 -RH-Acked-by: Thomas Huth -RH-Acked-by: Cornelia Huck -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/3] c531352b9d57f51ba938d4c46ee19a5706ade697 (clegoate/qemu-kvm-c9s) - -ISM device firmware stores unique state information that can -can cause a wholesale unmap of the associated IOMMU (e.g. when -we get a termination signal for QEMU) to trigger firmware errors -because firmware believes we are attempting to invalidate entries -that are still in-use by the guest OS (when in fact that guest is -in the process of being terminated or rebooted). -To alleviate this, register both a shutdown notifier (for unexpected -termination cases e.g. virsh destroy) as well as a reset callback -(for cases like guest OS reboot). For each of these scenarios, trigger -PCI device reset; this is enough to indicate to firmware that the IOMMU -is no longer in-use by the guest OS, making it safe to invalidate any -associated IOMMU entries. - -Fixes: 15d0e7942d3b ("s390x/pci: don't fence interpreted devices without MSI-X") -Signed-off-by: Matthew Rosato -Message-Id: <20221209195700.263824-1-mjrosato@linux.ibm.com> -Reviewed-by: Eric Farman -[thuth: Adjusted the hunk in s390-pci-vfio.c due to different context] -Signed-off-by: Thomas Huth -(cherry picked from commit 03451953c79e6b31f7860ee0c35b28e181d573c1) -Signed-off-by: Cédric Le Goater ---- - hw/s390x/s390-pci-bus.c | 28 ++++++++++++++++++++++++++++ - hw/s390x/s390-pci-vfio.c | 2 ++ - include/hw/s390x/s390-pci-bus.h | 5 +++++ - 3 files changed, 35 insertions(+) - -diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c -index 977e7daa15..02751f3597 100644 ---- a/hw/s390x/s390-pci-bus.c -+++ b/hw/s390x/s390-pci-bus.c -@@ -24,6 +24,8 @@ - #include "hw/pci/msi.h" - #include "qemu/error-report.h" - #include "qemu/module.h" -+#include "sysemu/reset.h" -+#include "sysemu/runstate.h" - - #ifndef DEBUG_S390PCI_BUS - #define DEBUG_S390PCI_BUS 0 -@@ -150,10 +152,30 @@ out: - psccb->header.response_code = cpu_to_be16(rc); - } - -+static void s390_pci_shutdown_notifier(Notifier *n, void *opaque) -+{ -+ S390PCIBusDevice *pbdev = container_of(n, S390PCIBusDevice, -+ shutdown_notifier); -+ -+ pci_device_reset(pbdev->pdev); -+} -+ -+static void s390_pci_reset_cb(void *opaque) -+{ -+ S390PCIBusDevice *pbdev = opaque; -+ -+ pci_device_reset(pbdev->pdev); -+} -+ - static void s390_pci_perform_unplug(S390PCIBusDevice *pbdev) - { - HotplugHandler *hotplug_ctrl; - -+ if (pbdev->pft == ZPCI_PFT_ISM) { -+ notifier_remove(&pbdev->shutdown_notifier); -+ qemu_unregister_reset(s390_pci_reset_cb, pbdev); -+ } -+ - /* Unplug the PCI device */ - if (pbdev->pdev) { - DeviceState *pdev = DEVICE(pbdev->pdev); -@@ -1111,6 +1133,12 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, - pbdev->fh |= FH_SHM_VFIO; - pbdev->forwarding_assist = false; - } -+ /* Register shutdown notifier and reset callback for ISM devices */ -+ if (pbdev->pft == ZPCI_PFT_ISM) { -+ pbdev->shutdown_notifier.notify = s390_pci_shutdown_notifier; -+ qemu_register_shutdown_notifier(&pbdev->shutdown_notifier); -+ qemu_register_reset(s390_pci_reset_cb, pbdev); -+ } - } else { - pbdev->fh |= FH_SHM_EMUL; - /* Always intercept emulated devices */ -diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c -index f7bf36cec8..f51190d466 100644 ---- a/hw/s390x/s390-pci-vfio.c -+++ b/hw/s390x/s390-pci-vfio.c -@@ -124,6 +124,8 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev, - /* The following values remain 0 until we support other FMB formats */ - pbdev->zpci_fn.fmbl = 0; - pbdev->zpci_fn.pft = 0; -+ /* Store function type separately for type-specific behavior */ -+ pbdev->pft = cap->pft; - - /* - * If appropriate, reduce the size of the supported DMA aperture reported -diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h -index 1c46e3a269..e0a9f9385b 100644 ---- a/include/hw/s390x/s390-pci-bus.h -+++ b/include/hw/s390x/s390-pci-bus.h -@@ -39,6 +39,9 @@ - #define UID_CHECKING_ENABLED 0x01 - #define ZPCI_DTSM 0x40 - -+/* zPCI Function Types */ -+#define ZPCI_PFT_ISM 5 -+ - OBJECT_DECLARE_SIMPLE_TYPE(S390pciState, S390_PCI_HOST_BRIDGE) - OBJECT_DECLARE_SIMPLE_TYPE(S390PCIBus, S390_PCI_BUS) - OBJECT_DECLARE_SIMPLE_TYPE(S390PCIBusDevice, S390_PCI_DEVICE) -@@ -344,6 +347,7 @@ struct S390PCIBusDevice { - uint16_t noi; - uint16_t maxstbl; - uint8_t sum; -+ uint8_t pft; - S390PCIGroup *pci_group; - ClpRspQueryPci zpci_fn; - S390MsixInfo msix; -@@ -352,6 +356,7 @@ struct S390PCIBusDevice { - MemoryRegion msix_notify_mr; - IndAddr *summary_ind; - IndAddr *indicator; -+ Notifier shutdown_notifier; - bool pci_unplug_request_processed; - bool unplug_requested; - bool interp; --- -2.31.1 - diff --git a/SOURCES/kvm-s390x-pci-shrink-DMA-aperture-to-be-bound-by-vfio-DM.patch b/SOURCES/kvm-s390x-pci-shrink-DMA-aperture-to-be-bound-by-vfio-DM.patch deleted file mode 100644 index 0992724..0000000 --- a/SOURCES/kvm-s390x-pci-shrink-DMA-aperture-to-be-bound-by-vfio-DM.patch +++ /dev/null @@ -1,91 +0,0 @@ -From ee69c8c57fe62fc200f749c4ce3927c88803644d Mon Sep 17 00:00:00 2001 -From: Matthew Rosato -Date: Fri, 28 Oct 2022 15:47:58 -0400 -Subject: [PATCH 7/9] s390x/pci: shrink DMA aperture to be bound by vfio DMA - limit -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 141: s390x/pci: reset ISM passthrough devices on shutdown and system reset -RH-Bugzilla: 2163701 -RH-Acked-by: Thomas Huth -RH-Acked-by: Cornelia Huck -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/3] 0956bbb4773dd0085f6aed59d6284c704b4fed3b (clegoate/qemu-kvm-c9s) - -Currently, s390x-pci performs accounting against the vfio DMA -limit and triggers the guest to clean up mappings when the limit -is reached. Let's go a step further and also limit the size of -the supported DMA aperture reported to the guest based upon the -initial vfio DMA limit reported for the container (if less than -than the size reported by the firmware/host zPCI layer). This -avoids processing sections of the guest DMA table during global -refresh that, for common use cases, will never be used anway, and -makes exhausting the vfio DMA limit due to mismatch between guest -aperture size and host limit far less likely and more indicitive -of an error. - -Signed-off-by: Matthew Rosato -Message-Id: <20221028194758.204007-4-mjrosato@linux.ibm.com> -Reviewed-by: Eric Farman -Signed-off-by: Thomas Huth -(cherry picked from commit df202e3ff3fccb49868e08f20d0bda86cb953fbe) -Signed-off-by: Cédric Le Goater ---- - hw/s390x/s390-pci-vfio.c | 11 +++++++++++ - include/hw/s390x/s390-pci-bus.h | 1 + - 2 files changed, 12 insertions(+) - -diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c -index 5f0adb0b4a..f7bf36cec8 100644 ---- a/hw/s390x/s390-pci-vfio.c -+++ b/hw/s390x/s390-pci-vfio.c -@@ -84,6 +84,7 @@ S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s, - cnt->users = 1; - cnt->avail = avail; - QTAILQ_INSERT_TAIL(&s->zpci_dma_limit, cnt, link); -+ pbdev->iommu->max_dma_limit = avail; - return cnt; - } - -@@ -103,6 +104,7 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev, - struct vfio_info_cap_header *hdr; - struct vfio_device_info_cap_zpci_base *cap; - VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev); -+ uint64_t vfio_size; - - hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_BASE); - -@@ -122,6 +124,15 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev, - /* The following values remain 0 until we support other FMB formats */ - pbdev->zpci_fn.fmbl = 0; - pbdev->zpci_fn.pft = 0; -+ -+ /* -+ * If appropriate, reduce the size of the supported DMA aperture reported -+ * to the guest based upon the vfio DMA limit. -+ */ -+ vfio_size = pbdev->iommu->max_dma_limit << TARGET_PAGE_BITS; -+ if (vfio_size < (cap->end_dma - cap->start_dma + 1)) { -+ pbdev->zpci_fn.edma = cap->start_dma + vfio_size - 1; -+ } - } - - static bool get_host_fh(S390PCIBusDevice *pbdev, struct vfio_device_info *info, -diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h -index 0605fcea24..1c46e3a269 100644 ---- a/include/hw/s390x/s390-pci-bus.h -+++ b/include/hw/s390x/s390-pci-bus.h -@@ -278,6 +278,7 @@ struct S390PCIIOMMU { - uint64_t g_iota; - uint64_t pba; - uint64_t pal; -+ uint64_t max_dma_limit; - GHashTable *iotlb; - S390PCIDMACount *dma_limit; - }; --- -2.31.1 - diff --git a/SOURCES/kvm-s390x-pv-Fix-spurious-warning-with-asynchronous-tear.patch b/SOURCES/kvm-s390x-pv-Fix-spurious-warning-with-asynchronous-tear.patch new file mode 100644 index 0000000..ecf1353 --- /dev/null +++ b/SOURCES/kvm-s390x-pv-Fix-spurious-warning-with-asynchronous-tear.patch @@ -0,0 +1,129 @@ +From 3cab2a638a10ece2b76d9f33a3c5dc6f64f1bbaa Mon Sep 17 00:00:00 2001 +From: Claudio Imbrenda +Date: Wed, 10 May 2023 12:55:31 +0200 +Subject: [PATCH 21/21] s390x/pv: Fix spurious warning with asynchronous + teardown +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 173: Improve memory reclaiming for z15 Secure Execution guests +RH-Bugzilla: 2168500 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cédric Le Goater +RH-Commit: [2/2] cb690d3155ea22c6df00a4d75b72f501515e5556 (thuth/qemu-kvm-cs9) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168500 + +Kernel commit 292a7d6fca33 ("KVM: s390: pv: fix asynchronous teardown +for small VMs") causes the KVM_PV_ASYNC_CLEANUP_PREPARE ioctl to fail +if the VM is not larger than 2GiB. QEMU would attempt it and fail, +print an error message, and then proceed with a normal teardown. + +Avoid attempting to use asynchronous teardown altogether when the VM is +not larger than 2 GiB. This will avoid triggering the error message and +also avoid pointless overhead; normal teardown is fast enough for small +VMs. + +Reported-by: Marc Hartmayer +Fixes: c3a073c610 ("s390x/pv: Add support for asynchronous teardown for reboot") +Link: https://lore.kernel.org/all/20230421085036.52511-2-imbrenda@linux.ibm.com/ +Signed-off-by: Claudio Imbrenda +Message-Id: <20230510105531.30623-2-imbrenda@linux.ibm.com> +Reviewed-by: Thomas Huth +[thuth: Fix inline function parameter in pv.h] +Signed-off-by: Thomas Huth +(cherry picked from commit 88693ab2a53f2f3d25cb39a7b5034ab391bc5a81) +--- + hw/s390x/pv.c | 10 ++++++++-- + hw/s390x/s390-virtio-ccw.c | 2 +- + include/hw/s390x/pv.h | 6 +++--- + 3 files changed, 12 insertions(+), 6 deletions(-) + +diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c +index 49ea38236c..b63f3784c6 100644 +--- a/hw/s390x/pv.c ++++ b/hw/s390x/pv.c +@@ -13,6 +13,7 @@ + + #include + ++#include "qemu/units.h" + #include "qapi/error.h" + #include "qemu/error-report.h" + #include "sysemu/kvm.h" +@@ -115,7 +116,7 @@ static void *s390_pv_do_unprot_async_fn(void *p) + return NULL; + } + +-bool s390_pv_vm_try_disable_async(void) ++bool s390_pv_vm_try_disable_async(S390CcwMachineState *ms) + { + /* + * t is only needed to create the thread; once qemu_thread_create +@@ -123,7 +124,12 @@ bool s390_pv_vm_try_disable_async(void) + */ + QemuThread t; + +- if (!kvm_check_extension(kvm_state, KVM_CAP_S390_PROTECTED_ASYNC_DISABLE)) { ++ /* ++ * If the feature is not present or if the VM is not larger than 2 GiB, ++ * KVM_PV_ASYNC_CLEANUP_PREPARE fill fail; no point in attempting it. ++ */ ++ if ((MACHINE(ms)->maxram_size <= 2 * GiB) || ++ !kvm_check_extension(kvm_state, KVM_CAP_S390_PROTECTED_ASYNC_DISABLE)) { + return false; + } + if (s390_pv_cmd(KVM_PV_ASYNC_CLEANUP_PREPARE, NULL) != 0) { +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 6a0b93c63d..d95c595f88 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -330,7 +330,7 @@ static inline void s390_do_cpu_ipl(CPUState *cs, run_on_cpu_data arg) + + static void s390_machine_unprotect(S390CcwMachineState *ms) + { +- if (!s390_pv_vm_try_disable_async()) { ++ if (!s390_pv_vm_try_disable_async(ms)) { + s390_pv_vm_disable(); + } + ms->pv = false; +diff --git a/include/hw/s390x/pv.h b/include/hw/s390x/pv.h +index 966306a9db..7b935e2246 100644 +--- a/include/hw/s390x/pv.h ++++ b/include/hw/s390x/pv.h +@@ -14,10 +14,10 @@ + + #include "qapi/error.h" + #include "sysemu/kvm.h" ++#include "hw/s390x/s390-virtio-ccw.h" + + #ifdef CONFIG_KVM + #include "cpu.h" +-#include "hw/s390x/s390-virtio-ccw.h" + + static inline bool s390_is_pv(void) + { +@@ -41,7 +41,7 @@ static inline bool s390_is_pv(void) + int s390_pv_query_info(void); + int s390_pv_vm_enable(void); + void s390_pv_vm_disable(void); +-bool s390_pv_vm_try_disable_async(void); ++bool s390_pv_vm_try_disable_async(S390CcwMachineState *ms); + int s390_pv_set_sec_parms(uint64_t origin, uint64_t length); + int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak); + void s390_pv_prep_reset(void); +@@ -61,7 +61,7 @@ static inline bool s390_is_pv(void) { return false; } + static inline int s390_pv_query_info(void) { return 0; } + static inline int s390_pv_vm_enable(void) { return 0; } + static inline void s390_pv_vm_disable(void) {} +-static inline bool s390_pv_vm_try_disable_async(void) { return false; } ++static inline bool s390_pv_vm_try_disable_async(S390CcwMachineState *ms) { return false; } + static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t length) { return 0; } + static inline int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak) { return 0; } + static inline void s390_pv_prep_reset(void) {} +-- +2.39.3 + diff --git a/SOURCES/kvm-s390x-pv-Implement-a-CGS-check-helper.patch b/SOURCES/kvm-s390x-pv-Implement-a-CGS-check-helper.patch deleted file mode 100644 index c3383af..0000000 --- a/SOURCES/kvm-s390x-pv-Implement-a-CGS-check-helper.patch +++ /dev/null @@ -1,109 +0,0 @@ -From 9452246e59a5f16f44fdf9a7d514b947faf1d5fc Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Mon, 16 Jan 2023 18:46:05 +0100 -Subject: [PATCH 5/9] s390x/pv: Implement a CGS check helper -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 139: s390x/pv: Implement a CGS check helper -RH-Bugzilla: 2122523 -RH-Acked-by: Thomas Huth -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Commit: [1/1] 8551ce772b10de653b4e1c8be60aae60ec98b421 (clegoate/qemu-kvm-c9s) - -When a protected VM is started with the maximum number of CPUs (248), -the service call providing information on the CPUs requires more -buffer space than allocated and QEMU disgracefully aborts : - - LOADPARM=[........] - Using virtio-blk. - Using SCSI scheme. - ................................................................................... - qemu-system-s390x: KVM_S390_MEM_OP failed: Argument list too long - -When protected virtualization is initialized, compute the maximum -number of vCPUs supported by the machine and return useful information -to the user before the machine starts in case of error. - -Suggested-by: Thomas Huth -Reviewed-by: Thomas Huth -Signed-off-by: Cédric Le Goater -Message-Id: <20230116174607.2459498-2-clg@kaod.org> -Signed-off-by: Thomas Huth -(cherry picked from commit 75d7150c636569f6687f7e70a33be893be43eb5f) -Signed-off-by: Cédric Le Goater ---- - hw/s390x/pv.c | 40 ++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 40 insertions(+) - -diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c -index 8dfe92d8df..8a1c71436b 100644 ---- a/hw/s390x/pv.c -+++ b/hw/s390x/pv.c -@@ -20,6 +20,7 @@ - #include "exec/confidential-guest-support.h" - #include "hw/s390x/ipl.h" - #include "hw/s390x/pv.h" -+#include "hw/s390x/sclp.h" - #include "target/s390x/kvm/kvm_s390x.h" - - static bool info_valid; -@@ -249,6 +250,41 @@ struct S390PVGuestClass { - ConfidentialGuestSupportClass parent_class; - }; - -+/* -+ * If protected virtualization is enabled, the amount of data that the -+ * Read SCP Info Service Call can use is limited to one page. The -+ * available space also depends on the Extended-Length SCCB (ELS) -+ * feature which can take more buffer space to store feature -+ * information. This impacts the maximum number of CPUs supported in -+ * the machine. -+ */ -+static uint32_t s390_pv_get_max_cpus(void) -+{ -+ int offset_cpu = s390_has_feat(S390_FEAT_EXTENDED_LENGTH_SCCB) ? -+ offsetof(ReadInfo, entries) : SCLP_READ_SCP_INFO_FIXED_CPU_OFFSET; -+ -+ return (TARGET_PAGE_SIZE - offset_cpu) / sizeof(CPUEntry); -+} -+ -+static bool s390_pv_check_cpus(Error **errp) -+{ -+ MachineState *ms = MACHINE(qdev_get_machine()); -+ uint32_t pv_max_cpus = s390_pv_get_max_cpus(); -+ -+ if (ms->smp.max_cpus > pv_max_cpus) { -+ error_setg(errp, "Protected VMs support a maximum of %d CPUs", -+ pv_max_cpus); -+ return false; -+ } -+ -+ return true; -+} -+ -+static bool s390_pv_guest_check(ConfidentialGuestSupport *cgs, Error **errp) -+{ -+ return s390_pv_check_cpus(errp); -+} -+ - int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) - { - if (!object_dynamic_cast(OBJECT(cgs), TYPE_S390_PV_GUEST)) { -@@ -261,6 +297,10 @@ int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) - return -1; - } - -+ if (!s390_pv_guest_check(cgs, errp)) { -+ return -1; -+ } -+ - cgs->ready = true; - - return 0; --- -2.31.1 - diff --git a/SOURCES/kvm-s390x-s390-virtio-ccw-Activate-zPCI-features-on-s390.patch b/SOURCES/kvm-s390x-s390-virtio-ccw-Activate-zPCI-features-on-s390.patch deleted file mode 100644 index 42114a1..0000000 --- a/SOURCES/kvm-s390x-s390-virtio-ccw-Activate-zPCI-features-on-s390.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 51fcf352a97f2e99a6a3fb8ae663b45436304120 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Tue, 10 Jan 2023 14:25:34 +0100 -Subject: [PATCH 11/31] s390x/s390-virtio-ccw: Activate zPCI features on - s390-ccw-virtio-rhel8.6.0 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 133: s390x/s390-virtio-ccw: Activate zPCI features on s390-ccw-virtio-rhel8.6.0 -RH-Bugzilla: 2159408 -RH-Acked-by: Thomas Huth -RH-Acked-by: David Hildenbrand -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/1] 1ed82e56fe74a283a1726c4893dc3387e645072c (clegoate/qemu-kvm-c9s) - -commit c7b14d3af7 ("s390x/s390-virtio-ccw: Switch off zPCI enhancements -on older machines") activated zPCI enhancement features (interpretation -and forward assist) silently on the s390-ccw-virtio-rhel8.6.0 machine -for RHEL8.8. It didn't seem to be a problem since migration is not -possible but it broke LEAPP upgrade to RHEL9 when the machine is -defined with a passthrough device. Activate the zPCI features also on -RHEL9.2 for the machines to be alike in both latest RHEL distros. - -Upstream Status: RHEL-only -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2159408 - -Signed-off-by: Cédric Le Goater ---- - hw/s390x/s390-virtio-ccw.c | 12 ++++++++++++ - 1 file changed, 12 insertions(+) - -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index aa142a1a4e..4cdd59c394 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -1234,8 +1234,14 @@ static void ccw_machine_rhel860_instance_options(MachineState *machine) - - static void ccw_machine_rhel860_class_options(MachineClass *mc) - { -+ static GlobalProperty compat[] = { -+ { TYPE_S390_PCI_DEVICE, "interpret", "on", }, -+ { TYPE_S390_PCI_DEVICE, "forwarding-assist", "on", }, -+ }; -+ - ccw_machine_rhel900_class_options(mc); - compat_props_add(mc->compat_props, hw_compat_rhel_8_6, hw_compat_rhel_8_6_len); -+ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); - - /* All RHEL machines for prior major releases are deprecated */ - mc->deprecation_reason = rhel_old_machine_deprecation; -@@ -1259,8 +1265,14 @@ static void ccw_machine_rhel850_instance_options(MachineState *machine) - - static void ccw_machine_rhel850_class_options(MachineClass *mc) - { -+ static GlobalProperty compat[] = { -+ { TYPE_S390_PCI_DEVICE, "interpret", "off", }, -+ { TYPE_S390_PCI_DEVICE, "forwarding-assist", "off", }, -+ }; -+ - ccw_machine_rhel860_class_options(mc); - compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); -+ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); - mc->smp_props.prefer_sockets = true; - } - DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", false); --- -2.31.1 - diff --git a/SOURCES/kvm-scsi-cleanup-scsi_clear_unit_attention.patch b/SOURCES/kvm-scsi-cleanup-scsi_clear_unit_attention.patch new file mode 100644 index 0000000..11dda3a --- /dev/null +++ b/SOURCES/kvm-scsi-cleanup-scsi_clear_unit_attention.patch @@ -0,0 +1,81 @@ +From 5dd7d26c034c26b2d4d9b91b8d1a7b605e19730f Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Wed, 12 Jul 2023 15:43:51 +0200 +Subject: [PATCH 02/12] scsi: cleanup scsi_clear_unit_attention() + +RH-Author: Stefano Garzarella +RH-MergeRequest: 184: scsi: fix issue with Linux guest and unit attention +RH-Bugzilla: 2176702 +RH-Acked-by: Thomas Huth +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [2/3] b3a06a91644e44fae3d76d0fbe72448652db517a (sgarzarella/qemu-kvm-c-9-s) + +The previous commit moved the unit attention clearing when we create +the request. So now we can clean scsi_clear_unit_attention() to handle +only the case of the REPORT LUNS command: this is the only case in +which a UNIT ATTENTION is cleared without having been reported. + +Suggested-by: Paolo Bonzini +Signed-off-by: Stefano Garzarella +Message-ID: <20230712134352.118655-3-sgarzare@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit ba947dab98e7cd4337c70975bd255701a2a6aad8) +Signed-off-by: Stefano Garzarella +--- + hw/scsi/scsi-bus.c | 28 ++++++---------------------- + 1 file changed, 6 insertions(+), 22 deletions(-) + +diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c +index 5d22313b9d..cecd26479e 100644 +--- a/hw/scsi/scsi-bus.c ++++ b/hw/scsi/scsi-bus.c +@@ -828,26 +828,12 @@ static void scsi_clear_unit_attention(SCSIRequest *req) + return; + } + +- if (req->dev->unit_attention.key != UNIT_ATTENTION && +- req->bus->unit_attention.key != UNIT_ATTENTION) { +- return; +- } +- +- /* +- * If an INQUIRY command enters the enabled command state, +- * the device server shall [not] clear any unit attention condition; +- * See also MMC-6, paragraphs 6.5 and 6.6.2. +- */ +- if (req->cmd.buf[0] == INQUIRY || +- req->cmd.buf[0] == GET_CONFIGURATION || +- req->cmd.buf[0] == GET_EVENT_STATUS_NOTIFICATION) { +- return; +- } +- + if (req->dev->unit_attention.key == UNIT_ATTENTION) { + ua = &req->dev->unit_attention; +- } else { ++ } else if (req->bus->unit_attention.key == UNIT_ATTENTION) { + ua = &req->bus->unit_attention; ++ } else { ++ return; + } + + /* +@@ -856,12 +842,10 @@ static void scsi_clear_unit_attention(SCSIRequest *req) + * with an additional sense code of REPORTED LUNS DATA HAS CHANGED. + */ + if (req->cmd.buf[0] == REPORT_LUNS && +- !(ua->asc == SENSE_CODE(REPORTED_LUNS_CHANGED).asc && +- ua->ascq == SENSE_CODE(REPORTED_LUNS_CHANGED).ascq)) { +- return; ++ ua->asc == SENSE_CODE(REPORTED_LUNS_CHANGED).asc && ++ ua->ascq == SENSE_CODE(REPORTED_LUNS_CHANGED).ascq) { ++ *ua = SENSE_CODE(NO_SENSE); + } +- +- *ua = SENSE_CODE(NO_SENSE); + } + + int scsi_req_get_sense(SCSIRequest *req, uint8_t *buf, int len) +-- +2.39.3 + diff --git a/SOURCES/kvm-scsi-clear-unit-attention-only-for-REPORT-LUNS-comma.patch b/SOURCES/kvm-scsi-clear-unit-attention-only-for-REPORT-LUNS-comma.patch new file mode 100644 index 0000000..cb3b24e --- /dev/null +++ b/SOURCES/kvm-scsi-clear-unit-attention-only-for-REPORT-LUNS-comma.patch @@ -0,0 +1,110 @@ +From 0a784c45a7b7ee32c36bf86eebb24c8431a89f49 Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Wed, 12 Jul 2023 15:43:52 +0200 +Subject: [PATCH 03/12] scsi: clear unit attention only for REPORT LUNS + commands + +RH-Author: Stefano Garzarella +RH-MergeRequest: 184: scsi: fix issue with Linux guest and unit attention +RH-Bugzilla: 2176702 +RH-Acked-by: Thomas Huth +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [3/3] 01d5e112ef9ae204d96ceb01b4a453fdb4e8b669 (sgarzarella/qemu-kvm-c-9-s) + +scsi_clear_unit_attention() now only handles REPORTED LUNS DATA HAS +CHANGED. + +This only happens when we handle REPORT LUNS commands, so let's rename +the function in scsi_clear_reported_luns_changed() and call it only in +scsi_target_emulate_report_luns(). + +Suggested-by: Paolo Bonzini +Signed-off-by: Stefano Garzarella +Message-ID: <20230712134352.118655-4-sgarzare@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 2eb5599e8a73e70a9e86a97120818ff95a43a23a) +Signed-off-by: Stefano Garzarella +--- + hw/scsi/scsi-bus.c | 34 +++++++++++----------------------- + 1 file changed, 11 insertions(+), 23 deletions(-) + +diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c +index cecd26479e..9542410800 100644 +--- a/hw/scsi/scsi-bus.c ++++ b/hw/scsi/scsi-bus.c +@@ -22,6 +22,7 @@ static char *scsibus_get_fw_dev_path(DeviceState *dev); + static void scsi_req_dequeue(SCSIRequest *req); + static uint8_t *scsi_target_alloc_buf(SCSIRequest *req, size_t len); + static void scsi_target_free_buf(SCSIRequest *req); ++static void scsi_clear_reported_luns_changed(SCSIRequest *req); + + static int next_scsi_bus; + +@@ -518,6 +519,14 @@ static bool scsi_target_emulate_report_luns(SCSITargetReq *r) + + /* store the LUN list length */ + stl_be_p(&r->buf[0], len - 8); ++ ++ /* ++ * If a REPORT LUNS command enters the enabled command state, [...] ++ * the device server shall clear any pending unit attention condition ++ * with an additional sense code of REPORTED LUNS DATA HAS CHANGED. ++ */ ++ scsi_clear_reported_luns_changed(&r->req); ++ + return true; + } + +@@ -816,18 +825,10 @@ uint8_t *scsi_req_get_buf(SCSIRequest *req) + return req->ops->get_buf(req); + } + +-static void scsi_clear_unit_attention(SCSIRequest *req) ++static void scsi_clear_reported_luns_changed(SCSIRequest *req) + { + SCSISense *ua; + +- /* +- * scsi_fetch_unit_attention_sense() already cleaned the unit attention +- * in this case. +- */ +- if (req->ops == &reqops_unit_attention) { +- return; +- } +- + if (req->dev->unit_attention.key == UNIT_ATTENTION) { + ua = &req->dev->unit_attention; + } else if (req->bus->unit_attention.key == UNIT_ATTENTION) { +@@ -836,13 +837,7 @@ static void scsi_clear_unit_attention(SCSIRequest *req) + return; + } + +- /* +- * If a REPORT LUNS command enters the enabled command state, [...] +- * the device server shall clear any pending unit attention condition +- * with an additional sense code of REPORTED LUNS DATA HAS CHANGED. +- */ +- if (req->cmd.buf[0] == REPORT_LUNS && +- ua->asc == SENSE_CODE(REPORTED_LUNS_CHANGED).asc && ++ if (ua->asc == SENSE_CODE(REPORTED_LUNS_CHANGED).asc && + ua->ascq == SENSE_CODE(REPORTED_LUNS_CHANGED).ascq) { + *ua = SENSE_CODE(NO_SENSE); + } +@@ -1528,13 +1523,6 @@ void scsi_req_complete(SCSIRequest *req, int status) + req->dev->sense_is_ua = false; + } + +- /* +- * Unit attention state is now stored in the device's sense buffer +- * if the HBA didn't do autosense. Clear the pending unit attention +- * flags. +- */ +- scsi_clear_unit_attention(req); +- + scsi_req_ref(req); + scsi_req_dequeue(req); + req->bus->info->complete(req, req->residual); +-- +2.39.3 + diff --git a/SOURCES/kvm-scsi-fetch-unit-attention-when-creating-the-request.patch b/SOURCES/kvm-scsi-fetch-unit-attention-when-creating-the-request.patch new file mode 100644 index 0000000..a41ae82 --- /dev/null +++ b/SOURCES/kvm-scsi-fetch-unit-attention-when-creating-the-request.patch @@ -0,0 +1,132 @@ +From 562ea3a2d602cf41c548f3ddf52c43c04fded347 Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Wed, 12 Jul 2023 15:43:50 +0200 +Subject: [PATCH 01/12] scsi: fetch unit attention when creating the request + +RH-Author: Stefano Garzarella +RH-MergeRequest: 184: scsi: fix issue with Linux guest and unit attention +RH-Bugzilla: 2176702 +RH-Acked-by: Thomas Huth +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [1/3] 04563caac45d0110ea65eda8e55472556cd317c0 (sgarzarella/qemu-kvm-c-9-s) + +Commit 1880ad4f4e ("virtio-scsi: Batched prepare for cmd reqs") split +calls to scsi_req_new() and scsi_req_enqueue() in the virtio-scsi device. +No ill effects were observed until commit 8cc5583abe ("virtio-scsi: Send +"REPORTED LUNS CHANGED" sense data upon disk hotplug events") added a +unit attention that was easy to trigger with device hotplug and +hot-unplug. + +Because the two calls were separated, all requests in the batch were +prepared calling scsi_req_new() to report a sense. The first one +submitted would report the right sense and reset it to NO_SENSE, while +the others reported CHECK_CONDITION with no sense data. This caused +SCSI errors in Linux. + +To solve this issue, let's fetch the unit attention as early as possible +when we prepare the request, so that only the first request in the batch +will use the unit attention SCSIReqOps and the others will not report +CHECK CONDITION. + +Fixes: 1880ad4f4e ("virtio-scsi: Batched prepare for cmd reqs") +Fixes: 8cc5583abe ("virtio-scsi: Send "REPORTED LUNS CHANGED" sense data upon disk hotplug events") +Reported-by: Thomas Huth +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2176702 +Co-developed-by: Paolo Bonzini +Signed-off-by: Stefano Garzarella +Message-ID: <20230712134352.118655-2-sgarzare@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 9472083e642bfb9bc836b38662baddd9bc964ebc) +Signed-off-by: Stefano Garzarella +--- + hw/scsi/scsi-bus.c | 36 +++++++++++++++++++++++++++++++++--- + include/hw/scsi/scsi.h | 1 + + 2 files changed, 34 insertions(+), 3 deletions(-) + +diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c +index 3c20b47ad0..5d22313b9d 100644 +--- a/hw/scsi/scsi-bus.c ++++ b/hw/scsi/scsi-bus.c +@@ -413,19 +413,35 @@ static const struct SCSIReqOps reqops_invalid_opcode = { + + /* SCSIReqOps implementation for unit attention conditions. */ + +-static int32_t scsi_unit_attention(SCSIRequest *req, uint8_t *buf) ++static void scsi_fetch_unit_attention_sense(SCSIRequest *req) + { ++ SCSISense *ua = NULL; ++ + if (req->dev->unit_attention.key == UNIT_ATTENTION) { +- scsi_req_build_sense(req, req->dev->unit_attention); ++ ua = &req->dev->unit_attention; + } else if (req->bus->unit_attention.key == UNIT_ATTENTION) { +- scsi_req_build_sense(req, req->bus->unit_attention); ++ ua = &req->bus->unit_attention; + } ++ ++ /* ++ * Fetch the unit attention sense immediately so that another ++ * scsi_req_new does not use reqops_unit_attention. ++ */ ++ if (ua) { ++ scsi_req_build_sense(req, *ua); ++ *ua = SENSE_CODE(NO_SENSE); ++ } ++} ++ ++static int32_t scsi_unit_attention(SCSIRequest *req, uint8_t *buf) ++{ + scsi_req_complete(req, CHECK_CONDITION); + return 0; + } + + static const struct SCSIReqOps reqops_unit_attention = { + .size = sizeof(SCSIRequest), ++ .init_req = scsi_fetch_unit_attention_sense, + .send_command = scsi_unit_attention + }; + +@@ -699,6 +715,11 @@ SCSIRequest *scsi_req_alloc(const SCSIReqOps *reqops, SCSIDevice *d, + object_ref(OBJECT(d)); + object_ref(OBJECT(qbus->parent)); + notifier_list_init(&req->cancel_notifiers); ++ ++ if (reqops->init_req) { ++ reqops->init_req(req); ++ } ++ + trace_scsi_req_alloc(req->dev->id, req->lun, req->tag); + return req; + } +@@ -798,6 +819,15 @@ uint8_t *scsi_req_get_buf(SCSIRequest *req) + static void scsi_clear_unit_attention(SCSIRequest *req) + { + SCSISense *ua; ++ ++ /* ++ * scsi_fetch_unit_attention_sense() already cleaned the unit attention ++ * in this case. ++ */ ++ if (req->ops == &reqops_unit_attention) { ++ return; ++ } ++ + if (req->dev->unit_attention.key != UNIT_ATTENTION && + req->bus->unit_attention.key != UNIT_ATTENTION) { + return; +diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h +index 6f23a7a73e..1787ddd01e 100644 +--- a/include/hw/scsi/scsi.h ++++ b/include/hw/scsi/scsi.h +@@ -108,6 +108,7 @@ int cdrom_read_toc_raw(int nb_sectors, uint8_t *buf, int msf, int session_num); + /* scsi-bus.c */ + struct SCSIReqOps { + size_t size; ++ void (*init_req)(SCSIRequest *req); + void (*free_req)(SCSIRequest *req); + int32_t (*send_command)(SCSIRequest *req, uint8_t *buf); + void (*read_data)(SCSIRequest *req); +-- +2.39.3 + diff --git a/SOURCES/kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch b/SOURCES/kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch deleted file mode 100644 index ca61286..0000000 --- a/SOURCES/kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch +++ /dev/null @@ -1,176 +0,0 @@ -From 0a4f5bcc2a6f8ac31431e971c1dce9e6ab2191c2 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 21 Feb 2023 16:22:16 -0500 -Subject: [PATCH 01/12] scsi: protect req->aiocb with AioContext lock - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 155: virtio-scsi: reset SCSI devices from main loop thread -RH-Bugzilla: 2155748 -RH-Acked-by: Eric Blake -RH-Acked-by: Kevin Wolf -RH-Acked-by: Laszlo Ersek -RH-Commit: [1/3] 61727297bd31dfe18220b61f1d265ced0649c60d (stefanha/centos-stream-qemu-kvm) - -If requests are being processed in the IOThread when a SCSIDevice is -unplugged, scsi_device_purge_requests() -> scsi_req_cancel_async() races -with I/O completion callbacks. Both threads load and store req->aiocb. -This can lead to assert(r->req.aiocb == NULL) failures and undefined -behavior. - -Protect r->req.aiocb with the AioContext lock to prevent the race. - -Reviewed-by: Eric Blake -Reviewed-by: Kevin Wolf -Signed-off-by: Stefan Hajnoczi -Message-Id: <20230221212218.1378734-2-stefanha@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 7b7fc3d0102dafe8eb44802493036a526e921a71) -Signed-off-by: Stefan Hajnoczi ---- - hw/scsi/scsi-disk.c | 23 ++++++++++++++++------- - hw/scsi/scsi-generic.c | 11 ++++++----- - 2 files changed, 22 insertions(+), 12 deletions(-) - -diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c -index e493c28814..5327f93f4c 100644 ---- a/hw/scsi/scsi-disk.c -+++ b/hw/scsi/scsi-disk.c -@@ -273,9 +273,11 @@ static void scsi_aio_complete(void *opaque, int ret) - SCSIDiskReq *r = (SCSIDiskReq *)opaque; - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - -+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -+ - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; -- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -+ - if (scsi_disk_req_check_error(r, ret, true)) { - goto done; - } -@@ -357,10 +359,11 @@ static void scsi_dma_complete(void *opaque, int ret) - SCSIDiskReq *r = (SCSIDiskReq *)opaque; - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - -+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -+ - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; - -- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); - if (ret < 0) { - block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); - } else { -@@ -393,10 +396,11 @@ static void scsi_read_complete(void *opaque, int ret) - SCSIDiskReq *r = (SCSIDiskReq *)opaque; - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - -+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -+ - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; - -- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); - if (ret < 0) { - block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); - } else { -@@ -446,10 +450,11 @@ static void scsi_do_read_cb(void *opaque, int ret) - SCSIDiskReq *r = (SCSIDiskReq *)opaque; - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - -+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -+ - assert (r->req.aiocb != NULL); - r->req.aiocb = NULL; - -- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); - if (ret < 0) { - block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); - } else { -@@ -530,10 +535,11 @@ static void scsi_write_complete(void * opaque, int ret) - SCSIDiskReq *r = (SCSIDiskReq *)opaque; - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - -+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -+ - assert (r->req.aiocb != NULL); - r->req.aiocb = NULL; - -- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); - if (ret < 0) { - block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); - } else { -@@ -1737,10 +1743,11 @@ static void scsi_unmap_complete(void *opaque, int ret) - SCSIDiskReq *r = data->r; - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - -+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -+ - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; - -- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); - if (scsi_disk_req_check_error(r, ret, true)) { - scsi_req_unref(&r->req); - g_free(data); -@@ -1816,9 +1823,11 @@ static void scsi_write_same_complete(void *opaque, int ret) - SCSIDiskReq *r = data->r; - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - -+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -+ - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; -- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -+ - if (scsi_disk_req_check_error(r, ret, true)) { - goto done; - } -diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c -index 92cce20a4d..ac9fa662b4 100644 ---- a/hw/scsi/scsi-generic.c -+++ b/hw/scsi/scsi-generic.c -@@ -111,10 +111,11 @@ static void scsi_command_complete(void *opaque, int ret) - SCSIGenericReq *r = (SCSIGenericReq *)opaque; - SCSIDevice *s = r->req.dev; - -+ aio_context_acquire(blk_get_aio_context(s->conf.blk)); -+ - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; - -- aio_context_acquire(blk_get_aio_context(s->conf.blk)); - scsi_command_complete_noio(r, ret); - aio_context_release(blk_get_aio_context(s->conf.blk)); - } -@@ -269,11 +270,11 @@ static void scsi_read_complete(void * opaque, int ret) - SCSIDevice *s = r->req.dev; - int len; - -+ aio_context_acquire(blk_get_aio_context(s->conf.blk)); -+ - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; - -- aio_context_acquire(blk_get_aio_context(s->conf.blk)); -- - if (ret || r->req.io_canceled) { - scsi_command_complete_noio(r, ret); - goto done; -@@ -386,11 +387,11 @@ static void scsi_write_complete(void * opaque, int ret) - - trace_scsi_generic_write_complete(ret); - -+ aio_context_acquire(blk_get_aio_context(s->conf.blk)); -+ - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; - -- aio_context_acquire(blk_get_aio_context(s->conf.blk)); -- - if (ret || r->req.io_canceled) { - scsi_command_complete_noio(r, ret); - goto done; --- -2.39.1 - diff --git a/SOURCES/kvm-spice-move-client_migrate_info-command-to-ui.patch b/SOURCES/kvm-spice-move-client_migrate_info-command-to-ui.patch new file mode 100644 index 0000000..f1de158 --- /dev/null +++ b/SOURCES/kvm-spice-move-client_migrate_info-command-to-ui.patch @@ -0,0 +1,248 @@ +From 00f6e941e75f378c84c773a15efde7dd085d9ce3 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 19:40:14 +0100 +Subject: [PATCH 21/56] spice: move client_migrate_info command to ui/ +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [20/50] a587bb001b51a1f9fdf2fcfb0978bb931ae443b6 (peterx/qemu-kvm) + +It has nothing to do with migration, except for the "migrate" in the +name of the command. Move it with the rest of the ui commands. + +Signed-off-by: Juan Quintela +Reviewed-by: Philippe Mathieu-Daudé +(cherry picked from commit f9e1ef7482f1ee289b04f4b45702a1701bc8929d) +Signed-off-by: Peter Xu +--- + migration/migration-hmp-cmds.c | 17 ----------------- + migration/migration.c | 30 ------------------------------ + qapi/migration.json | 28 ---------------------------- + qapi/ui.json | 28 ++++++++++++++++++++++++++++ + ui/ui-hmp-cmds.c | 17 +++++++++++++++++ + ui/ui-qmp-cmds.c | 29 +++++++++++++++++++++++++++++ + 6 files changed, 74 insertions(+), 75 deletions(-) + +diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c +index 71da91967a..4e9f00e7dc 100644 +--- a/migration/migration-hmp-cmds.c ++++ b/migration/migration-hmp-cmds.c +@@ -636,23 +636,6 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) + hmp_handle_error(mon, err); + } + +-void hmp_client_migrate_info(Monitor *mon, const QDict *qdict) +-{ +- Error *err = NULL; +- const char *protocol = qdict_get_str(qdict, "protocol"); +- const char *hostname = qdict_get_str(qdict, "hostname"); +- bool has_port = qdict_haskey(qdict, "port"); +- int port = qdict_get_try_int(qdict, "port", -1); +- bool has_tls_port = qdict_haskey(qdict, "tls-port"); +- int tls_port = qdict_get_try_int(qdict, "tls-port", -1); +- const char *cert_subject = qdict_get_try_str(qdict, "cert-subject"); +- +- qmp_client_migrate_info(protocol, hostname, +- has_port, port, has_tls_port, tls_port, +- cert_subject, &err); +- hmp_handle_error(mon, err); +-} +- + void hmp_migrate_start_postcopy(Monitor *mon, const QDict *qdict) + { + Error *err = NULL; +diff --git a/migration/migration.c b/migration/migration.c +index aa96ffdc5b..b745d829a4 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -63,7 +63,6 @@ + #include "sysemu/cpus.h" + #include "yank_functions.h" + #include "sysemu/qtest.h" +-#include "ui/qemu-spice.h" + + #define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ + +@@ -1018,35 +1017,6 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) + return params; + } + +-void qmp_client_migrate_info(const char *protocol, const char *hostname, +- bool has_port, int64_t port, +- bool has_tls_port, int64_t tls_port, +- const char *cert_subject, +- Error **errp) +-{ +- if (strcmp(protocol, "spice") == 0) { +- if (!qemu_using_spice(errp)) { +- return; +- } +- +- if (!has_port && !has_tls_port) { +- error_setg(errp, QERR_MISSING_PARAMETER, "port/tls-port"); +- return; +- } +- +- if (qemu_spice.migrate_info(hostname, +- has_port ? port : -1, +- has_tls_port ? tls_port : -1, +- cert_subject)) { +- error_setg(errp, "Could not set up display for migration"); +- return; +- } +- return; +- } +- +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "protocol", "'spice'"); +-} +- + AnnounceParameters *migrate_announce_params(void) + { + static AnnounceParameters ap; +diff --git a/qapi/migration.json b/qapi/migration.json +index c84fa10e86..2c35b7b9cf 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -1203,34 +1203,6 @@ + { 'command': 'query-migrate-parameters', + 'returns': 'MigrationParameters' } + +-## +-# @client_migrate_info: +-# +-# Set migration information for remote display. This makes the server +-# ask the client to automatically reconnect using the new parameters +-# once migration finished successfully. Only implemented for SPICE. +-# +-# @protocol: must be "spice" +-# @hostname: migration target hostname +-# @port: spice tcp port for plaintext channels +-# @tls-port: spice tcp port for tls-secured channels +-# @cert-subject: server certificate subject +-# +-# Since: 0.14 +-# +-# Example: +-# +-# -> { "execute": "client_migrate_info", +-# "arguments": { "protocol": "spice", +-# "hostname": "virt42.lab.kraxel.org", +-# "port": 1234 } } +-# <- { "return": {} } +-# +-## +-{ 'command': 'client_migrate_info', +- 'data': { 'protocol': 'str', 'hostname': 'str', '*port': 'int', +- '*tls-port': 'int', '*cert-subject': 'str' } } +- + ## + # @migrate-start-postcopy: + # +diff --git a/qapi/ui.json b/qapi/ui.json +index 98322342f7..7ddd27a932 100644 +--- a/qapi/ui.json ++++ b/qapi/ui.json +@@ -1554,3 +1554,31 @@ + { 'command': 'display-update', + 'data': 'DisplayUpdateOptions', + 'boxed' : true } ++ ++## ++# @client_migrate_info: ++# ++# Set migration information for remote display. This makes the server ++# ask the client to automatically reconnect using the new parameters ++# once migration finished successfully. Only implemented for SPICE. ++# ++# @protocol: must be "spice" ++# @hostname: migration target hostname ++# @port: spice tcp port for plaintext channels ++# @tls-port: spice tcp port for tls-secured channels ++# @cert-subject: server certificate subject ++# ++# Since: 0.14 ++# ++# Example: ++# ++# -> { "execute": "client_migrate_info", ++# "arguments": { "protocol": "spice", ++# "hostname": "virt42.lab.kraxel.org", ++# "port": 1234 } } ++# <- { "return": {} } ++# ++## ++{ 'command': 'client_migrate_info', ++ 'data': { 'protocol': 'str', 'hostname': 'str', '*port': 'int', ++ '*tls-port': 'int', '*cert-subject': 'str' } } +diff --git a/ui/ui-hmp-cmds.c b/ui/ui-hmp-cmds.c +index 5c456ecc02..c671389473 100644 +--- a/ui/ui-hmp-cmds.c ++++ b/ui/ui-hmp-cmds.c +@@ -458,3 +458,20 @@ hmp_screendump(Monitor *mon, const QDict *qdict) + end: + hmp_handle_error(mon, err); + } ++ ++void hmp_client_migrate_info(Monitor *mon, const QDict *qdict) ++{ ++ Error *err = NULL; ++ const char *protocol = qdict_get_str(qdict, "protocol"); ++ const char *hostname = qdict_get_str(qdict, "hostname"); ++ bool has_port = qdict_haskey(qdict, "port"); ++ int port = qdict_get_try_int(qdict, "port", -1); ++ bool has_tls_port = qdict_haskey(qdict, "tls-port"); ++ int tls_port = qdict_get_try_int(qdict, "tls-port", -1); ++ const char *cert_subject = qdict_get_try_str(qdict, "cert-subject"); ++ ++ qmp_client_migrate_info(protocol, hostname, ++ has_port, port, has_tls_port, tls_port, ++ cert_subject, &err); ++ hmp_handle_error(mon, err); ++} +diff --git a/ui/ui-qmp-cmds.c b/ui/ui-qmp-cmds.c +index dbc4afcd73..a37a7024f3 100644 +--- a/ui/ui-qmp-cmds.c ++++ b/ui/ui-qmp-cmds.c +@@ -175,3 +175,32 @@ void qmp_display_update(DisplayUpdateOptions *arg, Error **errp) + abort(); + } + } ++ ++void qmp_client_migrate_info(const char *protocol, const char *hostname, ++ bool has_port, int64_t port, ++ bool has_tls_port, int64_t tls_port, ++ const char *cert_subject, ++ Error **errp) ++{ ++ if (strcmp(protocol, "spice") == 0) { ++ if (!qemu_using_spice(errp)) { ++ return; ++ } ++ ++ if (!has_port && !has_tls_port) { ++ error_setg(errp, QERR_MISSING_PARAMETER, "port/tls-port"); ++ return; ++ } ++ ++ if (qemu_spice.migrate_info(hostname, ++ has_port ? port : -1, ++ has_tls_port ? tls_port : -1, ++ cert_subject)) { ++ error_setg(errp, "Could not set up display for migration"); ++ return; ++ } ++ return; ++ } ++ ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "protocol", "'spice'"); ++} +-- +2.39.1 + diff --git a/SOURCES/kvm-stream-Replace-subtree-drain-with-a-single-node-drai.patch b/SOURCES/kvm-stream-Replace-subtree-drain-with-a-single-node-drai.patch deleted file mode 100644 index a8e3957..0000000 --- a/SOURCES/kvm-stream-Replace-subtree-drain-with-a-single-node-drai.patch +++ /dev/null @@ -1,159 +0,0 @@ -From 5defda06ec4c24818a34126c5048be5e274b63f5 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:41:04 +0100 -Subject: [PATCH 22/31] stream: Replace subtree drain with a single node drain - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [10/16] a93250b1f6ef296e903df0ba5d8b29bc2ed540a8 (sgarzarella/qemu-kvm-c-9-s) - -The subtree drain was introduced in commit b1e1af394d9 as a way to avoid -graph changes between finding the base node and changing the block graph -as necessary on completion of the image streaming job. - -The block graph could change between these two points because -bdrv_set_backing_hd() first drains the parent node, which involved -polling and can do anything. - -Subtree draining was an imperfect way to make this less likely (because -with it, fewer callbacks are called during this window). Everyone agreed -that it's not really the right solution, and it was only committed as a -stopgap solution. - -This replaces the subtree drain with a solution that simply drains the -parent node before we try to find the base node, and then call a version -of bdrv_set_backing_hd() that doesn't drain, but just asserts that the -parent node is already drained. - -This way, any graph changes caused by draining happen before we start -looking at the graph and things stay consistent between finding the base -node and changing the graph. - -Signed-off-by: Kevin Wolf -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Hanna Reitz -Message-Id: <20221118174110.55183-10-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 92140b9f3f07d80e2c27edcc6e32f392be2135e6) -Signed-off-by: Stefano Garzarella ---- - block.c | 17 ++++++++++++++--- - block/stream.c | 26 ++++++++++++++++---------- - include/block/block-global-state.h | 3 +++ - 3 files changed, 33 insertions(+), 13 deletions(-) - -diff --git a/block.c b/block.c -index b3449a312e..5330e89903 100644 ---- a/block.c -+++ b/block.c -@@ -3403,14 +3403,15 @@ static int bdrv_set_backing_noperm(BlockDriverState *bs, - return bdrv_set_file_or_backing_noperm(bs, backing_hd, true, tran, errp); - } - --int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, -- Error **errp) -+int bdrv_set_backing_hd_drained(BlockDriverState *bs, -+ BlockDriverState *backing_hd, -+ Error **errp) - { - int ret; - Transaction *tran = tran_new(); - - GLOBAL_STATE_CODE(); -- bdrv_drained_begin(bs); -+ assert(bs->quiesce_counter > 0); - - ret = bdrv_set_backing_noperm(bs, backing_hd, tran, errp); - if (ret < 0) { -@@ -3420,7 +3421,17 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, - ret = bdrv_refresh_perms(bs, errp); - out: - tran_finalize(tran, ret); -+ return ret; -+} - -+int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, -+ Error **errp) -+{ -+ int ret; -+ GLOBAL_STATE_CODE(); -+ -+ bdrv_drained_begin(bs); -+ ret = bdrv_set_backing_hd_drained(bs, backing_hd, errp); - bdrv_drained_end(bs); - - return ret; -diff --git a/block/stream.c b/block/stream.c -index 694709bd25..8744ad103f 100644 ---- a/block/stream.c -+++ b/block/stream.c -@@ -64,13 +64,16 @@ static int stream_prepare(Job *job) - bdrv_cor_filter_drop(s->cor_filter_bs); - s->cor_filter_bs = NULL; - -- bdrv_subtree_drained_begin(s->above_base); -+ /* -+ * bdrv_set_backing_hd() requires that unfiltered_bs is drained. Drain -+ * already here and use bdrv_set_backing_hd_drained() instead because -+ * the polling during drained_begin() might change the graph, and if we do -+ * this only later, we may end up working with the wrong base node (or it -+ * might even have gone away by the time we want to use it). -+ */ -+ bdrv_drained_begin(unfiltered_bs); - - base = bdrv_filter_or_cow_bs(s->above_base); -- if (base) { -- bdrv_ref(base); -- } -- - unfiltered_base = bdrv_skip_filters(base); - - if (bdrv_cow_child(unfiltered_bs)) { -@@ -82,7 +85,13 @@ static int stream_prepare(Job *job) - } - } - -- bdrv_set_backing_hd(unfiltered_bs, base, &local_err); -+ bdrv_set_backing_hd_drained(unfiltered_bs, base, &local_err); -+ -+ /* -+ * This call will do I/O, so the graph can change again from here on. -+ * We have already completed the graph change, so we are not in danger -+ * of operating on the wrong node any more if this happens. -+ */ - ret = bdrv_change_backing_file(unfiltered_bs, base_id, base_fmt, false); - if (local_err) { - error_report_err(local_err); -@@ -92,10 +101,7 @@ static int stream_prepare(Job *job) - } - - out: -- if (base) { -- bdrv_unref(base); -- } -- bdrv_subtree_drained_end(s->above_base); -+ bdrv_drained_end(unfiltered_bs); - return ret; - } - -diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h -index c7bd4a2088..00e0cf8aea 100644 ---- a/include/block/block-global-state.h -+++ b/include/block/block-global-state.h -@@ -82,6 +82,9 @@ int bdrv_open_file_child(const char *filename, - BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp); - int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, - Error **errp); -+int bdrv_set_backing_hd_drained(BlockDriverState *bs, -+ BlockDriverState *backing_hd, -+ Error **errp); - int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, - const char *bdref_key, Error **errp); - BlockDriverState *bdrv_open(const char *filename, const char *reference, --- -2.31.1 - diff --git a/SOURCES/kvm-target-i386-Add-EPYC-Genoa-model-to-support-Zen-4-pr.patch b/SOURCES/kvm-target-i386-Add-EPYC-Genoa-model-to-support-Zen-4-pr.patch new file mode 100644 index 0000000..43c239a --- /dev/null +++ b/SOURCES/kvm-target-i386-Add-EPYC-Genoa-model-to-support-Zen-4-pr.patch @@ -0,0 +1,203 @@ +From 03011d00cfb5862edb7394a9b79b269198af5c89 Mon Sep 17 00:00:00 2001 +From: Bandan Das +Date: Wed, 9 Aug 2023 12:48:34 -0400 +Subject: [PATCH 7/7] target/i386: Add EPYC-Genoa model to support Zen 4 + processor series + +RH-Author: Bandan Das +RH-MergeRequest: 198: Add EPYC-Genoa CPU model in qemu +RH-Bugzilla: 2094913 +RH-Acked-by: Wei Huang +RH-Acked-by: Miroslav Rezanina +RH-Commit: [7/7] 158091c691169a5d30c7c8005371ee7a0d9fc4ce (bdas1/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094913 + +commit 166b1741884dd4fd7090b753cd7333868457a29b +Author: Babu Moger +Date: Thu May 4 15:53:12 2023 -0500 + + target/i386: Add EPYC-Genoa model to support Zen 4 processor series + + Adds the support for AMD EPYC Genoa generation processors. The model + display for the new processor will be EPYC-Genoa. + + Adds the following new feature bits on top of the feature bits from + the previous generation EPYC models. + + avx512f : AVX-512 Foundation instruction + avx512dq : AVX-512 Doubleword & Quadword Instruction + avx512ifma : AVX-512 Integer Fused Multiply Add instruction + avx512cd : AVX-512 Conflict Detection instruction + avx512bw : AVX-512 Byte and Word Instructions + avx512vl : AVX-512 Vector Length Extension Instructions + avx512vbmi : AVX-512 Vector Byte Manipulation Instruction + avx512_vbmi2 : AVX-512 Additional Vector Byte Manipulation Instruction + gfni : AVX-512 Galois Field New Instructions + avx512_vnni : AVX-512 Vector Neural Network Instructions + avx512_bitalg : AVX-512 Bit Algorithms, add bit algorithms Instructions + avx512_vpopcntdq: AVX-512 AVX-512 Vector Population Count Doubleword and + Quadword Instructions + avx512_bf16 : AVX-512 BFLOAT16 instructions + la57 : 57-bit virtual address support (5-level Page Tables) + vnmi : Virtual NMI (VNMI) allows the hypervisor to inject the NMI + into the guest without using Event Injection mechanism + meaning not required to track the guest NMI and intercepting + the IRET. + auto-ibrs : The AMD Zen4 core supports a new feature called Automatic IBRS. + It is a "set-and-forget" feature that means that, unlike e.g., + s/w-toggled SPEC_CTRL.IBRS, h/w manages its IBRS mitigation + resources automatically across CPL transitions. + + Signed-off-by: Babu Moger + Message-Id: <20230504205313.225073-8-babu.moger@amd.com> + Signed-off-by: Paolo Bonzini + +Signed-off-by: Bandan Das +--- + target/i386/cpu.c | 122 ++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 122 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index f1baefe775..b27db050a2 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1973,6 +1973,56 @@ static const CPUCaches epyc_milan_v2_cache_info = { + }, + }; + ++static const CPUCaches epyc_genoa_cache_info = { ++ .l1d_cache = &(CPUCacheInfo) { ++ .type = DATA_CACHE, ++ .level = 1, ++ .size = 32 * KiB, ++ .line_size = 64, ++ .associativity = 8, ++ .partitions = 1, ++ .sets = 64, ++ .lines_per_tag = 1, ++ .self_init = 1, ++ .no_invd_sharing = true, ++ }, ++ .l1i_cache = &(CPUCacheInfo) { ++ .type = INSTRUCTION_CACHE, ++ .level = 1, ++ .size = 32 * KiB, ++ .line_size = 64, ++ .associativity = 8, ++ .partitions = 1, ++ .sets = 64, ++ .lines_per_tag = 1, ++ .self_init = 1, ++ .no_invd_sharing = true, ++ }, ++ .l2_cache = &(CPUCacheInfo) { ++ .type = UNIFIED_CACHE, ++ .level = 2, ++ .size = 1 * MiB, ++ .line_size = 64, ++ .associativity = 8, ++ .partitions = 1, ++ .sets = 2048, ++ .lines_per_tag = 1, ++ }, ++ .l3_cache = &(CPUCacheInfo) { ++ .type = UNIFIED_CACHE, ++ .level = 3, ++ .size = 32 * MiB, ++ .line_size = 64, ++ .associativity = 16, ++ .partitions = 1, ++ .sets = 32768, ++ .lines_per_tag = 1, ++ .self_init = true, ++ .inclusive = true, ++ .complex_indexing = false, ++ }, ++}; ++ + /* The following VMX features are not supported by KVM and are left out in the + * CPU definitions: + * +@@ -4493,6 +4543,78 @@ static const X86CPUDefinition builtin_x86_defs[] = { + { /* end of list */ } + } + }, ++ { ++ .name = "EPYC-Genoa", ++ .level = 0xd, ++ .vendor = CPUID_VENDOR_AMD, ++ .family = 25, ++ .model = 17, ++ .stepping = 0, ++ .features[FEAT_1_EDX] = ++ CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | ++ CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE | ++ CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE | ++ CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | CPUID_DE | ++ CPUID_VME | CPUID_FP87, ++ .features[FEAT_1_ECX] = ++ CPUID_EXT_RDRAND | CPUID_EXT_F16C | CPUID_EXT_AVX | ++ CPUID_EXT_XSAVE | CPUID_EXT_AES | CPUID_EXT_POPCNT | ++ CPUID_EXT_MOVBE | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | ++ CPUID_EXT_PCID | CPUID_EXT_CX16 | CPUID_EXT_FMA | ++ CPUID_EXT_SSSE3 | CPUID_EXT_MONITOR | CPUID_EXT_PCLMULQDQ | ++ CPUID_EXT_SSE3, ++ .features[FEAT_8000_0001_EDX] = ++ CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_PDPE1GB | ++ CPUID_EXT2_FFXSR | CPUID_EXT2_MMXEXT | CPUID_EXT2_NX | ++ CPUID_EXT2_SYSCALL, ++ .features[FEAT_8000_0001_ECX] = ++ CPUID_EXT3_OSVW | CPUID_EXT3_3DNOWPREFETCH | ++ CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | ++ CPUID_EXT3_CR8LEG | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM | ++ CPUID_EXT3_TOPOEXT | CPUID_EXT3_PERFCORE, ++ .features[FEAT_8000_0008_EBX] = ++ CPUID_8000_0008_EBX_CLZERO | CPUID_8000_0008_EBX_XSAVEERPTR | ++ CPUID_8000_0008_EBX_WBNOINVD | CPUID_8000_0008_EBX_IBPB | ++ CPUID_8000_0008_EBX_IBRS | CPUID_8000_0008_EBX_STIBP | ++ CPUID_8000_0008_EBX_STIBP_ALWAYS_ON | ++ CPUID_8000_0008_EBX_AMD_SSBD | CPUID_8000_0008_EBX_AMD_PSFD, ++ .features[FEAT_8000_0021_EAX] = ++ CPUID_8000_0021_EAX_No_NESTED_DATA_BP | ++ CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING | ++ CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE | ++ CPUID_8000_0021_EAX_AUTO_IBRS, ++ .features[FEAT_7_0_EBX] = ++ CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | ++ CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | ++ CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_AVX512F | ++ CPUID_7_0_EBX_AVX512DQ | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | ++ CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_AVX512IFMA | ++ CPUID_7_0_EBX_CLFLUSHOPT | CPUID_7_0_EBX_CLWB | ++ CPUID_7_0_EBX_AVX512CD | CPUID_7_0_EBX_SHA_NI | ++ CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512VL, ++ .features[FEAT_7_0_ECX] = ++ CPUID_7_0_ECX_AVX512_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | ++ CPUID_7_0_ECX_AVX512_VBMI2 | CPUID_7_0_ECX_GFNI | ++ CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ | ++ CPUID_7_0_ECX_AVX512VNNI | CPUID_7_0_ECX_AVX512BITALG | ++ CPUID_7_0_ECX_AVX512_VPOPCNTDQ | CPUID_7_0_ECX_LA57 | ++ CPUID_7_0_ECX_RDPID, ++ .features[FEAT_7_0_EDX] = ++ CPUID_7_0_EDX_FSRM, ++ .features[FEAT_7_1_EAX] = ++ CPUID_7_1_EAX_AVX512_BF16, ++ .features[FEAT_XSAVE] = ++ CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | ++ CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, ++ .features[FEAT_6_EAX] = ++ CPUID_6_EAX_ARAT, ++ .features[FEAT_SVM] = ++ CPUID_SVM_NPT | CPUID_SVM_NRIPSAVE | CPUID_SVM_VNMI | ++ CPUID_SVM_SVME_ADDR_CHK, ++ .xlevel = 0x80000022, ++ .model_id = "AMD EPYC-Genoa Processor", ++ .cache_info = &epyc_genoa_cache_info, ++ }, + }; + + /* +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-Add-VNMI-and-automatic-IBRS-feature-bits.patch b/SOURCES/kvm-target-i386-Add-VNMI-and-automatic-IBRS-feature-bits.patch new file mode 100644 index 0000000..5e8f79b --- /dev/null +++ b/SOURCES/kvm-target-i386-Add-VNMI-and-automatic-IBRS-feature-bits.patch @@ -0,0 +1,105 @@ +From 95c5cee20741b055dea9ac3ad3176bbaa1eaf705 Mon Sep 17 00:00:00 2001 +From: Bandan Das +Date: Wed, 9 Aug 2023 12:46:25 -0400 +Subject: [PATCH 6/7] target/i386: Add VNMI and automatic IBRS feature bits +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Bandan Das +RH-MergeRequest: 198: Add EPYC-Genoa CPU model in qemu +RH-Bugzilla: 2094913 +RH-Acked-by: Wei Huang +RH-Acked-by: Miroslav Rezanina +RH-Commit: [6/7] 24c0fb08973aa2615817f67576550ce2efadb75c (bdas1/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094913 + +commit 62a798d4bc2c3e767d94670776c77a7df274d7c5 +Author: Babu Moger +Date: Thu May 4 15:53:11 2023 -0500 + + target/i386: Add VNMI and automatic IBRS feature bits + + Add the following featute bits. + + vnmi: Virtual NMI (VNMI) allows the hypervisor to inject the NMI into the + guest without using Event Injection mechanism meaning not required to + track the guest NMI and intercepting the IRET. + The presence of this feature is indicated via the CPUID function + 0x8000000A_EDX[25]. + + automatic-ibrs : + The AMD Zen4 core supports a new feature called Automatic IBRS. + It is a "set-and-forget" feature that means that, unlike e.g., + s/w-toggled SPEC_CTRL.IBRS, h/w manages its IBRS mitigation + resources automatically across CPL transitions. + The presence of this feature is indicated via the CPUID function + 0x80000021_EAX[8]. + + The documention for the features are available in the links below. + a. Processor Programming Reference (PPR) for AMD Family 19h Model 01h, + Revision B1 Processors + b. AMD64 Architecture Programmer’s Manual Volumes 1–5 Publication No. Revision + 40332 4.05 Date October 2022 + + Signed-off-by: Santosh Shukla + Signed-off-by: Kim Phillips + Signed-off-by: Babu Moger + Link: https://www.amd.com/system/files/TechDocs/55898_B1_pub_0.50.zip + Link: https://www.amd.com/system/files/TechDocs/40332_4.05.pdf + Message-Id: <20230504205313.225073-7-babu.moger@amd.com> + Signed-off-by: Paolo Bonzini + +Signed-off-by: Bandan Das +--- + target/i386/cpu.c | 4 ++-- + target/i386/cpu.h | 3 +++ + 2 files changed, 5 insertions(+), 2 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index bbddc682df..f1baefe775 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -806,7 +806,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + "pfthreshold", "avic", NULL, "v-vmsave-vmload", + "vgif", NULL, NULL, NULL, + NULL, NULL, NULL, NULL, +- NULL, NULL, NULL, NULL, ++ NULL, "vnmi", NULL, NULL, + "svme-addr-chk", NULL, NULL, NULL, + }, + .cpuid = { .eax = 0x8000000A, .reg = R_EDX, }, +@@ -925,7 +925,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + .feat_names = { + "no-nested-data-bp", NULL, "lfence-always-serializing", NULL, + NULL, NULL, "null-sel-clr-base", NULL, +- NULL, NULL, NULL, NULL, ++ "auto-ibrs", NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index c37abf62ae..f7d225e4f1 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -773,6 +773,7 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, + #define CPUID_SVM_AVIC (1U << 13) + #define CPUID_SVM_V_VMSAVE_VMLOAD (1U << 15) + #define CPUID_SVM_VGIF (1U << 16) ++#define CPUID_SVM_VNMI (1U << 25) + #define CPUID_SVM_SVME_ADDR_CHK (1U << 28) + + /* Support RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE */ +@@ -948,6 +949,8 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, + #define CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING (1U << 2) + /* Null Selector Clears Base */ + #define CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE (1U << 6) ++/* Automatic IBRS */ ++#define CPUID_8000_0021_EAX_AUTO_IBRS (1U << 8) + + #define CPUID_XSAVE_XSAVEOPT (1U << 0) + #define CPUID_XSAVE_XSAVEC (1U << 1) +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-Add-a-couple-of-feature-bits-in-8000_000.patch b/SOURCES/kvm-target-i386-Add-a-couple-of-feature-bits-in-8000_000.patch new file mode 100644 index 0000000..772bbbd --- /dev/null +++ b/SOURCES/kvm-target-i386-Add-a-couple-of-feature-bits-in-8000_000.patch @@ -0,0 +1,94 @@ +From 2d7fb99c02a7666f1d8fe70a4749f0b7771a68ed Mon Sep 17 00:00:00 2001 +From: Bandan Das +Date: Wed, 9 Aug 2023 12:29:55 -0400 +Subject: [PATCH 3/7] target/i386: Add a couple of feature bits in + 8000_0008_EBX + +RH-Author: Bandan Das +RH-MergeRequest: 198: Add EPYC-Genoa CPU model in qemu +RH-Bugzilla: 2094913 +RH-Acked-by: Wei Huang +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/7] b11020b249d4ecc2e3e1ddf4fdc4b52c42ec2642 (bdas1/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094913 + +commit bb039a230e6a7920d71d21fa9afee2653a678c48 +Author: Babu Moger +Date: Thu May 4 15:53:08 2023 -0500 + + target/i386: Add a couple of feature bits in 8000_0008_EBX + + Add the following feature bits. + + amd-psfd : Predictive Store Forwarding Disable: + PSF is a hardware-based micro-architectural optimization + designed to improve the performance of code execution by + predicting address dependencies between loads and stores. + While SSBD (Speculative Store Bypass Disable) disables both + PSF and speculative store bypass, PSFD only disables PSF. + PSFD may be desirable for the software which is concerned + with the speculative behavior of PSF but desires a smaller + performance impact than setting SSBD. + Depends on the following kernel commit: + b73a54321ad8 ("KVM: x86: Expose Predictive Store Forwarding Disable") + + stibp-always-on : + Single Thread Indirect Branch Prediction mode has enhanced + performance and may be left always on. + + The documentation for the features are available in the links below. + a. Processor Programming Reference (PPR) for AMD Family 19h Model 01h, + Revision B1 Processors + b. SECURITY ANALYSIS OF AMD PREDICTIVE STORE FORWARDING + + Signed-off-by: Babu Moger + Acked-by: Michael S. Tsirkin + Link: https://www.amd.com/system/files/documents/security-analysis-predictive-store-forwarding.pdf + Link: https://www.amd.com/system/files/TechDocs/55898_B1_pub_0.50.zip + Message-Id: <20230504205313.225073-4-babu.moger@amd.com> + Signed-off-by: Paolo Bonzini + +Signed-off-by: Bandan Das +--- + target/i386/cpu.c | 4 ++-- + target/i386/cpu.h | 4 ++++ + 2 files changed, 6 insertions(+), 2 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 8aa7eb611c..c8f88aefc7 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -911,10 +911,10 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + NULL, NULL, NULL, NULL, + NULL, "wbnoinvd", NULL, NULL, + "ibpb", NULL, "ibrs", "amd-stibp", +- NULL, NULL, NULL, NULL, ++ NULL, "stibp-always-on", NULL, NULL, + NULL, NULL, NULL, NULL, + "amd-ssbd", "virt-ssbd", "amd-no-ssb", NULL, +- NULL, NULL, NULL, NULL, ++ "amd-psfd", NULL, NULL, NULL, + }, + .cpuid = { .eax = 0x80000008, .reg = R_EBX, }, + .tcg_features = 0, +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index c28b9df217..81d2200543 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -934,8 +934,12 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, + #define CPUID_8000_0008_EBX_IBRS (1U << 14) + /* Single Thread Indirect Branch Predictors */ + #define CPUID_8000_0008_EBX_STIBP (1U << 15) ++/* STIBP mode has enhanced performance and may be left always on */ ++#define CPUID_8000_0008_EBX_STIBP_ALWAYS_ON (1U << 17) + /* Speculative Store Bypass Disable */ + #define CPUID_8000_0008_EBX_AMD_SSBD (1U << 24) ++/* Predictive Store Forwarding Disable */ ++#define CPUID_8000_0008_EBX_AMD_PSFD (1U << 28) + + #define CPUID_XSAVE_XSAVEOPT (1U << 0) + #define CPUID_XSAVE_XSAVEC (1U << 1) +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-Add-feature-bits-for-CPUID_Fn80000021_EA.patch b/SOURCES/kvm-target-i386-Add-feature-bits-for-CPUID_Fn80000021_EA.patch new file mode 100644 index 0000000..c714e49 --- /dev/null +++ b/SOURCES/kvm-target-i386-Add-feature-bits-for-CPUID_Fn80000021_EA.patch @@ -0,0 +1,126 @@ +From 2a2f74c53258ef67034307b59afe2f4c679afaa2 Mon Sep 17 00:00:00 2001 +From: Bandan Das +Date: Wed, 9 Aug 2023 12:32:00 -0400 +Subject: [PATCH 4/7] target/i386: Add feature bits for CPUID_Fn80000021_EAX +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Bandan Das +RH-MergeRequest: 198: Add EPYC-Genoa CPU model in qemu +RH-Bugzilla: 2094913 +RH-Acked-by: Wei Huang +RH-Acked-by: Miroslav Rezanina +RH-Commit: [4/7] 133044a7245226308406a684a875e1f96a394516 (bdas1/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094913 + +commit b70eec312b185197d639bff689007727e596afd1 +Author: Babu Moger +Date: Thu May 4 15:53:09 2023 -0500 + + target/i386: Add feature bits for CPUID_Fn80000021_EAX + + Add the following feature bits. + no-nested-data-bp : Processor ignores nested data breakpoints. + lfence-always-serializing : LFENCE instruction is always serializing. + null-sel-cls-base : Null Selector Clears Base. When this bit is + set, a null segment load clears the segment base. + + The documentation for the features are available in the links below. + a. Processor Programming Reference (PPR) for AMD Family 19h Model 01h, + Revision B1 Processors + b. AMD64 Architecture Programmer’s Manual Volumes 1–5 Publication No. Revision + 40332 4.05 Date October 2022 + + Signed-off-by: Babu Moger + Acked-by: Michael S. Tsirkin + Link: https://www.amd.com/system/files/TechDocs/55898_B1_pub_0.50.zip + Link: https://www.amd.com/system/files/TechDocs/40332_4.05.pdf + Message-Id: <20230504205313.225073-5-babu.moger@amd.com> + Signed-off-by: Paolo Bonzini + +Signed-off-by: Bandan Das +--- + target/i386/cpu.c | 24 ++++++++++++++++++++++++ + target/i386/cpu.h | 8 ++++++++ + 2 files changed, 32 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index c8f88aefc7..7ddebbaa3c 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -920,6 +920,22 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + .tcg_features = 0, + .unmigratable_flags = 0, + }, ++ [FEAT_8000_0021_EAX] = { ++ .type = CPUID_FEATURE_WORD, ++ .feat_names = { ++ "no-nested-data-bp", NULL, "lfence-always-serializing", NULL, ++ NULL, NULL, "null-sel-clr-base", NULL, ++ NULL, NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ }, ++ .cpuid = { .eax = 0x80000021, .reg = R_EAX, }, ++ .tcg_features = 0, ++ .unmigratable_flags = 0, ++ }, + [FEAT_XSAVE] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { +@@ -6156,6 +6172,10 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */ + } + break; ++ case 0x80000021: ++ *eax = env->features[FEAT_8000_0021_EAX]; ++ *ebx = *ecx = *edx = 0; ++ break; + default: + /* reserved values: zero */ + *eax = 0; +@@ -6585,6 +6605,10 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) + x86_cpu_adjust_level(cpu, &env->cpuid_min_xlevel, 0x8000001F); + } + ++ if (env->features[FEAT_8000_0021_EAX]) { ++ x86_cpu_adjust_level(cpu, &env->cpuid_min_xlevel, 0x80000021); ++ } ++ + /* SGX requires CPUID[0x12] for EPC enumeration */ + if (env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_SGX) { + x86_cpu_adjust_level(cpu, &env->cpuid_min_level, 0x12); +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 81d2200543..c37abf62ae 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -600,6 +600,7 @@ typedef enum FeatureWord { + FEAT_8000_0001_ECX, /* CPUID[8000_0001].ECX */ + FEAT_8000_0007_EDX, /* CPUID[8000_0007].EDX */ + FEAT_8000_0008_EBX, /* CPUID[8000_0008].EBX */ ++ FEAT_8000_0021_EAX, /* CPUID[8000_0021].EAX */ + FEAT_C000_0001_EDX, /* CPUID[C000_0001].EDX */ + FEAT_KVM, /* CPUID[4000_0001].EAX (KVM_CPUID_FEATURES) */ + FEAT_KVM_HINTS, /* CPUID[4000_0001].EDX */ +@@ -941,6 +942,13 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, + /* Predictive Store Forwarding Disable */ + #define CPUID_8000_0008_EBX_AMD_PSFD (1U << 28) + ++/* Processor ignores nested data breakpoints */ ++#define CPUID_8000_0021_EAX_No_NESTED_DATA_BP (1U << 0) ++/* LFENCE is always serializing */ ++#define CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING (1U << 2) ++/* Null Selector Clears Base */ ++#define CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE (1U << 6) ++ + #define CPUID_XSAVE_XSAVEOPT (1U << 0) + #define CPUID_XSAVE_XSAVEC (1U << 1) + #define CPUID_XSAVE_XGETBV1 (1U << 2) +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-Add-missing-feature-bits-in-EPYC-Milan-m.patch b/SOURCES/kvm-target-i386-Add-missing-feature-bits-in-EPYC-Milan-m.patch new file mode 100644 index 0000000..9bb4bf9 --- /dev/null +++ b/SOURCES/kvm-target-i386-Add-missing-feature-bits-in-EPYC-Milan-m.patch @@ -0,0 +1,152 @@ +From a8180665019d537ee9775614627bf9eb8bd4770e Mon Sep 17 00:00:00 2001 +From: Bandan Das +Date: Wed, 9 Aug 2023 12:35:33 -0400 +Subject: [PATCH 5/7] target/i386: Add missing feature bits in EPYC-Milan model +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Bandan Das +RH-MergeRequest: 198: Add EPYC-Genoa CPU model in qemu +RH-Bugzilla: 2094913 +RH-Acked-by: Wei Huang +RH-Acked-by: Miroslav Rezanina +RH-Commit: [5/7] 8f77315c8d7010564423df3e3c594c90fd5f9c00 (bdas1/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094913 + +commit 27f03be6f59d04bd5673ba1e1628b2b490f9a9ff +Author: Babu Moger +Date: Thu May 4 15:53:10 2023 -0500 + + target/i386: Add missing feature bits in EPYC-Milan model + + Add the following feature bits for EPYC-Milan model and bump the version. + vaes : Vector VAES(ENC|DEC), VAES(ENC|DEC)LAST instruction support + vpclmulqdq : Vector VPCLMULQDQ instruction support + stibp-always-on : Single Thread Indirect Branch Prediction Mode has enhanced + performance and may be left Always on + amd-psfd : Predictive Store Forward Disable + no-nested-data-bp : Processor ignores nested data breakpoints + lfence-always-serializing : LFENCE instruction is always serializing + null-sel-clr-base : Null Selector Clears Base. When this bit is + set, a null segment load clears the segment base + + These new features will be added in EPYC-Milan-v2. The "-cpu help" output + after the change will be. + + x86 EPYC-Milan (alias configured by machine type) + x86 EPYC-Milan-v1 AMD EPYC-Milan Processor + x86 EPYC-Milan-v2 AMD EPYC-Milan Processor + + The documentation for the features are available in the links below. + a. Processor Programming Reference (PPR) for AMD Family 19h Model 01h, + Revision B1 Processors + b. SECURITY ANALYSIS OF AMD PREDICTIVE STORE FORWARDING + c. AMD64 Architecture Programmer’s Manual Volumes 1–5 Publication No. Revision + 40332 4.05 Date October 2022 + + Signed-off-by: Babu Moger + Acked-by: Michael S. Tsirkin + Link: https://www.amd.com/system/files/TechDocs/55898_B1_pub_0.50.zip + Link: https://www.amd.com/system/files/documents/security-analysis-predictive-store-forwarding.pdf + Link: https://www.amd.com/system/files/TechDocs/40332_4.05.pdf + Message-Id: <20230504205313.225073-6-babu.moger@amd.com> + Signed-off-by: Paolo Bonzini + +Signed-off-by: Bandan Das +--- + target/i386/cpu.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 70 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 7ddebbaa3c..bbddc682df 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1923,6 +1923,56 @@ static const CPUCaches epyc_milan_cache_info = { + }, + }; + ++static const CPUCaches epyc_milan_v2_cache_info = { ++ .l1d_cache = &(CPUCacheInfo) { ++ .type = DATA_CACHE, ++ .level = 1, ++ .size = 32 * KiB, ++ .line_size = 64, ++ .associativity = 8, ++ .partitions = 1, ++ .sets = 64, ++ .lines_per_tag = 1, ++ .self_init = 1, ++ .no_invd_sharing = true, ++ }, ++ .l1i_cache = &(CPUCacheInfo) { ++ .type = INSTRUCTION_CACHE, ++ .level = 1, ++ .size = 32 * KiB, ++ .line_size = 64, ++ .associativity = 8, ++ .partitions = 1, ++ .sets = 64, ++ .lines_per_tag = 1, ++ .self_init = 1, ++ .no_invd_sharing = true, ++ }, ++ .l2_cache = &(CPUCacheInfo) { ++ .type = UNIFIED_CACHE, ++ .level = 2, ++ .size = 512 * KiB, ++ .line_size = 64, ++ .associativity = 8, ++ .partitions = 1, ++ .sets = 1024, ++ .lines_per_tag = 1, ++ }, ++ .l3_cache = &(CPUCacheInfo) { ++ .type = UNIFIED_CACHE, ++ .level = 3, ++ .size = 32 * MiB, ++ .line_size = 64, ++ .associativity = 16, ++ .partitions = 1, ++ .sets = 32768, ++ .lines_per_tag = 1, ++ .self_init = true, ++ .inclusive = true, ++ .complex_indexing = false, ++ }, ++}; ++ + /* The following VMX features are not supported by KVM and are left out in the + * CPU definitions: + * +@@ -4422,6 +4472,26 @@ static const X86CPUDefinition builtin_x86_defs[] = { + .xlevel = 0x8000001E, + .model_id = "AMD EPYC-Milan Processor", + .cache_info = &epyc_milan_cache_info, ++ .versions = (X86CPUVersionDefinition[]) { ++ { .version = 1 }, ++ { ++ .version = 2, ++ .props = (PropValue[]) { ++ { "model-id", ++ "AMD EPYC-Milan-v2 Processor" }, ++ { "vaes", "on" }, ++ { "vpclmulqdq", "on" }, ++ { "stibp-always-on", "on" }, ++ { "amd-psfd", "on" }, ++ { "no-nested-data-bp", "on" }, ++ { "lfence-always-serializing", "on" }, ++ { "null-sel-clr-base", "on" }, ++ { /* end of list */ } ++ }, ++ .cache_info = &epyc_milan_v2_cache_info ++ }, ++ { /* end of list */ } ++ } + }, + }; + +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-Add-new-EPYC-CPU-versions-with-updated-c.patch b/SOURCES/kvm-target-i386-Add-new-EPYC-CPU-versions-with-updated-c.patch new file mode 100644 index 0000000..40c289a --- /dev/null +++ b/SOURCES/kvm-target-i386-Add-new-EPYC-CPU-versions-with-updated-c.patch @@ -0,0 +1,192 @@ +From 92f0b5d0c7a841a21cabbc6efc1d7baf0e5a3e0f Mon Sep 17 00:00:00 2001 +From: Bandan Das +Date: Wed, 9 Aug 2023 12:26:12 -0400 +Subject: [PATCH 2/7] target/i386: Add new EPYC CPU versions with updated + cache_info + +RH-Author: Bandan Das +RH-MergeRequest: 198: Add EPYC-Genoa CPU model in qemu +RH-Bugzilla: 2094913 +RH-Acked-by: Wei Huang +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/7] 71a2fd907636733f86729bc9328600f6f9306eaf (bdas1/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094913 + +commit d7c72735f618a7ee27ee109d8b1468193734606a +Author: Michael Roth +Date: Thu May 4 15:53:07 2023 -0500 + + target/i386: Add new EPYC CPU versions with updated cache_info + + Introduce new EPYC cpu versions: EPYC-v4 and EPYC-Rome-v3. + The only difference vs. older models is an updated cache_info with + the 'complex_indexing' bit unset, since this bit is not currently + defined for AMD and may cause problems should it be used for + something else in the future. Setting this bit will also cause + CPUID validation failures when running SEV-SNP guests. + + Signed-off-by: Michael Roth + Signed-off-by: Babu Moger + Acked-by: Michael S. Tsirkin + Message-Id: <20230504205313.225073-3-babu.moger@amd.com> + Signed-off-by: Paolo Bonzini + +Signed-off-by: Bandan Das +--- + target/i386/cpu.c | 118 ++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 118 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 3558c92ed0..8aa7eb611c 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1707,6 +1707,56 @@ static const CPUCaches epyc_cache_info = { + }, + }; + ++static CPUCaches epyc_v4_cache_info = { ++ .l1d_cache = &(CPUCacheInfo) { ++ .type = DATA_CACHE, ++ .level = 1, ++ .size = 32 * KiB, ++ .line_size = 64, ++ .associativity = 8, ++ .partitions = 1, ++ .sets = 64, ++ .lines_per_tag = 1, ++ .self_init = 1, ++ .no_invd_sharing = true, ++ }, ++ .l1i_cache = &(CPUCacheInfo) { ++ .type = INSTRUCTION_CACHE, ++ .level = 1, ++ .size = 64 * KiB, ++ .line_size = 64, ++ .associativity = 4, ++ .partitions = 1, ++ .sets = 256, ++ .lines_per_tag = 1, ++ .self_init = 1, ++ .no_invd_sharing = true, ++ }, ++ .l2_cache = &(CPUCacheInfo) { ++ .type = UNIFIED_CACHE, ++ .level = 2, ++ .size = 512 * KiB, ++ .line_size = 64, ++ .associativity = 8, ++ .partitions = 1, ++ .sets = 1024, ++ .lines_per_tag = 1, ++ }, ++ .l3_cache = &(CPUCacheInfo) { ++ .type = UNIFIED_CACHE, ++ .level = 3, ++ .size = 8 * MiB, ++ .line_size = 64, ++ .associativity = 16, ++ .partitions = 1, ++ .sets = 8192, ++ .lines_per_tag = 1, ++ .self_init = true, ++ .inclusive = true, ++ .complex_indexing = false, ++ }, ++}; ++ + static const CPUCaches epyc_rome_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, +@@ -1757,6 +1807,56 @@ static const CPUCaches epyc_rome_cache_info = { + }, + }; + ++static const CPUCaches epyc_rome_v3_cache_info = { ++ .l1d_cache = &(CPUCacheInfo) { ++ .type = DATA_CACHE, ++ .level = 1, ++ .size = 32 * KiB, ++ .line_size = 64, ++ .associativity = 8, ++ .partitions = 1, ++ .sets = 64, ++ .lines_per_tag = 1, ++ .self_init = 1, ++ .no_invd_sharing = true, ++ }, ++ .l1i_cache = &(CPUCacheInfo) { ++ .type = INSTRUCTION_CACHE, ++ .level = 1, ++ .size = 32 * KiB, ++ .line_size = 64, ++ .associativity = 8, ++ .partitions = 1, ++ .sets = 64, ++ .lines_per_tag = 1, ++ .self_init = 1, ++ .no_invd_sharing = true, ++ }, ++ .l2_cache = &(CPUCacheInfo) { ++ .type = UNIFIED_CACHE, ++ .level = 2, ++ .size = 512 * KiB, ++ .line_size = 64, ++ .associativity = 8, ++ .partitions = 1, ++ .sets = 1024, ++ .lines_per_tag = 1, ++ }, ++ .l3_cache = &(CPUCacheInfo) { ++ .type = UNIFIED_CACHE, ++ .level = 3, ++ .size = 16 * MiB, ++ .line_size = 64, ++ .associativity = 16, ++ .partitions = 1, ++ .sets = 16384, ++ .lines_per_tag = 1, ++ .self_init = true, ++ .inclusive = true, ++ .complex_indexing = false, ++ }, ++}; ++ + static const CPUCaches epyc_milan_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, +@@ -4112,6 +4212,15 @@ static const X86CPUDefinition builtin_x86_defs[] = { + { /* end of list */ } + } + }, ++ { ++ .version = 4, ++ .props = (PropValue[]) { ++ { "model-id", ++ "AMD EPYC-v4 Processor" }, ++ { /* end of list */ } ++ }, ++ .cache_info = &epyc_v4_cache_info ++ }, + { /* end of list */ } + } + }, +@@ -4231,6 +4340,15 @@ static const X86CPUDefinition builtin_x86_defs[] = { + { /* end of list */ } + } + }, ++ { ++ .version = 3, ++ .props = (PropValue[]) { ++ { "model-id", ++ "AMD EPYC-Rome-v3 Processor" }, ++ { /* end of list */ } ++ }, ++ .cache_info = &epyc_rome_v3_cache_info ++ }, + { /* end of list */ } + } + }, +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-Fix-32-bit-AD-CO-X-insns-in-64-bit-mode.patch b/SOURCES/kvm-target-i386-Fix-32-bit-AD-CO-X-insns-in-64-bit-mode.patch deleted file mode 100644 index 52e73e7..0000000 --- a/SOURCES/kvm-target-i386-Fix-32-bit-AD-CO-X-insns-in-64-bit-mode.patch +++ /dev/null @@ -1,144 +0,0 @@ -From e419493e6ec188461aa6f06c1b1cdc8a698859df Mon Sep 17 00:00:00 2001 -From: Richard Henderson -Date: Sat, 14 Jan 2023 15:21:03 -1000 -Subject: [PATCH 6/8] target/i386: Fix 32-bit AD[CO]X insns in 64-bit mode -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Paolo Bonzini -RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions -RH-Bugzilla: 2173590 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Bandan Das -RH-Commit: [6/7] 0fa4d3858319d4f877a5b3f31776121a72e2c57a (bonzini/rhel-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590 -Upstream-Status: merged - -Failure to truncate the inputs results in garbage for the carry-out. - -Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1373 -Signed-off-by: Richard Henderson -Reviewed-by: Philippe Mathieu-Daudé -Message-Id: <20230115012103.3131796-1-richard.henderson@linaro.org> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 6fbef9426bac7184b5d5887589d8386e732865eb) ---- - target/i386/tcg/emit.c.inc | 2 + - tests/tcg/x86_64/Makefile.target | 3 ++ - tests/tcg/x86_64/adox.c | 69 ++++++++++++++++++++++++++++++++ - 3 files changed, 74 insertions(+) - create mode 100644 tests/tcg/x86_64/adox.c - -diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc -index 0d7c6e80ae..e61ae9a2e9 100644 ---- a/target/i386/tcg/emit.c.inc -+++ b/target/i386/tcg/emit.c.inc -@@ -1037,6 +1037,8 @@ static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op) - #ifdef TARGET_X86_64 - case MO_32: - /* If TL is 64-bit just do everything in 64-bit arithmetic. */ -+ tcg_gen_ext32u_tl(s->T0, s->T0); -+ tcg_gen_ext32u_tl(s->T1, s->T1); - tcg_gen_add_i64(s->T0, s->T0, s->T1); - tcg_gen_add_i64(s->T0, s->T0, carry_in); - tcg_gen_shri_i64(carry_out, s->T0, 32); -diff --git a/tests/tcg/x86_64/Makefile.target b/tests/tcg/x86_64/Makefile.target -index 4eac78293f..e64aab1b81 100644 ---- a/tests/tcg/x86_64/Makefile.target -+++ b/tests/tcg/x86_64/Makefile.target -@@ -12,11 +12,14 @@ ifeq ($(filter %-linux-user, $(TARGET)),$(TARGET)) - X86_64_TESTS += vsyscall - X86_64_TESTS += noexec - X86_64_TESTS += cmpxchg -+X86_64_TESTS += adox - TESTS=$(MULTIARCH_TESTS) $(X86_64_TESTS) test-x86_64 - else - TESTS=$(MULTIARCH_TESTS) - endif - -+adox: CFLAGS=-O2 -+ - run-test-i386-ssse3: QEMU_OPTS += -cpu max - run-plugin-test-i386-ssse3-%: QEMU_OPTS += -cpu max - -diff --git a/tests/tcg/x86_64/adox.c b/tests/tcg/x86_64/adox.c -new file mode 100644 -index 0000000000..36be644c8b ---- /dev/null -+++ b/tests/tcg/x86_64/adox.c -@@ -0,0 +1,69 @@ -+/* See if ADOX give expected results */ -+ -+#include -+#include -+#include -+ -+static uint64_t adoxq(bool *c_out, uint64_t a, uint64_t b, bool c) -+{ -+ asm ("addl $0x7fffffff, %k1\n\t" -+ "adoxq %2, %0\n\t" -+ "seto %b1" -+ : "+r"(a), "=&r"(c) : "r"(b), "1"((int)c)); -+ *c_out = c; -+ return a; -+} -+ -+static uint64_t adoxl(bool *c_out, uint64_t a, uint64_t b, bool c) -+{ -+ asm ("addl $0x7fffffff, %k1\n\t" -+ "adoxl %k2, %k0\n\t" -+ "seto %b1" -+ : "+r"(a), "=&r"(c) : "r"(b), "1"((int)c)); -+ *c_out = c; -+ return a; -+} -+ -+int main() -+{ -+ uint64_t r; -+ bool c; -+ -+ r = adoxq(&c, 0, 0, 0); -+ assert(r == 0); -+ assert(c == 0); -+ -+ r = adoxl(&c, 0, 0, 0); -+ assert(r == 0); -+ assert(c == 0); -+ -+ r = adoxl(&c, 0x100000000, 0, 0); -+ assert(r == 0); -+ assert(c == 0); -+ -+ r = adoxq(&c, 0, 0, 1); -+ assert(r == 1); -+ assert(c == 0); -+ -+ r = adoxl(&c, 0, 0, 1); -+ assert(r == 1); -+ assert(c == 0); -+ -+ r = adoxq(&c, -1, -1, 0); -+ assert(r == -2); -+ assert(c == 1); -+ -+ r = adoxl(&c, -1, -1, 0); -+ assert(r == 0xfffffffe); -+ assert(c == 1); -+ -+ r = adoxq(&c, -1, -1, 1); -+ assert(r == -1); -+ assert(c == 1); -+ -+ r = adoxl(&c, -1, -1, 1); -+ assert(r == 0xffffffff); -+ assert(c == 1); -+ -+ return 0; -+} --- -2.39.1 - diff --git a/SOURCES/kvm-target-i386-Fix-BEXTR-instruction.patch b/SOURCES/kvm-target-i386-Fix-BEXTR-instruction.patch deleted file mode 100644 index 0c28c7e..0000000 --- a/SOURCES/kvm-target-i386-Fix-BEXTR-instruction.patch +++ /dev/null @@ -1,110 +0,0 @@ -From a019c203f0148e5fbb20e102a17453806f5296b6 Mon Sep 17 00:00:00 2001 -From: Richard Henderson -Date: Sat, 14 Jan 2023 13:05:42 -1000 -Subject: [PATCH 3/8] target/i386: Fix BEXTR instruction - -RH-Author: Paolo Bonzini -RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions -RH-Bugzilla: 2173590 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Bandan Das -RH-Commit: [3/7] bd1e3b26c72d7152b44be2d34308fd40dc106424 (bonzini/rhel-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590 -Upstream-Status: merged - -There were two problems here: not limiting the input to operand bits, -and not correctly handling large extraction length. - -Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1372 -Signed-off-by: Richard Henderson -Message-Id: <20230114230542.3116013-3-richard.henderson@linaro.org> -Cc: qemu-stable@nongnu.org -Fixes: 1d0b926150e5 ("target/i386: move scalar 0F 38 and 0F 3A instruction to new decoder", 2022-10-18) -Signed-off-by: Paolo Bonzini -(cherry picked from commit b14c0098975264ed03144f145bca0179a6763a07) ---- - target/i386/tcg/emit.c.inc | 22 +++++++++++----------- - tests/tcg/i386/test-i386-bmi2.c | 12 ++++++++++++ - 2 files changed, 23 insertions(+), 11 deletions(-) - -diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc -index 7037ff91c6..99f6ba6e19 100644 ---- a/target/i386/tcg/emit.c.inc -+++ b/target/i386/tcg/emit.c.inc -@@ -1078,30 +1078,30 @@ static void gen_ANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) - static void gen_BEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) - { - MemOp ot = decode->op[0].ot; -- TCGv bound, zero; -+ TCGv bound = tcg_constant_tl(ot == MO_64 ? 63 : 31); -+ TCGv zero = tcg_constant_tl(0); -+ TCGv mone = tcg_constant_tl(-1); - - /* - * Extract START, and shift the operand. - * Shifts larger than operand size get zeros. - */ - tcg_gen_ext8u_tl(s->A0, s->T1); -+ if (TARGET_LONG_BITS == 64 && ot == MO_32) { -+ tcg_gen_ext32u_tl(s->T0, s->T0); -+ } - tcg_gen_shr_tl(s->T0, s->T0, s->A0); - -- bound = tcg_constant_tl(ot == MO_64 ? 63 : 31); -- zero = tcg_constant_tl(0); - tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound, s->T0, zero); - - /* -- * Extract the LEN into a mask. Lengths larger than -- * operand size get all ones. -+ * Extract the LEN into an inverse mask. Lengths larger than -+ * operand size get all zeros, length 0 gets all ones. - */ - tcg_gen_extract_tl(s->A0, s->T1, 8, 8); -- tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound, s->A0, bound); -- -- tcg_gen_movi_tl(s->T1, 1); -- tcg_gen_shl_tl(s->T1, s->T1, s->A0); -- tcg_gen_subi_tl(s->T1, s->T1, 1); -- tcg_gen_and_tl(s->T0, s->T0, s->T1); -+ tcg_gen_shl_tl(s->T1, mone, s->A0); -+ tcg_gen_movcond_tl(TCG_COND_LEU, s->T1, s->A0, bound, s->T1, zero); -+ tcg_gen_andc_tl(s->T0, s->T0, s->T1); - - gen_op_update1_cc(s); - set_cc_op(s, CC_OP_LOGICB + ot); -diff --git a/tests/tcg/i386/test-i386-bmi2.c b/tests/tcg/i386/test-i386-bmi2.c -index 3c3ef85513..982d4abda4 100644 ---- a/tests/tcg/i386/test-i386-bmi2.c -+++ b/tests/tcg/i386/test-i386-bmi2.c -@@ -99,6 +99,9 @@ int main(int argc, char *argv[]) { - result = bextrq(mask, 0x10f8); - assert(result == 0); - -+ result = bextrq(0xfedcba9876543210ull, 0x7f00); -+ assert(result == 0xfedcba9876543210ull); -+ - result = blsiq(0x30); - assert(result == 0x10); - -@@ -164,6 +167,15 @@ int main(int argc, char *argv[]) { - result = bextrl(mask, 0x1038); - assert(result == 0); - -+ result = bextrl((reg_t)0x8f635a775ad3b9b4ull, 0x3018); -+ assert(result == 0x5a); -+ -+ result = bextrl((reg_t)0xfedcba9876543210ull, 0x7f00); -+ assert(result == 0x76543210u); -+ -+ result = bextrl(-1, 0); -+ assert(result == 0); -+ - result = blsil(0xffff); - assert(result == 1); - --- -2.39.1 - diff --git a/SOURCES/kvm-target-i386-Fix-BZHI-instruction.patch b/SOURCES/kvm-target-i386-Fix-BZHI-instruction.patch deleted file mode 100644 index bcf79f4..0000000 --- a/SOURCES/kvm-target-i386-Fix-BZHI-instruction.patch +++ /dev/null @@ -1,77 +0,0 @@ -From d49e5d193dfccf6f5cfa98ccce5bd491478d563d Mon Sep 17 00:00:00 2001 -From: Richard Henderson -Date: Sat, 14 Jan 2023 13:32:06 -1000 -Subject: [PATCH 7/8] target/i386: Fix BZHI instruction - -RH-Author: Paolo Bonzini -RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions -RH-Bugzilla: 2173590 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Bandan Das -RH-Commit: [7/7] ad6b343c09c0304ac32cc68670c49d1fc12d8cf8 (bonzini/rhel-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590 -Upstream-Status: merged - -We did not correctly handle N >= operand size. - -Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1374 -Signed-off-by: Richard Henderson -Message-Id: <20230114233206.3118472-1-richard.henderson@linaro.org> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 9ad2ba6e8e7fc195d0dd0b76ab38bd2fceb1bdd4) ---- - target/i386/tcg/emit.c.inc | 14 +++++++------- - tests/tcg/i386/test-i386-bmi2.c | 3 +++ - 2 files changed, 10 insertions(+), 7 deletions(-) - -diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc -index e61ae9a2e9..0d01e13002 100644 ---- a/target/i386/tcg/emit.c.inc -+++ b/target/i386/tcg/emit.c.inc -@@ -1147,20 +1147,20 @@ static void gen_BLSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) - static void gen_BZHI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) - { - MemOp ot = decode->op[0].ot; -- TCGv bound; -+ TCGv bound = tcg_constant_tl(ot == MO_64 ? 63 : 31); -+ TCGv zero = tcg_constant_tl(0); -+ TCGv mone = tcg_constant_tl(-1); - -- tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]); -- bound = tcg_constant_tl(ot == MO_64 ? 63 : 31); -+ tcg_gen_ext8u_tl(s->T1, s->T1); - - /* - * Note that since we're using BMILG (in order to get O - * cleared) we need to store the inverse into C. - */ -- tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src, s->T1, bound); -- tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1, bound, bound, s->T1); -+ tcg_gen_setcond_tl(TCG_COND_LEU, cpu_cc_src, s->T1, bound); - -- tcg_gen_movi_tl(s->A0, -1); -- tcg_gen_shl_tl(s->A0, s->A0, s->T1); -+ tcg_gen_shl_tl(s->A0, mone, s->T1); -+ tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->T1, bound, s->A0, zero); - tcg_gen_andc_tl(s->T0, s->T0, s->A0); - - gen_op_update1_cc(s); -diff --git a/tests/tcg/i386/test-i386-bmi2.c b/tests/tcg/i386/test-i386-bmi2.c -index 982d4abda4..0244df7987 100644 ---- a/tests/tcg/i386/test-i386-bmi2.c -+++ b/tests/tcg/i386/test-i386-bmi2.c -@@ -123,6 +123,9 @@ int main(int argc, char *argv[]) { - result = bzhiq(mask, 0x1f); - assert(result == (mask & ~(-1 << 30))); - -+ result = bzhiq(mask, 0x40); -+ assert(result == mask); -+ - result = rorxq(0x2132435465768798, 8); - assert(result == 0x9821324354657687); - --- -2.39.1 - diff --git a/SOURCES/kvm-target-i386-Fix-C-flag-for-BLSI-BLSMSK-BLSR.patch b/SOURCES/kvm-target-i386-Fix-C-flag-for-BLSI-BLSMSK-BLSR.patch deleted file mode 100644 index 7f3051f..0000000 --- a/SOURCES/kvm-target-i386-Fix-C-flag-for-BLSI-BLSMSK-BLSR.patch +++ /dev/null @@ -1,60 +0,0 @@ -From cb2b591e1677db2837810eaedac534a7ff3a7b1c Mon Sep 17 00:00:00 2001 -From: Richard Henderson -Date: Sat, 14 Jan 2023 08:06:01 -1000 -Subject: [PATCH 4/8] target/i386: Fix C flag for BLSI, BLSMSK, BLSR - -RH-Author: Paolo Bonzini -RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions -RH-Bugzilla: 2173590 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Bandan Das -RH-Commit: [4/7] 173e23c492c830da6c5a4be0cfc20a69ac655b59 (bonzini/rhel-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590 -Upstream-Status: merged - -We forgot to set cc_src, which is used for computing C. - -Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1370 -Signed-off-by: Richard Henderson -Message-Id: <20230114180601.2993644-1-richard.henderson@linaro.org> -Cc: qemu-stable@nongnu.org -Fixes: 1d0b926150e5 ("target/i386: move scalar 0F 38 and 0F 3A instruction to new decoder", 2022-10-18) -Signed-off-by: Paolo Bonzini -(cherry picked from commit 99282098dc74c2055bde5652bde6cf0067d0c370) ---- - target/i386/tcg/emit.c.inc | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc -index 99f6ba6e19..4d7702c106 100644 ---- a/target/i386/tcg/emit.c.inc -+++ b/target/i386/tcg/emit.c.inc -@@ -1111,6 +1111,7 @@ static void gen_BLSI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) - { - MemOp ot = decode->op[0].ot; - -+ tcg_gen_mov_tl(cpu_cc_src, s->T0); - tcg_gen_neg_tl(s->T1, s->T0); - tcg_gen_and_tl(s->T0, s->T0, s->T1); - tcg_gen_mov_tl(cpu_cc_dst, s->T0); -@@ -1121,6 +1122,7 @@ static void gen_BLSMSK(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode - { - MemOp ot = decode->op[0].ot; - -+ tcg_gen_mov_tl(cpu_cc_src, s->T0); - tcg_gen_subi_tl(s->T1, s->T0, 1); - tcg_gen_xor_tl(s->T0, s->T0, s->T1); - tcg_gen_mov_tl(cpu_cc_dst, s->T0); -@@ -1131,6 +1133,7 @@ static void gen_BLSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) - { - MemOp ot = decode->op[0].ot; - -+ tcg_gen_mov_tl(cpu_cc_src, s->T0); - tcg_gen_subi_tl(s->T1, s->T0, 1); - tcg_gen_and_tl(s->T0, s->T0, s->T1); - tcg_gen_mov_tl(cpu_cc_dst, s->T0); --- -2.39.1 - diff --git a/SOURCES/kvm-target-i386-add-support-for-FB_CLEAR-feature.patch b/SOURCES/kvm-target-i386-add-support-for-FB_CLEAR-feature.patch new file mode 100644 index 0000000..2b1cbc9 --- /dev/null +++ b/SOURCES/kvm-target-i386-add-support-for-FB_CLEAR-feature.patch @@ -0,0 +1,71 @@ +From 0d056d6da9e4147d5965bf3507f6d6d6a413924d Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Wed, 24 May 2023 06:52:43 -0400 +Subject: [PATCH 2/5] target/i386: add support for FB_CLEAR feature + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 167: target/i386: add support for FB_CLEAR feature +RH-Bugzilla: 2216201 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/2] 5f191964ba25754107a06ef907f4ac614280aaa1 (eesposit/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2216201 + +commit 22e1094ca82d5518c1b69aff3e87c550776ae1eb +Author: Emanuele Giuseppe Esposito +Date: Wed Feb 1 08:57:59 2023 -0500 + + target/i386: add support for FB_CLEAR feature + + As reported by the Intel's doc: + "FB_CLEAR: The processor will overwrite fill buffer values as part of + MD_CLEAR operations with the VERW instruction. + On these processors, L1D_FLUSH does not overwrite fill buffer values." + + If this cpu feature is present in host, allow QEMU to choose whether to + show it to the guest too. + One disadvantage of not exposing it is that the guest will report + a non existing vulnerability in + /sys/devices/system/cpu/vulnerabilities/mmio_stale_data + because the mitigation is present only when the cpu has + (FLUSH_L1D and MD_CLEAR) or FB_CLEAR + features enabled. + + Signed-off-by: Emanuele Giuseppe Esposito + Message-Id: <20230201135759.555607-3-eesposit@redhat.com> + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + target/i386/cpu.c | 2 +- + target/i386/cpu.h | 1 + + 2 files changed, 2 insertions(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index caf6338cc0..839706b430 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1012,7 +1012,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + "ssb-no", "mds-no", "pschange-mc-no", "tsx-ctrl", + "taa-no", NULL, NULL, NULL, + NULL, NULL, NULL, NULL, +- NULL, NULL, NULL, NULL, ++ NULL, "fb-clear", NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 74fa649b60..c28b9df217 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -989,6 +989,7 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, + #define MSR_ARCH_CAP_PSCHANGE_MC_NO (1U << 6) + #define MSR_ARCH_CAP_TSX_CTRL_MSR (1U << 7) + #define MSR_ARCH_CAP_TAA_NO (1U << 8) ++#define MSR_ARCH_CAP_FB_CLEAR (1U << 17) + + #define MSR_CORE_CAP_SPLIT_LOCK_DETECT (1U << 5) + +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-add-support-for-FLUSH_L1D-feature.patch b/SOURCES/kvm-target-i386-add-support-for-FLUSH_L1D-feature.patch new file mode 100644 index 0000000..39f2542 --- /dev/null +++ b/SOURCES/kvm-target-i386-add-support-for-FLUSH_L1D-feature.patch @@ -0,0 +1,70 @@ +From 14eae569030805680570d93412100ad26242c7e6 Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Wed, 24 May 2023 06:52:34 -0400 +Subject: [PATCH 1/5] target/i386: add support for FLUSH_L1D feature + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 167: target/i386: add support for FB_CLEAR feature +RH-Bugzilla: 2216201 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/2] e296c75c5cd7e1d16d3c70483d52aeba9f9eb2cd (eesposit/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2216201 + +commit 0e7e3bf1a552c178924867fa7c2f30ccc8a179e0 +Author: Emanuele Giuseppe Esposito +Date: Wed Feb 1 08:57:58 2023 -0500 + + target/i386: add support for FLUSH_L1D feature + + As reported by Intel's doc: + "L1D_FLUSH: Writeback and invalidate the L1 data cache" + + If this cpu feature is present in host, allow QEMU to choose whether to + show it to the guest too. + One disadvantage of not exposing it is that the guest will report + a non existing vulnerability in + /sys/devices/system/cpu/vulnerabilities/mmio_stale_data + because the mitigation is present only when the cpu has + (FLUSH_L1D and MD_CLEAR) or FB_CLEAR + features enabled. + + Signed-off-by: Emanuele Giuseppe Esposito + Message-Id: <20230201135759.555607-2-eesposit@redhat.com> + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + target/i386/cpu.c | 2 +- + target/i386/cpu.h | 2 ++ + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 0ef2bf1b93..caf6338cc0 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -860,7 +860,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + "tsx-ldtrk", NULL, NULL /* pconfig */, "arch-lbr", + NULL, NULL, "amx-bf16", "avx512-fp16", + "amx-tile", "amx-int8", "spec-ctrl", "stibp", +- NULL, "arch-capabilities", "core-capability", "ssbd", ++ "flush-l1d", "arch-capabilities", "core-capability", "ssbd", + }, + .cpuid = { + .eax = 7, +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index d243e290d3..74fa649b60 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -896,6 +896,8 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, + #define CPUID_7_0_EDX_SPEC_CTRL (1U << 26) + /* Single Thread Indirect Branch Predictors */ + #define CPUID_7_0_EDX_STIBP (1U << 27) ++/* Flush L1D cache */ ++#define CPUID_7_0_EDX_FLUSH_L1D (1U << 28) + /* Arch Capabilities */ + #define CPUID_7_0_EDX_ARCH_CAPABILITIES (1U << 29) + /* Core Capability */ +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-allow-versioned-CPUs-to-specify-new-cach.patch b/SOURCES/kvm-target-i386-allow-versioned-CPUs-to-specify-new-cach.patch new file mode 100644 index 0000000..2c81c72 --- /dev/null +++ b/SOURCES/kvm-target-i386-allow-versioned-CPUs-to-specify-new-cach.patch @@ -0,0 +1,116 @@ +From 457e74c076e0fe7b64631dfd4369d167f0762c9a Mon Sep 17 00:00:00 2001 +From: Bandan Das +Date: Wed, 9 Aug 2023 12:22:41 -0400 +Subject: [PATCH 1/7] target/i386: allow versioned CPUs to specify new + cache_info + +RH-Author: Bandan Das +RH-MergeRequest: 198: Add EPYC-Genoa CPU model in qemu +RH-Bugzilla: 2094913 +RH-Acked-by: Wei Huang +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/7] 6070e07a4bb070d1c15a811b2bd3195929c18d61 (bdas1/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094913 + +commit cca0a000d06f897411a8af4402e5d0522bbe450b +Author: Michael Roth +Date: Thu May 4 15:53:06 2023 -0500 + + target/i386: allow versioned CPUs to specify new cache_info + + New EPYC CPUs versions require small changes to their cache_info's. + Because current QEMU x86 CPU definition does not support versioned + cach_info, we would have to declare a new CPU type for each such case. + To avoid the dup work, add "cache_info" in X86CPUVersionDefinition", + to allow new cache_info pointers to be specified for a new CPU version. + + Co-developed-by: Wei Huang + Signed-off-by: Wei Huang + Signed-off-by: Michael Roth + Signed-off-by: Babu Moger + Acked-by: Michael S. Tsirkin + Message-Id: <20230504205313.225073-2-babu.moger@amd.com> + Signed-off-by: Paolo Bonzini + +Signed-off-by: Bandan Das +--- + target/i386/cpu.c | 35 ++++++++++++++++++++++++++++++++--- + 1 file changed, 32 insertions(+), 3 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 4ac3046313..3558c92ed0 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1598,6 +1598,7 @@ typedef struct X86CPUVersionDefinition { + const char *alias; + const char *note; + PropValue *props; ++ const CPUCaches *const cache_info; + } X86CPUVersionDefinition; + + /* Base definition for a CPU model */ +@@ -5213,6 +5214,31 @@ static void x86_cpu_apply_version_props(X86CPU *cpu, X86CPUModel *model) + assert(vdef->version == version); + } + ++static const CPUCaches *x86_cpu_get_versioned_cache_info(X86CPU *cpu, ++ X86CPUModel *model) ++{ ++ const X86CPUVersionDefinition *vdef; ++ X86CPUVersion version = x86_cpu_model_resolve_version(model); ++ const CPUCaches *cache_info = model->cpudef->cache_info; ++ ++ if (version == CPU_VERSION_LEGACY) { ++ return cache_info; ++ } ++ ++ for (vdef = x86_cpu_def_get_versions(model->cpudef); vdef->version; vdef++) { ++ if (vdef->cache_info) { ++ cache_info = vdef->cache_info; ++ } ++ ++ if (vdef->version == version) { ++ break; ++ } ++ } ++ ++ assert(vdef->version == version); ++ return cache_info; ++} ++ + /* + * Load data from X86CPUDefinition into a X86CPU object. + * Only for builtin_x86_defs models initialized with x86_register_cpudef_types. +@@ -5245,7 +5271,7 @@ static void x86_cpu_load_model(X86CPU *cpu, X86CPUModel *model) + } + + /* legacy-cache defaults to 'off' if CPU model provides cache info */ +- cpu->legacy_cache = !def->cache_info; ++ cpu->legacy_cache = !x86_cpu_get_versioned_cache_info(cpu, model); + + env->features[FEAT_1_ECX] |= CPUID_EXT_HYPERVISOR; + +@@ -6724,14 +6750,17 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) + + /* Cache information initialization */ + if (!cpu->legacy_cache) { +- if (!xcc->model || !xcc->model->cpudef->cache_info) { ++ const CPUCaches *cache_info = ++ x86_cpu_get_versioned_cache_info(cpu, xcc->model); ++ ++ if (!xcc->model || !cache_info) { + g_autofree char *name = x86_cpu_class_get_model_name(xcc); + error_setg(errp, + "CPU model '%s' doesn't support legacy-cache=off", name); + return; + } + env->cache_info_cpuid2 = env->cache_info_cpuid4 = env->cache_info_amd = +- *xcc->model->cpudef->cache_info; ++ *cache_info; + } else { + /* Build legacy cache information */ + env->cache_info_cpuid2.l1d_cache = &legacy_l1d_cache; +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-fix-ADOX-followed-by-ADCX.patch b/SOURCES/kvm-target-i386-fix-ADOX-followed-by-ADCX.patch deleted file mode 100644 index 72ae8ee..0000000 --- a/SOURCES/kvm-target-i386-fix-ADOX-followed-by-ADCX.patch +++ /dev/null @@ -1,205 +0,0 @@ -From 54d3e58aabf9716f9a07aeb7044d7b7997e28123 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Tue, 31 Jan 2023 09:48:03 +0100 -Subject: [PATCH 5/8] target/i386: fix ADOX followed by ADCX - -RH-Author: Paolo Bonzini -RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions -RH-Bugzilla: 2173590 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Bandan Das -RH-Commit: [5/7] 64dbe4e602f08e4a88fdeacee5a8993ca4383563 (bonzini/rhel-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590 -Upstream-Status: merged - -When ADCX is followed by ADOX or vice versa, the second instruction's -carry comes from EFLAGS and the condition codes use the CC_OP_ADCOX -operation. Retrieving the carry from EFLAGS is handled by this bit -of gen_ADCOX: - - tcg_gen_extract_tl(carry_in, cpu_cc_src, - ctz32(cc_op == CC_OP_ADCX ? CC_C : CC_O), 1); - -Unfortunately, in this case cc_op has been overwritten by the previous -"if" statement to CC_OP_ADCOX. This works by chance when the first -instruction is ADCX; however, if the first instruction is ADOX, -ADCX will incorrectly take its carry from OF instead of CF. - -Fix by moving the computation of the new cc_op at the end of the function. -The included exhaustive test case fails without this patch and passes -afterwards. - -Because ADCX/ADOX need not be invoked through the VEX prefix, this -regression bisects to commit 16fc5726a6e2 ("target/i386: reimplement -0x0f 0x38, add AVX", 2022-10-18). However, the mistake happened a -little earlier, when BMI instructions were rewritten using the new -decoder framework. - -Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1471 -Reported-by: Paul Jolly -Fixes: 1d0b926150e5 ("target/i386: move scalar 0F 38 and 0F 3A instruction to new decoder", 2022-10-18) -Cc: qemu-stable@nongnu.org -Signed-off-by: Paolo Bonzini -(cherry picked from commit 60c7dd22e1383754d5f150bc9f7c2785c662a7b6) ---- - target/i386/tcg/emit.c.inc | 20 +++++---- - tests/tcg/i386/Makefile.target | 6 ++- - tests/tcg/i386/test-i386-adcox.c | 75 ++++++++++++++++++++++++++++++++ - 3 files changed, 91 insertions(+), 10 deletions(-) - create mode 100644 tests/tcg/i386/test-i386-adcox.c - -diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc -index 4d7702c106..0d7c6e80ae 100644 ---- a/target/i386/tcg/emit.c.inc -+++ b/target/i386/tcg/emit.c.inc -@@ -1015,6 +1015,7 @@ VSIB_AVX(VPGATHERQ, vpgatherq) - - static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op) - { -+ int opposite_cc_op; - TCGv carry_in = NULL; - TCGv carry_out = (cc_op == CC_OP_ADCX ? cpu_cc_dst : cpu_cc_src2); - TCGv zero; -@@ -1022,14 +1023,8 @@ static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op) - if (cc_op == s->cc_op || s->cc_op == CC_OP_ADCOX) { - /* Re-use the carry-out from a previous round. */ - carry_in = carry_out; -- cc_op = s->cc_op; -- } else if (s->cc_op == CC_OP_ADCX || s->cc_op == CC_OP_ADOX) { -- /* Merge with the carry-out from the opposite instruction. */ -- cc_op = CC_OP_ADCOX; -- } -- -- /* If we don't have a carry-in, get it out of EFLAGS. */ -- if (!carry_in) { -+ } else { -+ /* We don't have a carry-in, get it out of EFLAGS. */ - if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) { - gen_compute_eflags(s); - } -@@ -1053,7 +1048,14 @@ static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op) - tcg_gen_add2_tl(s->T0, carry_out, s->T0, carry_out, s->T1, zero); - break; - } -- set_cc_op(s, cc_op); -+ -+ opposite_cc_op = cc_op == CC_OP_ADCX ? CC_OP_ADOX : CC_OP_ADCX; -+ if (s->cc_op == CC_OP_ADCOX || s->cc_op == opposite_cc_op) { -+ /* Merge with the carry-out from the opposite instruction. */ -+ set_cc_op(s, CC_OP_ADCOX); -+ } else { -+ set_cc_op(s, cc_op); -+ } - } - - static void gen_ADCX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) -diff --git a/tests/tcg/i386/Makefile.target b/tests/tcg/i386/Makefile.target -index 81831cafbc..bafd8c2180 100644 ---- a/tests/tcg/i386/Makefile.target -+++ b/tests/tcg/i386/Makefile.target -@@ -14,7 +14,7 @@ config-cc.mak: Makefile - I386_SRCS=$(notdir $(wildcard $(I386_SRC)/*.c)) - ALL_X86_TESTS=$(I386_SRCS:.c=) - SKIP_I386_TESTS=test-i386-ssse3 test-avx test-3dnow test-mmx --X86_64_TESTS:=$(filter test-i386-bmi2 $(SKIP_I386_TESTS), $(ALL_X86_TESTS)) -+X86_64_TESTS:=$(filter test-i386-adcox test-i386-bmi2 $(SKIP_I386_TESTS), $(ALL_X86_TESTS)) - - test-i386-sse-exceptions: CFLAGS += -msse4.1 -mfpmath=sse - run-test-i386-sse-exceptions: QEMU_OPTS += -cpu max -@@ -28,6 +28,10 @@ test-i386-bmi2: CFLAGS=-O2 - run-test-i386-bmi2: QEMU_OPTS += -cpu max - run-plugin-test-i386-bmi2-%: QEMU_OPTS += -cpu max - -+test-i386-adcox: CFLAGS=-O2 -+run-test-i386-adcox: QEMU_OPTS += -cpu max -+run-plugin-test-i386-adcox-%: QEMU_OPTS += -cpu max -+ - # - # hello-i386 is a barebones app - # -diff --git a/tests/tcg/i386/test-i386-adcox.c b/tests/tcg/i386/test-i386-adcox.c -new file mode 100644 -index 0000000000..16169efff8 ---- /dev/null -+++ b/tests/tcg/i386/test-i386-adcox.c -@@ -0,0 +1,75 @@ -+/* See if various BMI2 instructions give expected results */ -+#include -+#include -+#include -+ -+#define CC_C 1 -+#define CC_O (1 << 11) -+ -+#ifdef __x86_64__ -+#define REG uint64_t -+#else -+#define REG uint32_t -+#endif -+ -+void test_adox_adcx(uint32_t in_c, uint32_t in_o, REG adcx_operand, REG adox_operand) -+{ -+ REG flags; -+ REG out_adcx, out_adox; -+ -+ asm("pushf; pop %0" : "=r"(flags)); -+ flags &= ~(CC_C | CC_O); -+ flags |= (in_c ? CC_C : 0); -+ flags |= (in_o ? CC_O : 0); -+ -+ out_adcx = adcx_operand; -+ out_adox = adox_operand; -+ asm("push %0; popf;" -+ "adox %3, %2;" -+ "adcx %3, %1;" -+ "pushf; pop %0" -+ : "+r" (flags), "+r" (out_adcx), "+r" (out_adox) -+ : "r" ((REG)-1), "0" (flags), "1" (out_adcx), "2" (out_adox)); -+ -+ assert(out_adcx == in_c + adcx_operand - 1); -+ assert(out_adox == in_o + adox_operand - 1); -+ assert(!!(flags & CC_C) == (in_c || adcx_operand)); -+ assert(!!(flags & CC_O) == (in_o || adox_operand)); -+} -+ -+void test_adcx_adox(uint32_t in_c, uint32_t in_o, REG adcx_operand, REG adox_operand) -+{ -+ REG flags; -+ REG out_adcx, out_adox; -+ -+ asm("pushf; pop %0" : "=r"(flags)); -+ flags &= ~(CC_C | CC_O); -+ flags |= (in_c ? CC_C : 0); -+ flags |= (in_o ? CC_O : 0); -+ -+ out_adcx = adcx_operand; -+ out_adox = adox_operand; -+ asm("push %0; popf;" -+ "adcx %3, %1;" -+ "adox %3, %2;" -+ "pushf; pop %0" -+ : "+r" (flags), "+r" (out_adcx), "+r" (out_adox) -+ : "r" ((REG)-1), "0" (flags), "1" (out_adcx), "2" (out_adox)); -+ -+ assert(out_adcx == in_c + adcx_operand - 1); -+ assert(out_adox == in_o + adox_operand - 1); -+ assert(!!(flags & CC_C) == (in_c || adcx_operand)); -+ assert(!!(flags & CC_O) == (in_o || adox_operand)); -+} -+ -+int main(int argc, char *argv[]) { -+ /* try all combinations of input CF, input OF, CF from op1+op2, OF from op2+op1 */ -+ int i; -+ for (i = 0; i <= 15; i++) { -+ printf("%d\n", i); -+ test_adcx_adox(!!(i & 1), !!(i & 2), !!(i & 4), !!(i & 8)); -+ test_adox_adcx(!!(i & 1), !!(i & 2), !!(i & 4), !!(i & 8)); -+ } -+ return 0; -+} -+ --- -2.39.1 - diff --git a/SOURCES/kvm-target-i386-fix-operand-size-of-unary-SSE-operations.patch b/SOURCES/kvm-target-i386-fix-operand-size-of-unary-SSE-operations.patch deleted file mode 100644 index 81a0003..0000000 --- a/SOURCES/kvm-target-i386-fix-operand-size-of-unary-SSE-operations.patch +++ /dev/null @@ -1,77 +0,0 @@ -From f4ddcdd2395e0944c20f6683c66068ed0ac7d757 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Sat, 7 Jan 2023 18:14:20 +0100 -Subject: [PATCH 1/8] target/i386: fix operand size of unary SSE operations - -RH-Author: Paolo Bonzini -RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions -RH-Bugzilla: 2173590 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Bandan Das -RH-Commit: [1/7] 7041f3e30e19add6bd8e5355d8bebf92390a5c2e (bonzini/rhel-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590 -Upstream-Status: merged - -VRCPSS, VRSQRTSS and VCVTSx2Sx have a 32-bit or 64-bit memory operand, -which is represented in the decoding tables by X86_VEX_REPScalar. Add it -to the tables, and make validate_vex() handle the case of an instruction -that is in exception type 4 without the REP prefix and exception type 5 -with it; this is the cas of VRCP and VRSQRT. - -Reported-by: yongwoo -Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1377 -Signed-off-by: Paolo Bonzini -(cherry picked from commit 3d304620ec6c95f31db17acc132f42f243369299) ---- - target/i386/tcg/decode-new.c.inc | 11 ++++++----- - 1 file changed, 6 insertions(+), 5 deletions(-) - -diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc -index 80c579164f..d5fd8d965c 100644 ---- a/target/i386/tcg/decode-new.c.inc -+++ b/target/i386/tcg/decode-new.c.inc -@@ -105,6 +105,7 @@ - #define vex3 .vex_class = 3, - #define vex4 .vex_class = 4, - #define vex4_unal .vex_class = 4, .vex_special = X86_VEX_SSEUnaligned, -+#define vex4_rep5 .vex_class = 4, .vex_special = X86_VEX_REPScalar, - #define vex5 .vex_class = 5, - #define vex6 .vex_class = 6, - #define vex7 .vex_class = 7, -@@ -839,8 +840,8 @@ static const X86OpEntry opcodes_0F[256] = { - - [0x50] = X86_OP_ENTRY3(MOVMSK, G,y, None,None, U,x, vex7 p_00_66), - [0x51] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), -- [0x52] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex5 p_00_f3), -- [0x53] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex5 p_00_f3), -+ [0x52] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex4_rep5 p_00_f3), -+ [0x53] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex4_rep5 p_00_f3), - [0x54] = X86_OP_ENTRY3(PAND, V,x, H,x, W,x, vex4 p_00_66), /* vand */ - [0x55] = X86_OP_ENTRY3(PANDN, V,x, H,x, W,x, vex4 p_00_66), /* vandn */ - [0x56] = X86_OP_ENTRY3(POR, V,x, H,x, W,x, vex4 p_00_66), /* vor */ -@@ -878,7 +879,7 @@ static const X86OpEntry opcodes_0F[256] = { - - [0x58] = X86_OP_ENTRY3(VADD, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), - [0x59] = X86_OP_ENTRY3(VMUL, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), -- [0x5a] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex3 p_00_66_f3_f2), -+ [0x5a] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), - [0x5b] = X86_OP_GROUP0(0F5B), - [0x5c] = X86_OP_ENTRY3(VSUB, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), - [0x5d] = X86_OP_ENTRY3(VMIN, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), -@@ -1447,9 +1448,9 @@ static bool validate_vex(DisasContext *s, X86DecodedInsn *decode) - * Instructions which differ between 00/66 and F2/F3 in the - * exception classification and the size of the memory operand. - */ -- assert(e->vex_class == 1 || e->vex_class == 2); -+ assert(e->vex_class == 1 || e->vex_class == 2 || e->vex_class == 4); - if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) { -- e->vex_class = 3; -+ e->vex_class = e->vex_class < 4 ? 3 : 5; - if (s->vex_l) { - goto illegal; - } --- -2.39.1 - diff --git a/SOURCES/kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch b/SOURCES/kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch deleted file mode 100644 index b9536c3..0000000 --- a/SOURCES/kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch +++ /dev/null @@ -1,50 +0,0 @@ -From b330bf0a2ad5af73d3c62997f7f0fa5b61f1796b Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Tue, 14 Feb 2023 14:48:37 +0100 -Subject: [PATCH 8/8] target/s390x/arch_dump: Fix memory corruption in - s390x_write_elf64_notes() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -RH-MergeRequest: 152: Fix memory corruption in s390x_write_elf64_notes() -RH-Bugzilla: 2168172 -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cédric Le Goater -RH-Commit: [1/1] 37a2c997b2c8b7524e0b6299891bf3ea7c9a46d0 (thuth/qemu-kvm-cs9) - -Bugzilla: https://bugzilla.redhat.com/2168172 -Upstream-Status: Posted (and reviewed, but not merged yet) - -"note_size" can be smaller than sizeof(note), so unconditionally calling -memset(notep, 0, sizeof(note)) could cause a memory corruption here in -case notep has been allocated dynamically, thus let's use note_size as -length argument for memset() instead. - -Fixes: 113d8f4e95 ("s390x: pv: Add dump support") -Message-Id: <20230214141056.680969-1-thuth@redhat.com> -Reviewed-by: Janosch Frank -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Thomas Huth ---- - target/s390x/arch_dump.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/target/s390x/arch_dump.c b/target/s390x/arch_dump.c -index a2329141e8..a7c44ba49d 100644 ---- a/target/s390x/arch_dump.c -+++ b/target/s390x/arch_dump.c -@@ -248,7 +248,7 @@ static int s390x_write_elf64_notes(const char *note_name, - notep = g_malloc(note_size); - } - -- memset(notep, 0, sizeof(note)); -+ memset(notep, 0, note_size); - - /* Setup note header data */ - notep->hdr.n_descsz = cpu_to_be32(content_size); --- -2.31.1 - diff --git a/SOURCES/kvm-test-bdrv-drain-Don-t-yield-in-.bdrv_co_drained_begi.patch b/SOURCES/kvm-test-bdrv-drain-Don-t-yield-in-.bdrv_co_drained_begi.patch deleted file mode 100644 index 268c263..0000000 --- a/SOURCES/kvm-test-bdrv-drain-Don-t-yield-in-.bdrv_co_drained_begi.patch +++ /dev/null @@ -1,153 +0,0 @@ -From 093c4a6834f3ec5a05390a3630ae4edec80885b8 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:40:57 +0100 -Subject: [PATCH 15/31] test-bdrv-drain: Don't yield in - .bdrv_co_drained_begin/end() - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [3/16] 5282d3e13cb85dfb480edb11b7eb2769248465df (sgarzarella/qemu-kvm-c-9-s) - -We want to change .bdrv_co_drained_begin/end() back to be non-coroutine -callbacks, so in preparation, avoid yielding in their implementation. - -This does almost the same as the existing logic in bdrv_drain_invoke(), -by creating and entering coroutines internally. However, since the test -case is by far the heaviest user of coroutine code in drain callbacks, -it is preferable to have the complexity in the test case rather than the -drain core, which is already complicated enough without this. - -The behaviour for bdrv_drain_begin() is unchanged because we increase -bs->in_flight and this is still polled. However, bdrv_drain_end() -doesn't wait for the spawned coroutine to complete any more. This is -fine, we don't rely on bdrv_drain_end() restarting all operations -immediately before the next aio_poll(). - -Signed-off-by: Kevin Wolf -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Emanuele Giuseppe Esposito -Reviewed-by: Hanna Reitz -Message-Id: <20221118174110.55183-3-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 7bce1c299834557bffd92294608ea528648cfe75) -Signed-off-by: Stefano Garzarella ---- - tests/unit/test-bdrv-drain.c | 64 ++++++++++++++++++++++++++---------- - 1 file changed, 46 insertions(+), 18 deletions(-) - -diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c -index 09dc4a4891..24f34e24ad 100644 ---- a/tests/unit/test-bdrv-drain.c -+++ b/tests/unit/test-bdrv-drain.c -@@ -38,12 +38,22 @@ typedef struct BDRVTestState { - bool sleep_in_drain_begin; - } BDRVTestState; - -+static void coroutine_fn sleep_in_drain_begin(void *opaque) -+{ -+ BlockDriverState *bs = opaque; -+ -+ qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000); -+ bdrv_dec_in_flight(bs); -+} -+ - static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs) - { - BDRVTestState *s = bs->opaque; - s->drain_count++; - if (s->sleep_in_drain_begin) { -- qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000); -+ Coroutine *co = qemu_coroutine_create(sleep_in_drain_begin, bs); -+ bdrv_inc_in_flight(bs); -+ aio_co_enter(bdrv_get_aio_context(bs), co); - } - } - -@@ -1916,6 +1926,21 @@ static int coroutine_fn bdrv_replace_test_co_preadv(BlockDriverState *bs, - return 0; - } - -+static void coroutine_fn bdrv_replace_test_drain_co(void *opaque) -+{ -+ BlockDriverState *bs = opaque; -+ BDRVReplaceTestState *s = bs->opaque; -+ -+ /* Keep waking io_co up until it is done */ -+ while (s->io_co) { -+ aio_co_wake(s->io_co); -+ s->io_co = NULL; -+ qemu_coroutine_yield(); -+ } -+ s->drain_co = NULL; -+ bdrv_dec_in_flight(bs); -+} -+ - /** - * If .drain_count is 0, wake up .io_co if there is one; and set - * .was_drained. -@@ -1926,20 +1951,27 @@ static void coroutine_fn bdrv_replace_test_co_drain_begin(BlockDriverState *bs) - BDRVReplaceTestState *s = bs->opaque; - - if (!s->drain_count) { -- /* Keep waking io_co up until it is done */ -- s->drain_co = qemu_coroutine_self(); -- while (s->io_co) { -- aio_co_wake(s->io_co); -- s->io_co = NULL; -- qemu_coroutine_yield(); -- } -- s->drain_co = NULL; -- -+ s->drain_co = qemu_coroutine_create(bdrv_replace_test_drain_co, bs); -+ bdrv_inc_in_flight(bs); -+ aio_co_enter(bdrv_get_aio_context(bs), s->drain_co); - s->was_drained = true; - } - s->drain_count++; - } - -+static void coroutine_fn bdrv_replace_test_read_entry(void *opaque) -+{ -+ BlockDriverState *bs = opaque; -+ char data; -+ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, &data, 1); -+ int ret; -+ -+ /* Queue a read request post-drain */ -+ ret = bdrv_replace_test_co_preadv(bs, 0, 1, &qiov, 0); -+ g_assert(ret >= 0); -+ bdrv_dec_in_flight(bs); -+} -+ - /** - * Reduce .drain_count, set .was_undrained once it reaches 0. - * If .drain_count reaches 0 and the node has a backing file, issue a -@@ -1951,17 +1983,13 @@ static void coroutine_fn bdrv_replace_test_co_drain_end(BlockDriverState *bs) - - g_assert(s->drain_count > 0); - if (!--s->drain_count) { -- int ret; -- - s->was_undrained = true; - - if (bs->backing) { -- char data; -- QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, &data, 1); -- -- /* Queue a read request post-drain */ -- ret = bdrv_replace_test_co_preadv(bs, 0, 1, &qiov, 0); -- g_assert(ret >= 0); -+ Coroutine *co = qemu_coroutine_create(bdrv_replace_test_read_entry, -+ bs); -+ bdrv_inc_in_flight(bs); -+ aio_co_enter(bdrv_get_aio_context(bs), co); - } - } - } --- -2.31.1 - diff --git a/SOURCES/kvm-tests-qtest-netdev-test-stream-and-dgram-backends.patch b/SOURCES/kvm-tests-qtest-netdev-test-stream-and-dgram-backends.patch deleted file mode 100644 index ebd52cd..0000000 --- a/SOURCES/kvm-tests-qtest-netdev-test-stream-and-dgram-backends.patch +++ /dev/null @@ -1,505 +0,0 @@ -From 39d5761fe1f546e764dedf2ea32c55d8f5222696 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Wed, 18 Jan 2023 13:04:05 +0100 -Subject: [PATCH 1/8] tests/qtest: netdev: test stream and dgram backends -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 148: net: stream: add a new option to automatically reconnect -RH-Bugzilla: 2169232 -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Cindy Lu -RH-Acked-by: MST -RH-Acked-by: David Gibson (Red Hat) -RH-Commit: [1/2] 75c71b47eea072e14651a96612d402b50d2b8f1e (lvivier/qemu-kvm-centos) - -Signed-off-by: Laurent Vivier -Acked-by: Michael S. Tsirkin -Message-Id: <20230118120405.1876329-1-lvivier@redhat.com> -Signed-off-by: Thomas Huth -(cherry picked from commit c95031a19f0d7f418a597243f6f84b031a858997) ---- - tests/qtest/meson.build | 2 + - tests/qtest/netdev-socket.c | 448 ++++++++++++++++++++++++++++++++++++ - 2 files changed, 450 insertions(+) - create mode 100644 tests/qtest/netdev-socket.c - -diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build -index 9df3f9f8b9..2e7c6fe5e3 100644 ---- a/tests/qtest/meson.build -+++ b/tests/qtest/meson.build -@@ -27,6 +27,7 @@ qtests_generic = [ - 'test-hmp', - 'qos-test', - 'readconfig-test', -+ 'netdev-socket', - ] - if config_host.has_key('CONFIG_MODULES') - qtests_generic += [ 'modules-test' ] -@@ -299,6 +300,7 @@ qtests = { - 'tpm-tis-device-swtpm-test': [io, tpmemu_files, 'tpm-tis-util.c'], - 'tpm-tis-device-test': [io, tpmemu_files, 'tpm-tis-util.c'], - 'vmgenid-test': files('boot-sector.c', 'acpi-utils.c'), -+ 'netdev-socket': files('netdev-socket.c', '../unit/socket-helpers.c'), - } - - gvnc = dependency('gvnc-1.0', required: false) -diff --git a/tests/qtest/netdev-socket.c b/tests/qtest/netdev-socket.c -new file mode 100644 -index 0000000000..6ba256e173 ---- /dev/null -+++ b/tests/qtest/netdev-socket.c -@@ -0,0 +1,448 @@ -+/* -+ * QTest testcase for netdev stream and dgram -+ * -+ * Copyright (c) 2022 Red Hat, Inc. -+ * -+ * SPDX-License-Identifier: GPL-2.0-or-later -+ */ -+ -+#include "qemu/osdep.h" -+#include "qemu/sockets.h" -+#include -+#include "../unit/socket-helpers.h" -+#include "libqtest.h" -+ -+#define CONNECTION_TIMEOUT 5 -+ -+#define EXPECT_STATE(q, e, t) \ -+do { \ -+ char *resp = NULL; \ -+ g_test_timer_start(); \ -+ do { \ -+ g_free(resp); \ -+ resp = qtest_hmp(q, "info network"); \ -+ if (t) { \ -+ strrchr(resp, t)[0] = 0; \ -+ } \ -+ if (g_str_equal(resp, e)) { \ -+ break; \ -+ } \ -+ } while (g_test_timer_elapsed() < CONNECTION_TIMEOUT); \ -+ g_assert_cmpstr(resp, ==, e); \ -+ g_free(resp); \ -+} while (0) -+ -+static gchar *tmpdir; -+ -+static int inet_get_free_port_socket_ipv4(int sock) -+{ -+ struct sockaddr_in addr; -+ socklen_t len; -+ -+ memset(&addr, 0, sizeof(addr)); -+ addr.sin_family = AF_INET; -+ addr.sin_addr.s_addr = INADDR_ANY; -+ addr.sin_port = 0; -+ if (bind(sock, (struct sockaddr *)&addr, sizeof(addr)) < 0) { -+ return -1; -+ } -+ -+ len = sizeof(addr); -+ if (getsockname(sock, (struct sockaddr *)&addr, &len) < 0) { -+ return -1; -+ } -+ -+ return ntohs(addr.sin_port); -+} -+ -+static int inet_get_free_port_socket_ipv6(int sock) -+{ -+ struct sockaddr_in6 addr; -+ socklen_t len; -+ -+ memset(&addr, 0, sizeof(addr)); -+ addr.sin6_family = AF_INET6; -+ addr.sin6_addr = in6addr_any; -+ addr.sin6_port = 0; -+ if (bind(sock, (struct sockaddr *)&addr, sizeof(addr)) < 0) { -+ return -1; -+ } -+ -+ len = sizeof(addr); -+ if (getsockname(sock, (struct sockaddr *)&addr, &len) < 0) { -+ return -1; -+ } -+ -+ return ntohs(addr.sin6_port); -+} -+ -+static int inet_get_free_port_multiple(int nb, int *port, bool ipv6) -+{ -+ int sock[nb]; -+ int i; -+ -+ for (i = 0; i < nb; i++) { -+ sock[i] = socket(ipv6 ? AF_INET6 : AF_INET, SOCK_STREAM, 0); -+ if (sock[i] < 0) { -+ break; -+ } -+ port[i] = ipv6 ? inet_get_free_port_socket_ipv6(sock[i]) : -+ inet_get_free_port_socket_ipv4(sock[i]); -+ if (port[i] == -1) { -+ break; -+ } -+ } -+ -+ nb = i; -+ for (i = 0; i < nb; i++) { -+ closesocket(sock[i]); -+ } -+ -+ return nb; -+} -+ -+static int inet_get_free_port(bool ipv6) -+{ -+ int nb, port; -+ -+ nb = inet_get_free_port_multiple(1, &port, ipv6); -+ g_assert_cmpint(nb, ==, 1); -+ -+ return port; -+} -+ -+static void test_stream_inet_ipv4(void) -+{ -+ QTestState *qts0, *qts1; -+ char *expect; -+ int port; -+ -+ port = inet_get_free_port(false); -+ qts0 = qtest_initf("-nodefaults -M none " -+ "-netdev stream,id=st0,server=true,addr.type=inet," -+ "addr.ipv4=on,addr.ipv6=off," -+ "addr.host=127.0.0.1,addr.port=%d", port); -+ -+ EXPECT_STATE(qts0, "st0: index=0,type=stream,\r\n", 0); -+ -+ qts1 = qtest_initf("-nodefaults -M none " -+ "-netdev stream,server=false,id=st0,addr.type=inet," -+ "addr.ipv4=on,addr.ipv6=off," -+ "addr.host=127.0.0.1,addr.port=%d", port); -+ -+ expect = g_strdup_printf("st0: index=0,type=stream,tcp:127.0.0.1:%d\r\n", -+ port); -+ EXPECT_STATE(qts1, expect, 0); -+ g_free(expect); -+ -+ /* the port is unknown, check only the address */ -+ EXPECT_STATE(qts0, "st0: index=0,type=stream,tcp:127.0.0.1", ':'); -+ -+ qtest_quit(qts1); -+ qtest_quit(qts0); -+} -+ -+static void test_stream_inet_ipv6(void) -+{ -+ QTestState *qts0, *qts1; -+ char *expect; -+ int port; -+ -+ port = inet_get_free_port(true); -+ qts0 = qtest_initf("-nodefaults -M none " -+ "-netdev stream,id=st0,server=true,addr.type=inet," -+ "addr.ipv4=off,addr.ipv6=on," -+ "addr.host=::1,addr.port=%d", port); -+ -+ EXPECT_STATE(qts0, "st0: index=0,type=stream,\r\n", 0); -+ -+ qts1 = qtest_initf("-nodefaults -M none " -+ "-netdev stream,server=false,id=st0,addr.type=inet," -+ "addr.ipv4=off,addr.ipv6=on," -+ "addr.host=::1,addr.port=%d", port); -+ -+ expect = g_strdup_printf("st0: index=0,type=stream,tcp:::1:%d\r\n", -+ port); -+ EXPECT_STATE(qts1, expect, 0); -+ g_free(expect); -+ -+ /* the port is unknown, check only the address */ -+ EXPECT_STATE(qts0, "st0: index=0,type=stream,tcp:::1", ':'); -+ -+ qtest_quit(qts1); -+ qtest_quit(qts0); -+} -+ -+static void test_stream_unix(void) -+{ -+ QTestState *qts0, *qts1; -+ char *expect; -+ gchar *path; -+ -+ path = g_strconcat(tmpdir, "/stream_unix", NULL); -+ -+ qts0 = qtest_initf("-nodefaults -M none " -+ "-netdev stream,id=st0,server=true," -+ "addr.type=unix,addr.path=%s,", -+ path); -+ -+ EXPECT_STATE(qts0, "st0: index=0,type=stream,\r\n", 0); -+ -+ qts1 = qtest_initf("-nodefaults -M none " -+ "-netdev stream,id=st0,server=false," -+ "addr.type=unix,addr.path=%s", -+ path); -+ -+ expect = g_strdup_printf("st0: index=0,type=stream,unix:%s\r\n", path); -+ EXPECT_STATE(qts1, expect, 0); -+ EXPECT_STATE(qts0, expect, 0); -+ g_free(expect); -+ g_free(path); -+ -+ qtest_quit(qts1); -+ qtest_quit(qts0); -+} -+ -+#ifdef CONFIG_LINUX -+static void test_stream_unix_abstract(void) -+{ -+ QTestState *qts0, *qts1; -+ char *expect; -+ gchar *path; -+ -+ path = g_strconcat(tmpdir, "/stream_unix_abstract", NULL); -+ -+ qts0 = qtest_initf("-nodefaults -M none " -+ "-netdev stream,id=st0,server=true," -+ "addr.type=unix,addr.path=%s," -+ "addr.abstract=on", -+ path); -+ -+ EXPECT_STATE(qts0, "st0: index=0,type=stream,\r\n", 0); -+ -+ qts1 = qtest_initf("-nodefaults -M none " -+ "-netdev stream,id=st0,server=false," -+ "addr.type=unix,addr.path=%s,addr.abstract=on", -+ path); -+ -+ expect = g_strdup_printf("st0: index=0,type=stream,unix:%s\r\n", path); -+ EXPECT_STATE(qts1, expect, 0); -+ EXPECT_STATE(qts0, expect, 0); -+ g_free(expect); -+ g_free(path); -+ -+ qtest_quit(qts1); -+ qtest_quit(qts0); -+} -+#endif -+ -+#ifndef _WIN32 -+static void test_stream_fd(void) -+{ -+ QTestState *qts0, *qts1; -+ int sock[2]; -+ int ret; -+ -+ ret = socketpair(AF_LOCAL, SOCK_STREAM, 0, sock); -+ g_assert_true(ret == 0); -+ -+ qts0 = qtest_initf("-nodefaults -M none " -+ "-netdev stream,id=st0,addr.type=fd,addr.str=%d", -+ sock[0]); -+ -+ EXPECT_STATE(qts0, "st0: index=0,type=stream,unix:\r\n", 0); -+ -+ qts1 = qtest_initf("-nodefaults -M none " -+ "-netdev stream,id=st0,addr.type=fd,addr.str=%d", -+ sock[1]); -+ -+ EXPECT_STATE(qts1, "st0: index=0,type=stream,unix:\r\n", 0); -+ EXPECT_STATE(qts0, "st0: index=0,type=stream,unix:\r\n", 0); -+ -+ qtest_quit(qts1); -+ qtest_quit(qts0); -+ -+ closesocket(sock[0]); -+ closesocket(sock[1]); -+} -+#endif -+ -+static void test_dgram_inet(void) -+{ -+ QTestState *qts0, *qts1; -+ char *expect; -+ int port[2]; -+ int nb; -+ -+ nb = inet_get_free_port_multiple(2, port, false); -+ g_assert_cmpint(nb, ==, 2); -+ -+ qts0 = qtest_initf("-nodefaults -M none " -+ "-netdev dgram,id=st0," -+ "local.type=inet,local.host=127.0.0.1,local.port=%d," -+ "remote.type=inet,remote.host=127.0.0.1,remote.port=%d", -+ port[0], port[1]); -+ -+ expect = g_strdup_printf("st0: index=0,type=dgram," -+ "udp=127.0.0.1:%d/127.0.0.1:%d\r\n", -+ port[0], port[1]); -+ EXPECT_STATE(qts0, expect, 0); -+ g_free(expect); -+ -+ qts1 = qtest_initf("-nodefaults -M none " -+ "-netdev dgram,id=st0," -+ "local.type=inet,local.host=127.0.0.1,local.port=%d," -+ "remote.type=inet,remote.host=127.0.0.1,remote.port=%d", -+ port[1], port[0]); -+ -+ expect = g_strdup_printf("st0: index=0,type=dgram," -+ "udp=127.0.0.1:%d/127.0.0.1:%d\r\n", -+ port[1], port[0]); -+ EXPECT_STATE(qts1, expect, 0); -+ g_free(expect); -+ -+ qtest_quit(qts1); -+ qtest_quit(qts0); -+} -+ -+#ifndef _WIN32 -+static void test_dgram_mcast(void) -+{ -+ QTestState *qts; -+ -+ qts = qtest_initf("-nodefaults -M none " -+ "-netdev dgram,id=st0," -+ "remote.type=inet,remote.host=230.0.0.1,remote.port=1234"); -+ -+ EXPECT_STATE(qts, "st0: index=0,type=dgram,mcast=230.0.0.1:1234\r\n", 0); -+ -+ qtest_quit(qts); -+} -+ -+static void test_dgram_unix(void) -+{ -+ QTestState *qts0, *qts1; -+ char *expect; -+ gchar *path0, *path1; -+ -+ path0 = g_strconcat(tmpdir, "/dgram_unix0", NULL); -+ path1 = g_strconcat(tmpdir, "/dgram_unix1", NULL); -+ -+ qts0 = qtest_initf("-nodefaults -M none " -+ "-netdev dgram,id=st0,local.type=unix,local.path=%s," -+ "remote.type=unix,remote.path=%s", -+ path0, path1); -+ -+ expect = g_strdup_printf("st0: index=0,type=dgram,udp=%s:%s\r\n", -+ path0, path1); -+ EXPECT_STATE(qts0, expect, 0); -+ g_free(expect); -+ -+ qts1 = qtest_initf("-nodefaults -M none " -+ "-netdev dgram,id=st0,local.type=unix,local.path=%s," -+ "remote.type=unix,remote.path=%s", -+ path1, path0); -+ -+ -+ expect = g_strdup_printf("st0: index=0,type=dgram,udp=%s:%s\r\n", -+ path1, path0); -+ EXPECT_STATE(qts1, expect, 0); -+ g_free(expect); -+ -+ unlink(path0); -+ g_free(path0); -+ unlink(path1); -+ g_free(path1); -+ -+ qtest_quit(qts1); -+ qtest_quit(qts0); -+} -+ -+static void test_dgram_fd(void) -+{ -+ QTestState *qts0, *qts1; -+ char *expect; -+ int ret; -+ int sv[2]; -+ -+ ret = socketpair(PF_UNIX, SOCK_DGRAM, 0, sv); -+ g_assert_cmpint(ret, !=, -1); -+ -+ qts0 = qtest_initf("-nodefaults -M none " -+ "-netdev dgram,id=st0,local.type=fd,local.str=%d", -+ sv[0]); -+ -+ expect = g_strdup_printf("st0: index=0,type=dgram,fd=%d unix\r\n", sv[0]); -+ EXPECT_STATE(qts0, expect, 0); -+ g_free(expect); -+ -+ qts1 = qtest_initf("-nodefaults -M none " -+ "-netdev dgram,id=st0,local.type=fd,local.str=%d", -+ sv[1]); -+ -+ -+ expect = g_strdup_printf("st0: index=0,type=dgram,fd=%d unix\r\n", sv[1]); -+ EXPECT_STATE(qts1, expect, 0); -+ g_free(expect); -+ -+ qtest_quit(qts1); -+ qtest_quit(qts0); -+ -+ closesocket(sv[0]); -+ closesocket(sv[1]); -+} -+#endif -+ -+int main(int argc, char **argv) -+{ -+ int ret; -+ bool has_ipv4, has_ipv6, has_afunix; -+ g_autoptr(GError) err = NULL; -+ -+ socket_init(); -+ g_test_init(&argc, &argv, NULL); -+ -+ if (socket_check_protocol_support(&has_ipv4, &has_ipv6) < 0) { -+ g_error("socket_check_protocol_support() failed\n"); -+ } -+ -+ tmpdir = g_dir_make_tmp("netdev-socket.XXXXXX", &err); -+ if (tmpdir == NULL) { -+ g_error("Can't create temporary directory in %s: %s", -+ g_get_tmp_dir(), err->message); -+ } -+ -+ if (has_ipv4) { -+ qtest_add_func("/netdev/stream/inet/ipv4", test_stream_inet_ipv4); -+ qtest_add_func("/netdev/dgram/inet", test_dgram_inet); -+#ifndef _WIN32 -+ qtest_add_func("/netdev/dgram/mcast", test_dgram_mcast); -+#endif -+ } -+ if (has_ipv6) { -+ qtest_add_func("/netdev/stream/inet/ipv6", test_stream_inet_ipv6); -+ } -+ -+ socket_check_afunix_support(&has_afunix); -+ if (has_afunix) { -+#ifndef _WIN32 -+ qtest_add_func("/netdev/dgram/unix", test_dgram_unix); -+#endif -+ qtest_add_func("/netdev/stream/unix", test_stream_unix); -+#ifdef CONFIG_LINUX -+ qtest_add_func("/netdev/stream/unix/abstract", -+ test_stream_unix_abstract); -+#endif -+#ifndef _WIN32 -+ qtest_add_func("/netdev/stream/fd", test_stream_fd); -+ qtest_add_func("/netdev/dgram/fd", test_dgram_fd); -+#endif -+ } -+ -+ ret = g_test_run(); -+ -+ g_rmdir(tmpdir); -+ g_free(tmpdir); -+ -+ return ret; -+} --- -2.31.1 - diff --git a/SOURCES/kvm-tests-tcg-i386-Introduce-and-use-reg_t-consistently.patch b/SOURCES/kvm-tests-tcg-i386-Introduce-and-use-reg_t-consistently.patch deleted file mode 100644 index 14388fe..0000000 --- a/SOURCES/kvm-tests-tcg-i386-Introduce-and-use-reg_t-consistently.patch +++ /dev/null @@ -1,299 +0,0 @@ -From 120db3dfeb88c447f0e115c19b7ede704f8f80cb Mon Sep 17 00:00:00 2001 -From: Richard Henderson -Date: Sat, 14 Jan 2023 13:05:41 -1000 -Subject: [PATCH 2/8] tests/tcg/i386: Introduce and use reg_t consistently -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Paolo Bonzini -RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions -RH-Bugzilla: 2173590 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Bandan Das -RH-Commit: [2/7] 843a677555414170392db21c828bef3dc3c29300 (bonzini/rhel-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590 -Upstream-Status: merged - -Define reg_t based on the actual register width. -Define the inlines using that type. This will allow -input registers to 32-bit insns to be set to 64-bit -values on x86-64, which allows testing various edge cases. - -Signed-off-by: Richard Henderson -Reviewed-by: Philippe Mathieu-Daudé -Message-Id: <20230114230542.3116013-2-richard.henderson@linaro.org> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 5d62d6649cd367b5b4a3676e7514d2f9ca86cb03) ---- - tests/tcg/i386/test-i386-bmi2.c | 182 ++++++++++++++++---------------- - 1 file changed, 93 insertions(+), 89 deletions(-) - -diff --git a/tests/tcg/i386/test-i386-bmi2.c b/tests/tcg/i386/test-i386-bmi2.c -index 5fadf47510..3c3ef85513 100644 ---- a/tests/tcg/i386/test-i386-bmi2.c -+++ b/tests/tcg/i386/test-i386-bmi2.c -@@ -3,34 +3,40 @@ - #include - #include - -+#ifdef __x86_64 -+typedef uint64_t reg_t; -+#else -+typedef uint32_t reg_t; -+#endif -+ - #define insn1q(name, arg0) \ --static inline uint64_t name##q(uint64_t arg0) \ -+static inline reg_t name##q(reg_t arg0) \ - { \ -- uint64_t result64; \ -+ reg_t result64; \ - asm volatile (#name "q %1, %0" : "=r"(result64) : "rm"(arg0)); \ - return result64; \ - } - - #define insn1l(name, arg0) \ --static inline uint32_t name##l(uint32_t arg0) \ -+static inline reg_t name##l(reg_t arg0) \ - { \ -- uint32_t result32; \ -+ reg_t result32; \ - asm volatile (#name "l %k1, %k0" : "=r"(result32) : "rm"(arg0)); \ - return result32; \ - } - - #define insn2q(name, arg0, c0, arg1, c1) \ --static inline uint64_t name##q(uint64_t arg0, uint64_t arg1) \ -+static inline reg_t name##q(reg_t arg0, reg_t arg1) \ - { \ -- uint64_t result64; \ -+ reg_t result64; \ - asm volatile (#name "q %2, %1, %0" : "=r"(result64) : c0(arg0), c1(arg1)); \ - return result64; \ - } - - #define insn2l(name, arg0, c0, arg1, c1) \ --static inline uint32_t name##l(uint32_t arg0, uint32_t arg1) \ -+static inline reg_t name##l(reg_t arg0, reg_t arg1) \ - { \ -- uint32_t result32; \ -+ reg_t result32; \ - asm volatile (#name "l %k2, %k1, %k0" : "=r"(result32) : c0(arg0), c1(arg1)); \ - return result32; \ - } -@@ -65,130 +71,128 @@ insn1l(blsr, src) - int main(int argc, char *argv[]) { - uint64_t ehlo = 0x202020204f4c4845ull; - uint64_t mask = 0xa080800302020001ull; -- uint32_t result32; -+ reg_t result; - - #ifdef __x86_64 -- uint64_t result64; -- - /* 64 bits */ -- result64 = andnq(mask, ehlo); -- assert(result64 == 0x002020204d4c4844); -+ result = andnq(mask, ehlo); -+ assert(result == 0x002020204d4c4844); - -- result64 = pextq(ehlo, mask); -- assert(result64 == 133); -+ result = pextq(ehlo, mask); -+ assert(result == 133); - -- result64 = pdepq(result64, mask); -- assert(result64 == (ehlo & mask)); -+ result = pdepq(result, mask); -+ assert(result == (ehlo & mask)); - -- result64 = pextq(-1ull, mask); -- assert(result64 == 511); /* mask has 9 bits set */ -+ result = pextq(-1ull, mask); -+ assert(result == 511); /* mask has 9 bits set */ - -- result64 = pdepq(-1ull, mask); -- assert(result64 == mask); -+ result = pdepq(-1ull, mask); -+ assert(result == mask); - -- result64 = bextrq(mask, 0x3f00); -- assert(result64 == (mask & ~INT64_MIN)); -+ result = bextrq(mask, 0x3f00); -+ assert(result == (mask & ~INT64_MIN)); - -- result64 = bextrq(mask, 0x1038); -- assert(result64 == 0xa0); -+ result = bextrq(mask, 0x1038); -+ assert(result == 0xa0); - -- result64 = bextrq(mask, 0x10f8); -- assert(result64 == 0); -+ result = bextrq(mask, 0x10f8); -+ assert(result == 0); - -- result64 = blsiq(0x30); -- assert(result64 == 0x10); -+ result = blsiq(0x30); -+ assert(result == 0x10); - -- result64 = blsiq(0x30ull << 32); -- assert(result64 == 0x10ull << 32); -+ result = blsiq(0x30ull << 32); -+ assert(result == 0x10ull << 32); - -- result64 = blsmskq(0x30); -- assert(result64 == 0x1f); -+ result = blsmskq(0x30); -+ assert(result == 0x1f); - -- result64 = blsrq(0x30); -- assert(result64 == 0x20); -+ result = blsrq(0x30); -+ assert(result == 0x20); - -- result64 = blsrq(0x30ull << 32); -- assert(result64 == 0x20ull << 32); -+ result = blsrq(0x30ull << 32); -+ assert(result == 0x20ull << 32); - -- result64 = bzhiq(mask, 0x3f); -- assert(result64 == (mask & ~INT64_MIN)); -+ result = bzhiq(mask, 0x3f); -+ assert(result == (mask & ~INT64_MIN)); - -- result64 = bzhiq(mask, 0x1f); -- assert(result64 == (mask & ~(-1 << 30))); -+ result = bzhiq(mask, 0x1f); -+ assert(result == (mask & ~(-1 << 30))); - -- result64 = rorxq(0x2132435465768798, 8); -- assert(result64 == 0x9821324354657687); -+ result = rorxq(0x2132435465768798, 8); -+ assert(result == 0x9821324354657687); - -- result64 = sarxq(0xffeeddccbbaa9988, 8); -- assert(result64 == 0xffffeeddccbbaa99); -+ result = sarxq(0xffeeddccbbaa9988, 8); -+ assert(result == 0xffffeeddccbbaa99); - -- result64 = sarxq(0x77eeddccbbaa9988, 8 | 64); -- assert(result64 == 0x0077eeddccbbaa99); -+ result = sarxq(0x77eeddccbbaa9988, 8 | 64); -+ assert(result == 0x0077eeddccbbaa99); - -- result64 = shrxq(0xffeeddccbbaa9988, 8); -- assert(result64 == 0x00ffeeddccbbaa99); -+ result = shrxq(0xffeeddccbbaa9988, 8); -+ assert(result == 0x00ffeeddccbbaa99); - -- result64 = shrxq(0x77eeddccbbaa9988, 8 | 192); -- assert(result64 == 0x0077eeddccbbaa99); -+ result = shrxq(0x77eeddccbbaa9988, 8 | 192); -+ assert(result == 0x0077eeddccbbaa99); - -- result64 = shlxq(0xffeeddccbbaa9988, 8); -- assert(result64 == 0xeeddccbbaa998800); -+ result = shlxq(0xffeeddccbbaa9988, 8); -+ assert(result == 0xeeddccbbaa998800); - #endif - - /* 32 bits */ -- result32 = andnl(mask, ehlo); -- assert(result32 == 0x04d4c4844); -+ result = andnl(mask, ehlo); -+ assert(result == 0x04d4c4844); - -- result32 = pextl((uint32_t) ehlo, mask); -- assert(result32 == 5); -+ result = pextl((uint32_t) ehlo, mask); -+ assert(result == 5); - -- result32 = pdepl(result32, mask); -- assert(result32 == (uint32_t)(ehlo & mask)); -+ result = pdepl(result, mask); -+ assert(result == (uint32_t)(ehlo & mask)); - -- result32 = pextl(-1u, mask); -- assert(result32 == 7); /* mask has 3 bits set */ -+ result = pextl(-1u, mask); -+ assert(result == 7); /* mask has 3 bits set */ - -- result32 = pdepl(-1u, mask); -- assert(result32 == (uint32_t)mask); -+ result = pdepl(-1u, mask); -+ assert(result == (uint32_t)mask); - -- result32 = bextrl(mask, 0x1f00); -- assert(result32 == (mask & ~INT32_MIN)); -+ result = bextrl(mask, 0x1f00); -+ assert(result == (mask & ~INT32_MIN)); - -- result32 = bextrl(ehlo, 0x1018); -- assert(result32 == 0x4f); -+ result = bextrl(ehlo, 0x1018); -+ assert(result == 0x4f); - -- result32 = bextrl(mask, 0x1038); -- assert(result32 == 0); -+ result = bextrl(mask, 0x1038); -+ assert(result == 0); - -- result32 = blsil(0xffff); -- assert(result32 == 1); -+ result = blsil(0xffff); -+ assert(result == 1); - -- result32 = blsmskl(0x300); -- assert(result32 == 0x1ff); -+ result = blsmskl(0x300); -+ assert(result == 0x1ff); - -- result32 = blsrl(0xffc); -- assert(result32 == 0xff8); -+ result = blsrl(0xffc); -+ assert(result == 0xff8); - -- result32 = bzhil(mask, 0xf); -- assert(result32 == 1); -+ result = bzhil(mask, 0xf); -+ assert(result == 1); - -- result32 = rorxl(0x65768798, 8); -- assert(result32 == 0x98657687); -+ result = rorxl(0x65768798, 8); -+ assert(result == 0x98657687); - -- result32 = sarxl(0xffeeddcc, 8); -- assert(result32 == 0xffffeedd); -+ result = sarxl(0xffeeddcc, 8); -+ assert(result == 0xffffeedd); - -- result32 = sarxl(0x77eeddcc, 8 | 32); -- assert(result32 == 0x0077eedd); -+ result = sarxl(0x77eeddcc, 8 | 32); -+ assert(result == 0x0077eedd); - -- result32 = shrxl(0xffeeddcc, 8); -- assert(result32 == 0x00ffeedd); -+ result = shrxl(0xffeeddcc, 8); -+ assert(result == 0x00ffeedd); - -- result32 = shrxl(0x77eeddcc, 8 | 128); -- assert(result32 == 0x0077eedd); -+ result = shrxl(0x77eeddcc, 8 | 128); -+ assert(result == 0x0077eedd); - -- result32 = shlxl(0xffeeddcc, 8); -- assert(result32 == 0xeeddcc00); -+ result = shlxl(0xffeeddcc, 8); -+ assert(result == 0xeeddcc00); - - return 0; - } --- -2.39.1 - diff --git a/SOURCES/kvm-ui-Fix-pixel-colour-channel-order-for-PNG-screenshot.patch b/SOURCES/kvm-ui-Fix-pixel-colour-channel-order-for-PNG-screenshot.patch new file mode 100644 index 0000000..ef99b30 --- /dev/null +++ b/SOURCES/kvm-ui-Fix-pixel-colour-channel-order-for-PNG-screenshot.patch @@ -0,0 +1,88 @@ +From b998f8474846886fa1e0428fe79fe2a79231cc05 Mon Sep 17 00:00:00 2001 +From: Peter Maydell +Date: Fri, 12 May 2023 15:43:38 +0100 +Subject: [PATCH 35/37] ui: Fix pixel colour channel order for PNG screenshots +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +RH-MergeRequest: 183: ui: Fix pixel colour channel order for PNG screenshots +RH-Bugzilla: 2222579 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/1] 76acd3c5526639e70bc2998f584503c78fc9bc56 (marcandre.lureau-rh/qemu-kvm-centos) + +When we take a PNG screenshot the ordering of the colour channels in +the data is not correct, resulting in the image having weird +colouring compared to the actual display. (Specifically, on a +little-endian host the blue and red channels are swapped; on +big-endian everything is wrong.) + +This happens because the pixman idea of the pixel data and the libpng +idea differ. PIXMAN_a8r8g8b8 defines that pixels are 32-bit values, +with A in bits 24-31, R in bits 16-23, G in bits 8-15 and B in bits +0-7. This means that on little-endian systems the bytes in memory +are + B G R A +and on big-endian systems they are + A R G B + +libpng, on the other hand, thinks of pixels as being a series of +values for each channel, so its format PNG_COLOR_TYPE_RGB_ALPHA +always wants bytes in the order + R G B A + +This isn't the same as the pixman order for either big or little +endian hosts. + +The alpha channel is also unnecessary bulk in the output PNG file, +because there is no alpha information in a screenshot. + +To handle the endianness issue, we already define in ui/qemu-pixman.h +various PIXMAN_BE_* and PIXMAN_LE_* values that give consistent +byte-order pixel channel formats. So we can use PIXMAN_BE_r8g8b8 and +PNG_COLOR_TYPE_RGB, which both have an in-memory byte order of + R G B +and 3 bytes per pixel. + +(PPM format screenshots get this right; they already use the +PIXMAN_BE_r8g8b8 format.) + +Cc: qemu-stable@nongnu.org +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1622 +Fixes: 9a0a119a382867 ("Added parameter to take screenshot with screendump as PNG") +Signed-off-by: Peter Maydell +Reviewed-by: Marc-André Lureau +Message-id: 20230502135548.2451309-1-peter.maydell@linaro.org + +(cherry picked from commit cd22a0f520f471e3bd33bc19cf3b2fa772cdb2a8) +Signed-off-by: Marc-André Lureau +--- + ui/console.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/ui/console.c b/ui/console.c +index 6e8a3cdc62..e173731e20 100644 +--- a/ui/console.c ++++ b/ui/console.c +@@ -311,7 +311,7 @@ static bool png_save(int fd, pixman_image_t *image, Error **errp) + png_struct *png_ptr; + png_info *info_ptr; + g_autoptr(pixman_image_t) linebuf = +- qemu_pixman_linebuf_create(PIXMAN_a8r8g8b8, width); ++ qemu_pixman_linebuf_create(PIXMAN_BE_r8g8b8, width); + uint8_t *buf = (uint8_t *)pixman_image_get_data(linebuf); + FILE *f = fdopen(fd, "wb"); + int y; +@@ -341,7 +341,7 @@ static bool png_save(int fd, pixman_image_t *image, Error **errp) + png_init_io(png_ptr, f); + + png_set_IHDR(png_ptr, info_ptr, width, height, 8, +- PNG_COLOR_TYPE_RGB_ALPHA, PNG_INTERLACE_NONE, ++ PNG_COLOR_TYPE_RGB, PNG_INTERLACE_NONE, + PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE); + + png_write_info(png_ptr, info_ptr); +-- +2.39.3 + diff --git a/SOURCES/kvm-util-async-teardown-wire-up-query-command-line-optio.patch b/SOURCES/kvm-util-async-teardown-wire-up-query-command-line-optio.patch new file mode 100644 index 0000000..8c468d8 --- /dev/null +++ b/SOURCES/kvm-util-async-teardown-wire-up-query-command-line-optio.patch @@ -0,0 +1,180 @@ +From c1502b0cd16378d6d5bd4259b90bf81a5fb5aad3 Mon Sep 17 00:00:00 2001 +From: Claudio Imbrenda +Date: Fri, 5 May 2023 14:00:51 +0200 +Subject: [PATCH 20/21] util/async-teardown: wire up query-command-line-options +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 173: Improve memory reclaiming for z15 Secure Execution guests +RH-Bugzilla: 2168500 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cédric Le Goater +RH-Commit: [1/2] 76e5f25df2c02721f5a29f552ee3061be589abb2 (thuth/qemu-kvm-cs9) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168500 + +Add new -run-with option with an async-teardown=on|off parameter. It is +visible in the output of query-command-line-options QMP command, so it +can be discovered and used by libvirt. + +The option -async-teardown is now redundant, deprecate it. + +Reported-by: Boris Fiuczynski +Fixes: c891c24b1a ("os-posix: asynchronous teardown for shutdown on Linux") +Signed-off-by: Claudio Imbrenda +Message-Id: <20230505120051.36605-2-imbrenda@linux.ibm.com> +[thuth: Add curly braces to fix error with GCC 8.5, fix bug in deprecated.rst] +Signed-off-by: Thomas Huth + +(cherry picked from commit 80bd81cadd127c1e2fc784612a52abe392670ba4) +Conflicts: + docs/about/deprecated.rst (missing context from other patches) +Signed-off-by: Thomas Huth +--- + docs/about/deprecated.rst | 5 +++++ + os-posix.c | 14 ++++++++++++++ + qemu-options.hx | 34 +++++++++++++++++++++++----------- + util/async-teardown.c | 21 +++++++++++++++++++++ + 4 files changed, 63 insertions(+), 11 deletions(-) + +diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst +index 1ca9dc33d6..52893fcf38 100644 +--- a/docs/about/deprecated.rst ++++ b/docs/about/deprecated.rst +@@ -111,6 +111,11 @@ Use ``-machine acpi=off`` instead. + The HAXM project has been retired (see https://github.com/intel/haxm#status). + Use "whpx" (on Windows) or "hvf" (on macOS) instead. + ++``-async-teardown`` (since 8.1) ++''''''''''''''''''''''''''''''' ++ ++Use ``-run-with async-teardown=on`` instead. ++ + + QEMU Machine Protocol (QMP) commands + ------------------------------------ +diff --git a/os-posix.c b/os-posix.c +index 5adc69f560..90ea71725f 100644 +--- a/os-posix.c ++++ b/os-posix.c +@@ -36,6 +36,8 @@ + #include "qemu/log.h" + #include "sysemu/runstate.h" + #include "qemu/cutils.h" ++#include "qemu/config-file.h" ++#include "qemu/option.h" + + #ifdef CONFIG_LINUX + #include +@@ -152,9 +154,21 @@ int os_parse_cmd_args(int index, const char *optarg) + daemonize = 1; + break; + #if defined(CONFIG_LINUX) ++ /* deprecated */ + case QEMU_OPTION_asyncteardown: + init_async_teardown(); + break; ++ case QEMU_OPTION_run_with: { ++ QemuOpts *opts = qemu_opts_parse_noisily(qemu_find_opts("run-with"), ++ optarg, false); ++ if (!opts) { ++ exit(1); ++ } ++ if (qemu_opt_get_bool(opts, "async-teardown", false)) { ++ init_async_teardown(); ++ } ++ break; ++ } + #endif + default: + return -1; +diff --git a/qemu-options.hx b/qemu-options.hx +index 52b49f1f6a..b18f933703 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -4766,20 +4766,32 @@ DEF("qtest-log", HAS_ARG, QEMU_OPTION_qtest_log, "", QEMU_ARCH_ALL) + DEF("async-teardown", 0, QEMU_OPTION_asyncteardown, + "-async-teardown enable asynchronous teardown\n", + QEMU_ARCH_ALL) +-#endif + SRST + ``-async-teardown`` +- Enable asynchronous teardown. A new process called "cleanup/" +- will be created at startup sharing the address space with the main qemu +- process, using clone. It will wait for the main qemu process to +- terminate completely, and then exit. +- This allows qemu to terminate very quickly even if the guest was +- huge, leaving the teardown of the address space to the cleanup +- process. Since the cleanup process shares the same cgroups as the +- main qemu process, accounting is performed correctly. This only +- works if the cleanup process is not forcefully killed with SIGKILL +- before the main qemu process has terminated completely. ++ This option is deprecated and should no longer be used. The new option ++ ``-run-with async-teardown=on`` is a replacement. + ERST ++DEF("run-with", HAS_ARG, QEMU_OPTION_run_with, ++ "-run-with async-teardown[=on|off]\n" ++ " misc QEMU process lifecycle options\n" ++ " async-teardown=on enables asynchronous teardown\n", ++ QEMU_ARCH_ALL) ++SRST ++``-run-with`` ++ Set QEMU process lifecycle options. ++ ++ ``async-teardown=on`` enables asynchronous teardown. A new process called ++ "cleanup/" will be created at startup sharing the address ++ space with the main QEMU process, using clone. It will wait for the ++ main QEMU process to terminate completely, and then exit. This allows ++ QEMU to terminate very quickly even if the guest was huge, leaving the ++ teardown of the address space to the cleanup process. Since the cleanup ++ process shares the same cgroups as the main QEMU process, accounting is ++ performed correctly. This only works if the cleanup process is not ++ forcefully killed with SIGKILL before the main QEMU process has ++ terminated completely. ++ERST ++#endif + + DEF("msg", HAS_ARG, QEMU_OPTION_msg, + "-msg [timestamp[=on|off]][,guest-name=[on|off]]\n" +diff --git a/util/async-teardown.c b/util/async-teardown.c +index 62cdeb0f20..3ab19c8740 100644 +--- a/util/async-teardown.c ++++ b/util/async-teardown.c +@@ -12,6 +12,9 @@ + */ + + #include "qemu/osdep.h" ++#include "qemu/config-file.h" ++#include "qemu/option.h" ++#include "qemu/module.h" + #include + #include + #include +@@ -144,3 +147,21 @@ void init_async_teardown(void) + clone(async_teardown_fn, new_stack_for_clone(), CLONE_VM, NULL); + sigprocmask(SIG_SETMASK, &old_signals, NULL); + } ++ ++static QemuOptsList qemu_run_with_opts = { ++ .name = "run-with", ++ .head = QTAILQ_HEAD_INITIALIZER(qemu_run_with_opts.head), ++ .desc = { ++ { ++ .name = "async-teardown", ++ .type = QEMU_OPT_BOOL, ++ }, ++ { /* end of list */ } ++ }, ++}; ++ ++static void register_teardown(void) ++{ ++ qemu_add_opts(&qemu_run_with_opts); ++} ++opts_init(register_teardown); +-- +2.39.3 + diff --git a/SOURCES/kvm-util-iov-Make-qiov_slice-public.patch b/SOURCES/kvm-util-iov-Make-qiov_slice-public.patch new file mode 100644 index 0000000..fe68d18 --- /dev/null +++ b/SOURCES/kvm-util-iov-Make-qiov_slice-public.patch @@ -0,0 +1,97 @@ +From 64652225695c23855cfb1252cea2b55c24da2260 Mon Sep 17 00:00:00 2001 +From: Hanna Czenczek +Date: Tue, 11 Apr 2023 19:34:15 +0200 +Subject: [PATCH 1/9] util/iov: Make qiov_slice() public + +RH-Author: Hanna Czenczek +RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX +RH-Bugzilla: 2174676 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/5] 9c3cd661f7139ce124ee4f4d5fcbeaf3dbb9c45c (hreitz/qemu-kvm-c-9-s) + +We want to inline qemu_iovec_init_extended() in block/io.c for padding +requests, and having access to qiov_slice() is useful for this. As a +public function, it is renamed to qemu_iovec_slice(). + +(We will need to count the number of I/O vector elements of a slice +there, and then later process this slice. Without qiov_slice(), we +would need to call qemu_iovec_subvec_niov(), and all further +IOV-processing functions may need to skip prefixing elements to +accomodate for a qiov_offset. Because qemu_iovec_subvec_niov() +internally calls qiov_slice(), we can just have the block/io.c code call +qiov_slice() itself, thus get the number of elements, and also create an +iovec array with the superfluous prefixing elements stripped, so the +following processing functions no longer need to skip them.) + +Reviewed-by: Eric Blake +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Czenczek +Message-Id: <20230411173418.19549-2-hreitz@redhat.com> +(cherry picked from commit 3d06cea8256d54a6b0238934c31012f7f17100f5) +Signed-off-by: Hanna Czenczek +--- + include/qemu/iov.h | 3 +++ + util/iov.c | 14 +++++++------- + 2 files changed, 10 insertions(+), 7 deletions(-) + +diff --git a/include/qemu/iov.h b/include/qemu/iov.h +index 9330746680..46fadfb27a 100644 +--- a/include/qemu/iov.h ++++ b/include/qemu/iov.h +@@ -229,6 +229,9 @@ int qemu_iovec_init_extended( + void *tail_buf, size_t tail_len); + void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source, + size_t offset, size_t len); ++struct iovec *qemu_iovec_slice(QEMUIOVector *qiov, ++ size_t offset, size_t len, ++ size_t *head, size_t *tail, int *niov); + int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len); + void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len); + void qemu_iovec_concat(QEMUIOVector *dst, +diff --git a/util/iov.c b/util/iov.c +index b4be580022..65a70449da 100644 +--- a/util/iov.c ++++ b/util/iov.c +@@ -378,15 +378,15 @@ static struct iovec *iov_skip_offset(struct iovec *iov, size_t offset, + } + + /* +- * qiov_slice ++ * qemu_iovec_slice + * + * Find subarray of iovec's, containing requested range. @head would + * be offset in first iov (returned by the function), @tail would be + * count of extra bytes in last iovec (returned iov + @niov - 1). + */ +-static struct iovec *qiov_slice(QEMUIOVector *qiov, +- size_t offset, size_t len, +- size_t *head, size_t *tail, int *niov) ++struct iovec *qemu_iovec_slice(QEMUIOVector *qiov, ++ size_t offset, size_t len, ++ size_t *head, size_t *tail, int *niov) + { + struct iovec *iov, *end_iov; + +@@ -411,7 +411,7 @@ int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len) + size_t head, tail; + int niov; + +- qiov_slice(qiov, offset, len, &head, &tail, &niov); ++ qemu_iovec_slice(qiov, offset, len, &head, &tail, &niov); + + return niov; + } +@@ -439,8 +439,8 @@ int qemu_iovec_init_extended( + } + + if (mid_len) { +- mid_iov = qiov_slice(mid_qiov, mid_offset, mid_len, +- &mid_head, &mid_tail, &mid_niov); ++ mid_iov = qemu_iovec_slice(mid_qiov, mid_offset, mid_len, ++ &mid_head, &mid_tail, &mid_niov); + } + + total_niov = !!head_len + mid_niov + !!tail_len; +-- +2.39.3 + diff --git a/SOURCES/kvm-util-iov-Remove-qemu_iovec_init_extended.patch b/SOURCES/kvm-util-iov-Remove-qemu_iovec_init_extended.patch new file mode 100644 index 0000000..fd21880 --- /dev/null +++ b/SOURCES/kvm-util-iov-Remove-qemu_iovec_init_extended.patch @@ -0,0 +1,156 @@ +From 8ff973985a04fec1a3cdf886976a03e0dca7b0ea Mon Sep 17 00:00:00 2001 +From: Hanna Czenczek +Date: Tue, 11 Apr 2023 19:34:17 +0200 +Subject: [PATCH 3/9] util/iov: Remove qemu_iovec_init_extended() + +RH-Author: Hanna Czenczek +RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX +RH-Bugzilla: 2174676 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/5] 1740d7b15ea4fbfbe71e7adc122741e85e83fb8c (hreitz/qemu-kvm-c-9-s) + +bdrv_pad_request() was the main user of qemu_iovec_init_extended(). +HEAD^ has removed that use, so we can remove qemu_iovec_init_extended() +now. + +The only remaining user is qemu_iovec_init_slice(), which can easily +inline the small part it really needs. + +Note that qemu_iovec_init_extended() offered a memcpy() optimization to +initialize the new I/O vector. qemu_iovec_concat_iov(), which is used +to replace its functionality, does not, but calls qemu_iovec_add() for +every single element. If we decide this optimization was important, we +will need to re-implement it in qemu_iovec_concat_iov(), which might +also benefit its pre-existing users. + +Reviewed-by: Eric Blake +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Czenczek +Message-Id: <20230411173418.19549-4-hreitz@redhat.com> +(cherry picked from commit cc63f6f6fa1aaa4b6405dd69432c693e9c8d18ca) +Signed-off-by: Hanna Czenczek +--- + include/qemu/iov.h | 5 --- + util/iov.c | 79 +++++++--------------------------------------- + 2 files changed, 11 insertions(+), 73 deletions(-) + +diff --git a/include/qemu/iov.h b/include/qemu/iov.h +index 46fadfb27a..63a1c01965 100644 +--- a/include/qemu/iov.h ++++ b/include/qemu/iov.h +@@ -222,11 +222,6 @@ static inline void *qemu_iovec_buf(QEMUIOVector *qiov) + + void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint); + void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov); +-int qemu_iovec_init_extended( +- QEMUIOVector *qiov, +- void *head_buf, size_t head_len, +- QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len, +- void *tail_buf, size_t tail_len); + void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source, + size_t offset, size_t len); + struct iovec *qemu_iovec_slice(QEMUIOVector *qiov, +diff --git a/util/iov.c b/util/iov.c +index 65a70449da..866fb577f3 100644 +--- a/util/iov.c ++++ b/util/iov.c +@@ -416,70 +416,6 @@ int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len) + return niov; + } + +-/* +- * Compile new iovec, combining @head_buf buffer, sub-qiov of @mid_qiov, +- * and @tail_buf buffer into new qiov. +- */ +-int qemu_iovec_init_extended( +- QEMUIOVector *qiov, +- void *head_buf, size_t head_len, +- QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len, +- void *tail_buf, size_t tail_len) +-{ +- size_t mid_head, mid_tail; +- int total_niov, mid_niov = 0; +- struct iovec *p, *mid_iov = NULL; +- +- assert(mid_qiov->niov <= IOV_MAX); +- +- if (SIZE_MAX - head_len < mid_len || +- SIZE_MAX - head_len - mid_len < tail_len) +- { +- return -EINVAL; +- } +- +- if (mid_len) { +- mid_iov = qemu_iovec_slice(mid_qiov, mid_offset, mid_len, +- &mid_head, &mid_tail, &mid_niov); +- } +- +- total_niov = !!head_len + mid_niov + !!tail_len; +- if (total_niov > IOV_MAX) { +- return -EINVAL; +- } +- +- if (total_niov == 1) { +- qemu_iovec_init_buf(qiov, NULL, 0); +- p = &qiov->local_iov; +- } else { +- qiov->niov = qiov->nalloc = total_niov; +- qiov->size = head_len + mid_len + tail_len; +- p = qiov->iov = g_new(struct iovec, qiov->niov); +- } +- +- if (head_len) { +- p->iov_base = head_buf; +- p->iov_len = head_len; +- p++; +- } +- +- assert(!mid_niov == !mid_len); +- if (mid_niov) { +- memcpy(p, mid_iov, mid_niov * sizeof(*p)); +- p[0].iov_base = (uint8_t *)p[0].iov_base + mid_head; +- p[0].iov_len -= mid_head; +- p[mid_niov - 1].iov_len -= mid_tail; +- p += mid_niov; +- } +- +- if (tail_len) { +- p->iov_base = tail_buf; +- p->iov_len = tail_len; +- } +- +- return 0; +-} +- + /* + * Check if the contents of subrange of qiov data is all zeroes. + */ +@@ -511,14 +447,21 @@ bool qemu_iovec_is_zero(QEMUIOVector *qiov, size_t offset, size_t bytes) + void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source, + size_t offset, size_t len) + { +- int ret; ++ struct iovec *slice_iov; ++ int slice_niov; ++ size_t slice_head, slice_tail; + + assert(source->size >= len); + assert(source->size - len >= offset); + +- /* We shrink the request, so we can't overflow neither size_t nor MAX_IOV */ +- ret = qemu_iovec_init_extended(qiov, NULL, 0, source, offset, len, NULL, 0); +- assert(ret == 0); ++ slice_iov = qemu_iovec_slice(source, offset, len, ++ &slice_head, &slice_tail, &slice_niov); ++ if (slice_niov == 1) { ++ qemu_iovec_init_buf(qiov, slice_iov[0].iov_base + slice_head, len); ++ } else { ++ qemu_iovec_init(qiov, slice_niov); ++ qemu_iovec_concat_iov(qiov, slice_iov, slice_niov, slice_head, len); ++ } + } + + void qemu_iovec_destroy(QEMUIOVector *qiov) +-- +2.39.3 + diff --git a/SOURCES/kvm-util-mmap-alloc-qemu_fd_getfs.patch b/SOURCES/kvm-util-mmap-alloc-qemu_fd_getfs.patch new file mode 100644 index 0000000..b0e66f6 --- /dev/null +++ b/SOURCES/kvm-util-mmap-alloc-qemu_fd_getfs.patch @@ -0,0 +1,95 @@ +From 439a8cdd010dfd253fc2277ae4ec605b5ba621d9 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Wed, 19 Apr 2023 12:17:36 -0400 +Subject: [PATCH 02/56] util/mmap-alloc: qemu_fd_getfs() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [1/50] 8970b5ae611a933d693e0c90cbf4eda073635494 (peterx/qemu-kvm) + +This new helper fetches file system type for a fd. Only Linux is +implemented so far. Currently only tmpfs and hugetlbfs are defined, +but it can grow as needed. + +Signed-off-by: Peter Xu +Reviewed-by: David Hildenbrand +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit fa45f8dab9613993c042176ea2d25552bfebc955) +Signed-off-by: Peter Xu +--- + include/qemu/mmap-alloc.h | 7 +++++++ + util/mmap-alloc.c | 28 ++++++++++++++++++++++++++++ + 2 files changed, 35 insertions(+) + +diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h +index 2825e231a7..8344daaa03 100644 +--- a/include/qemu/mmap-alloc.h ++++ b/include/qemu/mmap-alloc.h +@@ -1,8 +1,15 @@ + #ifndef QEMU_MMAP_ALLOC_H + #define QEMU_MMAP_ALLOC_H + ++typedef enum { ++ QEMU_FS_TYPE_UNKNOWN = 0, ++ QEMU_FS_TYPE_TMPFS, ++ QEMU_FS_TYPE_HUGETLBFS, ++ QEMU_FS_TYPE_NUM, ++} QemuFsType; + + size_t qemu_fd_getpagesize(int fd); ++QemuFsType qemu_fd_getfs(int fd); + + /** + * qemu_ram_mmap: mmap anonymous memory, the specified file or device. +diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c +index 5ed7d29183..ed14f9c64d 100644 +--- a/util/mmap-alloc.c ++++ b/util/mmap-alloc.c +@@ -27,8 +27,36 @@ + + #ifdef CONFIG_LINUX + #include ++#include + #endif + ++QemuFsType qemu_fd_getfs(int fd) ++{ ++#ifdef CONFIG_LINUX ++ struct statfs fs; ++ int ret; ++ ++ if (fd < 0) { ++ return QEMU_FS_TYPE_UNKNOWN; ++ } ++ ++ do { ++ ret = fstatfs(fd, &fs); ++ } while (ret != 0 && errno == EINTR); ++ ++ switch (fs.f_type) { ++ case TMPFS_MAGIC: ++ return QEMU_FS_TYPE_TMPFS; ++ case HUGETLBFS_MAGIC: ++ return QEMU_FS_TYPE_HUGETLBFS; ++ default: ++ return QEMU_FS_TYPE_UNKNOWN; ++ } ++#else ++ return QEMU_FS_TYPE_UNKNOWN; ++#endif ++} ++ + size_t qemu_fd_getpagesize(int fd) + { + #ifdef CONFIG_LINUX +-- +2.39.1 + diff --git a/SOURCES/kvm-util-userfaultfd-Add-uffd_open.patch b/SOURCES/kvm-util-userfaultfd-Add-uffd_open.patch deleted file mode 100644 index 5a5f90c..0000000 --- a/SOURCES/kvm-util-userfaultfd-Add-uffd_open.patch +++ /dev/null @@ -1,169 +0,0 @@ -From 80445fed73a7d1a87e8ce96f6cb7d505e437f845 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Wed, 1 Feb 2023 16:10:54 -0500 -Subject: [PATCH 4/8] util/userfaultfd: Add uffd_open() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 149: Support /dev/userfaultfd -RH-Bugzilla: 2158704 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: quintela1 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/3] 4c81696314ab26db47c3415fa2c2501c6a572b5c (peterx/qemu-kvm) - -Add a helper to create the uffd handle. - -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Juan Quintela -Signed-off-by: Peter Xu -Signed-off-by: Juan Quintela -(cherry picked from commit d5890ea0722831eea76a0efd23a496b3e8815fe8) -Signed-off-by: Peter Xu ---- - include/qemu/userfaultfd.h | 12 ++++++++++++ - migration/postcopy-ram.c | 11 +++++------ - tests/qtest/migration-test.c | 4 ++-- - util/userfaultfd.c | 13 +++++++++++-- - 4 files changed, 30 insertions(+), 10 deletions(-) - -diff --git a/include/qemu/userfaultfd.h b/include/qemu/userfaultfd.h -index 6b74f92792..d764496f0b 100644 ---- a/include/qemu/userfaultfd.h -+++ b/include/qemu/userfaultfd.h -@@ -13,10 +13,20 @@ - #ifndef USERFAULTFD_H - #define USERFAULTFD_H - -+#ifdef CONFIG_LINUX -+ - #include "qemu/osdep.h" - #include "exec/hwaddr.h" - #include - -+/** -+ * uffd_open(): Open an userfaultfd handle for current context. -+ * -+ * @flags: The flags we want to pass in when creating the handle. -+ * -+ * Returns: the uffd handle if >=0, or <0 if error happens. -+ */ -+int uffd_open(int flags); - int uffd_query_features(uint64_t *features); - int uffd_create_fd(uint64_t features, bool non_blocking); - void uffd_close_fd(int uffd_fd); -@@ -32,4 +42,6 @@ int uffd_wakeup(int uffd_fd, void *addr, uint64_t length); - int uffd_read_events(int uffd_fd, struct uffd_msg *msgs, int count); - bool uffd_poll_events(int uffd_fd, int tmo); - -+#endif /* CONFIG_LINUX */ -+ - #endif /* USERFAULTFD_H */ -diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c -index b9a37ef255..0c55df0e52 100644 ---- a/migration/postcopy-ram.c -+++ b/migration/postcopy-ram.c -@@ -37,6 +37,7 @@ - #include "qemu-file.h" - #include "yank_functions.h" - #include "tls.h" -+#include "qemu/userfaultfd.h" - - /* Arbitrary limit on size of each discard command, - * keeps them around ~200 bytes -@@ -226,11 +227,9 @@ static bool receive_ufd_features(uint64_t *features) - int ufd; - bool ret = true; - -- /* if we are here __NR_userfaultfd should exists */ -- ufd = syscall(__NR_userfaultfd, O_CLOEXEC); -+ ufd = uffd_open(O_CLOEXEC); - if (ufd == -1) { -- error_report("%s: syscall __NR_userfaultfd failed: %s", __func__, -- strerror(errno)); -+ error_report("%s: uffd_open() failed: %s", __func__, strerror(errno)); - return false; - } - -@@ -375,7 +374,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) - goto out; - } - -- ufd = syscall(__NR_userfaultfd, O_CLOEXEC); -+ ufd = uffd_open(O_CLOEXEC); - if (ufd == -1) { - error_report("%s: userfaultfd not available: %s", __func__, - strerror(errno)); -@@ -1160,7 +1159,7 @@ static int postcopy_temp_pages_setup(MigrationIncomingState *mis) - int postcopy_ram_incoming_setup(MigrationIncomingState *mis) - { - /* Open the fd for the kernel to give us userfaults */ -- mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); -+ mis->userfault_fd = uffd_open(O_CLOEXEC | O_NONBLOCK); - if (mis->userfault_fd == -1) { - error_report("%s: Failed to open userfault fd: %s", __func__, - strerror(errno)); -diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c -index dbde726adf..0100e1bdbc 100644 ---- a/tests/qtest/migration-test.c -+++ b/tests/qtest/migration-test.c -@@ -61,14 +61,14 @@ static bool uffd_feature_thread_id; - #if defined(__linux__) && defined(__NR_userfaultfd) && defined(CONFIG_EVENTFD) - #include - #include --#include -+#include "qemu/userfaultfd.h" - - static bool ufd_version_check(void) - { - struct uffdio_api api_struct; - uint64_t ioctl_mask; - -- int ufd = syscall(__NR_userfaultfd, O_CLOEXEC); -+ int ufd = uffd_open(O_CLOEXEC); - - if (ufd == -1) { - g_test_message("Skipping test: userfaultfd not available"); -diff --git a/util/userfaultfd.c b/util/userfaultfd.c -index f1cd6af2b1..4953b3137d 100644 ---- a/util/userfaultfd.c -+++ b/util/userfaultfd.c -@@ -19,6 +19,15 @@ - #include - #include - -+int uffd_open(int flags) -+{ -+#if defined(__NR_userfaultfd) -+ return syscall(__NR_userfaultfd, flags); -+#else -+ return -EINVAL; -+#endif -+} -+ - /** - * uffd_query_features: query UFFD features - * -@@ -32,7 +41,7 @@ int uffd_query_features(uint64_t *features) - struct uffdio_api api_struct = { 0 }; - int ret = -1; - -- uffd_fd = syscall(__NR_userfaultfd, O_CLOEXEC); -+ uffd_fd = uffd_open(O_CLOEXEC); - if (uffd_fd < 0) { - trace_uffd_query_features_nosys(errno); - return -1; -@@ -69,7 +78,7 @@ int uffd_create_fd(uint64_t features, bool non_blocking) - uint64_t ioctl_mask = BIT(_UFFDIO_REGISTER) | BIT(_UFFDIO_UNREGISTER); - - flags = O_CLOEXEC | (non_blocking ? O_NONBLOCK : 0); -- uffd_fd = syscall(__NR_userfaultfd, flags); -+ uffd_fd = uffd_open(flags); - if (uffd_fd < 0) { - trace_uffd_create_fd_nosys(errno); - return -1; --- -2.31.1 - diff --git a/SOURCES/kvm-util-userfaultfd-Support-dev-userfaultfd.patch b/SOURCES/kvm-util-userfaultfd-Support-dev-userfaultfd.patch deleted file mode 100644 index b0a22eb..0000000 --- a/SOURCES/kvm-util-userfaultfd-Support-dev-userfaultfd.patch +++ /dev/null @@ -1,94 +0,0 @@ -From a91da7741464dadeb306a741b4fb562e49ffea57 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Tue, 7 Feb 2023 15:57:11 -0500 -Subject: [PATCH 5/8] util/userfaultfd: Support /dev/userfaultfd - -RH-Author: Peter Xu -RH-MergeRequest: 149: Support /dev/userfaultfd -RH-Bugzilla: 2158704 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: quintela1 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/3] 5f427d8c18c210ff8f66724c9e358a7120619e69 (peterx/qemu-kvm) - -Teach QEMU to use /dev/userfaultfd when it existed and fallback to the -system call if either it's not there or doesn't have enough permission. - -Firstly, as long as the app has permission to access /dev/userfaultfd, it -always have the ability to trap kernel faults which QEMU mostly wants. -Meanwhile, in some context (e.g. containers) the userfaultfd syscall can be -forbidden, so it can be the major way to use postcopy in a restricted -environment with strict seccomp setup. - -Signed-off-by: Peter Xu -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -(cherry picked from commit c40c0463413b941c13fe5f99a90c02d7d6584828) -Signed-off-by: Peter Xu ---- - util/trace-events | 1 + - util/userfaultfd.c | 32 ++++++++++++++++++++++++++++++++ - 2 files changed, 33 insertions(+) - -diff --git a/util/trace-events b/util/trace-events -index c8f53d7d9f..16f78d8fe5 100644 ---- a/util/trace-events -+++ b/util/trace-events -@@ -93,6 +93,7 @@ qemu_vfio_region_info(const char *desc, uint64_t region_ofs, uint64_t region_siz - qemu_vfio_pci_map_bar(int index, uint64_t region_ofs, uint64_t region_size, int ofs, void *host) "map region bar#%d addr 0x%"PRIx64" size 0x%"PRIx64" ofs 0x%x host %p" - - #userfaultfd.c -+uffd_detect_open_mode(int mode) "%d" - uffd_query_features_nosys(int err) "errno: %i" - uffd_query_features_api_failed(int err) "errno: %i" - uffd_create_fd_nosys(int err) "errno: %i" -diff --git a/util/userfaultfd.c b/util/userfaultfd.c -index 4953b3137d..fdff4867e8 100644 ---- a/util/userfaultfd.c -+++ b/util/userfaultfd.c -@@ -18,10 +18,42 @@ - #include - #include - #include -+#include -+ -+typedef enum { -+ UFFD_UNINITIALIZED = 0, -+ UFFD_USE_DEV_PATH, -+ UFFD_USE_SYSCALL, -+} uffd_open_mode; - - int uffd_open(int flags) - { - #if defined(__NR_userfaultfd) -+ static uffd_open_mode open_mode; -+ static int uffd_dev; -+ -+ /* Detect how to generate uffd desc when run the 1st time */ -+ if (open_mode == UFFD_UNINITIALIZED) { -+ /* -+ * Make /dev/userfaultfd the default approach because it has better -+ * permission controls, meanwhile allows kernel faults without any -+ * privilege requirement (e.g. SYS_CAP_PTRACE). -+ */ -+ uffd_dev = open("/dev/userfaultfd", O_RDWR | O_CLOEXEC); -+ if (uffd_dev >= 0) { -+ open_mode = UFFD_USE_DEV_PATH; -+ } else { -+ /* Fallback to the system call */ -+ open_mode = UFFD_USE_SYSCALL; -+ } -+ trace_uffd_detect_open_mode(open_mode); -+ } -+ -+ if (open_mode == UFFD_USE_DEV_PATH) { -+ assert(uffd_dev >= 0); -+ return ioctl(uffd_dev, USERFAULTFD_IOC_NEW, flags); -+ } -+ - return syscall(__NR_userfaultfd, flags); - #else - return -EINVAL; --- -2.31.1 - diff --git a/SOURCES/kvm-util-vfio-helpers-Use-g_file_read_link.patch b/SOURCES/kvm-util-vfio-helpers-Use-g_file_read_link.patch new file mode 100644 index 0000000..4e492d9 --- /dev/null +++ b/SOURCES/kvm-util-vfio-helpers-Use-g_file_read_link.patch @@ -0,0 +1,82 @@ +From fb2d40cc84f689e46138a81c57ccd1f234dbbb7c Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 07/37] util/vfio-helpers: Use g_file_read_link() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [5/28] 3545a07c967782dba8dd081415232f91d3f600a9 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit dbdea0dbfe2c +Author: Akihiko Odaki +Date: Tue May 23 11:39:12 2023 +0900 + + util/vfio-helpers: Use g_file_read_link() + + When _FORTIFY_SOURCE=2, glibc version is 2.35, and GCC version is + 12.1.0, the compiler complains as follows: + + In file included from /usr/include/features.h:490, + from /usr/include/bits/libc-header-start.h:33, + from /usr/include/stdint.h:26, + from /usr/lib/gcc/aarch64-unknown-linux-gnu/12.1.0/include/stdint.h:9, + from /home/alarm/q/var/qemu/include/qemu/osdep.h:94, + from ../util/vfio-helpers.c:13: + In function 'readlink', + inlined from 'sysfs_find_group_file' at ../util/vfio-helpers.c:116:9, + inlined from 'qemu_vfio_init_pci' at ../util/vfio-helpers.c:326:18, + inlined from 'qemu_vfio_open_pci' at ../util/vfio-helpers.c:517:9: + /usr/include/bits/unistd.h:119:10: error: argument 2 is null but the corresponding size argument 3 value is 4095 [-Werror=nonnull] + 119 | return __glibc_fortify (readlink, __len, sizeof (char), + | ^~~~~~~~~~~~~~~ + + This error implies the allocated buffer can be NULL. Use + g_file_read_link(), which allocates buffer automatically to avoid the + error. + + Signed-off-by: Akihiko Odaki + Reviewed-by: Philippe Mathieu-Daudé + Reviewed-by: Cédric Le Goater + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + util/vfio-helpers.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c +index 2d8af38f88..f8bab46c68 100644 +--- a/util/vfio-helpers.c ++++ b/util/vfio-helpers.c +@@ -106,15 +106,17 @@ struct QEMUVFIOState { + */ + static char *sysfs_find_group_file(const char *device, Error **errp) + { ++ g_autoptr(GError) gerr = NULL; + char *sysfs_link; + char *sysfs_group; + char *p; + char *path = NULL; + + sysfs_link = g_strdup_printf("/sys/bus/pci/devices/%s/iommu_group", device); +- sysfs_group = g_malloc0(PATH_MAX); +- if (readlink(sysfs_link, sysfs_group, PATH_MAX - 1) == -1) { +- error_setg_errno(errp, errno, "Failed to find iommu group sysfs path"); ++ sysfs_group = g_file_read_link(sysfs_link, &gerr); ++ if (gerr) { ++ error_setg(errp, "Failed to find iommu group sysfs path: %s", ++ gerr->message); + goto out; + } + p = strrchr(sysfs_group, '/'); +-- +2.39.3 + diff --git a/SOURCES/kvm-vdpa-add-asid-parameter-to-vhost_vdpa_dma_map-unmap.patch b/SOURCES/kvm-vdpa-add-asid-parameter-to-vhost_vdpa_dma_map-unmap.patch deleted file mode 100644 index a56c6eb..0000000 --- a/SOURCES/kvm-vdpa-add-asid-parameter-to-vhost_vdpa_dma_map-unmap.patch +++ /dev/null @@ -1,221 +0,0 @@ -From d0e7f24a8d941ab142f2a1973ae18ed1bfdc074f Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 15 Dec 2022 12:31:41 +0100 -Subject: [PATCH 09/14] vdpa: add asid parameter to vhost_vdpa_dma_map/unmap -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 136: vDPA ASID support in Qemu -RH-Bugzilla: 2104412 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [9/13] 3e7f89e57f73661017ccf0206f2ea77a72ca46bb (eperezmartin/qemu-kvm) - -So the caller can choose which ASID is destined. - -No need to update the batch functions as they will always be called from -memory listener updates at the moment. Memory listener updates will -always update ASID 0, as it's the passthrough ASID. - -All vhost devices's ASID are 0 at this moment. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Message-Id: <20221215113144.322011-10-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit cd831ed5c4add8ed6ee980c3645b241cbef5130f) ---- - hw/virtio/trace-events | 4 ++-- - hw/virtio/vhost-vdpa.c | 36 +++++++++++++++++++++++----------- - include/hw/virtio/vhost-vdpa.h | 14 ++++++++++--- - net/vhost-vdpa.c | 6 +++--- - 4 files changed, 41 insertions(+), 19 deletions(-) - -diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events -index 46f2faf04e..a87c5f39a2 100644 ---- a/hw/virtio/trace-events -+++ b/hw/virtio/trace-events -@@ -30,8 +30,8 @@ vhost_user_write(uint32_t req, uint32_t flags) "req:%d flags:0x%"PRIx32"" - vhost_user_create_notifier(int idx, void *n) "idx:%d n:%p" - - # vhost-vdpa.c --vhost_vdpa_dma_map(void *vdpa, int fd, uint32_t msg_type, uint64_t iova, uint64_t size, uint64_t uaddr, uint8_t perm, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" iova: 0x%"PRIx64" size: 0x%"PRIx64" uaddr: 0x%"PRIx64" perm: 0x%"PRIx8" type: %"PRIu8 --vhost_vdpa_dma_unmap(void *vdpa, int fd, uint32_t msg_type, uint64_t iova, uint64_t size, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" iova: 0x%"PRIx64" size: 0x%"PRIx64" type: %"PRIu8 -+vhost_vdpa_dma_map(void *vdpa, int fd, uint32_t msg_type, uint32_t asid, uint64_t iova, uint64_t size, uint64_t uaddr, uint8_t perm, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" asid: %"PRIu32" iova: 0x%"PRIx64" size: 0x%"PRIx64" uaddr: 0x%"PRIx64" perm: 0x%"PRIx8" type: %"PRIu8 -+vhost_vdpa_dma_unmap(void *vdpa, int fd, uint32_t msg_type, uint32_t asid, uint64_t iova, uint64_t size, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" asid: %"PRIu32" iova: 0x%"PRIx64" size: 0x%"PRIx64" type: %"PRIu8 - vhost_vdpa_listener_begin_batch(void *v, int fd, uint32_t msg_type, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" type: %"PRIu8 - vhost_vdpa_listener_commit(void *v, int fd, uint32_t msg_type, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" type: %"PRIu8 - vhost_vdpa_listener_region_add(void *vdpa, uint64_t iova, uint64_t llend, void *vaddr, bool readonly) "vdpa: %p iova 0x%"PRIx64" llend 0x%"PRIx64" vaddr: %p read-only: %d" -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index dd2768634b..0ecf2bbaa0 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -72,22 +72,28 @@ static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section, - return false; - } - --int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, -- void *vaddr, bool readonly) -+/* -+ * The caller must set asid = 0 if the device does not support asid. -+ * This is not an ABI break since it is set to 0 by the initializer anyway. -+ */ -+int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, -+ hwaddr size, void *vaddr, bool readonly) - { - struct vhost_msg_v2 msg = {}; - int fd = v->device_fd; - int ret = 0; - - msg.type = v->msg_type; -+ msg.asid = asid; - msg.iotlb.iova = iova; - msg.iotlb.size = size; - msg.iotlb.uaddr = (uint64_t)(uintptr_t)vaddr; - msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW; - msg.iotlb.type = VHOST_IOTLB_UPDATE; - -- trace_vhost_vdpa_dma_map(v, fd, msg.type, msg.iotlb.iova, msg.iotlb.size, -- msg.iotlb.uaddr, msg.iotlb.perm, msg.iotlb.type); -+ trace_vhost_vdpa_dma_map(v, fd, msg.type, msg.asid, msg.iotlb.iova, -+ msg.iotlb.size, msg.iotlb.uaddr, msg.iotlb.perm, -+ msg.iotlb.type); - - if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { - error_report("failed to write, fd=%d, errno=%d (%s)", -@@ -98,18 +104,24 @@ int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, - return ret; - } - --int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size) -+/* -+ * The caller must set asid = 0 if the device does not support asid. -+ * This is not an ABI break since it is set to 0 by the initializer anyway. -+ */ -+int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, -+ hwaddr size) - { - struct vhost_msg_v2 msg = {}; - int fd = v->device_fd; - int ret = 0; - - msg.type = v->msg_type; -+ msg.asid = asid; - msg.iotlb.iova = iova; - msg.iotlb.size = size; - msg.iotlb.type = VHOST_IOTLB_INVALIDATE; - -- trace_vhost_vdpa_dma_unmap(v, fd, msg.type, msg.iotlb.iova, -+ trace_vhost_vdpa_dma_unmap(v, fd, msg.type, msg.asid, msg.iotlb.iova, - msg.iotlb.size, msg.iotlb.type); - - if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { -@@ -229,8 +241,8 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, - } - - vhost_vdpa_iotlb_batch_begin_once(v); -- ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize), -- vaddr, section->readonly); -+ ret = vhost_vdpa_dma_map(v, VHOST_VDPA_GUEST_PA_ASID, iova, -+ int128_get64(llsize), vaddr, section->readonly); - if (ret) { - error_report("vhost vdpa map fail!"); - goto fail_map; -@@ -303,7 +315,8 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, - vhost_iova_tree_remove(v->iova_tree, *result); - } - vhost_vdpa_iotlb_batch_begin_once(v); -- ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize)); -+ ret = vhost_vdpa_dma_unmap(v, VHOST_VDPA_GUEST_PA_ASID, iova, -+ int128_get64(llsize)); - if (ret) { - error_report("vhost_vdpa dma unmap error!"); - } -@@ -876,7 +889,7 @@ static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr addr) - } - - size = ROUND_UP(result->size, qemu_real_host_page_size()); -- r = vhost_vdpa_dma_unmap(v, result->iova, size); -+ r = vhost_vdpa_dma_unmap(v, v->address_space_id, result->iova, size); - if (unlikely(r < 0)) { - error_report("Unable to unmap SVQ vring: %s (%d)", g_strerror(-r), -r); - return; -@@ -916,7 +929,8 @@ static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, DMAMap *needle, - return false; - } - -- r = vhost_vdpa_dma_map(v, needle->iova, needle->size + 1, -+ r = vhost_vdpa_dma_map(v, v->address_space_id, needle->iova, -+ needle->size + 1, - (void *)(uintptr_t)needle->translated_addr, - needle->perm == IOMMU_RO); - if (unlikely(r != 0)) { -diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h -index 1111d85643..e57dfa1fd1 100644 ---- a/include/hw/virtio/vhost-vdpa.h -+++ b/include/hw/virtio/vhost-vdpa.h -@@ -19,6 +19,12 @@ - #include "hw/virtio/virtio.h" - #include "standard-headers/linux/vhost_types.h" - -+/* -+ * ASID dedicated to map guest's addresses. If SVQ is disabled it maps GPA to -+ * qemu's IOVA. If SVQ is enabled it maps also the SVQ vring here -+ */ -+#define VHOST_VDPA_GUEST_PA_ASID 0 -+ - typedef struct VhostVDPAHostNotifier { - MemoryRegion mr; - void *addr; -@@ -29,6 +35,7 @@ typedef struct vhost_vdpa { - int index; - uint32_t msg_type; - bool iotlb_batch_begin_sent; -+ uint32_t address_space_id; - MemoryListener listener; - struct vhost_vdpa_iova_range iova_range; - uint64_t acked_features; -@@ -42,8 +49,9 @@ typedef struct vhost_vdpa { - VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX]; - } VhostVDPA; - --int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, -- void *vaddr, bool readonly); --int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size); -+int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, -+ hwaddr size, void *vaddr, bool readonly); -+int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, -+ hwaddr size); - - #endif -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 85aa0da39a..c2f319eb88 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -258,7 +258,7 @@ static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr) - return; - } - -- r = vhost_vdpa_dma_unmap(v, map->iova, map->size + 1); -+ r = vhost_vdpa_dma_unmap(v, v->address_space_id, map->iova, map->size + 1); - if (unlikely(r != 0)) { - error_report("Device cannot unmap: %s(%d)", g_strerror(r), r); - } -@@ -298,8 +298,8 @@ static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size, - return r; - } - -- r = vhost_vdpa_dma_map(v, map.iova, vhost_vdpa_net_cvq_cmd_page_len(), buf, -- !write); -+ r = vhost_vdpa_dma_map(v, v->address_space_id, map.iova, -+ vhost_vdpa_net_cvq_cmd_page_len(), buf, !write); - if (unlikely(r < 0)) { - goto dma_map_err; - } --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-add-shadow_data-to-vhost_vdpa.patch b/SOURCES/kvm-vdpa-add-shadow_data-to-vhost_vdpa.patch deleted file mode 100644 index 57c38d1..0000000 --- a/SOURCES/kvm-vdpa-add-shadow_data-to-vhost_vdpa.patch +++ /dev/null @@ -1,94 +0,0 @@ -From 6282a83619f274ca45a52d61577c10a05a0714dc Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 15 Dec 2022 12:31:43 +0100 -Subject: [PATCH 11/14] vdpa: add shadow_data to vhost_vdpa -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 136: vDPA ASID support in Qemu -RH-Bugzilla: 2104412 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [11/13] 9d317add1318b555ba06e19e4c67849069e047b9 (eperezmartin/qemu-kvm) - -The memory listener that thells the device how to convert GPA to qemu's -va is registered against CVQ vhost_vdpa. memory listener translations -are always ASID 0, CVQ ones are ASID 1 if supported. - -Let's tell the listener if it needs to register them on iova tree or -not. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Message-Id: <20221215113144.322011-12-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 6188d78a19894ac8f2bf9484d48a5235a529d3b7) ---- - hw/virtio/vhost-vdpa.c | 6 +++--- - include/hw/virtio/vhost-vdpa.h | 2 ++ - net/vhost-vdpa.c | 1 + - 3 files changed, 6 insertions(+), 3 deletions(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 0ecf2bbaa0..dc3498e995 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -224,7 +224,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, - vaddr, section->readonly); - - llsize = int128_sub(llend, int128_make64(iova)); -- if (v->shadow_vqs_enabled) { -+ if (v->shadow_data) { - int r; - - mem_region.translated_addr = (hwaddr)(uintptr_t)vaddr, -@@ -251,7 +251,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, - return; - - fail_map: -- if (v->shadow_vqs_enabled) { -+ if (v->shadow_data) { - vhost_iova_tree_remove(v->iova_tree, mem_region); - } - -@@ -296,7 +296,7 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, - - llsize = int128_sub(llend, int128_make64(iova)); - -- if (v->shadow_vqs_enabled) { -+ if (v->shadow_data) { - const DMAMap *result; - const void *vaddr = memory_region_get_ram_ptr(section->mr) + - section->offset_within_region + -diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h -index e57dfa1fd1..45b969a311 100644 ---- a/include/hw/virtio/vhost-vdpa.h -+++ b/include/hw/virtio/vhost-vdpa.h -@@ -40,6 +40,8 @@ typedef struct vhost_vdpa { - struct vhost_vdpa_iova_range iova_range; - uint64_t acked_features; - bool shadow_vqs_enabled; -+ /* Vdpa must send shadow addresses as IOTLB key for data queues, not GPA */ -+ bool shadow_data; - /* IOVA mapping used by the Shadow Virtqueue */ - VhostIOVATree *iova_tree; - GPtrArray *shadow_vqs; -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 1757f1d028..eea7a0df12 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -581,6 +581,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - s->always_svq = svq; - s->vhost_vdpa.shadow_vqs_enabled = svq; - s->vhost_vdpa.iova_range = iova_range; -+ s->vhost_vdpa.shadow_data = svq; - s->vhost_vdpa.iova_tree = iova_tree; - if (!is_datapath) { - s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(), --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-add-vhost_vdpa_net_valid_svq_features.patch b/SOURCES/kvm-vdpa-add-vhost_vdpa_net_valid_svq_features.patch deleted file mode 100644 index c54a831..0000000 --- a/SOURCES/kvm-vdpa-add-vhost_vdpa_net_valid_svq_features.patch +++ /dev/null @@ -1,76 +0,0 @@ -From 0f3a28e1e128754184c4af6a578f27e16c6a61d5 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 15 Dec 2022 12:31:37 +0100 -Subject: [PATCH 05/14] vdpa: add vhost_vdpa_net_valid_svq_features -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 136: vDPA ASID support in Qemu -RH-Bugzilla: 2104412 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [5/13] 0b27e04f178ec73cb800f4fb05c17a92576142e4 (eperezmartin/qemu-kvm) - -It will be reused at vdpa device start so let's extract in its own -function. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Message-Id: <20221215113144.322011-6-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 36e4647247f200b6fa4d2f656133f567036e8a85) ---- - net/vhost-vdpa.c | 26 +++++++++++++++++--------- - 1 file changed, 17 insertions(+), 9 deletions(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index b06540ac89..16a5ebe2dd 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -106,6 +106,22 @@ VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc) - return s->vhost_net; - } - -+static bool vhost_vdpa_net_valid_svq_features(uint64_t features, Error **errp) -+{ -+ uint64_t invalid_dev_features = -+ features & ~vdpa_svq_device_features & -+ /* Transport are all accepted at this point */ -+ ~MAKE_64BIT_MASK(VIRTIO_TRANSPORT_F_START, -+ VIRTIO_TRANSPORT_F_END - VIRTIO_TRANSPORT_F_START); -+ -+ if (invalid_dev_features) { -+ error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64, -+ invalid_dev_features); -+ } -+ -+ return !invalid_dev_features; -+} -+ - static int vhost_vdpa_net_check_device_id(struct vhost_net *net) - { - uint32_t device_id; -@@ -684,15 +700,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - if (opts->x_svq) { - struct vhost_vdpa_iova_range iova_range; - -- uint64_t invalid_dev_features = -- features & ~vdpa_svq_device_features & -- /* Transport are all accepted at this point */ -- ~MAKE_64BIT_MASK(VIRTIO_TRANSPORT_F_START, -- VIRTIO_TRANSPORT_F_END - VIRTIO_TRANSPORT_F_START); -- -- if (invalid_dev_features) { -- error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64, -- invalid_dev_features); -+ if (!vhost_vdpa_net_valid_svq_features(features, errp)) { - goto err_svq; - } - --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-allocate-SVQ-array-unconditionally.patch b/SOURCES/kvm-vdpa-allocate-SVQ-array-unconditionally.patch deleted file mode 100644 index 22c5955..0000000 --- a/SOURCES/kvm-vdpa-allocate-SVQ-array-unconditionally.patch +++ /dev/null @@ -1,50 +0,0 @@ -From 72f296870805750df8dfe5eaad77dd7d435a8f41 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 15 Dec 2022 12:31:40 +0100 -Subject: [PATCH 08/14] vdpa: allocate SVQ array unconditionally -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 136: vDPA ASID support in Qemu -RH-Bugzilla: 2104412 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [8/13] 08cd86d0859f82d768794e29241cfeff25df667c (eperezmartin/qemu-kvm) - -SVQ may run or not in a device depending on runtime conditions (for -example, if the device can move CVQ to its own group or not). - -Allocate the SVQ array unconditionally at startup, since its hard to -move this allocation elsewhere. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Message-Id: <20221215113144.322011-9-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 273e0003f0005cc17292dedae01e5edb0064b69c) ---- - hw/virtio/vhost-vdpa.c | 4 ---- - 1 file changed, 4 deletions(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 84218ce078..dd2768634b 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -532,10 +532,6 @@ static void vhost_vdpa_svq_cleanup(struct vhost_dev *dev) - struct vhost_vdpa *v = dev->opaque; - size_t idx; - -- if (!v->shadow_vqs) { -- return; -- } -- - for (idx = 0; idx < v->shadow_vqs->len; ++idx) { - vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, idx)); - } --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-always-start-CVQ-in-SVQ-mode-if-possible.patch b/SOURCES/kvm-vdpa-always-start-CVQ-in-SVQ-mode-if-possible.patch deleted file mode 100644 index 9b78b5c..0000000 --- a/SOURCES/kvm-vdpa-always-start-CVQ-in-SVQ-mode-if-possible.patch +++ /dev/null @@ -1,193 +0,0 @@ -From 84c203faa570b85eec006215768c83371c9f0399 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 15 Dec 2022 12:31:44 +0100 -Subject: [PATCH 12/14] vdpa: always start CVQ in SVQ mode if possible -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 136: vDPA ASID support in Qemu -RH-Bugzilla: 2104412 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [12/13] 83f94b3e163ca38d08dbf7c111a4cfa7a44e3dc2 (eperezmartin/qemu-kvm) - -Isolate control virtqueue in its own group, allowing to intercept control -commands but letting dataplane run totally passthrough to the guest. - -Signed-off-by: Eugenio Pérez -Message-Id: <20221215113144.322011-13-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Acked-by: Jason Wang -(cherry picked from commit c1a1008685af0327d9d03f03d43bdb77e7af5bea) ---- - hw/virtio/vhost-vdpa.c | 3 +- - net/vhost-vdpa.c | 110 ++++++++++++++++++++++++++++++++++++++++- - 2 files changed, 111 insertions(+), 2 deletions(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index dc3498e995..72ff06673c 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -638,7 +638,8 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) - { - uint64_t features; - uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 | -- 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH; -+ 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH | -+ 0x1ULL << VHOST_BACKEND_F_IOTLB_ASID; - int r; - - if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) { -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index eea7a0df12..07d33dae26 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -101,6 +101,8 @@ static const uint64_t vdpa_svq_device_features = - BIT_ULL(VIRTIO_NET_F_RSC_EXT) | - BIT_ULL(VIRTIO_NET_F_STANDBY); - -+#define VHOST_VDPA_NET_CVQ_ASID 1 -+ - VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc) - { - VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); -@@ -242,6 +244,40 @@ static NetClientInfo net_vhost_vdpa_info = { - .check_peer_type = vhost_vdpa_check_peer_type, - }; - -+static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) -+{ -+ struct vhost_vring_state state = { -+ .index = vq_index, -+ }; -+ int r = ioctl(device_fd, VHOST_VDPA_GET_VRING_GROUP, &state); -+ -+ if (unlikely(r < 0)) { -+ error_report("Cannot get VQ %u group: %s", vq_index, -+ g_strerror(errno)); -+ return r; -+ } -+ -+ return state.num; -+} -+ -+static int vhost_vdpa_set_address_space_id(struct vhost_vdpa *v, -+ unsigned vq_group, -+ unsigned asid_num) -+{ -+ struct vhost_vring_state asid = { -+ .index = vq_group, -+ .num = asid_num, -+ }; -+ int r; -+ -+ r = ioctl(v->device_fd, VHOST_VDPA_SET_GROUP_ASID, &asid); -+ if (unlikely(r < 0)) { -+ error_report("Can't set vq group %u asid %u, errno=%d (%s)", -+ asid.index, asid.num, errno, g_strerror(errno)); -+ } -+ return r; -+} -+ - static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr) - { - VhostIOVATree *tree = v->iova_tree; -@@ -316,11 +352,75 @@ dma_map_err: - static int vhost_vdpa_net_cvq_start(NetClientState *nc) - { - VhostVDPAState *s; -- int r; -+ struct vhost_vdpa *v; -+ uint64_t backend_features; -+ int64_t cvq_group; -+ int cvq_index, r; - - assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); - - s = DO_UPCAST(VhostVDPAState, nc, nc); -+ v = &s->vhost_vdpa; -+ -+ v->shadow_data = s->always_svq; -+ v->shadow_vqs_enabled = s->always_svq; -+ s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID; -+ -+ if (s->always_svq) { -+ /* SVQ is already configured for all virtqueues */ -+ goto out; -+ } -+ -+ /* -+ * If we early return in these cases SVQ will not be enabled. The migration -+ * will be blocked as long as vhost-vdpa backends will not offer _F_LOG. -+ * -+ * Calling VHOST_GET_BACKEND_FEATURES as they are not available in v->dev -+ * yet. -+ */ -+ r = ioctl(v->device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features); -+ if (unlikely(r < 0)) { -+ error_report("Cannot get vdpa backend_features: %s(%d)", -+ g_strerror(errno), errno); -+ return -1; -+ } -+ if (!(backend_features & VHOST_BACKEND_F_IOTLB_ASID) || -+ !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { -+ return 0; -+ } -+ -+ /* -+ * Check if all the virtqueues of the virtio device are in a different vq -+ * than the last vq. VQ group of last group passed in cvq_group. -+ */ -+ cvq_index = v->dev->vq_index_end - 1; -+ cvq_group = vhost_vdpa_get_vring_group(v->device_fd, cvq_index); -+ if (unlikely(cvq_group < 0)) { -+ return cvq_group; -+ } -+ for (int i = 0; i < cvq_index; ++i) { -+ int64_t group = vhost_vdpa_get_vring_group(v->device_fd, i); -+ -+ if (unlikely(group < 0)) { -+ return group; -+ } -+ -+ if (group == cvq_group) { -+ return 0; -+ } -+ } -+ -+ r = vhost_vdpa_set_address_space_id(v, cvq_group, VHOST_VDPA_NET_CVQ_ASID); -+ if (unlikely(r < 0)) { -+ return r; -+ } -+ -+ v->iova_tree = vhost_iova_tree_new(v->iova_range.first, -+ v->iova_range.last); -+ v->shadow_vqs_enabled = true; -+ s->vhost_vdpa.address_space_id = VHOST_VDPA_NET_CVQ_ASID; -+ -+out: - if (!s->vhost_vdpa.shadow_vqs_enabled) { - return 0; - } -@@ -349,6 +449,14 @@ static void vhost_vdpa_net_cvq_stop(NetClientState *nc) - if (s->vhost_vdpa.shadow_vqs_enabled) { - vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer); - vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->status); -+ if (!s->always_svq) { -+ /* -+ * If only the CVQ is shadowed we can delete this safely. -+ * If all the VQs are shadows this will be needed by the time the -+ * device is started again to register SVQ vrings and similar. -+ */ -+ g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete); -+ } - } - } - --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-do-not-block-migration-if-device-has-cvq-and-x-.patch b/SOURCES/kvm-vdpa-do-not-block-migration-if-device-has-cvq-and-x-.patch new file mode 100644 index 0000000..56b9aed --- /dev/null +++ b/SOURCES/kvm-vdpa-do-not-block-migration-if-device-has-cvq-and-x-.patch @@ -0,0 +1,61 @@ +From 74c2f378bdf278a03c02ae48948b00b4431a3fd6 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Fri, 2 Jun 2023 16:38:52 +0200 +Subject: [PATCH 6/9] vdpa: do not block migration if device has cvq and + x-svq=on +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 190: vdpa: do not block migration if device has cvq and x-svq=on +RH-Jira: RHEL-573 +RH-Acked-by: Jason Wang +RH-Acked-by: Laurent Vivier +RH-Commit: [1/1] b0e2ec3c9e5c17252cf6a043fe1374ddc3c37de7 (eperezmartin/qemu-kvm) + +It was a mistake to forbid in all cases, as SVQ is already able to send +all the CVQ messages before start forwarding data vqs. It actually +caused a regression, making impossible to migrate device previously +migratable. + +Fixes: 36e4647247f2 ("vdpa: add vhost_vdpa_net_valid_svq_features") +Signed-off-by: Eugenio Pérez +Message-Id: <20230602143854.1879091-2-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Tested-by: Lei Yang +(cherry picked from commit 8bc0049eadafb984d305c847cedff550b58e5fc0) +Signed-off-by: Eugenio Pérez +--- + net/vhost-vdpa.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 8c8900f0f4..1ae839da34 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -844,13 +844,16 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + s->vhost_vdpa.shadow_vq_ops_opaque = s; + + /* +- * TODO: We cannot migrate devices with CVQ as there is no way to set +- * the device state (MAC, MQ, etc) before starting the datapath. ++ * TODO: We cannot migrate devices with CVQ and no x-svq enabled as ++ * there is no way to set the device state (MAC, MQ, etc) before ++ * starting the datapath. + * + * Migration blocker ownership now belongs to s->vhost_vdpa. + */ +- error_setg(&s->vhost_vdpa.migration_blocker, +- "net vdpa cannot migrate with CVQ feature"); ++ if (!svq) { ++ error_setg(&s->vhost_vdpa.migration_blocker, ++ "net vdpa cannot migrate with CVQ feature"); ++ } + } + ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); + if (ret) { +-- +2.39.3 + diff --git a/SOURCES/kvm-vdpa-do-not-handle-VIRTIO_NET_F_GUEST_ANNOUNCE-in-vh.patch b/SOURCES/kvm-vdpa-do-not-handle-VIRTIO_NET_F_GUEST_ANNOUNCE-in-vh.patch deleted file mode 100644 index d800258..0000000 --- a/SOURCES/kvm-vdpa-do-not-handle-VIRTIO_NET_F_GUEST_ANNOUNCE-in-vh.patch +++ /dev/null @@ -1,44 +0,0 @@ -From fbb177ad84d562a20e51e71c73257d2ef85be2d9 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Wed, 21 Dec 2022 12:50:15 +0100 -Subject: [PATCH 4/9] vdpa: do not handle VIRTIO_NET_F_GUEST_ANNOUNCE in - vhost-vdpa -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 137: vDPA net SVQ guest announce support -RH-Bugzilla: 2141088 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Jason Wang -RH-Commit: [4/4] b3960a8b3e4ca569b1b1e6ceccf2051d8c4b1079 (eperezmartin/qemu-kvm) - -So qemu emulates it even in case the device does not support it. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Message-Id: <20221221115015.1400889-5-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 980003debddd18306ea2e1364b96598383c0e257) ---- - net/vhost-vdpa.c | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 52ef9cb3a2..b06540ac89 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -72,7 +72,6 @@ const int vdpa_feature_bits[] = { - VIRTIO_F_RING_RESET, - VIRTIO_NET_F_RSS, - VIRTIO_NET_F_HASH_REPORT, -- VIRTIO_NET_F_GUEST_ANNOUNCE, - VIRTIO_NET_F_STATUS, - VHOST_INVALID_FEATURE_BIT - }; --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-export-vhost_vdpa_set_vring_ready.patch b/SOURCES/kvm-vdpa-export-vhost_vdpa_set_vring_ready.patch new file mode 100644 index 0000000..1ab8f02 --- /dev/null +++ b/SOURCES/kvm-vdpa-export-vhost_vdpa_set_vring_ready.patch @@ -0,0 +1,105 @@ +From 636eb63cbf23b31fc9880528490ac4bef680305b Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Wed, 25 Jan 2023 08:47:34 +0100 +Subject: [PATCH 4/7] vdpa: export vhost_vdpa_set_vring_ready +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 199: CVQ migration support +RH-Jira: RHEL-923 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Commit: [4/7] 8d1fecec7a993b8b68e268e8783c200c158f5ee0 (eperezmartin/qemu-kvm) + +The vhost-vdpa net backend needs to enable vrings in a different order +than default, so export it. + +No functional change intended except for tracing, that now includes the +(virtio) index being enabled and the return value of the ioctl. + +Still ignoring return value of this function if called from +vhost_vdpa_dev_start, as reorganize calling code around it is out of +the scope of this series. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +--- + hw/virtio/trace-events | 2 +- + hw/virtio/vhost-vdpa.c | 25 +++++++++++++------------ + include/hw/virtio/vhost-vdpa.h | 1 + + 3 files changed, 15 insertions(+), 13 deletions(-) + +diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events +index 300dec8d3e..85b43cd8fe 100644 +--- a/hw/virtio/trace-events ++++ b/hw/virtio/trace-events +@@ -48,7 +48,7 @@ vhost_vdpa_set_features(void *dev, uint64_t features) "dev: %p features: 0x%"PRI + vhost_vdpa_get_device_id(void *dev, uint32_t device_id) "dev: %p device_id %"PRIu32 + vhost_vdpa_reset_device(void *dev, uint8_t status) "dev: %p status: 0x%"PRIx8 + vhost_vdpa_get_vq_index(void *dev, int idx, int vq_idx) "dev: %p idx: %d vq idx: %d" +-vhost_vdpa_set_vring_ready(void *dev) "dev: %p" ++vhost_vdpa_set_vring_ready(void *dev, unsigned i, int r) "dev: %p, idx: %u, r: %d" + vhost_vdpa_dump_config(void *dev, const char *line) "dev: %p %s" + vhost_vdpa_set_config(void *dev, uint32_t offset, uint32_t size, uint32_t flags) "dev: %p offset: %"PRIu32" size: %"PRIu32" flags: 0x%"PRIx32 + vhost_vdpa_get_config(void *dev, void *config, uint32_t config_len) "dev: %p config: %p config_len: %"PRIu32 +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index c04f14420d..e4d0101327 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -733,18 +733,17 @@ static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx) + return idx; + } + +-static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev) ++int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, unsigned idx) + { +- int i; +- trace_vhost_vdpa_set_vring_ready(dev); +- for (i = 0; i < dev->nvqs; ++i) { +- struct vhost_vring_state state = { +- .index = dev->vq_index + i, +- .num = 1, +- }; +- vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state); +- } +- return 0; ++ struct vhost_dev *dev = v->dev; ++ struct vhost_vring_state state = { ++ .index = idx, ++ .num = 1, ++ }; ++ int r = vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state); ++ ++ trace_vhost_vdpa_set_vring_ready(dev, idx, r); ++ return r; + } + + static int vhost_vdpa_set_config_call(struct vhost_dev *dev, +@@ -1155,7 +1154,9 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) + if (unlikely(!ok)) { + return -1; + } +- vhost_vdpa_set_vring_ready(dev); ++ for (int i = 0; i < dev->nvqs; ++i) { ++ vhost_vdpa_set_vring_ready(v, dev->vq_index + i); ++ } + } else { + vhost_vdpa_suspend(dev); + vhost_vdpa_svqs_stop(dev); +diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h +index c278a2a8de..540642d304 100644 +--- a/include/hw/virtio/vhost-vdpa.h ++++ b/include/hw/virtio/vhost-vdpa.h +@@ -55,6 +55,7 @@ typedef struct vhost_vdpa { + } VhostVDPA; + + int vhost_vdpa_get_iova_range(int fd, struct vhost_vdpa_iova_range *iova_range); ++int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, unsigned idx); + + int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, + hwaddr size, void *vaddr, bool readonly); +-- +2.39.3 + diff --git a/SOURCES/kvm-vdpa-fix-VHOST_BACKEND_F_IOTLB_ASID-flag-check.patch b/SOURCES/kvm-vdpa-fix-VHOST_BACKEND_F_IOTLB_ASID-flag-check.patch deleted file mode 100644 index bb55256..0000000 --- a/SOURCES/kvm-vdpa-fix-VHOST_BACKEND_F_IOTLB_ASID-flag-check.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 46e80a9350a02fdb5689638df96bc7389e953cf8 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 17 Jan 2023 11:53:08 +0100 -Subject: [PATCH 13/14] vdpa: fix VHOST_BACKEND_F_IOTLB_ASID flag check -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 136: vDPA ASID support in Qemu -RH-Bugzilla: 2104412 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [13/13] b7fb4b8e9ea26b6664a9179ed0a88376acf5115f (eperezmartin/qemu-kvm) - -VHOST_BACKEND_F_IOTLB_ASID is the feature bit, not the bitmask. Since -the device under test also provided VHOST_BACKEND_F_IOTLB_MSG_V2 and -VHOST_BACKEND_F_IOTLB_BATCH, this went unnoticed. - -Fixes: c1a1008685 ("vdpa: always start CVQ in SVQ mode if possible") -Signed-off-by: Eugenio Pérez -Reviewed-by: Michael S. Tsirkin -Acked-by: Jason Wang -Signed-off-by: Jason Wang - -Upstream status: git@github.com:jasowang/qemu.git -(cherry picked from commit 2bd492bca521ee8594f1d5db8dc9aac126fc4f85) ---- - net/vhost-vdpa.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 07d33dae26..7d9c4ea09d 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -384,7 +384,7 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) - g_strerror(errno), errno); - return -1; - } -- if (!(backend_features & VHOST_BACKEND_F_IOTLB_ASID) || -+ if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)) || - !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { - return 0; - } --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-handle-VIRTIO_NET_CTRL_ANNOUNCE-in-vhost_vdpa_n.patch b/SOURCES/kvm-vdpa-handle-VIRTIO_NET_CTRL_ANNOUNCE-in-vhost_vdpa_n.patch deleted file mode 100644 index ebb7f38..0000000 --- a/SOURCES/kvm-vdpa-handle-VIRTIO_NET_CTRL_ANNOUNCE-in-vhost_vdpa_n.patch +++ /dev/null @@ -1,59 +0,0 @@ -From b71724e94c94acd6e09fed2b47be2901799c2353 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Wed, 21 Dec 2022 12:50:14 +0100 -Subject: [PATCH 3/9] vdpa: handle VIRTIO_NET_CTRL_ANNOUNCE in - vhost_vdpa_net_handle_ctrl_avail -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 137: vDPA net SVQ guest announce support -RH-Bugzilla: 2141088 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Jason Wang -RH-Commit: [3/4] c4ef5b62a5d41911565b8960a88bb48d746ff6c7 (eperezmartin/qemu-kvm) - -Since this capability is emulated by qemu shadowed CVQ cannot forward it -to the device. Process all that command within qemu. - -Signed-off-by: Eugenio Pérez -Message-Id: <20221221115015.1400889-4-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Acked-by: Jason Wang -(cherry picked from commit 3f9a3eeb7ca6acd899e2205a9118928b4cd94e47) ---- - net/vhost-vdpa.c | 15 ++++++++++++--- - 1 file changed, 12 insertions(+), 3 deletions(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 2b4b85d8f8..52ef9cb3a2 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -489,9 +489,18 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, - out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0, - s->cvq_cmd_out_buffer, - vhost_vdpa_net_cvq_cmd_len()); -- dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status)); -- if (unlikely(dev_written < 0)) { -- goto out; -+ if (*(uint8_t *)s->cvq_cmd_out_buffer == VIRTIO_NET_CTRL_ANNOUNCE) { -+ /* -+ * Guest announce capability is emulated by qemu, so don't forward to -+ * the device. -+ */ -+ dev_written = sizeof(status); -+ *s->status = VIRTIO_NET_OK; -+ } else { -+ dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status)); -+ if (unlikely(dev_written < 0)) { -+ goto out; -+ } - } - - if (unlikely(dev_written < sizeof(status))) { --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-map-shadow-vrings-with-MAP_SHARED.patch b/SOURCES/kvm-vdpa-map-shadow-vrings-with-MAP_SHARED.patch deleted file mode 100644 index c577758..0000000 --- a/SOURCES/kvm-vdpa-map-shadow-vrings-with-MAP_SHARED.patch +++ /dev/null @@ -1,131 +0,0 @@ -From 965f27235276e3b16ebf630436eb1d7e792a3d2a Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Fri, 2 Jun 2023 16:38:54 +0200 -Subject: [PATCH 3/4] vdpa: map shadow vrings with MAP_SHARED -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 298: Fix qemu core dump with "x-svq=on" when hot-plugging a NIC -RH-Jira: RHEL-1060 -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/3] 673ba501d6e76bae9272847acebaf5f01689f9cf - -JIRA: https://issues.redhat.com/browse/RHEL-1060 - -The vdpa devices that use va addresses neeeds these maps shared. -Otherwise, vhost_vdpa checks will refuse to accept the maps. - -The mmap call will always return a page aligned address, so removing the -qemu_memalign call. Keeping the ROUND_UP for the size as we still need -to DMA-map them in full. - -Not applying fixes tag as it never worked with va devices. - -Signed-off-by: Eugenio Pérez -Message-Id: <20230602143854.1879091-4-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit babf8b87127ae809b31b3c0a117dcbc91aaf9aba) - -Conflicts - - because of missing commits: - - 5d410557dea4 ("vhost: fix possible wrap in SVQ descriptor ring") - 5c1ebd4c432e ("vdpa: block migration if device has unsupported features") - - and already backported commit$ - - a0d7215e339b ("vhost-vdpa: do not cleanup the vdpa/vhost-net structures if peer nic is present") - -Signed-off-by: Laurent Vivier ---- - hw/virtio/vhost-shadow-virtqueue.c | 18 +++++++++--------- - net/vhost-vdpa.c | 16 ++++++++-------- - 2 files changed, 17 insertions(+), 17 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index 4307296358..9f09d435be 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -647,7 +647,7 @@ void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd) - void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, - VirtQueue *vq, VhostIOVATree *iova_tree) - { -- size_t desc_size, driver_size, device_size; -+ size_t desc_size; - - event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call); - svq->next_guest_avail_elem = NULL; -@@ -659,14 +659,14 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, - svq->iova_tree = iova_tree; - - svq->vring.num = virtio_queue_get_num(vdev, virtio_get_queue_index(vq)); -- driver_size = vhost_svq_driver_area_size(svq); -- device_size = vhost_svq_device_area_size(svq); -- svq->vring.desc = qemu_memalign(qemu_real_host_page_size(), driver_size); -+ svq->vring.desc = mmap(NULL, vhost_svq_driver_area_size(svq), -+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -+ -1, 0); - desc_size = sizeof(vring_desc_t) * svq->vring.num; - svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size); -- memset(svq->vring.desc, 0, driver_size); -- svq->vring.used = qemu_memalign(qemu_real_host_page_size(), device_size); -- memset(svq->vring.used, 0, device_size); -+ svq->vring.used = mmap(NULL, vhost_svq_device_area_size(svq), -+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -+ -1, 0); - svq->desc_state = g_new0(SVQDescState, svq->vring.num); - svq->desc_next = g_new0(uint16_t, svq->vring.num); - for (unsigned i = 0; i < svq->vring.num - 1; i++) { -@@ -705,8 +705,8 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) - svq->vq = NULL; - g_free(svq->desc_next); - g_free(svq->desc_state); -- qemu_vfree(svq->vring.desc); -- qemu_vfree(svq->vring.used); -+ munmap(svq->vring.desc, vhost_svq_driver_area_size(svq)); -+ munmap(svq->vring.used, vhost_svq_device_area_size(svq)); - event_notifier_set_handler(&svq->hdev_call, NULL); - } - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index d282c90a3d..8bfa95b801 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -203,8 +203,8 @@ static void vhost_vdpa_cleanup(NetClientState *nc) - if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_NIC) { - return; - } -- qemu_vfree(s->cvq_cmd_out_buffer); -- qemu_vfree(s->status); -+ munmap(s->cvq_cmd_out_buffer, vhost_vdpa_net_cvq_cmd_page_len()); -+ munmap(s->status, vhost_vdpa_net_cvq_cmd_page_len()); - if (s->vhost_net) { - vhost_net_cleanup(s->vhost_net); - g_free(s->vhost_net); -@@ -761,12 +761,12 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - s->vhost_vdpa.iova_range = iova_range; - s->vhost_vdpa.shadow_data = svq; - if (!is_datapath) { -- s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(), -- vhost_vdpa_net_cvq_cmd_page_len()); -- memset(s->cvq_cmd_out_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len()); -- s->status = qemu_memalign(qemu_real_host_page_size(), -- vhost_vdpa_net_cvq_cmd_page_len()); -- memset(s->status, 0, vhost_vdpa_net_cvq_cmd_page_len()); -+ s->cvq_cmd_out_buffer = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(), -+ PROT_READ | PROT_WRITE, -+ MAP_SHARED | MAP_ANONYMOUS, -1, 0); -+ s->status = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(), -+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -+ -1, 0); - - s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; - s->vhost_vdpa.shadow_vq_ops_opaque = s; --- -2.39.3 - diff --git a/SOURCES/kvm-vdpa-move-CVQ-isolation-check-to-net_init_vhost_vdpa.patch b/SOURCES/kvm-vdpa-move-CVQ-isolation-check-to-net_init_vhost_vdpa.patch new file mode 100644 index 0000000..a37612c --- /dev/null +++ b/SOURCES/kvm-vdpa-move-CVQ-isolation-check-to-net_init_vhost_vdpa.patch @@ -0,0 +1,286 @@ +From 1609e47511c9a02b26e0023ff6e1e999d7cdf179 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Fri, 26 May 2023 17:31:43 +0200 +Subject: [PATCH 2/7] vdpa: move CVQ isolation check to net_init_vhost_vdpa +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 199: CVQ migration support +RH-Jira: RHEL-923 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Commit: [2/7] caed8f81c3e30e6147817e7f43225aa3ee90ff37 (eperezmartin/qemu-kvm) + +Evaluating it at start time instead of initialization time may make the +guest capable of dynamically adding or removing migration blockers. + +Also, moving to initialization reduces the number of ioctls in the +migration, reducing failure possibilities. + +As a drawback we need to check for CVQ isolation twice: one time with no +MQ negotiated and another one acking it, as long as the device supports +it. This is because Vring ASID / group management is based on vq +indexes, but we don't know the index of CVQ before negotiating MQ. + +Signed-off-by: Eugenio Pérez +Message-Id: <20230526153143.470745-3-eperezma@redhat.com> +Tested-by: Lei Yang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Acked-by: Jason Wang +--- + net/vhost-vdpa.c | 155 ++++++++++++++++++++++++++++++++++------------- + 1 file changed, 112 insertions(+), 43 deletions(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 801d4e0422..ce17e4416a 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -43,6 +43,10 @@ typedef struct VhostVDPAState { + + /* The device always have SVQ enabled */ + bool always_svq; ++ ++ /* The device can isolate CVQ in its own ASID */ ++ bool cvq_isolated; ++ + bool started; + } VhostVDPAState; + +@@ -369,15 +373,8 @@ static NetClientInfo net_vhost_vdpa_info = { + .check_peer_type = vhost_vdpa_check_peer_type, + }; + +-/** +- * Get vring virtqueue group +- * +- * @device_fd vdpa device fd +- * @vq_index Virtqueue index +- * +- * Return -errno in case of error, or vq group if success. +- */ +-static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) ++static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index, ++ Error **errp) + { + struct vhost_vring_state state = { + .index = vq_index, +@@ -386,8 +383,7 @@ static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) + + if (unlikely(r < 0)) { + r = -errno; +- error_report("Cannot get VQ %u group: %s", vq_index, +- g_strerror(errno)); ++ error_setg_errno(errp, errno, "Cannot get VQ %u group", vq_index); + return r; + } + +@@ -487,9 +483,9 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) + { + VhostVDPAState *s, *s0; + struct vhost_vdpa *v; +- uint64_t backend_features; + int64_t cvq_group; +- int cvq_index, r; ++ int r; ++ Error *err = NULL; + + assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); + +@@ -509,41 +505,22 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) + /* + * If we early return in these cases SVQ will not be enabled. The migration + * will be blocked as long as vhost-vdpa backends will not offer _F_LOG. +- * +- * Calling VHOST_GET_BACKEND_FEATURES as they are not available in v->dev +- * yet. + */ +- r = ioctl(v->device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features); +- if (unlikely(r < 0)) { +- error_report("Cannot get vdpa backend_features: %s(%d)", +- g_strerror(errno), errno); +- return -1; ++ if (!vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { ++ return 0; + } +- if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)) || +- !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { ++ ++ if (!s->cvq_isolated) { + return 0; + } + +- /* +- * Check if all the virtqueues of the virtio device are in a different vq +- * than the last vq. VQ group of last group passed in cvq_group. +- */ +- cvq_index = v->dev->vq_index_end - 1; +- cvq_group = vhost_vdpa_get_vring_group(v->device_fd, cvq_index); ++ cvq_group = vhost_vdpa_get_vring_group(v->device_fd, ++ v->dev->vq_index_end - 1, ++ &err); + if (unlikely(cvq_group < 0)) { ++ error_report_err(err); + return cvq_group; + } +- for (int i = 0; i < cvq_index; ++i) { +- int64_t group = vhost_vdpa_get_vring_group(v->device_fd, i); +- +- if (unlikely(group < 0)) { +- return group; +- } +- +- if (group == cvq_group) { +- return 0; +- } +- } + + r = vhost_vdpa_set_address_space_id(v, cvq_group, VHOST_VDPA_NET_CVQ_ASID); + if (unlikely(r < 0)) { +@@ -806,6 +783,87 @@ static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = { + .avail_handler = vhost_vdpa_net_handle_ctrl_avail, + }; + ++/** ++ * Probe if CVQ is isolated ++ * ++ * @device_fd The vdpa device fd ++ * @features Features offered by the device. ++ * @cvq_index The control vq pair index ++ * ++ * Returns <0 in case of failure, 0 if false and 1 if true. ++ */ ++static int vhost_vdpa_probe_cvq_isolation(int device_fd, uint64_t features, ++ int cvq_index, Error **errp) ++{ ++ uint64_t backend_features; ++ int64_t cvq_group; ++ uint8_t status = VIRTIO_CONFIG_S_ACKNOWLEDGE | ++ VIRTIO_CONFIG_S_DRIVER | ++ VIRTIO_CONFIG_S_FEATURES_OK; ++ int r; ++ ++ ERRP_GUARD(); ++ ++ r = ioctl(device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features); ++ if (unlikely(r < 0)) { ++ error_setg_errno(errp, errno, "Cannot get vdpa backend_features"); ++ return r; ++ } ++ ++ if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) { ++ return 0; ++ } ++ ++ r = ioctl(device_fd, VHOST_SET_FEATURES, &features); ++ if (unlikely(r)) { ++ error_setg_errno(errp, errno, "Cannot set features"); ++ } ++ ++ r = ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status); ++ if (unlikely(r)) { ++ error_setg_errno(errp, -r, "Cannot set device features"); ++ goto out; ++ } ++ ++ cvq_group = vhost_vdpa_get_vring_group(device_fd, cvq_index, errp); ++ if (unlikely(cvq_group < 0)) { ++ if (cvq_group != -ENOTSUP) { ++ r = cvq_group; ++ goto out; ++ } ++ ++ /* ++ * The kernel report VHOST_BACKEND_F_IOTLB_ASID if the vdpa frontend ++ * support ASID even if the parent driver does not. The CVQ cannot be ++ * isolated in this case. ++ */ ++ error_free(*errp); ++ *errp = NULL; ++ r = 0; ++ goto out; ++ } ++ ++ for (int i = 0; i < cvq_index; ++i) { ++ int64_t group = vhost_vdpa_get_vring_group(device_fd, i, errp); ++ if (unlikely(group < 0)) { ++ r = group; ++ goto out; ++ } ++ ++ if (group == (int64_t)cvq_group) { ++ r = 0; ++ goto out; ++ } ++ } ++ ++ r = 1; ++ ++out: ++ status = 0; ++ ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status); ++ return r; ++} ++ + static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + const char *device, + const char *name, +@@ -815,16 +873,26 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + bool is_datapath, + bool svq, + struct vhost_vdpa_iova_range iova_range, +- uint64_t features) ++ uint64_t features, ++ Error **errp) + { + NetClientState *nc = NULL; + VhostVDPAState *s; + int ret = 0; + assert(name); ++ int cvq_isolated; ++ + if (is_datapath) { + nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device, + name); + } else { ++ cvq_isolated = vhost_vdpa_probe_cvq_isolation(vdpa_device_fd, features, ++ queue_pair_index * 2, ++ errp); ++ if (unlikely(cvq_isolated < 0)) { ++ return NULL; ++ } ++ + nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer, + device, name); + } +@@ -851,6 +919,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + + s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; + s->vhost_vdpa.shadow_vq_ops_opaque = s; ++ s->cvq_isolated = cvq_isolated; + + /* + * TODO: We cannot migrate devices with CVQ and no x-svq enabled as +@@ -982,7 +1051,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, + for (i = 0; i < queue_pairs; i++) { + ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, + vdpa_device_fd, i, 2, true, opts->x_svq, +- iova_range, features); ++ iova_range, features, errp); + if (!ncs[i]) + goto err; + } +@@ -990,7 +1059,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, + if (has_cvq) { + nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, + vdpa_device_fd, i, 1, false, +- opts->x_svq, iova_range, features); ++ opts->x_svq, iova_range, features, errp); + if (!nc) + goto err; + } +-- +2.39.3 + diff --git a/SOURCES/kvm-vdpa-move-SVQ-vring-features-check-to-net.patch b/SOURCES/kvm-vdpa-move-SVQ-vring-features-check-to-net.patch deleted file mode 100644 index 7cda847..0000000 --- a/SOURCES/kvm-vdpa-move-SVQ-vring-features-check-to-net.patch +++ /dev/null @@ -1,118 +0,0 @@ -From 63a45add7c9f7bb2b7775ae4cb2d7df22f7f2033 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 15 Dec 2022 12:31:39 +0100 -Subject: [PATCH 07/14] vdpa: move SVQ vring features check to net/ -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 136: vDPA ASID support in Qemu -RH-Bugzilla: 2104412 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [7/13] a24189aea4dbde3ed4486f685d0d88aeee1a0ee7 (eperezmartin/qemu-kvm) - -The next patches will start control SVQ if possible. However, we don't -know if that will be possible at qemu boot anymore. - -Since the moved checks will be already evaluated at net/ to know if it -is ok to shadow CVQ, move them. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Message-Id: <20221215113144.322011-8-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 258a03941fd23108a322d09abc9c55341e09688d) ---- - hw/virtio/vhost-vdpa.c | 32 ++------------------------------ - net/vhost-vdpa.c | 3 ++- - 2 files changed, 4 insertions(+), 31 deletions(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 9e7cbf1776..84218ce078 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -389,29 +389,9 @@ static int vhost_vdpa_get_dev_features(struct vhost_dev *dev, - return ret; - } - --static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v, -- Error **errp) -+static void vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v) - { - g_autoptr(GPtrArray) shadow_vqs = NULL; -- uint64_t dev_features, svq_features; -- int r; -- bool ok; -- -- if (!v->shadow_vqs_enabled) { -- return 0; -- } -- -- r = vhost_vdpa_get_dev_features(hdev, &dev_features); -- if (r != 0) { -- error_setg_errno(errp, -r, "Can't get vdpa device features"); -- return r; -- } -- -- svq_features = dev_features; -- ok = vhost_svq_valid_features(svq_features, errp); -- if (unlikely(!ok)) { -- return -1; -- } - - shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free); - for (unsigned n = 0; n < hdev->nvqs; ++n) { -@@ -422,7 +402,6 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v, - } - - v->shadow_vqs = g_steal_pointer(&shadow_vqs); -- return 0; - } - - static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) -@@ -447,10 +426,7 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) - dev->opaque = opaque ; - v->listener = vhost_vdpa_memory_listener; - v->msg_type = VHOST_IOTLB_MSG_V2; -- ret = vhost_vdpa_init_svq(dev, v, errp); -- if (ret) { -- goto err; -- } -+ vhost_vdpa_init_svq(dev, v); - - if (!vhost_vdpa_first_dev(dev)) { - return 0; -@@ -460,10 +436,6 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) - VIRTIO_CONFIG_S_DRIVER); - - return 0; -- --err: -- ram_block_discard_disable(false); -- return ret; - } - - static void vhost_vdpa_host_notifier_uninit(struct vhost_dev *dev, -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 8d3ed095d0..85aa0da39a 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -117,9 +117,10 @@ static bool vhost_vdpa_net_valid_svq_features(uint64_t features, Error **errp) - if (invalid_dev_features) { - error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64, - invalid_dev_features); -+ return false; - } - -- return !invalid_dev_features; -+ return vhost_svq_valid_features(features, errp); - } - - static int vhost_vdpa_net_check_device_id(struct vhost_net *net) --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-move-vhost_vdpa_set_vring_ready-to-the-caller.patch b/SOURCES/kvm-vdpa-move-vhost_vdpa_set_vring_ready-to-the-caller.patch new file mode 100644 index 0000000..4ebd8bd --- /dev/null +++ b/SOURCES/kvm-vdpa-move-vhost_vdpa_set_vring_ready-to-the-caller.patch @@ -0,0 +1,134 @@ +From 09bf0febef2512f00e71edca0fcbaf452652c2c7 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 10 Aug 2023 11:27:28 +0200 +Subject: [PATCH 6/7] vdpa: move vhost_vdpa_set_vring_ready to the caller +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 199: CVQ migration support +RH-Jira: RHEL-923 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Commit: [6/7] cf4fd1071ca127914c8e8d6aefec451cad97ecc1 (eperezmartin/qemu-kvm) + +Doing that way allows CVQ to be enabled before the dataplane vqs, +restoring the state as MQ or MAC addresses properly in the case of a +migration. + +The patch does it by defining a ->load NetClientInfo callback also for +dataplane. Ideally, this should be done by an independent patch, but +the function is already static so it would only add an empty +vhost_vdpa_net_data_load stub. + +Signed-off-by: Eugenio Pérez +--- +v3: +* Fix subject typo +* Expand patch message so it explains why +--- + hw/virtio/vdpa-dev.c | 3 +++ + hw/virtio/vhost-vdpa.c | 3 --- + net/vhost-vdpa.c | 41 +++++++++++++++++++++++++++++++---------- + 3 files changed, 34 insertions(+), 13 deletions(-) + +diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c +index 01b41eb0f1..8c47d643bf 100644 +--- a/hw/virtio/vdpa-dev.c ++++ b/hw/virtio/vdpa-dev.c +@@ -256,6 +256,9 @@ static int vhost_vdpa_device_start(VirtIODevice *vdev, Error **errp) + error_setg_errno(errp, -ret, "Error starting vhost"); + goto err_guest_notifiers; + } ++ for (i = 0; i < s->dev.nvqs; ++i) { ++ vhost_vdpa_set_vring_ready(&s->vdpa, i); ++ } + s->started = true; + + /* +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index e4d0101327..0d9d311abd 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -1154,9 +1154,6 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) + if (unlikely(!ok)) { + return -1; + } +- for (int i = 0; i < dev->nvqs; ++i) { +- vhost_vdpa_set_vring_ready(v, dev->vq_index + i); +- } + } else { + vhost_vdpa_suspend(dev); + vhost_vdpa_svqs_stop(dev); +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index a1b16bbc52..47b87bf80d 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -344,6 +344,22 @@ static int vhost_vdpa_net_data_start(NetClientState *nc) + return 0; + } + ++static int vhost_vdpa_net_data_load(NetClientState *nc) ++{ ++ VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); ++ struct vhost_vdpa *v = &s->vhost_vdpa; ++ bool has_cvq = v->dev->vq_index_end % 2; ++ ++ if (has_cvq) { ++ return 0; ++ } ++ ++ for (int i = 0; i < v->dev->nvqs; ++i) { ++ vhost_vdpa_set_vring_ready(v, i + v->dev->vq_index); ++ } ++ return 0; ++} ++ + static void vhost_vdpa_net_client_stop(NetClientState *nc) + { + VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); +@@ -366,6 +382,7 @@ static NetClientInfo net_vhost_vdpa_info = { + .size = sizeof(VhostVDPAState), + .receive = vhost_vdpa_receive, + .start = vhost_vdpa_net_data_start, ++ .load = vhost_vdpa_net_data_load, + .stop = vhost_vdpa_net_client_stop, + .cleanup = vhost_vdpa_cleanup, + .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, +@@ -682,18 +699,22 @@ static int vhost_vdpa_net_cvq_load(NetClientState *nc) + + assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); + +- if (!v->shadow_vqs_enabled) { +- return 0; +- } ++ vhost_vdpa_set_vring_ready(v, v->dev->vq_index); + +- n = VIRTIO_NET(v->dev->vdev); +- r = vhost_vdpa_net_load_mac(s, n); +- if (unlikely(r < 0)) { +- return r; ++ if (v->shadow_vqs_enabled) { ++ n = VIRTIO_NET(v->dev->vdev); ++ r = vhost_vdpa_net_load_mac(s, n); ++ if (unlikely(r < 0)) { ++ return r; ++ } ++ r = vhost_vdpa_net_load_mq(s, n); ++ if (unlikely(r)) { ++ return r; ++ } + } +- r = vhost_vdpa_net_load_mq(s, n); +- if (unlikely(r)) { +- return r; ++ ++ for (int i = 0; i < v->dev->vq_index; ++i) { ++ vhost_vdpa_set_vring_ready(v, i); + } + + return 0; +-- +2.39.3 + diff --git a/SOURCES/kvm-vdpa-net-move-iova-tree-creation-from-init-to-start.patch b/SOURCES/kvm-vdpa-net-move-iova-tree-creation-from-init-to-start.patch deleted file mode 100644 index 7191628..0000000 --- a/SOURCES/kvm-vdpa-net-move-iova-tree-creation-from-init-to-start.patch +++ /dev/null @@ -1,268 +0,0 @@ -From 293e249644c14b2bd19dd6a3f08a0e18ec040200 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Fri, 3 Mar 2023 18:24:32 +0100 -Subject: [PATCH 1/4] vdpa net: move iova tree creation from init to start -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 298: Fix qemu core dump with "x-svq=on" when hot-plugging a NIC -RH-Jira: RHEL-1060 -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/3] f85a05eb528820adf4a3c0cad2950b4ab500d5fe - -JIRA: https://issues.redhat.com/browse/RHEL-1060 - -Only create iova_tree if and when it is needed. - -The cleanup keeps being responsible for the last VQ but this change -allows it to merge both cleanup functions. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Message-Id: <20230303172445.1089785-2-eperezma@redhat.com> -Tested-by: Lei Yang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 00ef422e9fbfef1fb40447b08826db0951d788dd) - -Conflicts - - because of missing commit - - bf7a2ad8b6df ("vdpa: harden the error path if get_iova_range failed") - -Signed-off-by: Laurent Vivier ---- - net/vhost-vdpa.c | 113 ++++++++++++++++++++++++++++++++++------------- - 1 file changed, 83 insertions(+), 30 deletions(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 1b4fec59a2..a914348e2a 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -178,7 +178,6 @@ err_init: - static void vhost_vdpa_cleanup(NetClientState *nc) - { - VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); -- struct vhost_dev *dev = &s->vhost_net->dev; - - /* - * If a peer NIC is attached, do not cleanup anything. -@@ -190,9 +189,6 @@ static void vhost_vdpa_cleanup(NetClientState *nc) - } - qemu_vfree(s->cvq_cmd_out_buffer); - qemu_vfree(s->status); -- if (dev->vq_index + dev->nvqs == dev->vq_index_end) { -- g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete); -- } - if (s->vhost_net) { - vhost_net_cleanup(s->vhost_net); - g_free(s->vhost_net); -@@ -242,10 +238,64 @@ static ssize_t vhost_vdpa_receive(NetClientState *nc, const uint8_t *buf, - return size; - } - -+/** From any vdpa net client, get the netclient of the first queue pair */ -+static VhostVDPAState *vhost_vdpa_net_first_nc_vdpa(VhostVDPAState *s) -+{ -+ NICState *nic = qemu_get_nic(s->nc.peer); -+ NetClientState *nc0 = qemu_get_peer(nic->ncs, 0); -+ -+ return DO_UPCAST(VhostVDPAState, nc, nc0); -+} -+ -+static void vhost_vdpa_net_data_start_first(VhostVDPAState *s) -+{ -+ struct vhost_vdpa *v = &s->vhost_vdpa; -+ -+ if (v->shadow_vqs_enabled) { -+ v->iova_tree = vhost_iova_tree_new(v->iova_range.first, -+ v->iova_range.last); -+ } -+} -+ -+static int vhost_vdpa_net_data_start(NetClientState *nc) -+{ -+ VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); -+ struct vhost_vdpa *v = &s->vhost_vdpa; -+ -+ assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); -+ -+ if (v->index == 0) { -+ vhost_vdpa_net_data_start_first(s); -+ return 0; -+ } -+ -+ if (v->shadow_vqs_enabled) { -+ VhostVDPAState *s0 = vhost_vdpa_net_first_nc_vdpa(s); -+ v->iova_tree = s0->vhost_vdpa.iova_tree; -+ } -+ -+ return 0; -+} -+ -+static void vhost_vdpa_net_client_stop(NetClientState *nc) -+{ -+ VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); -+ struct vhost_dev *dev; -+ -+ assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); -+ -+ dev = s->vhost_vdpa.dev; -+ if (dev->vq_index + dev->nvqs == dev->vq_index_end) { -+ g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete); -+ } -+} -+ - static NetClientInfo net_vhost_vdpa_info = { - .type = NET_CLIENT_DRIVER_VHOST_VDPA, - .size = sizeof(VhostVDPAState), - .receive = vhost_vdpa_receive, -+ .start = vhost_vdpa_net_data_start, -+ .stop = vhost_vdpa_net_client_stop, - .cleanup = vhost_vdpa_cleanup, - .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, - .has_ufo = vhost_vdpa_has_ufo, -@@ -359,7 +409,7 @@ dma_map_err: - - static int vhost_vdpa_net_cvq_start(NetClientState *nc) - { -- VhostVDPAState *s; -+ VhostVDPAState *s, *s0; - struct vhost_vdpa *v; - uint64_t backend_features; - int64_t cvq_group; -@@ -423,8 +473,6 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) - return r; - } - -- v->iova_tree = vhost_iova_tree_new(v->iova_range.first, -- v->iova_range.last); - v->shadow_vqs_enabled = true; - s->vhost_vdpa.address_space_id = VHOST_VDPA_NET_CVQ_ASID; - -@@ -433,6 +481,27 @@ out: - return 0; - } - -+ s0 = vhost_vdpa_net_first_nc_vdpa(s); -+ if (s0->vhost_vdpa.iova_tree) { -+ /* -+ * SVQ is already configured for all virtqueues. Reuse IOVA tree for -+ * simplicity, whether CVQ shares ASID with guest or not, because: -+ * - Memory listener need access to guest's memory addresses allocated -+ * in the IOVA tree. -+ * - There should be plenty of IOVA address space for both ASID not to -+ * worry about collisions between them. Guest's translations are -+ * still validated with virtio virtqueue_pop so there is no risk for -+ * the guest to access memory that it shouldn't. -+ * -+ * To allocate a iova tree per ASID is doable but it complicates the -+ * code and it is not worth it for the moment. -+ */ -+ v->iova_tree = s0->vhost_vdpa.iova_tree; -+ } else { -+ v->iova_tree = vhost_iova_tree_new(v->iova_range.first, -+ v->iova_range.last); -+ } -+ - r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer, - vhost_vdpa_net_cvq_cmd_page_len(), false); - if (unlikely(r < 0)) { -@@ -457,15 +526,9 @@ static void vhost_vdpa_net_cvq_stop(NetClientState *nc) - if (s->vhost_vdpa.shadow_vqs_enabled) { - vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer); - vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->status); -- if (!s->always_svq) { -- /* -- * If only the CVQ is shadowed we can delete this safely. -- * If all the VQs are shadows this will be needed by the time the -- * device is started again to register SVQ vrings and similar. -- */ -- g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete); -- } - } -+ -+ vhost_vdpa_net_client_stop(nc); - } - - static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len, -@@ -675,8 +738,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - int nvqs, - bool is_datapath, - bool svq, -- struct vhost_vdpa_iova_range iova_range, -- VhostIOVATree *iova_tree) -+ struct vhost_vdpa_iova_range iova_range) - { - NetClientState *nc = NULL; - VhostVDPAState *s; -@@ -698,7 +760,6 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - s->vhost_vdpa.shadow_vqs_enabled = svq; - s->vhost_vdpa.iova_range = iova_range; - s->vhost_vdpa.shadow_data = svq; -- s->vhost_vdpa.iova_tree = iova_tree; - if (!is_datapath) { - s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(), - vhost_vdpa_net_cvq_cmd_page_len()); -@@ -776,7 +837,6 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - uint64_t features; - int vdpa_device_fd; - g_autofree NetClientState **ncs = NULL; -- g_autoptr(VhostIOVATree) iova_tree = NULL; - struct vhost_vdpa_iova_range iova_range; - NetClientState *nc; - int queue_pairs, r, i = 0, has_cvq = 0; -@@ -822,12 +882,8 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - } - - vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range); -- if (opts->x_svq) { -- if (!vhost_vdpa_net_valid_svq_features(features, errp)) { -- goto err_svq; -- } -- -- iova_tree = vhost_iova_tree_new(iova_range.first, iova_range.last); -+ if (opts->x_svq && !vhost_vdpa_net_valid_svq_features(features, errp)) { -+ goto err; - } - - ncs = g_malloc0(sizeof(*ncs) * queue_pairs); -@@ -835,7 +891,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - for (i = 0; i < queue_pairs; i++) { - ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, - vdpa_device_fd, i, 2, true, opts->x_svq, -- iova_range, iova_tree); -+ iova_range); - if (!ncs[i]) - goto err; - } -@@ -843,13 +899,11 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - if (has_cvq) { - nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, - vdpa_device_fd, i, 1, false, -- opts->x_svq, iova_range, iova_tree); -+ opts->x_svq, iova_range); - if (!nc) - goto err; - } - -- /* iova_tree ownership belongs to last NetClientState */ -- g_steal_pointer(&iova_tree); - return 0; - - err: -@@ -859,7 +913,6 @@ err: - } - } - --err_svq: - qemu_close(vdpa_device_fd); - - return -1; --- -2.39.3 - diff --git a/SOURCES/kvm-vdpa-remove-net-cvq-migration-blocker.patch b/SOURCES/kvm-vdpa-remove-net-cvq-migration-blocker.patch new file mode 100644 index 0000000..9388d75 --- /dev/null +++ b/SOURCES/kvm-vdpa-remove-net-cvq-migration-blocker.patch @@ -0,0 +1,51 @@ +From 46d5b861a39b7d0d3222162e6b7707526c131230 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Fri, 24 Mar 2023 13:28:15 +0100 +Subject: [PATCH 7/7] vdpa: remove net cvq migration blocker +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 199: CVQ migration support +RH-Jira: RHEL-923 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Commit: [7/7] 9542e305c7ea3a47e0f1fe0629281238b0bb2111 (eperezmartin/qemu-kvm) + +Now that we have add migration blockers if the device does not support +all the needed features, remove the general blocker applied to all net +devices with CVQ. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +--- + net/vhost-vdpa.c | 12 ------------ + 1 file changed, 12 deletions(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 47b87bf80d..6e03db4afa 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -941,18 +941,6 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; + s->vhost_vdpa.shadow_vq_ops_opaque = s; + s->cvq_isolated = cvq_isolated; +- +- /* +- * TODO: We cannot migrate devices with CVQ and no x-svq enabled as +- * there is no way to set the device state (MAC, MQ, etc) before +- * starting the datapath. +- * +- * Migration blocker ownership now belongs to s->vhost_vdpa. +- */ +- if (!svq) { +- error_setg(&s->vhost_vdpa.migration_blocker, +- "net vdpa cannot migrate with CVQ feature"); +- } + } + ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); + if (ret) { +-- +2.39.3 + diff --git a/SOURCES/kvm-vdpa-rename-vhost_vdpa_net_load-to-vhost_vdpa_net_cv.patch b/SOURCES/kvm-vdpa-rename-vhost_vdpa_net_load-to-vhost_vdpa_net_cv.patch new file mode 100644 index 0000000..15dc410 --- /dev/null +++ b/SOURCES/kvm-vdpa-rename-vhost_vdpa_net_load-to-vhost_vdpa_net_cv.patch @@ -0,0 +1,49 @@ +From db7ca7692e264e8bf1bd9e08e3de7a92fc76a363 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Wed, 9 Aug 2023 18:07:26 +0200 +Subject: [PATCH 5/7] vdpa: rename vhost_vdpa_net_load to + vhost_vdpa_net_cvq_load +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 199: CVQ migration support +RH-Jira: RHEL-923 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Commit: [5/7] aea91f3274786665725af892eb905818eb0f44f1 (eperezmartin/qemu-kvm) + +Next patches will add the corresponding data load. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +--- + net/vhost-vdpa.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 29d3fd3ca6..a1b16bbc52 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -673,7 +673,7 @@ static int vhost_vdpa_net_load_mq(VhostVDPAState *s, + return *s->status != VIRTIO_NET_OK; + } + +-static int vhost_vdpa_net_load(NetClientState *nc) ++static int vhost_vdpa_net_cvq_load(NetClientState *nc) + { + VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); + struct vhost_vdpa *v = &s->vhost_vdpa; +@@ -704,7 +704,7 @@ static NetClientInfo net_vhost_vdpa_cvq_info = { + .size = sizeof(VhostVDPAState), + .receive = vhost_vdpa_receive, + .start = vhost_vdpa_net_cvq_start, +- .load = vhost_vdpa_net_load, ++ .load = vhost_vdpa_net_cvq_load, + .stop = vhost_vdpa_net_cvq_stop, + .cleanup = vhost_vdpa_cleanup, + .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, +-- +2.39.3 + diff --git a/SOURCES/kvm-vdpa-reorder-vhost_vdpa_net_cvq_cmd_page_len-functio.patch b/SOURCES/kvm-vdpa-reorder-vhost_vdpa_net_cvq_cmd_page_len-functio.patch deleted file mode 100644 index 57c3f6f..0000000 --- a/SOURCES/kvm-vdpa-reorder-vhost_vdpa_net_cvq_cmd_page_len-functio.patch +++ /dev/null @@ -1,84 +0,0 @@ -From af109b3c7e8d7cb3b6c7c842a92ddf5de2270a3c Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Fri, 2 Jun 2023 16:38:53 +0200 -Subject: [PATCH 2/4] vdpa: reorder vhost_vdpa_net_cvq_cmd_page_len function -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 298: Fix qemu core dump with "x-svq=on" when hot-plugging a NIC -RH-Jira: RHEL-1060 -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/3] df45d2faa25186a246c18f24909ced67f94cf33f - -JIRA: https://issues.redhat.com/browse/RHEL-1060 - -We need to call it from resource cleanup context, as munmap needs the -size of the mappings. - -Signed-off-by: Eugenio Pérez -Reviewed-by: Philippe Mathieu-Daudé -Message-Id: <20230602143854.1879091-3-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 915bf6ccd7a5c9b6cbea7a72f153597d1b98834f) -Signed-off-by: Laurent Vivier ---- - net/vhost-vdpa.c | 32 ++++++++++++++++---------------- - 1 file changed, 16 insertions(+), 16 deletions(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index a914348e2a..d282c90a3d 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -110,6 +110,22 @@ VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc) - return s->vhost_net; - } - -+static size_t vhost_vdpa_net_cvq_cmd_len(void) -+{ -+ /* -+ * MAC_TABLE_SET is the ctrl command that produces the longer out buffer. -+ * In buffer is always 1 byte, so it should fit here -+ */ -+ return sizeof(struct virtio_net_ctrl_hdr) + -+ 2 * sizeof(struct virtio_net_ctrl_mac) + -+ MAC_TABLE_ENTRIES * ETH_ALEN; -+} -+ -+static size_t vhost_vdpa_net_cvq_cmd_page_len(void) -+{ -+ return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size()); -+} -+ - static bool vhost_vdpa_net_valid_svq_features(uint64_t features, Error **errp) - { - uint64_t invalid_dev_features = -@@ -362,22 +378,6 @@ static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr) - vhost_iova_tree_remove(tree, *map); - } - --static size_t vhost_vdpa_net_cvq_cmd_len(void) --{ -- /* -- * MAC_TABLE_SET is the ctrl command that produces the longer out buffer. -- * In buffer is always 1 byte, so it should fit here -- */ -- return sizeof(struct virtio_net_ctrl_hdr) + -- 2 * sizeof(struct virtio_net_ctrl_mac) + -- MAC_TABLE_ENTRIES * ETH_ALEN; --} -- --static size_t vhost_vdpa_net_cvq_cmd_page_len(void) --{ -- return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size()); --} -- - /** Map CVQ buffer. */ - static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size, - bool write) --- -2.39.3 - diff --git a/SOURCES/kvm-vdpa-request-iova_range-only-once.patch b/SOURCES/kvm-vdpa-request-iova_range-only-once.patch deleted file mode 100644 index 041e8f7..0000000 --- a/SOURCES/kvm-vdpa-request-iova_range-only-once.patch +++ /dev/null @@ -1,145 +0,0 @@ -From 760169d538a4e6ba61006f6796cd55af967a7f1e Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 15 Dec 2022 12:31:38 +0100 -Subject: [PATCH 06/14] vdpa: request iova_range only once -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 136: vDPA ASID support in Qemu -RH-Bugzilla: 2104412 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [6/13] 2a8ae2f46ae88f01c5535038f38cb7895098b610 (eperezmartin/qemu-kvm) - -Currently iova range is requested once per queue pair in the case of -net. Reduce the number of ioctls asking it once at initialization and -reusing that value for each vhost_vdpa. - -Signed-off-by: Eugenio Pérez -Message-Id: <20221215113144.322011-7-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Acked-by: Jason Wang -(cherry picked from commit a585fad26b2e6ccca156d9e65158ad1c5efd268d) ---- - hw/virtio/vhost-vdpa.c | 15 --------------- - net/vhost-vdpa.c | 27 ++++++++++++++------------- - 2 files changed, 14 insertions(+), 28 deletions(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index e65603022f..9e7cbf1776 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -365,19 +365,6 @@ static int vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status) - return 0; - } - --static void vhost_vdpa_get_iova_range(struct vhost_vdpa *v) --{ -- int ret = vhost_vdpa_call(v->dev, VHOST_VDPA_GET_IOVA_RANGE, -- &v->iova_range); -- if (ret != 0) { -- v->iova_range.first = 0; -- v->iova_range.last = UINT64_MAX; -- } -- -- trace_vhost_vdpa_get_iova_range(v->dev, v->iova_range.first, -- v->iova_range.last); --} -- - /* - * The use of this function is for requests that only need to be - * applied once. Typically such request occurs at the beginning -@@ -465,8 +452,6 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) - goto err; - } - -- vhost_vdpa_get_iova_range(v); -- - if (!vhost_vdpa_first_dev(dev)) { - return 0; - } -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 16a5ebe2dd..8d3ed095d0 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -549,14 +549,15 @@ static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = { - }; - - static NetClientState *net_vhost_vdpa_init(NetClientState *peer, -- const char *device, -- const char *name, -- int vdpa_device_fd, -- int queue_pair_index, -- int nvqs, -- bool is_datapath, -- bool svq, -- VhostIOVATree *iova_tree) -+ const char *device, -+ const char *name, -+ int vdpa_device_fd, -+ int queue_pair_index, -+ int nvqs, -+ bool is_datapath, -+ bool svq, -+ struct vhost_vdpa_iova_range iova_range, -+ VhostIOVATree *iova_tree) - { - NetClientState *nc = NULL; - VhostVDPAState *s; -@@ -575,6 +576,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - s->vhost_vdpa.device_fd = vdpa_device_fd; - s->vhost_vdpa.index = queue_pair_index; - s->vhost_vdpa.shadow_vqs_enabled = svq; -+ s->vhost_vdpa.iova_range = iova_range; - s->vhost_vdpa.iova_tree = iova_tree; - if (!is_datapath) { - s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(), -@@ -654,6 +656,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - int vdpa_device_fd; - g_autofree NetClientState **ncs = NULL; - g_autoptr(VhostIOVATree) iova_tree = NULL; -+ struct vhost_vdpa_iova_range iova_range; - NetClientState *nc; - int queue_pairs, r, i = 0, has_cvq = 0; - -@@ -697,14 +700,12 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - return queue_pairs; - } - -+ vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range); - if (opts->x_svq) { -- struct vhost_vdpa_iova_range iova_range; -- - if (!vhost_vdpa_net_valid_svq_features(features, errp)) { - goto err_svq; - } - -- vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range); - iova_tree = vhost_iova_tree_new(iova_range.first, iova_range.last); - } - -@@ -713,7 +714,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - for (i = 0; i < queue_pairs; i++) { - ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, - vdpa_device_fd, i, 2, true, opts->x_svq, -- iova_tree); -+ iova_range, iova_tree); - if (!ncs[i]) - goto err; - } -@@ -721,7 +722,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - if (has_cvq) { - nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, - vdpa_device_fd, i, 1, false, -- opts->x_svq, iova_tree); -+ opts->x_svq, iova_range, iova_tree); - if (!nc) - goto err; - } --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-return-errno-in-vhost_vdpa_get_vring_group-erro.patch b/SOURCES/kvm-vdpa-return-errno-in-vhost_vdpa_get_vring_group-erro.patch new file mode 100644 index 0000000..c8b4913 --- /dev/null +++ b/SOURCES/kvm-vdpa-return-errno-in-vhost_vdpa_get_vring_group-erro.patch @@ -0,0 +1,67 @@ +From 09583f39d51d16079c9fda32545d7a44b6f5c8c6 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Fri, 26 May 2023 17:31:42 +0200 +Subject: [PATCH 1/7] vdpa: return errno in vhost_vdpa_get_vring_group error +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 199: CVQ migration support +RH-Jira: RHEL-923 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Commit: [1/7] 89745b1828a1af535c40657022d385250688d11d (eperezmartin/qemu-kvm) + +We need to tell in the caller, as some errors are expected in a normal +workflow. In particular, parent drivers in recent kernels with +VHOST_BACKEND_F_IOTLB_ASID may not support vring groups. In that case, +-ENOTSUP is returned. + +This is the case of vp_vdpa in Linux 6.2. + +Next patches in this series will use that information to know if it must +abort or not. Also, next patches return properly an errp instead of +printing with error_report. + +Reviewed-by: Stefano Garzarella +Acked-by: Jason Wang +Signed-off-by: Eugenio Pérez +Message-Id: <20230526153143.470745-2-eperezma@redhat.com> +Tested-by: Lei Yang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +--- + net/vhost-vdpa.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 1ae839da34..801d4e0422 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -369,6 +369,14 @@ static NetClientInfo net_vhost_vdpa_info = { + .check_peer_type = vhost_vdpa_check_peer_type, + }; + ++/** ++ * Get vring virtqueue group ++ * ++ * @device_fd vdpa device fd ++ * @vq_index Virtqueue index ++ * ++ * Return -errno in case of error, or vq group if success. ++ */ + static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) + { + struct vhost_vring_state state = { +@@ -377,6 +385,7 @@ static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) + int r = ioctl(device_fd, VHOST_VDPA_GET_VRING_GROUP, &state); + + if (unlikely(r < 0)) { ++ r = -errno; + error_report("Cannot get VQ %u group: %s", vq_index, + g_strerror(errno)); + return r; +-- +2.39.3 + diff --git a/SOURCES/kvm-vdpa-stop-all-svq-on-device-deletion.patch b/SOURCES/kvm-vdpa-stop-all-svq-on-device-deletion.patch deleted file mode 100644 index e1e4c20..0000000 --- a/SOURCES/kvm-vdpa-stop-all-svq-on-device-deletion.patch +++ /dev/null @@ -1,84 +0,0 @@ -From 757767330abb2c0a650c387a9a5965fee224beee Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 9 Feb 2023 18:00:04 +0100 -Subject: [PATCH] vdpa: stop all svq on device deletion -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 286: vdpa: stop all svq on device deletion -RH-Bugzilla: 2213864 -RH-Acked-by: Cindy Lu -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Stefano Garzarella -RH-Commit: [1/1] b9ff402e4c6e386be3ea867df9358cdaa283cda7 - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2213864 -Upstream-Status: merged - -Not stopping them leave the device in a bad state when virtio-net -fronted device is unplugged with device_del monitor command. - -This is not triggable in regular poweroff or qemu forces shutdown -because cleanup is called right after vhost_vdpa_dev_start(false). But -devices hot unplug does not call vdpa device cleanups. This lead to all -the vhost_vdpa devices without stop the SVQ but the last. - -Fix it and clean the code, making it symmetric with -vhost_vdpa_svqs_start. - -Fixes: dff4426fa656 ("vhost: Add Shadow VirtQueue kick forwarding capabilities") -Reported-by: Lei Yang -Signed-off-by: Eugenio Pérez -Message-Id: <20230209170004.899472-1-eperezma@redhat.com> -Tested-by: Laurent Vivier -Acked-by: Jason Wang -(cherry picked from commit 2e1a9de96b487cf818a22d681cad8d3f5d18dcca) ---- - hw/virtio/vhost-vdpa.c | 17 ++--------------- - 1 file changed, 2 insertions(+), 15 deletions(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 72ff06673c..46896b7592 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -669,26 +669,11 @@ static int vhost_vdpa_get_device_id(struct vhost_dev *dev, - return ret; - } - --static void vhost_vdpa_reset_svq(struct vhost_vdpa *v) --{ -- if (!v->shadow_vqs_enabled) { -- return; -- } -- -- for (unsigned i = 0; i < v->shadow_vqs->len; ++i) { -- VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); -- vhost_svq_stop(svq); -- } --} -- - static int vhost_vdpa_reset_device(struct vhost_dev *dev) - { -- struct vhost_vdpa *v = dev->opaque; - int ret; - uint8_t status = 0; - -- vhost_vdpa_reset_svq(v); -- - ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status); - trace_vhost_vdpa_reset_device(dev, status); - return ret; -@@ -1080,6 +1065,8 @@ static void vhost_vdpa_svqs_stop(struct vhost_dev *dev) - - for (unsigned i = 0; i < v->shadow_vqs->len; ++i) { - VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); -+ -+ vhost_svq_stop(svq); - vhost_vdpa_svq_unmap_rings(dev, svq); - - event_notifier_cleanup(&svq->hdev_kick); --- -2.39.3 - diff --git a/SOURCES/kvm-vdpa-store-x-svq-parameter-in-VhostVDPAState.patch b/SOURCES/kvm-vdpa-store-x-svq-parameter-in-VhostVDPAState.patch deleted file mode 100644 index 68c0c86..0000000 --- a/SOURCES/kvm-vdpa-store-x-svq-parameter-in-VhostVDPAState.patch +++ /dev/null @@ -1,62 +0,0 @@ -From 28163d7d61b6b0b8312b78d57dabc8f44bf39c46 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 15 Dec 2022 12:31:42 +0100 -Subject: [PATCH 10/14] vdpa: store x-svq parameter in VhostVDPAState -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 136: vDPA ASID support in Qemu -RH-Bugzilla: 2104412 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [10/13] 53f3b2698b4a5caca434f55e4300103a78778548 (eperezmartin/qemu-kvm) - -CVQ can be shadowed two ways: -- Device has x-svq=on parameter (current way) -- The device can isolate CVQ in its own vq group - -QEMU needs to check for the second condition dynamically, because CVQ -index is not known before the driver ack the features. Since this is -dynamic, the CVQ isolation could vary with different conditions, making -it possible to go from "not isolated group" to "isolated". - -Saving the cmdline parameter in an extra field so we never disable CVQ -SVQ in case the device was started with x-svq cmdline. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Message-Id: <20221215113144.322011-11-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 7f211a28fd5482f76583988beecd8ee61588d45e) ---- - net/vhost-vdpa.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index c2f319eb88..1757f1d028 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -38,6 +38,8 @@ typedef struct VhostVDPAState { - void *cvq_cmd_out_buffer; - virtio_net_ctrl_ack *status; - -+ /* The device always have SVQ enabled */ -+ bool always_svq; - bool started; - } VhostVDPAState; - -@@ -576,6 +578,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - - s->vhost_vdpa.device_fd = vdpa_device_fd; - s->vhost_vdpa.index = queue_pair_index; -+ s->always_svq = svq; - s->vhost_vdpa.shadow_vqs_enabled = svq; - s->vhost_vdpa.iova_range = iova_range; - s->vhost_vdpa.iova_tree = iova_tree; --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-use-first-queue-SVQ-state-for-CVQ-default.patch b/SOURCES/kvm-vdpa-use-first-queue-SVQ-state-for-CVQ-default.patch new file mode 100644 index 0000000..bfb1b8e --- /dev/null +++ b/SOURCES/kvm-vdpa-use-first-queue-SVQ-state-for-CVQ-default.patch @@ -0,0 +1,46 @@ +From 726662aee0bc295f6931b7aba1bd68f033e949aa Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 10 Aug 2023 16:08:18 +0200 +Subject: [PATCH 3/7] vdpa: use first queue SVQ state for CVQ default +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 199: CVQ migration support +RH-Jira: RHEL-923 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Commit: [3/7] 5c98f11b5080552a62c8e37ff2c23339455b7b86 (eperezmartin/qemu-kvm) + +Previous to this patch the only way CVQ would be shadowed is if it does +support to isolate CVQ group or if all vqs were shadowed from the +beginning. The second condition was checked at the beginning, and no +more configuration was done. + +After this series we need to check if data queues are shadowed because +they are in the middle of the migration. As checking if they are +shadowed already covers the previous case, let's just mimic it. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +--- + net/vhost-vdpa.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index ce17e4416a..29d3fd3ca6 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -494,7 +494,7 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) + + s0 = vhost_vdpa_net_first_nc_vdpa(s); + v->shadow_data = s0->vhost_vdpa.shadow_vqs_enabled; +- v->shadow_vqs_enabled = s->always_svq; ++ v->shadow_vqs_enabled = s0->vhost_vdpa.shadow_vqs_enabled; + s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID; + + if (s->vhost_vdpa.shadow_data) { +-- +2.39.3 + diff --git a/SOURCES/kvm-vdpa-use-v-shadow_vqs_enabled-in-vhost_vdpa_svqs_sta.patch b/SOURCES/kvm-vdpa-use-v-shadow_vqs_enabled-in-vhost_vdpa_svqs_sta.patch deleted file mode 100644 index 3d11438..0000000 --- a/SOURCES/kvm-vdpa-use-v-shadow_vqs_enabled-in-vhost_vdpa_svqs_sta.patch +++ /dev/null @@ -1,58 +0,0 @@ -From cb974f2f9a0c5b9520b6ac80bd1d1e4a6b12bbdc Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 15 Dec 2022 12:31:33 +0100 -Subject: [PATCH 01/14] vdpa: use v->shadow_vqs_enabled in - vhost_vdpa_svqs_start & stop -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 136: vDPA ASID support in Qemu -RH-Bugzilla: 2104412 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/13] f0db50a95f87dd011418617be7b80aa6813a1146 (eperezmartin/qemu-kvm) - -This function used to trust in v->shadow_vqs != NULL to know if it must -start svq or not. - -This is not going to be valid anymore, as qemu is going to allocate svq -array unconditionally (but it will only start them conditionally). - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Message-Id: <20221215113144.322011-2-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 712c1a3171cf62d501dac5af58f77d5fea70350d) ---- - hw/virtio/vhost-vdpa.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index c5be2645b0..44e6a9b7b3 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -1036,7 +1036,7 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev) - Error *err = NULL; - unsigned i; - -- if (!v->shadow_vqs) { -+ if (!v->shadow_vqs_enabled) { - return true; - } - -@@ -1089,7 +1089,7 @@ static void vhost_vdpa_svqs_stop(struct vhost_dev *dev) - { - struct vhost_vdpa *v = dev->opaque; - -- if (!v->shadow_vqs) { -+ if (!v->shadow_vqs_enabled) { - return; - } - --- -2.31.1 - diff --git a/SOURCES/kvm-vfio-Fix-null-pointer-dereference-bug-in-vfio_bars_f.patch b/SOURCES/kvm-vfio-Fix-null-pointer-dereference-bug-in-vfio_bars_f.patch new file mode 100644 index 0000000..1e00427 --- /dev/null +++ b/SOURCES/kvm-vfio-Fix-null-pointer-dereference-bug-in-vfio_bars_f.patch @@ -0,0 +1,72 @@ +From 97124d4f2afbc8e65a3ecf76096e6b34a9b71541 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 30/37] vfio: Fix null pointer dereference bug in + vfio_bars_finalize() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [28/28] 4bbdf7f9c5595897244c6cc3d88d487dd5f99bf0 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 8af87a3ec7e4 +Author: Avihai Horon +Date: Tue Jul 4 16:39:27 2023 +0300 + + vfio: Fix null pointer dereference bug in vfio_bars_finalize() + + vfio_realize() has the following flow: + 1. vfio_bars_prepare() -- sets VFIOBAR->size. + 2. msix_early_setup(). + 3. vfio_bars_register() -- allocates VFIOBAR->mr. + + After vfio_bars_prepare() is called msix_early_setup() can fail. If it + does fail, vfio_bars_register() is never called and VFIOBAR->mr is not + allocated. + + In this case, vfio_bars_finalize() is called as part of the error flow + to free the bars' resources. However, vfio_bars_finalize() calls + object_unparent() for VFIOBAR->mr after checking only VFIOBAR->size, and + thus we get a null pointer dereference. + + Fix it by checking VFIOBAR->mr in vfio_bars_finalize(). + + Fixes: 89d5202edc50 ("vfio/pci: Allow relocating MSI-X MMIO") + Signed-off-by: Avihai Horon + Reviewed-by: Philippe Mathieu-Daudé + Reviewed-by: Cédric Le Goater + Reviewed-by: Alex Williamson + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/pci.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index ba40ca8784..9189459a38 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -1755,9 +1755,11 @@ static void vfio_bars_finalize(VFIOPCIDevice *vdev) + + vfio_bar_quirk_finalize(vdev, i); + vfio_region_finalize(&bar->region); +- if (bar->size) { ++ if (bar->mr) { ++ assert(bar->size); + object_unparent(OBJECT(bar->mr)); + g_free(bar->mr); ++ bar->mr = NULL; + } + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-Implement-a-common-device-info-helper.patch b/SOURCES/kvm-vfio-Implement-a-common-device-info-helper.patch new file mode 100644 index 0000000..78a554d --- /dev/null +++ b/SOURCES/kvm-vfio-Implement-a-common-device-info-helper.patch @@ -0,0 +1,196 @@ +From f68e8c5d841cd7fc785cc3d15b3c280211bfb4c3 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 17/37] vfio: Implement a common device info helper +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [15/28] 9cfd233ab1b95dc7de776e8ef901823bd37c5a6b (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 634f38f0f73f +Author: Alex Williamson +Date: Thu Jun 1 08:45:06 2023 -0600 + + vfio: Implement a common device info helper + + A common helper implementing the realloc algorithm for handling + capabilities. + + Reviewed-by: Philippe Mathieu-Daudé + Reviewed-by: Cédric Le Goater + Signed-off-by: Alex Williamson + Reviewed-by: Robin Voetter + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-vfio.c | 37 ++++------------------------ + hw/vfio/common.c | 46 ++++++++++++++++++++++++++--------- + include/hw/vfio/vfio-common.h | 1 + + 3 files changed, 41 insertions(+), 43 deletions(-) + +diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c +index f51190d466..59a2e03873 100644 +--- a/hw/s390x/s390-pci-vfio.c ++++ b/hw/s390x/s390-pci-vfio.c +@@ -289,38 +289,11 @@ static void s390_pci_read_pfip(S390PCIBusDevice *pbdev, + memcpy(pbdev->zpci_fn.pfip, cap->pfip, CLP_PFIP_NR_SEGMENTS); + } + +-static struct vfio_device_info *get_device_info(S390PCIBusDevice *pbdev, +- uint32_t argsz) ++static struct vfio_device_info *get_device_info(S390PCIBusDevice *pbdev) + { +- struct vfio_device_info *info = g_malloc0(argsz); +- VFIOPCIDevice *vfio_pci; +- int fd; ++ VFIOPCIDevice *vfio_pci = container_of(pbdev->pdev, VFIOPCIDevice, pdev); + +- vfio_pci = container_of(pbdev->pdev, VFIOPCIDevice, pdev); +- fd = vfio_pci->vbasedev.fd; +- +- /* +- * If the specified argsz is not large enough to contain all capabilities +- * it will be updated upon return from the ioctl. Retry until we have +- * a big enough buffer to hold the entire capability chain. On error, +- * just exit and rely on CLP defaults. +- */ +-retry: +- info->argsz = argsz; +- +- if (ioctl(fd, VFIO_DEVICE_GET_INFO, info)) { +- trace_s390_pci_clp_dev_info(vfio_pci->vbasedev.name); +- g_free(info); +- return NULL; +- } +- +- if (info->argsz > argsz) { +- argsz = info->argsz; +- info = g_realloc(info, argsz); +- goto retry; +- } +- +- return info; ++ return vfio_get_device_info(vfio_pci->vbasedev.fd); + } + + /* +@@ -335,7 +308,7 @@ bool s390_pci_get_host_fh(S390PCIBusDevice *pbdev, uint32_t *fh) + + assert(fh); + +- info = get_device_info(pbdev, sizeof(*info)); ++ info = get_device_info(pbdev); + if (!info) { + return false; + } +@@ -356,7 +329,7 @@ void s390_pci_get_clp_info(S390PCIBusDevice *pbdev) + { + g_autofree struct vfio_device_info *info = NULL; + +- info = get_device_info(pbdev, sizeof(*info)); ++ info = get_device_info(pbdev); + if (!info) { + return; + } +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index b73086e17a..3b4ac53f15 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -2845,11 +2845,35 @@ void vfio_put_group(VFIOGroup *group) + } + } + ++struct vfio_device_info *vfio_get_device_info(int fd) ++{ ++ struct vfio_device_info *info; ++ uint32_t argsz = sizeof(*info); ++ ++ info = g_malloc0(argsz); ++ ++retry: ++ info->argsz = argsz; ++ ++ if (ioctl(fd, VFIO_DEVICE_GET_INFO, info)) { ++ g_free(info); ++ return NULL; ++ } ++ ++ if (info->argsz > argsz) { ++ argsz = info->argsz; ++ info = g_realloc(info, argsz); ++ goto retry; ++ } ++ ++ return info; ++} ++ + int vfio_get_device(VFIOGroup *group, const char *name, + VFIODevice *vbasedev, Error **errp) + { +- struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) }; +- int ret, fd; ++ g_autofree struct vfio_device_info *info = NULL; ++ int fd; + + fd = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name); + if (fd < 0) { +@@ -2861,11 +2885,11 @@ int vfio_get_device(VFIOGroup *group, const char *name, + return fd; + } + +- ret = ioctl(fd, VFIO_DEVICE_GET_INFO, &dev_info); +- if (ret) { ++ info = vfio_get_device_info(fd); ++ if (!info) { + error_setg_errno(errp, errno, "error getting device info"); + close(fd); +- return ret; ++ return -1; + } + + /* +@@ -2893,14 +2917,14 @@ int vfio_get_device(VFIOGroup *group, const char *name, + vbasedev->group = group; + QLIST_INSERT_HEAD(&group->device_list, vbasedev, next); + +- vbasedev->num_irqs = dev_info.num_irqs; +- vbasedev->num_regions = dev_info.num_regions; +- vbasedev->flags = dev_info.flags; ++ vbasedev->num_irqs = info->num_irqs; ++ vbasedev->num_regions = info->num_regions; ++ vbasedev->flags = info->flags; ++ ++ trace_vfio_get_device(name, info->flags, info->num_regions, info->num_irqs); + +- trace_vfio_get_device(name, dev_info.flags, dev_info.num_regions, +- dev_info.num_irqs); ++ vbasedev->reset_works = !!(info->flags & VFIO_DEVICE_FLAGS_RESET); + +- vbasedev->reset_works = !!(dev_info.flags & VFIO_DEVICE_FLAGS_RESET); + return 0; + } + +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 3dc5f2104c..6d1b8487c3 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -216,6 +216,7 @@ void vfio_region_finalize(VFIORegion *region); + void vfio_reset_handler(void *opaque); + VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp); + void vfio_put_group(VFIOGroup *group); ++struct vfio_device_info *vfio_get_device_info(int fd); + int vfio_get_device(VFIOGroup *group, const char *name, + VFIODevice *vbasedev, Error **errp); + +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-migration-Add-VFIO-migration-pre-copy-support.patch b/SOURCES/kvm-vfio-migration-Add-VFIO-migration-pre-copy-support.patch new file mode 100644 index 0000000..b8e72e6 --- /dev/null +++ b/SOURCES/kvm-vfio-migration-Add-VFIO-migration-pre-copy-support.patch @@ -0,0 +1,438 @@ +From 080d28c191b7d951f1f4596dcaa13d590c07d886 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 15/37] vfio/migration: Add VFIO migration pre-copy support +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [13/28] 7b2ea1471440d47e5aed1211c96942ca7bface96 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit eda7362af959 +Author: Avihai Horon +Date: Wed Jun 21 14:12:00 2023 +0300 + + vfio/migration: Add VFIO migration pre-copy support + + Pre-copy support allows the VFIO device data to be transferred while the + VM is running. This helps to accommodate VFIO devices that have a large + amount of data that needs to be transferred, and it can reduce migration + downtime. + + Pre-copy support is optional in VFIO migration protocol v2. + Implement pre-copy of VFIO migration protocol v2 and use it for devices + that support it. Full description of it can be found in the following + Linux commit: 4db52602a607 ("vfio: Extend the device migration protocol + with PRE_COPY"). + + Signed-off-by: Avihai Horon + Reviewed-by: Cédric Le Goater + Tested-by: YangHang Liu + Acked-by: Alex Williamson + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + docs/devel/vfio-migration.rst | 35 +++++--- + hw/vfio/common.c | 6 +- + hw/vfio/migration.c | 165 ++++++++++++++++++++++++++++++++-- + hw/vfio/trace-events | 4 +- + include/hw/vfio/vfio-common.h | 2 + + 5 files changed, 190 insertions(+), 22 deletions(-) + +diff --git a/docs/devel/vfio-migration.rst b/docs/devel/vfio-migration.rst +index 1b68ccf115..e896b2a673 100644 +--- a/docs/devel/vfio-migration.rst ++++ b/docs/devel/vfio-migration.rst +@@ -7,12 +7,14 @@ the guest is running on source host and restoring this saved state on the + destination host. This document details how saving and restoring of VFIO + devices is done in QEMU. + +-Migration of VFIO devices currently consists of a single stop-and-copy phase. +-During the stop-and-copy phase the guest is stopped and the entire VFIO device +-data is transferred to the destination. +- +-The pre-copy phase of migration is currently not supported for VFIO devices. +-Support for VFIO pre-copy will be added later on. ++Migration of VFIO devices consists of two phases: the optional pre-copy phase, ++and the stop-and-copy phase. The pre-copy phase is iterative and allows to ++accommodate VFIO devices that have a large amount of data that needs to be ++transferred. The iterative pre-copy phase of migration allows for the guest to ++continue whilst the VFIO device state is transferred to the destination, this ++helps to reduce the total downtime of the VM. VFIO devices opt-in to pre-copy ++support by reporting the VFIO_MIGRATION_PRE_COPY flag in the ++VFIO_DEVICE_FEATURE_MIGRATION ioctl. + + Note that currently VFIO migration is supported only for a single device. This + is due to VFIO migration's lack of P2P support. However, P2P support is planned +@@ -29,10 +31,20 @@ VFIO implements the device hooks for the iterative approach as follows: + * A ``load_setup`` function that sets the VFIO device on the destination in + _RESUMING state. + ++* A ``state_pending_estimate`` function that reports an estimate of the ++ remaining pre-copy data that the vendor driver has yet to save for the VFIO ++ device. ++ + * A ``state_pending_exact`` function that reads pending_bytes from the vendor + driver, which indicates the amount of data that the vendor driver has yet to + save for the VFIO device. + ++* An ``is_active_iterate`` function that indicates ``save_live_iterate`` is ++ active only when the VFIO device is in pre-copy states. ++ ++* A ``save_live_iterate`` function that reads the VFIO device's data from the ++ vendor driver during iterative pre-copy phase. ++ + * A ``save_state`` function to save the device config space if it is present. + + * A ``save_live_complete_precopy`` function that sets the VFIO device in +@@ -111,8 +123,10 @@ Flow of state changes during Live migration + =========================================== + + Below is the flow of state change during live migration. +-The values in the brackets represent the VM state, the migration state, and ++The values in the parentheses represent the VM state, the migration state, and + the VFIO device state, respectively. ++The text in the square brackets represents the flow if the VFIO device supports ++pre-copy. + + Live migration save path + ------------------------ +@@ -124,11 +138,12 @@ Live migration save path + | + migrate_init spawns migration_thread + Migration thread then calls each device's .save_setup() +- (RUNNING, _SETUP, _RUNNING) ++ (RUNNING, _SETUP, _RUNNING [_PRE_COPY]) + | +- (RUNNING, _ACTIVE, _RUNNING) +- If device is active, get pending_bytes by .state_pending_exact() ++ (RUNNING, _ACTIVE, _RUNNING [_PRE_COPY]) ++ If device is active, get pending_bytes by .state_pending_{estimate,exact}() + If total pending_bytes >= threshold_size, call .save_live_iterate() ++ [Data of VFIO device for pre-copy phase is copied] + Iterate till total pending bytes converge and are less than threshold + | + On migration completion, vCPU stops and calls .save_live_complete_precopy for +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 78358ede27..b73086e17a 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -492,7 +492,8 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) + } + + if (vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF && +- migration->device_state == VFIO_DEVICE_STATE_RUNNING) { ++ (migration->device_state == VFIO_DEVICE_STATE_RUNNING || ++ migration->device_state == VFIO_DEVICE_STATE_PRE_COPY)) { + return false; + } + } +@@ -537,7 +538,8 @@ static bool vfio_devices_all_running_and_mig_active(VFIOContainer *container) + return false; + } + +- if (migration->device_state == VFIO_DEVICE_STATE_RUNNING) { ++ if (migration->device_state == VFIO_DEVICE_STATE_RUNNING || ++ migration->device_state == VFIO_DEVICE_STATE_PRE_COPY) { + continue; + } else { + return false; +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index 8d33414379..d8f6a22ae1 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -68,6 +68,8 @@ static const char *mig_state_to_str(enum vfio_device_mig_state state) + return "STOP_COPY"; + case VFIO_DEVICE_STATE_RESUMING: + return "RESUMING"; ++ case VFIO_DEVICE_STATE_PRE_COPY: ++ return "PRE_COPY"; + default: + return "UNKNOWN STATE"; + } +@@ -241,6 +243,25 @@ static int vfio_query_stop_copy_size(VFIODevice *vbasedev, + return 0; + } + ++static int vfio_query_precopy_size(VFIOMigration *migration) ++{ ++ struct vfio_precopy_info precopy = { ++ .argsz = sizeof(precopy), ++ }; ++ ++ migration->precopy_init_size = 0; ++ migration->precopy_dirty_size = 0; ++ ++ if (ioctl(migration->data_fd, VFIO_MIG_GET_PRECOPY_INFO, &precopy)) { ++ return -errno; ++ } ++ ++ migration->precopy_init_size = precopy.initial_bytes; ++ migration->precopy_dirty_size = precopy.dirty_bytes; ++ ++ return 0; ++} ++ + /* Returns the size of saved data on success and -errno on error */ + static ssize_t vfio_save_block(QEMUFile *f, VFIOMigration *migration) + { +@@ -249,6 +270,14 @@ static ssize_t vfio_save_block(QEMUFile *f, VFIOMigration *migration) + data_size = read(migration->data_fd, migration->data_buffer, + migration->data_buffer_size); + if (data_size < 0) { ++ /* ++ * Pre-copy emptied all the device state for now. For more information, ++ * please refer to the Linux kernel VFIO uAPI. ++ */ ++ if (errno == ENOMSG) { ++ return 0; ++ } ++ + return -errno; + } + if (data_size == 0) { +@@ -265,6 +294,38 @@ static ssize_t vfio_save_block(QEMUFile *f, VFIOMigration *migration) + return qemu_file_get_error(f) ?: data_size; + } + ++static void vfio_update_estimated_pending_data(VFIOMigration *migration, ++ uint64_t data_size) ++{ ++ if (!data_size) { ++ /* ++ * Pre-copy emptied all the device state for now, update estimated sizes ++ * accordingly. ++ */ ++ migration->precopy_init_size = 0; ++ migration->precopy_dirty_size = 0; ++ ++ return; ++ } ++ ++ if (migration->precopy_init_size) { ++ uint64_t init_size = MIN(migration->precopy_init_size, data_size); ++ ++ migration->precopy_init_size -= init_size; ++ data_size -= init_size; ++ } ++ ++ migration->precopy_dirty_size -= MIN(migration->precopy_dirty_size, ++ data_size); ++} ++ ++static bool vfio_precopy_supported(VFIODevice *vbasedev) ++{ ++ VFIOMigration *migration = vbasedev->migration; ++ ++ return migration->mig_flags & VFIO_MIGRATION_PRE_COPY; ++} ++ + /* ---------------------------------------------------------------------- */ + + static int vfio_save_setup(QEMUFile *f, void *opaque) +@@ -285,6 +346,28 @@ static int vfio_save_setup(QEMUFile *f, void *opaque) + return -ENOMEM; + } + ++ if (vfio_precopy_supported(vbasedev)) { ++ int ret; ++ ++ switch (migration->device_state) { ++ case VFIO_DEVICE_STATE_RUNNING: ++ ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_PRE_COPY, ++ VFIO_DEVICE_STATE_RUNNING); ++ if (ret) { ++ return ret; ++ } ++ ++ vfio_query_precopy_size(migration); ++ ++ break; ++ case VFIO_DEVICE_STATE_STOP: ++ /* vfio_save_complete_precopy() will go to STOP_COPY */ ++ break; ++ default: ++ return -EINVAL; ++ } ++ } ++ + trace_vfio_save_setup(vbasedev->name, migration->data_buffer_size); + + qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); +@@ -299,26 +382,42 @@ static void vfio_save_cleanup(void *opaque) + + g_free(migration->data_buffer); + migration->data_buffer = NULL; ++ migration->precopy_init_size = 0; ++ migration->precopy_dirty_size = 0; + vfio_migration_cleanup(vbasedev); + trace_vfio_save_cleanup(vbasedev->name); + } + ++static void vfio_state_pending_estimate(void *opaque, uint64_t *must_precopy, ++ uint64_t *can_postcopy) ++{ ++ VFIODevice *vbasedev = opaque; ++ VFIOMigration *migration = vbasedev->migration; ++ ++ if (migration->device_state != VFIO_DEVICE_STATE_PRE_COPY) { ++ return; ++ } ++ ++ *must_precopy += ++ migration->precopy_init_size + migration->precopy_dirty_size; ++ ++ trace_vfio_state_pending_estimate(vbasedev->name, *must_precopy, ++ *can_postcopy, ++ migration->precopy_init_size, ++ migration->precopy_dirty_size); ++} ++ + /* + * Migration size of VFIO devices can be as little as a few KBs or as big as + * many GBs. This value should be big enough to cover the worst case. + */ + #define VFIO_MIG_STOP_COPY_SIZE (100 * GiB) + +-/* +- * Only exact function is implemented and not estimate function. The reason is +- * that during pre-copy phase of migration the estimate function is called +- * repeatedly while pending RAM size is over the threshold, thus migration +- * can't converge and querying the VFIO device pending data size is useless. +- */ + static void vfio_state_pending_exact(void *opaque, uint64_t *must_precopy, + uint64_t *can_postcopy) + { + VFIODevice *vbasedev = opaque; ++ VFIOMigration *migration = vbasedev->migration; + uint64_t stop_copy_size = VFIO_MIG_STOP_COPY_SIZE; + + /* +@@ -328,8 +427,48 @@ static void vfio_state_pending_exact(void *opaque, uint64_t *must_precopy, + vfio_query_stop_copy_size(vbasedev, &stop_copy_size); + *must_precopy += stop_copy_size; + ++ if (migration->device_state == VFIO_DEVICE_STATE_PRE_COPY) { ++ vfio_query_precopy_size(migration); ++ ++ *must_precopy += ++ migration->precopy_init_size + migration->precopy_dirty_size; ++ } ++ + trace_vfio_state_pending_exact(vbasedev->name, *must_precopy, *can_postcopy, +- stop_copy_size); ++ stop_copy_size, migration->precopy_init_size, ++ migration->precopy_dirty_size); ++} ++ ++static bool vfio_is_active_iterate(void *opaque) ++{ ++ VFIODevice *vbasedev = opaque; ++ VFIOMigration *migration = vbasedev->migration; ++ ++ return migration->device_state == VFIO_DEVICE_STATE_PRE_COPY; ++} ++ ++static int vfio_save_iterate(QEMUFile *f, void *opaque) ++{ ++ VFIODevice *vbasedev = opaque; ++ VFIOMigration *migration = vbasedev->migration; ++ ssize_t data_size; ++ ++ data_size = vfio_save_block(f, migration); ++ if (data_size < 0) { ++ return data_size; ++ } ++ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); ++ ++ vfio_update_estimated_pending_data(migration, data_size); ++ ++ trace_vfio_save_iterate(vbasedev->name, migration->precopy_init_size, ++ migration->precopy_dirty_size); ++ ++ /* ++ * A VFIO device's pre-copy dirty_bytes is not guaranteed to reach zero. ++ * Return 1 so following handlers will not be potentially blocked. ++ */ ++ return 1; + } + + static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) +@@ -338,7 +477,7 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) + ssize_t data_size; + int ret; + +- /* We reach here with device state STOP only */ ++ /* We reach here with device state STOP or STOP_COPY only */ + ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY, + VFIO_DEVICE_STATE_STOP); + if (ret) { +@@ -457,7 +596,10 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) + static const SaveVMHandlers savevm_vfio_handlers = { + .save_setup = vfio_save_setup, + .save_cleanup = vfio_save_cleanup, ++ .state_pending_estimate = vfio_state_pending_estimate, + .state_pending_exact = vfio_state_pending_exact, ++ .is_active_iterate = vfio_is_active_iterate, ++ .save_live_iterate = vfio_save_iterate, + .save_live_complete_precopy = vfio_save_complete_precopy, + .save_state = vfio_save_state, + .load_setup = vfio_load_setup, +@@ -470,13 +612,18 @@ static const SaveVMHandlers savevm_vfio_handlers = { + static void vfio_vmstate_change(void *opaque, bool running, RunState state) + { + VFIODevice *vbasedev = opaque; ++ VFIOMigration *migration = vbasedev->migration; + enum vfio_device_mig_state new_state; + int ret; + + if (running) { + new_state = VFIO_DEVICE_STATE_RUNNING; + } else { +- new_state = VFIO_DEVICE_STATE_STOP; ++ new_state = ++ (migration->device_state == VFIO_DEVICE_STATE_PRE_COPY && ++ (state == RUN_STATE_FINISH_MIGRATE || state == RUN_STATE_PAUSED)) ? ++ VFIO_DEVICE_STATE_STOP_COPY : ++ VFIO_DEVICE_STATE_STOP; + } + + /* +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 646e42fd27..4150b59e58 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -162,6 +162,8 @@ vfio_save_block(const char *name, int data_size) " (%s) data_size %d" + vfio_save_cleanup(const char *name) " (%s)" + vfio_save_complete_precopy(const char *name, int ret) " (%s) ret %d" + vfio_save_device_config_state(const char *name) " (%s)" ++vfio_save_iterate(const char *name, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64 + vfio_save_setup(const char *name, uint64_t data_buffer_size) " (%s) data buffer size 0x%"PRIx64 +-vfio_state_pending_exact(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t stopcopy_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" stopcopy size 0x%"PRIx64 ++vfio_state_pending_estimate(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64 ++vfio_state_pending_exact(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t stopcopy_size, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" stopcopy size 0x%"PRIx64" precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64 + vfio_vmstate_change(const char *name, int running, const char *reason, const char *dev_state) " (%s) running %d reason %s device state %s" +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 5f29dab839..1db901c194 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -67,6 +67,8 @@ typedef struct VFIOMigration { + void *data_buffer; + size_t data_buffer_size; + uint64_t mig_flags; ++ uint64_t precopy_init_size; ++ uint64_t precopy_dirty_size; + } VFIOMigration; + + typedef struct VFIOAddressSpace { +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-migration-Add-support-for-switchover-ack-capabi.patch b/SOURCES/kvm-vfio-migration-Add-support-for-switchover-ack-capabi.patch new file mode 100644 index 0000000..d87680d --- /dev/null +++ b/SOURCES/kvm-vfio-migration-Add-support-for-switchover-ack-capabi.patch @@ -0,0 +1,192 @@ +From 169dc1bb051b3aebc571936d956b49ba0621ae43 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 16/37] vfio/migration: Add support for switchover ack + capability +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [14/28] b3bd2eb2d0ca49ff05a0a82ae5bb956a354aed47 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 745c42912a04 +Author: Avihai Horon +Date: Wed Jun 21 14:12:01 2023 +0300 + + vfio/migration: Add support for switchover ack capability + + Loading of a VFIO device's data can take a substantial amount of time as + the device may need to allocate resources, prepare internal data + structures, etc. This can increase migration downtime, especially for + VFIO devices with a lot of resources. + + To solve this, VFIO migration uAPI defines "initial bytes" as part of + its precopy data stream. Initial bytes can be used in various ways to + improve VFIO migration performance. For example, it can be used to + transfer device metadata to pre-allocate resources in the destination. + However, for this to work we need to make sure that all initial bytes + are sent and loaded in the destination before the source VM is stopped. + + Use migration switchover ack capability to make sure a VFIO device's + initial bytes are sent and loaded in the destination before the source + stops the VM and attempts to complete the migration. + This can significantly reduce migration downtime for some devices. + + Signed-off-by: Avihai Horon + Reviewed-by: Cédric Le Goater + Tested-by: YangHang Liu + Acked-by: Alex Williamson + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + docs/devel/vfio-migration.rst | 10 +++++++++ + hw/vfio/migration.c | 39 ++++++++++++++++++++++++++++++++++- + include/hw/vfio/vfio-common.h | 1 + + 3 files changed, 49 insertions(+), 1 deletion(-) + +diff --git a/docs/devel/vfio-migration.rst b/docs/devel/vfio-migration.rst +index e896b2a673..b433cb5bb2 100644 +--- a/docs/devel/vfio-migration.rst ++++ b/docs/devel/vfio-migration.rst +@@ -16,6 +16,13 @@ helps to reduce the total downtime of the VM. VFIO devices opt-in to pre-copy + support by reporting the VFIO_MIGRATION_PRE_COPY flag in the + VFIO_DEVICE_FEATURE_MIGRATION ioctl. + ++When pre-copy is supported, it's possible to further reduce downtime by ++enabling "switchover-ack" migration capability. ++VFIO migration uAPI defines "initial bytes" as part of its pre-copy data stream ++and recommends that the initial bytes are sent and loaded in the destination ++before stopping the source VM. Enabling this migration capability will ++guarantee that and thus, can potentially reduce downtime even further. ++ + Note that currently VFIO migration is supported only for a single device. This + is due to VFIO migration's lack of P2P support. However, P2P support is planned + to be added later on. +@@ -45,6 +52,9 @@ VFIO implements the device hooks for the iterative approach as follows: + * A ``save_live_iterate`` function that reads the VFIO device's data from the + vendor driver during iterative pre-copy phase. + ++* A ``switchover_ack_needed`` function that checks if the VFIO device uses ++ "switchover-ack" migration capability when this capability is enabled. ++ + * A ``save_state`` function to save the device config space if it is present. + + * A ``save_live_complete_precopy`` function that sets the VFIO device in +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index d8f6a22ae1..acbf0bb7ab 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -18,6 +18,8 @@ + #include "sysemu/runstate.h" + #include "hw/vfio/vfio-common.h" + #include "migration/migration.h" ++#include "migration/options.h" ++#include "migration/savevm.h" + #include "migration/vmstate.h" + #include "migration/qemu-file.h" + #include "migration/register.h" +@@ -45,6 +47,7 @@ + #define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL) + #define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) + #define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) ++#define VFIO_MIG_FLAG_DEV_INIT_DATA_SENT (0xffffffffef100005ULL) + + /* + * This is an arbitrary size based on migration of mlx5 devices, where typically +@@ -384,6 +387,7 @@ static void vfio_save_cleanup(void *opaque) + migration->data_buffer = NULL; + migration->precopy_init_size = 0; + migration->precopy_dirty_size = 0; ++ migration->initial_data_sent = false; + vfio_migration_cleanup(vbasedev); + trace_vfio_save_cleanup(vbasedev->name); + } +@@ -457,10 +461,17 @@ static int vfio_save_iterate(QEMUFile *f, void *opaque) + if (data_size < 0) { + return data_size; + } +- qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); + + vfio_update_estimated_pending_data(migration, data_size); + ++ if (migrate_switchover_ack() && !migration->precopy_init_size && ++ !migration->initial_data_sent) { ++ qemu_put_be64(f, VFIO_MIG_FLAG_DEV_INIT_DATA_SENT); ++ migration->initial_data_sent = true; ++ } else { ++ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); ++ } ++ + trace_vfio_save_iterate(vbasedev->name, migration->precopy_init_size, + migration->precopy_dirty_size); + +@@ -579,6 +590,24 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) + } + break; + } ++ case VFIO_MIG_FLAG_DEV_INIT_DATA_SENT: ++ { ++ if (!vfio_precopy_supported(vbasedev) || ++ !migrate_switchover_ack()) { ++ error_report("%s: Received INIT_DATA_SENT but switchover ack " ++ "is not used", vbasedev->name); ++ return -EINVAL; ++ } ++ ++ ret = qemu_loadvm_approve_switchover(); ++ if (ret) { ++ error_report( ++ "%s: qemu_loadvm_approve_switchover failed, err=%d (%s)", ++ vbasedev->name, ret, strerror(-ret)); ++ } ++ ++ return ret; ++ } + default: + error_report("%s: Unknown tag 0x%"PRIx64, vbasedev->name, data); + return -EINVAL; +@@ -593,6 +622,13 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) + return ret; + } + ++static bool vfio_switchover_ack_needed(void *opaque) ++{ ++ VFIODevice *vbasedev = opaque; ++ ++ return vfio_precopy_supported(vbasedev); ++} ++ + static const SaveVMHandlers savevm_vfio_handlers = { + .save_setup = vfio_save_setup, + .save_cleanup = vfio_save_cleanup, +@@ -605,6 +641,7 @@ static const SaveVMHandlers savevm_vfio_handlers = { + .load_setup = vfio_load_setup, + .load_cleanup = vfio_load_cleanup, + .load_state = vfio_load_state, ++ .switchover_ack_needed = vfio_switchover_ack_needed, + }; + + /* ---------------------------------------------------------------------- */ +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 1db901c194..3dc5f2104c 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -69,6 +69,7 @@ typedef struct VFIOMigration { + uint64_t mig_flags; + uint64_t precopy_init_size; + uint64_t precopy_dirty_size; ++ bool initial_data_sent; + } VFIOMigration; + + typedef struct VFIOAddressSpace { +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-migration-Block-VFIO-migration-with-postcopy-mi.patch b/SOURCES/kvm-vfio-migration-Block-VFIO-migration-with-postcopy-mi.patch new file mode 100644 index 0000000..da43b4b --- /dev/null +++ b/SOURCES/kvm-vfio-migration-Block-VFIO-migration-with-postcopy-mi.patch @@ -0,0 +1,90 @@ +From df7814de08c8c7c45eacb7b9d9ead9be4d1e3baf Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Mon, 11 Sep 2023 16:10:19 +0200 +Subject: [PATCH 4/4] vfio/migration: Block VFIO migration with postcopy + migration +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 318: VFIO migration: fix a QEMU crash when postcopy is enabled +RH-Bugzilla: 2229868 +RH-Acked-by: Alex Williamson +RH-Acked-by: Peter Xu +RH-Commit: [4/4] 36eedf879547044c2ba2763fb48784a95f9e4ea7 + +Bugzilla: https://bugzilla.redhat.com/2229868 + +commit bf7ef7a2da3e61dc104f26c679c9465e3fbe7dde +Author: Avihai Horon +Date: Wed Sep 6 18:08:52 2023 +0300 + + vfio/migration: Block VFIO migration with postcopy migration + + VFIO migration is not compatible with postcopy migration. A VFIO device + in the destination can't handle page faults for pages that have not been + sent yet. + + Doing such migration will cause the VM to crash in the destination: + + qemu-system-x86_64: VFIO_MAP_DMA failed: Bad address + qemu-system-x86_64: vfio_dma_map(0x55a28c7659d0, 0xc0000, 0xb000, 0x7f1b11a00000) = -14 (Bad address) + qemu: hardware error: vfio: DMA mapping failed, unable to continue + + To prevent this, block VFIO migration with postcopy migration. + + Reported-by: Yanghang Liu + Signed-off-by: Avihai Horon + Tested-by: Yanghang Liu + Reviewed-by: Peter Xu + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/migration.c | 22 ++++++++++++++++++++++ + 1 file changed, 22 insertions(+) + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index 2674f4bc47..4f018c7531 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -331,6 +331,27 @@ static bool vfio_precopy_supported(VFIODevice *vbasedev) + + /* ---------------------------------------------------------------------- */ + ++static int vfio_save_prepare(void *opaque, Error **errp) ++{ ++ VFIODevice *vbasedev = opaque; ++ ++ /* ++ * Snapshot doesn't use postcopy, so allow snapshot even if postcopy is on. ++ */ ++ if (runstate_check(RUN_STATE_SAVE_VM)) { ++ return 0; ++ } ++ ++ if (migrate_postcopy_ram()) { ++ error_setg( ++ errp, "%s: VFIO migration is not supported with postcopy migration", ++ vbasedev->name); ++ return -EOPNOTSUPP; ++ } ++ ++ return 0; ++} ++ + static int vfio_save_setup(QEMUFile *f, void *opaque) + { + VFIODevice *vbasedev = opaque; +@@ -630,6 +651,7 @@ static bool vfio_switchover_ack_needed(void *opaque) + } + + static const SaveVMHandlers savevm_vfio_handlers = { ++ .save_prepare = vfio_save_prepare, + .save_setup = vfio_save_setup, + .save_cleanup = vfio_save_cleanup, + .state_pending_estimate = vfio_state_pending_estimate, +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-migration-Change-vIOMMU-blocker-from-global-to-.patch b/SOURCES/kvm-vfio-migration-Change-vIOMMU-blocker-from-global-to-.patch new file mode 100644 index 0000000..dde2e24 --- /dev/null +++ b/SOURCES/kvm-vfio-migration-Change-vIOMMU-blocker-from-global-to-.patch @@ -0,0 +1,171 @@ +From 35c7d0d3b02d61d6f29afae74bd83edd70a6a1b4 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 26/37] vfio/migration: Change vIOMMU blocker from global to + per device +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [24/28] 8fda1c82a81fadd4f38e6a5e878c9228a81c0f6e (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 3c26c80a0a26 +Author: Zhenzhong Duan +Date: Mon Jul 3 15:15:07 2023 +0800 + + vfio/migration: Change vIOMMU blocker from global to per device + + Contrary to multiple device blocker which needs to consider already-attached + devices to unblock/block dynamically, the vIOMMU migration blocker is a device + specific config. Meaning it only needs to know whether the device is bypassing + or not the vIOMMU (via machine property, or per pxb-pcie::bypass_iommu), and + does not need the state of currently present devices. For this reason, the + vIOMMU global migration blocker can be consolidated into the per-device + migration blocker, allowing us to remove some unnecessary code. + + This change also makes vfio_mig_active() more accurate as it doesn't check for + global blocker. + + Signed-off-by: Zhenzhong Duan + Reviewed-by: Joao Martins + Reviewed-by: Cédric Le Goater + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/common.c | 51 ++--------------------------------- + hw/vfio/migration.c | 7 ++--- + hw/vfio/pci.c | 1 - + include/hw/vfio/vfio-common.h | 3 +-- + 4 files changed, 7 insertions(+), 55 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 136d8243d6..e815f6ba30 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -362,7 +362,6 @@ bool vfio_mig_active(void) + } + + static Error *multiple_devices_migration_blocker; +-static Error *giommu_migration_blocker; + + static unsigned int vfio_migratable_device_num(void) + { +@@ -420,55 +419,9 @@ void vfio_unblock_multiple_devices_migration(void) + multiple_devices_migration_blocker = NULL; + } + +-static bool vfio_viommu_preset(void) ++bool vfio_viommu_preset(VFIODevice *vbasedev) + { +- VFIOAddressSpace *space; +- +- QLIST_FOREACH(space, &vfio_address_spaces, list) { +- if (space->as != &address_space_memory) { +- return true; +- } +- } +- +- return false; +-} +- +-int vfio_block_giommu_migration(VFIODevice *vbasedev, Error **errp) +-{ +- int ret; +- +- if (giommu_migration_blocker || +- !vfio_viommu_preset()) { +- return 0; +- } +- +- if (vbasedev->enable_migration == ON_OFF_AUTO_ON) { +- error_setg(errp, +- "Migration is currently not supported with vIOMMU enabled"); +- return -EINVAL; +- } +- +- error_setg(&giommu_migration_blocker, +- "Migration is currently not supported with vIOMMU enabled"); +- ret = migrate_add_blocker(giommu_migration_blocker, errp); +- if (ret < 0) { +- error_free(giommu_migration_blocker); +- giommu_migration_blocker = NULL; +- } +- +- return ret; +-} +- +-void vfio_migration_finalize(void) +-{ +- if (!giommu_migration_blocker || +- vfio_viommu_preset()) { +- return; +- } +- +- migrate_del_blocker(giommu_migration_blocker); +- error_free(giommu_migration_blocker); +- giommu_migration_blocker = NULL; ++ return vbasedev->group->container->space->as != &address_space_memory; + } + + static void vfio_set_migration_error(int err) +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index 1db7d52ab2..e6e5e85f75 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -878,9 +878,10 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) + return ret; + } + +- ret = vfio_block_giommu_migration(vbasedev, errp); +- if (ret) { +- return ret; ++ if (vfio_viommu_preset(vbasedev)) { ++ error_setg(&err, "%s: Migration is currently not supported " ++ "with vIOMMU enabled", vbasedev->name); ++ return vfio_block_migration(vbasedev, err, errp); + } + + trace_vfio_migration_realize(vbasedev->name); +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 2d059832a4..922c81872c 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3279,7 +3279,6 @@ static void vfio_instance_finalize(Object *obj) + */ + vfio_put_device(vdev); + vfio_put_group(group); +- vfio_migration_finalize(); + } + + static void vfio_exitfn(PCIDevice *pdev) +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 93429b9abb..45167c8a8a 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -227,7 +227,7 @@ extern VFIOGroupList vfio_group_list; + bool vfio_mig_active(void); + int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp); + void vfio_unblock_multiple_devices_migration(void); +-int vfio_block_giommu_migration(VFIODevice *vbasedev, Error **errp); ++bool vfio_viommu_preset(VFIODevice *vbasedev); + int64_t vfio_mig_bytes_transferred(void); + void vfio_reset_bytes_transferred(void); + +@@ -254,6 +254,5 @@ int vfio_spapr_remove_window(VFIOContainer *container, + + int vfio_migration_realize(VFIODevice *vbasedev, Error **errp); + void vfio_migration_exit(VFIODevice *vbasedev); +-void vfio_migration_finalize(void); + + #endif /* HW_VFIO_VFIO_COMMON_H */ +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-migration-Free-resources-when-vfio_migration_re.patch b/SOURCES/kvm-vfio-migration-Free-resources-when-vfio_migration_re.patch new file mode 100644 index 0000000..9deaf1a --- /dev/null +++ b/SOURCES/kvm-vfio-migration-Free-resources-when-vfio_migration_re.patch @@ -0,0 +1,145 @@ +From a36fa46369fe9bf2a2174e9ed6ab83042e904066 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 27/37] vfio/migration: Free resources when + vfio_migration_realize fails +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [25/28] b3ab8d3443d4bc12a689dc7d88a94da315814bb7 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 2b43b2995b02 +Author: Zhenzhong Duan +Date: Mon Jul 3 15:15:08 2023 +0800 + + vfio/migration: Free resources when vfio_migration_realize fails + + When vfio_realize() succeeds, hot unplug will call vfio_exitfn() + to free resources allocated in vfio_realize(); when vfio_realize() + fails, vfio_exitfn() is never called and we need to free resources + in vfio_realize(). + + In the case that vfio_migration_realize() fails, + e.g: with -only-migratable & enable-migration=off, we see below: + + (qemu) device_add vfio-pci,host=81:11.1,id=vfio1,bus=root1,enable-migration=off + 0000:81:11.1: Migration disabled + Error: disallowing migration blocker (--only-migratable) for: 0000:81:11.1: Migration is disabled for VFIO device + + If we hotplug again we should see same log as above, but we see: + (qemu) device_add vfio-pci,host=81:11.1,id=vfio1,bus=root1,enable-migration=off + Error: vfio 0000:81:11.1: device is already attached + + That's because some references to VFIO device isn't released. + For resources allocated in vfio_migration_realize(), free them by + jumping to out_deinit path with calling a new function + vfio_migration_deinit(). For resources allocated in vfio_realize(), + free them by jumping to de-register path in vfio_realize(). + + Signed-off-by: Zhenzhong Duan + Fixes: a22651053b59 ("vfio: Make vfio-pci device migration capable") + Reviewed-by: Cédric Le Goater + Reviewed-by: Joao Martins + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/migration.c | 33 +++++++++++++++++++++++---------- + hw/vfio/pci.c | 1 + + 2 files changed, 24 insertions(+), 10 deletions(-) + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index e6e5e85f75..e3954570c8 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -802,6 +802,17 @@ static int vfio_migration_init(VFIODevice *vbasedev) + return 0; + } + ++static void vfio_migration_deinit(VFIODevice *vbasedev) ++{ ++ VFIOMigration *migration = vbasedev->migration; ++ ++ remove_migration_state_change_notifier(&migration->migration_state); ++ qemu_del_vm_change_state_handler(migration->vm_state); ++ unregister_savevm(VMSTATE_IF(vbasedev->dev), "vfio", vbasedev); ++ vfio_migration_free(vbasedev); ++ vfio_unblock_multiple_devices_migration(); ++} ++ + static int vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp) + { + int ret; +@@ -866,7 +877,7 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) + error_setg(&err, + "%s: VFIO device doesn't support device dirty tracking", + vbasedev->name); +- return vfio_block_migration(vbasedev, err, errp); ++ goto add_blocker; + } + + warn_report("%s: VFIO device doesn't support device dirty tracking", +@@ -875,29 +886,31 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) + + ret = vfio_block_multiple_devices_migration(vbasedev, errp); + if (ret) { +- return ret; ++ goto out_deinit; + } + + if (vfio_viommu_preset(vbasedev)) { + error_setg(&err, "%s: Migration is currently not supported " + "with vIOMMU enabled", vbasedev->name); +- return vfio_block_migration(vbasedev, err, errp); ++ goto add_blocker; + } + + trace_vfio_migration_realize(vbasedev->name); + return 0; ++ ++add_blocker: ++ ret = vfio_block_migration(vbasedev, err, errp); ++out_deinit: ++ if (ret) { ++ vfio_migration_deinit(vbasedev); ++ } ++ return ret; + } + + void vfio_migration_exit(VFIODevice *vbasedev) + { + if (vbasedev->migration) { +- VFIOMigration *migration = vbasedev->migration; +- +- remove_migration_state_change_notifier(&migration->migration_state); +- qemu_del_vm_change_state_handler(migration->vm_state); +- unregister_savevm(VMSTATE_IF(vbasedev->dev), "vfio", vbasedev); +- vfio_migration_free(vbasedev); +- vfio_unblock_multiple_devices_migration(); ++ vfio_migration_deinit(vbasedev); + } + + if (vbasedev->migration_blocker) { +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 922c81872c..037b7d4176 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3234,6 +3234,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + ret = vfio_migration_realize(vbasedev, errp); + if (ret) { + error_report("%s: Migration disabled", vbasedev->name); ++ goto out_deregister; + } + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-migration-Make-VFIO-migration-non-experimental.patch b/SOURCES/kvm-vfio-migration-Make-VFIO-migration-non-experimental.patch new file mode 100644 index 0000000..3258541 --- /dev/null +++ b/SOURCES/kvm-vfio-migration-Make-VFIO-migration-non-experimental.patch @@ -0,0 +1,283 @@ +From 747c34c0a3b8048ebdab387d22f2b922c81d572a Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 21/37] vfio/migration: Make VFIO migration non-experimental +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [19/28] 2f457c1c0de95a3fced0270f2edbbc5193cc4de9 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 8bbcb64a71d8 +Author: Avihai Horon +Date: Wed Jun 28 10:31:12 2023 +0300 + + vfio/migration: Make VFIO migration non-experimental + + The major parts of VFIO migration are supported today in QEMU. This + includes basic VFIO migration, device dirty page tracking and precopy + support. + + Thus, at this point in time, it seems appropriate to make VFIO migration + non-experimental: remove the x prefix from enable_migration property, + change it to ON_OFF_AUTO and let the default value be AUTO. + + In addition, make the following adjustments: + 1. When enable_migration is ON and migration is not supported, fail VFIO + device realization. + 2. When enable_migration is AUTO (i.e., not explicitly enabled), require + device dirty tracking support. This is because device dirty tracking + is currently the only method to do dirty page tracking, which is + essential for migrating in a reasonable downtime. Setting + enable_migration to ON will not require device dirty tracking. + 3. Make migration error and blocker messages more elaborate. + 4. Remove error prints in vfio_migration_query_flags(). + 5. Rename trace_vfio_migration_probe() to + trace_vfio_migration_realize(). + + Signed-off-by: Avihai Horon + Reviewed-by: Joao Martins + Reviewed-by: Cédric Le Goater + Reviewed-by: Alex Williamson + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/common.c | 16 ++++++- + hw/vfio/migration.c | 79 +++++++++++++++++++++++------------ + hw/vfio/pci.c | 4 +- + hw/vfio/trace-events | 2 +- + include/hw/vfio/vfio-common.h | 6 +-- + 5 files changed, 73 insertions(+), 34 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 3b4ac53f15..136d8243d6 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -381,7 +381,7 @@ static unsigned int vfio_migratable_device_num(void) + return device_num; + } + +-int vfio_block_multiple_devices_migration(Error **errp) ++int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp) + { + int ret; + +@@ -390,6 +390,12 @@ int vfio_block_multiple_devices_migration(Error **errp) + return 0; + } + ++ if (vbasedev->enable_migration == ON_OFF_AUTO_ON) { ++ error_setg(errp, "Migration is currently not supported with multiple " ++ "VFIO devices"); ++ return -EINVAL; ++ } ++ + error_setg(&multiple_devices_migration_blocker, + "Migration is currently not supported with multiple " + "VFIO devices"); +@@ -427,7 +433,7 @@ static bool vfio_viommu_preset(void) + return false; + } + +-int vfio_block_giommu_migration(Error **errp) ++int vfio_block_giommu_migration(VFIODevice *vbasedev, Error **errp) + { + int ret; + +@@ -436,6 +442,12 @@ int vfio_block_giommu_migration(Error **errp) + return 0; + } + ++ if (vbasedev->enable_migration == ON_OFF_AUTO_ON) { ++ error_setg(errp, ++ "Migration is currently not supported with vIOMMU enabled"); ++ return -EINVAL; ++ } ++ + error_setg(&giommu_migration_blocker, + "Migration is currently not supported with vIOMMU enabled"); + ret = migrate_add_blocker(giommu_migration_blocker, errp); +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index 7cf143926c..1db7d52ab2 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -724,14 +724,6 @@ static int vfio_migration_query_flags(VFIODevice *vbasedev, uint64_t *mig_flags) + feature->argsz = sizeof(buf); + feature->flags = VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_MIGRATION; + if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) { +- if (errno == ENOTTY) { +- error_report("%s: VFIO migration is not supported in kernel", +- vbasedev->name); +- } else { +- error_report("%s: Failed to query VFIO migration support, err: %s", +- vbasedev->name, strerror(errno)); +- } +- + return -errno; + } + +@@ -810,6 +802,27 @@ static int vfio_migration_init(VFIODevice *vbasedev) + return 0; + } + ++static int vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp) ++{ ++ int ret; ++ ++ if (vbasedev->enable_migration == ON_OFF_AUTO_ON) { ++ error_propagate(errp, err); ++ return -EINVAL; ++ } ++ ++ vbasedev->migration_blocker = error_copy(err); ++ error_free(err); ++ ++ ret = migrate_add_blocker(vbasedev->migration_blocker, errp); ++ if (ret < 0) { ++ error_free(vbasedev->migration_blocker); ++ vbasedev->migration_blocker = NULL; ++ } ++ ++ return ret; ++} ++ + /* ---------------------------------------------------------------------- */ + + int64_t vfio_mig_bytes_transferred(void) +@@ -824,40 +837,54 @@ void vfio_reset_bytes_transferred(void) + + int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) + { +- int ret = -ENOTSUP; ++ Error *err = NULL; ++ int ret; + +- if (!vbasedev->enable_migration) { +- goto add_blocker; ++ if (vbasedev->enable_migration == ON_OFF_AUTO_OFF) { ++ error_setg(&err, "%s: Migration is disabled for VFIO device", ++ vbasedev->name); ++ return vfio_block_migration(vbasedev, err, errp); + } + + ret = vfio_migration_init(vbasedev); + if (ret) { +- goto add_blocker; ++ if (ret == -ENOTTY) { ++ error_setg(&err, "%s: VFIO migration is not supported in kernel", ++ vbasedev->name); ++ } else { ++ error_setg(&err, ++ "%s: Migration couldn't be initialized for VFIO device, " ++ "err: %d (%s)", ++ vbasedev->name, ret, strerror(-ret)); ++ } ++ ++ return vfio_block_migration(vbasedev, err, errp); ++ } ++ ++ if (!vbasedev->dirty_pages_supported) { ++ if (vbasedev->enable_migration == ON_OFF_AUTO_AUTO) { ++ error_setg(&err, ++ "%s: VFIO device doesn't support device dirty tracking", ++ vbasedev->name); ++ return vfio_block_migration(vbasedev, err, errp); ++ } ++ ++ warn_report("%s: VFIO device doesn't support device dirty tracking", ++ vbasedev->name); + } + +- ret = vfio_block_multiple_devices_migration(errp); ++ ret = vfio_block_multiple_devices_migration(vbasedev, errp); + if (ret) { + return ret; + } + +- ret = vfio_block_giommu_migration(errp); ++ ret = vfio_block_giommu_migration(vbasedev, errp); + if (ret) { + return ret; + } + +- trace_vfio_migration_probe(vbasedev->name); ++ trace_vfio_migration_realize(vbasedev->name); + return 0; +- +-add_blocker: +- error_setg(&vbasedev->migration_blocker, +- "VFIO device doesn't support migration"); +- +- ret = migrate_add_blocker(vbasedev->migration_blocker, errp); +- if (ret < 0) { +- error_free(vbasedev->migration_blocker); +- vbasedev->migration_blocker = NULL; +- } +- return ret; + } + + void vfio_migration_exit(VFIODevice *vbasedev) +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 15e7554954..6634945a70 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3371,8 +3371,8 @@ static Property vfio_pci_dev_properties[] = { + VFIO_FEATURE_ENABLE_REQ_BIT, true), + DEFINE_PROP_BIT("x-igd-opregion", VFIOPCIDevice, features, + VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false), +- DEFINE_PROP_BOOL("x-enable-migration", VFIOPCIDevice, +- vbasedev.enable_migration, false), ++ DEFINE_PROP_ON_OFF_AUTO("enable-migration", VFIOPCIDevice, ++ vbasedev.enable_migration, ON_OFF_AUTO_AUTO), + DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false), + DEFINE_PROP_BOOL("x-balloon-allowed", VFIOPCIDevice, + vbasedev.ram_block_discard_allowed, false), +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 4150b59e58..0391bd583b 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -155,7 +155,7 @@ vfio_load_cleanup(const char *name) " (%s)" + vfio_load_device_config_state(const char *name) " (%s)" + vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64 + vfio_load_state_device_data(const char *name, uint64_t data_size, int ret) " (%s) size 0x%"PRIx64" ret %d" +-vfio_migration_probe(const char *name) " (%s)" ++vfio_migration_realize(const char *name) " (%s)" + vfio_migration_set_state(const char *name, const char *state) " (%s) state %s" + vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s" + vfio_save_block(const char *name, int data_size) " (%s) data_size %d" +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 1d19c6f251..93429b9abb 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -139,7 +139,7 @@ typedef struct VFIODevice { + bool needs_reset; + bool no_mmap; + bool ram_block_discard_allowed; +- bool enable_migration; ++ OnOffAuto enable_migration; + VFIODeviceOps *ops; + unsigned int num_irqs; + unsigned int num_regions; +@@ -225,9 +225,9 @@ typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList; + extern VFIOGroupList vfio_group_list; + + bool vfio_mig_active(void); +-int vfio_block_multiple_devices_migration(Error **errp); ++int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp); + void vfio_unblock_multiple_devices_migration(void); +-int vfio_block_giommu_migration(Error **errp); ++int vfio_block_giommu_migration(VFIODevice *vbasedev, Error **errp); + int64_t vfio_mig_bytes_transferred(void); + void vfio_reset_bytes_transferred(void); + +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-migration-Refactor-vfio_save_block-to-return-sa.patch b/SOURCES/kvm-vfio-migration-Refactor-vfio_save_block-to-return-sa.patch new file mode 100644 index 0000000..3b61c5d --- /dev/null +++ b/SOURCES/kvm-vfio-migration-Refactor-vfio_save_block-to-return-sa.patch @@ -0,0 +1,102 @@ +From edcf24a08d66d620a10c746824e31d230c8516ce Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 13/37] vfio/migration: Refactor vfio_save_block() to return + saved data size +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [11/28] b4aed6ddcbde159e98275a0675dcdf45d644673b (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit cf53efbbda2e +Author: Avihai Horon +Date: Wed Jun 21 14:11:58 2023 +0300 + + vfio/migration: Refactor vfio_save_block() to return saved data size + + Refactor vfio_save_block() to return the size of saved data on success + and -errno on error. + + This will be used in next patch to implement VFIO migration pre-copy + support. + + Signed-off-by: Avihai Horon + Reviewed-by: Cédric Le Goater + Reviewed-by: Juan Quintela + Tested-by: YangHang Liu + Acked-by: Alex Williamson + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/migration.c | 17 +++++++++-------- + 1 file changed, 9 insertions(+), 8 deletions(-) + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index 6b58dddb88..235978fd68 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -241,8 +241,8 @@ static int vfio_query_stop_copy_size(VFIODevice *vbasedev, + return 0; + } + +-/* Returns 1 if end-of-stream is reached, 0 if more data and -errno if error */ +-static int vfio_save_block(QEMUFile *f, VFIOMigration *migration) ++/* Returns the size of saved data on success and -errno on error */ ++static ssize_t vfio_save_block(QEMUFile *f, VFIOMigration *migration) + { + ssize_t data_size; + +@@ -252,7 +252,7 @@ static int vfio_save_block(QEMUFile *f, VFIOMigration *migration) + return -errno; + } + if (data_size == 0) { +- return 1; ++ return 0; + } + + qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE); +@@ -262,7 +262,7 @@ static int vfio_save_block(QEMUFile *f, VFIOMigration *migration) + + trace_vfio_save_block(migration->vbasedev->name, data_size); + +- return qemu_file_get_error(f); ++ return qemu_file_get_error(f) ?: data_size; + } + + /* ---------------------------------------------------------------------- */ +@@ -335,6 +335,7 @@ static void vfio_state_pending_exact(void *opaque, uint64_t *must_precopy, + static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) + { + VFIODevice *vbasedev = opaque; ++ ssize_t data_size; + int ret; + + /* We reach here with device state STOP only */ +@@ -345,11 +346,11 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) + } + + do { +- ret = vfio_save_block(f, vbasedev->migration); +- if (ret < 0) { +- return ret; ++ data_size = vfio_save_block(f, vbasedev->migration); ++ if (data_size < 0) { ++ return data_size; + } +- } while (!ret); ++ } while (data_size); + + qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); + ret = qemu_file_get_error(f); +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-migration-Remove-print-of-Migration-disabled.patch b/SOURCES/kvm-vfio-migration-Remove-print-of-Migration-disabled.patch new file mode 100644 index 0000000..ad3c6ca --- /dev/null +++ b/SOURCES/kvm-vfio-migration-Remove-print-of-Migration-disabled.patch @@ -0,0 +1,56 @@ +From 5bb94c4eaeb94f0b41a57660098a4c12a295b725 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 28/37] vfio/migration: Remove print of "Migration disabled" +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [26/28] c7ff1f9c90b4cfcb327ef474042ea71ea577a94d (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 0520d63c7701 +Author: Zhenzhong Duan +Date: Mon Jul 3 15:15:09 2023 +0800 + + vfio/migration: Remove print of "Migration disabled" + + Property enable_migration supports [on/off/auto]. + In ON mode, error pointer is passed to errp and logged. + In OFF mode, we doesn't need to log "Migration disabled" as it's intentional. + In AUTO mode, we should only ever see errors or warnings if the device + supports migration and an error or incompatibility occurs while further + probing or configuring it. Lack of support for migration shoundn't + generate an error or warning. + + Signed-off-by: Zhenzhong Duan + Reviewed-by: Joao Martins + Reviewed-by: Cédric Le Goater + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/pci.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 037b7d4176..a60b868c38 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3233,7 +3233,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + if (!pdev->failover_pair_id) { + ret = vfio_migration_realize(vbasedev, errp); + if (ret) { +- error_report("%s: Migration disabled", vbasedev->name); + goto out_deregister; + } + } +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-migration-Reset-bytes_transferred-properly.patch b/SOURCES/kvm-vfio-migration-Reset-bytes_transferred-properly.patch new file mode 100644 index 0000000..2666460 --- /dev/null +++ b/SOURCES/kvm-vfio-migration-Reset-bytes_transferred-properly.patch @@ -0,0 +1,165 @@ +From a63b4010ba4f491c9144afff363bebcf35ecf496 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 20/37] vfio/migration: Reset bytes_transferred properly +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [18/28] e9a70faeca4fd5aa7ef36502cf76bf0b62f65057 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 808642a2f640 +Author: Avihai Horon +Date: Wed Jun 28 10:31:11 2023 +0300 + + vfio/migration: Reset bytes_transferred properly + + Currently, VFIO bytes_transferred is not reset properly: + 1. bytes_transferred is not reset after a VM snapshot (so a migration + following a snapshot will report incorrect value). + 2. bytes_transferred is a single counter for all VFIO devices, however + upon migration failure it is reset multiple times, by each VFIO + device. + + Fix it by introducing a new function vfio_reset_bytes_transferred() and + calling it during migration and snapshot start. + + Remove existing bytes_transferred reset in VFIO migration state + notifier, which is not needed anymore. + + Fixes: 3710586caa5d ("qapi: Add VFIO devices migration stats in Migration stats") + Signed-off-by: Avihai Horon + Reviewed-by: Cédric Le Goater + Reviewed-by: Alex Williamson + Signed-off-by: Cédric Le Goater + +Conflicts: + - migration/migration.c + migration/savevm.c + context changes due to commit aff3f6606d14 ("migration: Rename + ram_counters to mig_stats") + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/migration.c | 6 +++++- + include/hw/vfio/vfio-common.h | 1 + + migration/migration.c | 1 + + migration/migration.h | 1 + + migration/savevm.c | 1 + + migration/target.c | 17 +++++++++++++++-- + 6 files changed, 24 insertions(+), 3 deletions(-) + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index acbf0bb7ab..7cf143926c 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -697,7 +697,6 @@ static void vfio_migration_state_notifier(Notifier *notifier, void *data) + case MIGRATION_STATUS_CANCELLING: + case MIGRATION_STATUS_CANCELLED: + case MIGRATION_STATUS_FAILED: +- bytes_transferred = 0; + /* + * If setting the device in RUNNING state fails, the device should + * be reset. To do so, use ERROR state as a recover state. +@@ -818,6 +817,11 @@ int64_t vfio_mig_bytes_transferred(void) + return bytes_transferred; + } + ++void vfio_reset_bytes_transferred(void) ++{ ++ bytes_transferred = 0; ++} ++ + int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) + { + int ret = -ENOTSUP; +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 6d1b8487c3..1d19c6f251 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -229,6 +229,7 @@ int vfio_block_multiple_devices_migration(Error **errp); + void vfio_unblock_multiple_devices_migration(void); + int vfio_block_giommu_migration(Error **errp); + int64_t vfio_mig_bytes_transferred(void); ++void vfio_reset_bytes_transferred(void); + + #ifdef CONFIG_LINUX + int vfio_get_region_info(VFIODevice *vbasedev, int index, +diff --git a/migration/migration.c b/migration/migration.c +index 9bf1caee6c..47ad6c43cb 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1638,6 +1638,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, + */ + memset(&ram_counters, 0, sizeof(ram_counters)); + memset(&compression_counters, 0, sizeof(compression_counters)); ++ reset_vfio_bytes_transferred(); + + return true; + } +diff --git a/migration/migration.h b/migration/migration.h +index e9679f8029..7ccf460aa2 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -495,6 +495,7 @@ bool migration_rate_limit(void); + void migration_cancel(const Error *error); + + void populate_vfio_info(MigrationInfo *info); ++void reset_vfio_bytes_transferred(void); + void postcopy_temp_page_reset(PostcopyTmpPage *tmp_page); + + #endif +diff --git a/migration/savevm.c b/migration/savevm.c +index aff70e6263..83088fc3f8 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -1620,6 +1620,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) + migrate_init(ms); + memset(&ram_counters, 0, sizeof(ram_counters)); + memset(&compression_counters, 0, sizeof(compression_counters)); ++ reset_vfio_bytes_transferred(); + ms->to_dst_file = f; + + qemu_mutex_unlock_iothread(); +diff --git a/migration/target.c b/migration/target.c +index 00ca007f97..f39c9a8d88 100644 +--- a/migration/target.c ++++ b/migration/target.c +@@ -14,12 +14,25 @@ + #include "hw/vfio/vfio-common.h" + #endif + ++#ifdef CONFIG_VFIO + void populate_vfio_info(MigrationInfo *info) + { +-#ifdef CONFIG_VFIO + if (vfio_mig_active()) { + info->vfio = g_malloc0(sizeof(*info->vfio)); + info->vfio->transferred = vfio_mig_bytes_transferred(); + } +-#endif + } ++ ++void reset_vfio_bytes_transferred(void) ++{ ++ vfio_reset_bytes_transferred(); ++} ++#else ++void populate_vfio_info(MigrationInfo *info) ++{ ++} ++ ++void reset_vfio_bytes_transferred(void) ++{ ++} ++#endif +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-migration-Return-bool-type-for-vfio_migration_r.patch b/SOURCES/kvm-vfio-migration-Return-bool-type-for-vfio_migration_r.patch new file mode 100644 index 0000000..efd42a9 --- /dev/null +++ b/SOURCES/kvm-vfio-migration-Return-bool-type-for-vfio_migration_r.patch @@ -0,0 +1,125 @@ +From 223eef8363c9ba58514b2d4f93e5ff015d111ff2 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 29/37] vfio/migration: Return bool type for + vfio_migration_realize() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [27/28] d5aea3ea4c53e4573076cbacbbe3134f9f0f9e53 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit d4a2af747d5a +Author: Zhenzhong Duan +Date: Mon Jul 3 15:15:10 2023 +0800 + + vfio/migration: Return bool type for vfio_migration_realize() + + Make vfio_migration_realize() adhere to the convention of other realize() + callbacks(like qdev_realize) by returning bool instead of int. + + Suggested-by: Cédric Le Goater + Suggested-by: Joao Martins + Signed-off-by: Zhenzhong Duan + Reviewed-by: Joao Martins + Reviewed-by: Cédric Le Goater + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/migration.c | 15 ++++++++++----- + hw/vfio/pci.c | 3 +-- + include/hw/vfio/vfio-common.h | 2 +- + 3 files changed, 12 insertions(+), 8 deletions(-) + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index e3954570c8..2674f4bc47 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -846,7 +846,12 @@ void vfio_reset_bytes_transferred(void) + bytes_transferred = 0; + } + +-int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) ++/* ++ * Return true when either migration initialized or blocker registered. ++ * Currently only return false when adding blocker fails which will ++ * de-register vfio device. ++ */ ++bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp) + { + Error *err = NULL; + int ret; +@@ -854,7 +859,7 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) + if (vbasedev->enable_migration == ON_OFF_AUTO_OFF) { + error_setg(&err, "%s: Migration is disabled for VFIO device", + vbasedev->name); +- return vfio_block_migration(vbasedev, err, errp); ++ return !vfio_block_migration(vbasedev, err, errp); + } + + ret = vfio_migration_init(vbasedev); +@@ -869,7 +874,7 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) + vbasedev->name, ret, strerror(-ret)); + } + +- return vfio_block_migration(vbasedev, err, errp); ++ return !vfio_block_migration(vbasedev, err, errp); + } + + if (!vbasedev->dirty_pages_supported) { +@@ -896,7 +901,7 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) + } + + trace_vfio_migration_realize(vbasedev->name); +- return 0; ++ return true; + + add_blocker: + ret = vfio_block_migration(vbasedev, err, errp); +@@ -904,7 +909,7 @@ out_deinit: + if (ret) { + vfio_migration_deinit(vbasedev); + } +- return ret; ++ return !ret; + } + + void vfio_migration_exit(VFIODevice *vbasedev) +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index a60b868c38..ba40ca8784 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3231,8 +3231,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + } + + if (!pdev->failover_pair_id) { +- ret = vfio_migration_realize(vbasedev, errp); +- if (ret) { ++ if (!vfio_migration_realize(vbasedev, errp)) { + goto out_deregister; + } + } +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 45167c8a8a..da43d27352 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -252,7 +252,7 @@ int vfio_spapr_create_window(VFIOContainer *container, + int vfio_spapr_remove_window(VFIOContainer *container, + hwaddr offset_within_address_space); + +-int vfio_migration_realize(VFIODevice *vbasedev, Error **errp); ++bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp); + void vfio_migration_exit(VFIODevice *vbasedev); + + #endif /* HW_VFIO_VFIO_COMMON_H */ +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-migration-Skip-log_sync-during-migration-SETUP-.patch b/SOURCES/kvm-vfio-migration-Skip-log_sync-during-migration-SETUP-.patch new file mode 100644 index 0000000..6211db7 --- /dev/null +++ b/SOURCES/kvm-vfio-migration-Skip-log_sync-during-migration-SETUP-.patch @@ -0,0 +1,68 @@ +From 76208f7824d5139ac8d86140b0e01031b67638cc Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:56 +0200 +Subject: [PATCH 04/37] vfio/migration: Skip log_sync during migration SETUP + state +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/28] 4c340992b472ac4627b57705f4e971f14bbb0846 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit ff180c6bd7a8 +Author: Avihai Horon +Date: Mon Apr 3 16:00:00 2023 +0300 + + vfio/migration: Skip log_sync during migration SETUP state + + Currently, VFIO log_sync can be issued while migration is in SETUP + state. However, doing this log_sync is at best redundant and at worst + can fail. + + Redundant -- all RAM is marked dirty in migration SETUP state and is + transferred only after migration is set to ACTIVE state, so doing + log_sync during migration SETUP is pointless. + + Can fail -- there is a time window, between setting migration state to + SETUP and starting dirty tracking by RAM save_live_setup handler, during + which dirty tracking is still not started. Any VFIO log_sync call that + is issued during this time window will fail. For example, this error can + be triggered by migrating a VM when a GUI is active, which constantly + calls log_sync. + + Fix it by skipping VFIO log_sync while migration is in SETUP state. + + Fixes: 758b96b61d5c ("vfio/migrate: Move switch of dirty tracking into vfio_memory_listener") + Signed-off-by: Avihai Horon + Link: https://lore.kernel.org/r/20230403130000.6422-1-avihaih@nvidia.com + Signed-off-by: Alex Williamson + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/common.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 4d01ea3515..78358ede27 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -478,7 +478,8 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) + VFIODevice *vbasedev; + MigrationState *ms = migrate_get_current(); + +- if (!migration_is_setup_or_active(ms->state)) { ++ if (ms->state != MIGRATION_STATUS_ACTIVE && ++ ms->state != MIGRATION_STATUS_DEVICE) { + return false; + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-migration-Store-VFIO-migration-flags-in-VFIOMig.patch b/SOURCES/kvm-vfio-migration-Store-VFIO-migration-flags-in-VFIOMig.patch new file mode 100644 index 0000000..2db8511 --- /dev/null +++ b/SOURCES/kvm-vfio-migration-Store-VFIO-migration-flags-in-VFIOMig.patch @@ -0,0 +1,70 @@ +From 77353cdafd08562dff9c99e9f3984d12224bee52 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 14/37] vfio/migration: Store VFIO migration flags in + VFIOMigration +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [12/28] 31a9c39e6ee6338a35dc08c3e7f5c1a204166249 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 6cd1fe11598a +Author: Avihai Horon +Date: Wed Jun 21 14:11:59 2023 +0300 + + vfio/migration: Store VFIO migration flags in VFIOMigration + + VFIO migration flags are queried once in vfio_migration_init(). Store + them in VFIOMigration so they can be used later to check the device's + migration capabilities without re-querying them. + + This will be used in the next patch to check if the device supports + precopy migration. + + Signed-off-by: Avihai Horon + Reviewed-by: Cédric Le Goater + Tested-by: YangHang Liu + Acked-by: Alex Williamson + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/migration.c | 1 + + include/hw/vfio/vfio-common.h | 1 + + 2 files changed, 2 insertions(+) + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index 235978fd68..8d33414379 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -603,6 +603,7 @@ static int vfio_migration_init(VFIODevice *vbasedev) + migration->vbasedev = vbasedev; + migration->device_state = VFIO_DEVICE_STATE_RUNNING; + migration->data_fd = -1; ++ migration->mig_flags = mig_flags; + + vbasedev->dirty_pages_supported = vfio_dma_logging_supported(vbasedev); + +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index eed244f25f..5f29dab839 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -66,6 +66,7 @@ typedef struct VFIOMigration { + int data_fd; + void *data_buffer; + size_t data_buffer_size; ++ uint64_t mig_flags; + } VFIOMigration; + + typedef struct VFIOAddressSpace { +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-pci-Call-vfio_prepare_kvm_msi_virq_batch-in-MSI.patch b/SOURCES/kvm-vfio-pci-Call-vfio_prepare_kvm_msi_virq_batch-in-MSI.patch new file mode 100644 index 0000000..b5d9d37 --- /dev/null +++ b/SOURCES/kvm-vfio-pci-Call-vfio_prepare_kvm_msi_virq_batch-in-MSI.patch @@ -0,0 +1,67 @@ +From b5a69101abac153c9c9be7f539d810e3e4af3bdf Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 19/37] vfio/pci: Call vfio_prepare_kvm_msi_virq_batch() in MSI + retry path +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [17/28] 2067bb58f3a2c1a793e5566cee3c78a8299c9c1c (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit c17408892319 +Author: Shameer Kolothum +Date: Tue Jun 13 15:09:43 2023 +0100 + + vfio/pci: Call vfio_prepare_kvm_msi_virq_batch() in MSI retry path + + When vfio_enable_vectors() returns with less than requested nr_vectors + we retry with what kernel reported back. But the retry path doesn't + call vfio_prepare_kvm_msi_virq_batch() and this results in, + + qemu-system-aarch64: vfio: Error: Failed to enable 4 MSI vectors, retry with 1 + qemu-system-aarch64: ../hw/vfio/pci.c:602: vfio_commit_kvm_msi_virq_batch: Assertion `vdev->defer_kvm_irq_routing' failed + + Fixes: dc580d51f7dd ("vfio: defer to commit kvm irq routing when enable msi/msix") + Reviewed-by: Longpeng + Signed-off-by: Shameer Kolothum + Reviewed-by: Cédric Le Goater + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/pci.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 7c5e2b5996..15e7554954 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -666,6 +666,8 @@ static void vfio_msi_enable(VFIOPCIDevice *vdev) + + vfio_disable_interrupts(vdev); + ++ vdev->nr_vectors = msi_nr_vectors_allocated(&vdev->pdev); ++retry: + /* + * Setting vector notifiers needs to enable route for each vector. + * Deferring to commit the KVM routes once rather than per vector +@@ -673,8 +675,6 @@ static void vfio_msi_enable(VFIOPCIDevice *vdev) + */ + vfio_prepare_kvm_msi_virq_batch(vdev); + +- vdev->nr_vectors = msi_nr_vectors_allocated(&vdev->pdev); +-retry: + vdev->msi_vectors = g_new0(VFIOMSIVector, vdev->nr_vectors); + + for (i = 0; i < vdev->nr_vectors; i++) { +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-pci-Disable-INTx-in-vfio_realize-error-path.patch b/SOURCES/kvm-vfio-pci-Disable-INTx-in-vfio_realize-error-path.patch new file mode 100644 index 0000000..0aca4ef --- /dev/null +++ b/SOURCES/kvm-vfio-pci-Disable-INTx-in-vfio_realize-error-path.patch @@ -0,0 +1,54 @@ +From 816c20b23546d31316c9ca450db8a6668ac6216c Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 25/37] vfio/pci: Disable INTx in vfio_realize error path +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [23/28] 2fde4bad00c4286e6bbe24947c2bfd6468fc0ff3 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit adee0da0368f +Author: Zhenzhong Duan +Date: Mon Jul 3 15:15:06 2023 +0800 + + vfio/pci: Disable INTx in vfio_realize error path + + When vfio realize fails, INTx isn't disabled if it has been enabled. + This may confuse host side with unhandled interrupt report. + + Fixes: c5478fea27ac ("vfio/pci: Respond to KVM irqchip change notifier") + Signed-off-by: Zhenzhong Duan + Reviewed-by: Joao Martins + Reviewed-by: Cédric Le Goater + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/pci.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 87bd440504..2d059832a4 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3244,6 +3244,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + return; + + out_deregister: ++ if (vdev->interrupt == VFIO_INT_INTx) { ++ vfio_intx_disable(vdev); ++ } + pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); + if (vdev->irqchip_change_notifier.notify) { + kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-pci-Fix-a-segfault-in-vfio_realize.patch b/SOURCES/kvm-vfio-pci-Fix-a-segfault-in-vfio_realize.patch new file mode 100644 index 0000000..d05d114 --- /dev/null +++ b/SOURCES/kvm-vfio-pci-Fix-a-segfault-in-vfio_realize.patch @@ -0,0 +1,67 @@ +From 0b1ab3aacc02e70bfe8440236eb9def426bbe10e Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 22/37] vfio/pci: Fix a segfault in vfio_realize +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [20/28] 48b9c1efe295c2672693d9c99f6d11738d2b98d1 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 357bd7932a13 +Author: Zhenzhong Duan +Date: Thu Jun 29 16:40:38 2023 +0800 + + vfio/pci: Fix a segfault in vfio_realize + + The kvm irqchip notifier is only registered if the device supports + INTx, however it's unconditionally removed in vfio realize error + path. If the assigned device does not support INTx, this will cause + QEMU to crash when vfio realize fails. Change it to conditionally + remove the notifier only if the notify hook is setup. + + Before fix: + (qemu) device_add vfio-pci,host=81:11.1,id=vfio1,bus=root1,xres=1 + Connection closed by foreign host. + + After fix: + (qemu) device_add vfio-pci,host=81:11.1,id=vfio1,bus=root1,xres=1 + Error: vfio 0000:81:11.1: xres and yres properties require display=on + (qemu) + + Fixes: c5478fea27ac ("vfio/pci: Respond to KVM irqchip change notifier") + Signed-off-by: Zhenzhong Duan + Reviewed-by: Cédric Le Goater + Reviewed-by: Joao Martins + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/pci.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 6634945a70..d08e6c1a20 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3245,7 +3245,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + + out_deregister: + pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); +- kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); ++ if (vdev->irqchip_change_notifier.notify) { ++ kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); ++ } + out_teardown: + vfio_teardown_msi(vdev); + vfio_bars_exit(vdev); +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-pci-Fix-a-use-after-free-issue.patch b/SOURCES/kvm-vfio-pci-Fix-a-use-after-free-issue.patch new file mode 100644 index 0000000..1fa725f --- /dev/null +++ b/SOURCES/kvm-vfio-pci-Fix-a-use-after-free-issue.patch @@ -0,0 +1,56 @@ +From 2437a06ff137c4bc856df096e42407c1f50b25b0 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:56 +0200 +Subject: [PATCH 06/37] vfio/pci: Fix a use-after-free issue +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [4/28] eca69a7e0a6fb8c1c70be8b91209a53b040e30ba (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit b83b40b61484 +Author: Zhenzhong Duan +Date: Wed May 17 10:46:51 2023 +0800 + + vfio/pci: Fix a use-after-free issue + + vbasedev->name is freed wrongly which leads to garbage VFIO trace log. + Fix it by allocating a dup of vbasedev->name and then free the dup. + + Fixes: 2dca1b37a760 ("vfio/pci: add support for VF token") + Suggested-by: Alex Williamson + Signed-off-by: Zhenzhong Duan + Reviewed-by: Cédric Le Goater + Reviewed-by: Matthew Rosato + Acked-by: Alex Williamson + Reviewed-by: Philippe Mathieu-Daudé + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/pci.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 6cd3a98c39..7c5e2b5996 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3018,7 +3018,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + qemu_uuid_unparse(&vdev->vf_token, uuid); + name = g_strdup_printf("%s vf_token=%s", vbasedev->name, uuid); + } else { +- name = vbasedev->name; ++ name = g_strdup(vbasedev->name); + } + + ret = vfio_get_device(group, name, vbasedev, errp); +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-pci-Free-leaked-timer-in-vfio_realize-error-pat.patch b/SOURCES/kvm-vfio-pci-Free-leaked-timer-in-vfio_realize-error-pat.patch new file mode 100644 index 0000000..3978b96 --- /dev/null +++ b/SOURCES/kvm-vfio-pci-Free-leaked-timer-in-vfio_realize-error-pat.patch @@ -0,0 +1,55 @@ +From 9c5016c9b3f9cf66d1b531de829e8b5010962695 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 23/37] vfio/pci: Free leaked timer in vfio_realize error path +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [21/28] dbaae4e484de4613f7f7735be519b7357627326e (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 0cc889c8826c +Author: Zhenzhong Duan +Date: Thu Jun 29 16:40:39 2023 +0800 + + vfio/pci: Free leaked timer in vfio_realize error path + + When vfio_realize fails, the mmap_timer used for INTx optimization + isn't freed. As this timer isn't activated yet, the potential impact + is just a piece of leaked memory. + + Fixes: ea486926b07d ("vfio-pci: Update slow path INTx algorithm timer related") + Signed-off-by: Zhenzhong Duan + Reviewed-by: Cédric Le Goater + Reviewed-by: Joao Martins + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/pci.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index d08e6c1a20..87bd440504 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3248,6 +3248,9 @@ out_deregister: + if (vdev->irqchip_change_notifier.notify) { + kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); + } ++ if (vdev->intx.mmap_timer) { ++ timer_free(vdev->intx.mmap_timer); ++ } + out_teardown: + vfio_teardown_msi(vdev); + vfio_bars_exit(vdev); +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-pci-Static-Resizable-BAR-capability.patch b/SOURCES/kvm-vfio-pci-Static-Resizable-BAR-capability.patch new file mode 100644 index 0000000..d937140 --- /dev/null +++ b/SOURCES/kvm-vfio-pci-Static-Resizable-BAR-capability.patch @@ -0,0 +1,141 @@ +From db53345dba5682c3ba0bc3fc596b30a98dadb88f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:56 +0200 +Subject: [PATCH 05/37] vfio/pci: Static Resizable BAR capability +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/28] 42e9f4b517eb919c77c6fdbe771d9d05a91955bd (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit b5048a4cbfa0 +Author: Alex Williamson +Date: Thu May 4 14:42:48 2023 -0600 + + vfio/pci: Static Resizable BAR capability + + The PCI Resizable BAR (ReBAR) capability is currently hidden from the + VM because the protocol for interacting with the capability does not + support a mechanism for the device to reject an advertised supported + BAR size. However, when assigned to a VM, the act of resizing the + BAR requires adjustment of host resources for the device, which + absolutely can fail. Linux does not currently allow us to reserve + resources for the device independent of the current usage. + + The only writable field within the ReBAR capability is the BAR Size + register. The PCIe spec indicates that when written, the device + should immediately begin to operate with the provided BAR size. The + spec however also notes that software must only write values + corresponding to supported sizes as indicated in the capability and + control registers. Writing unsupported sizes produces undefined + results. Therefore, if the hypervisor were to virtualize the + capability and control registers such that the current size is the + only indicated available size, then a write of anything other than + the current size falls into the category of undefined behavior, + where we can essentially expose the modified ReBAR capability as + read-only. + + This may seem pointless, but users have reported that virtualizing + the capability in this way not only allows guest software to expose + related features as available (even if only cosmetic), but in some + scenarios can resolve guest driver issues. Additionally, no + regressions in behavior have been reported for this change. + + A caveat here is that the PCIe spec requires for compatibility that + devices report support for a size in the range of 1MB to 512GB, + therefore if the current BAR size falls outside that range we revert + to hiding the capability. + + Reviewed-by: Cédric Le Goater + Link: https://lore.kernel.org/r/20230505232308.2869912-1-alex.williamson@redhat.com + Signed-off-by: Alex Williamson + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/pci.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 53 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 579b92a6ed..6cd3a98c39 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -2069,6 +2069,54 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos, Error **errp) + return 0; + } + ++static int vfio_setup_rebar_ecap(VFIOPCIDevice *vdev, uint16_t pos) ++{ ++ uint32_t ctrl; ++ int i, nbar; ++ ++ ctrl = pci_get_long(vdev->pdev.config + pos + PCI_REBAR_CTRL); ++ nbar = (ctrl & PCI_REBAR_CTRL_NBAR_MASK) >> PCI_REBAR_CTRL_NBAR_SHIFT; ++ ++ for (i = 0; i < nbar; i++) { ++ uint32_t cap; ++ int size; ++ ++ ctrl = pci_get_long(vdev->pdev.config + pos + PCI_REBAR_CTRL + (i * 8)); ++ size = (ctrl & PCI_REBAR_CTRL_BAR_SIZE) >> PCI_REBAR_CTRL_BAR_SHIFT; ++ ++ /* The cap register reports sizes 1MB to 128TB, with 4 reserved bits */ ++ cap = size <= 27 ? 1U << (size + 4) : 0; ++ ++ /* ++ * The PCIe spec (v6.0.1, 7.8.6) requires HW to support at least one ++ * size in the range 1MB to 512GB. We intend to mask all sizes except ++ * the one currently enabled in the size field, therefore if it's ++ * outside the range, hide the whole capability as this virtualization ++ * trick won't work. If >512GB resizable BARs start to appear, we ++ * might need an opt-in or reservation scheme in the kernel. ++ */ ++ if (!(cap & PCI_REBAR_CAP_SIZES)) { ++ return -EINVAL; ++ } ++ ++ /* Hide all sizes reported in the ctrl reg per above requirement. */ ++ ctrl &= (PCI_REBAR_CTRL_BAR_SIZE | ++ PCI_REBAR_CTRL_NBAR_MASK | ++ PCI_REBAR_CTRL_BAR_IDX); ++ ++ /* ++ * The BAR size field is RW, however we've mangled the capability ++ * register such that we only report a single size, ie. the current ++ * BAR size. A write of an unsupported value is undefined, therefore ++ * the register field is essentially RO. ++ */ ++ vfio_add_emulated_long(vdev, pos + PCI_REBAR_CAP + (i * 8), cap, ~0); ++ vfio_add_emulated_long(vdev, pos + PCI_REBAR_CTRL + (i * 8), ctrl, ~0); ++ } ++ ++ return 0; ++} ++ + static void vfio_add_ext_cap(VFIOPCIDevice *vdev) + { + PCIDevice *pdev = &vdev->pdev; +@@ -2142,9 +2190,13 @@ static void vfio_add_ext_cap(VFIOPCIDevice *vdev) + case 0: /* kernel masked capability */ + case PCI_EXT_CAP_ID_SRIOV: /* Read-only VF BARs confuse OVMF */ + case PCI_EXT_CAP_ID_ARI: /* XXX Needs next function virtualization */ +- case PCI_EXT_CAP_ID_REBAR: /* Can't expose read-only */ + trace_vfio_add_ext_cap_dropped(vdev->vbasedev.name, cap_id, next); + break; ++ case PCI_EXT_CAP_ID_REBAR: ++ if (!vfio_setup_rebar_ecap(vdev, next)) { ++ pcie_add_capability(pdev, cap_id, cap_ver, next, size); ++ } ++ break; + default: + pcie_add_capability(pdev, cap_id, cap_ver, next, size); + } +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-pci-add-support-for-VF-token.patch b/SOURCES/kvm-vfio-pci-add-support-for-VF-token.patch new file mode 100644 index 0000000..7b40e5e --- /dev/null +++ b/SOURCES/kvm-vfio-pci-add-support-for-VF-token.patch @@ -0,0 +1,104 @@ +From 3022cc31bca5a5441e285c971eaf72b7643b9be0 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:56 +0200 +Subject: [PATCH 03/37] vfio/pci: add support for VF token +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/28] ff24284ede2806e21f4f6709d8abd4c4029b7d5c (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 2dca1b37a760 +Author: Minwoo Im +Date: Mon Mar 20 16:35:22 2023 +0900 + + vfio/pci: add support for VF token + + VF token was introduced [1] to kernel vfio-pci along with SR-IOV + support [2]. This patch adds support VF token among PF and VF(s). To + passthu PCIe VF to a VM, kernel >= v5.7 needs this. + + It can be configured with UUID like: + + -device vfio-pci,host=DDDD:BB:DD:F,vf-token=,... + + [1] https://lore.kernel.org/linux-pci/158396393244.5601.10297430724964025753.stgit@gimli.home/ + [2] https://lore.kernel.org/linux-pci/158396044753.5601.14804870681174789709.stgit@gimli.home/ + + Cc: Alex Williamson + Signed-off-by: Minwoo Im + Reviewed-by: Klaus Jensen + Link: https://lore.kernel.org/r/20230320073522epcms2p48f682ecdb73e0ae1a4850ad0712fd780@epcms2p4 + Signed-off-by: Alex Williamson + +Conflicts: + - hw/vfio/pci.c + context changes in vfio_realize () due to redhat commit 267071d16b23 + ("vfio: cap number of devices that can be assigned") + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/pci.c | 13 ++++++++++++- + hw/vfio/pci.h | 1 + + 2 files changed, 13 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index a779053be3..579b92a6ed 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -2859,6 +2859,8 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + int groupid; + int ret, i = 0; + bool is_mdev; ++ char uuid[UUID_FMT_LEN]; ++ char *name; + + if (device_limit && device_limit != vdev->assigned_device_limit) { + error_setg(errp, "Assigned device limit has been redefined. " +@@ -2960,7 +2962,15 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + goto error; + } + +- ret = vfio_get_device(group, vbasedev->name, vbasedev, errp); ++ if (!qemu_uuid_is_null(&vdev->vf_token)) { ++ qemu_uuid_unparse(&vdev->vf_token, uuid); ++ name = g_strdup_printf("%s vf_token=%s", vbasedev->name, uuid); ++ } else { ++ name = vbasedev->name; ++ } ++ ++ ret = vfio_get_device(group, name, vbasedev, errp); ++ g_free(name); + if (ret) { + vfio_put_group(group); + goto error; +@@ -3292,6 +3302,7 @@ static void vfio_instance_init(Object *obj) + + static Property vfio_pci_dev_properties[] = { + DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIOPCIDevice, host), ++ DEFINE_PROP_UUID_NODEFAULT("vf-token", VFIOPCIDevice, vf_token), + DEFINE_PROP_STRING("sysfsdev", VFIOPCIDevice, vbasedev.sysfsdev), + DEFINE_PROP_ON_OFF_AUTO("x-pre-copy-dirty-page-tracking", VFIOPCIDevice, + vbasedev.pre_copy_dirty_page_tracking, +diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h +index 45235d38ba..10530743ad 100644 +--- a/hw/vfio/pci.h ++++ b/hw/vfio/pci.h +@@ -137,6 +137,7 @@ struct VFIOPCIDevice { + VFIOVGA *vga; /* 0xa0000, 0x3b0, 0x3c0 */ + void *igd_opregion; + PCIHostDeviceAddress host; ++ QemuUUID vf_token; + EventNotifier err_notifier; + EventNotifier req_notifier; + int (*resetfn)(struct VFIOPCIDevice *); +-- +2.39.3 + diff --git a/SOURCES/kvm-vhost-add-support-for-configure-interrupt.patch b/SOURCES/kvm-vhost-add-support-for-configure-interrupt.patch deleted file mode 100644 index a7cfb2f..0000000 --- a/SOURCES/kvm-vhost-add-support-for-configure-interrupt.patch +++ /dev/null @@ -1,185 +0,0 @@ -From 42818e2bc6fa537fe52f7f0e6b094774a1eb00e1 Mon Sep 17 00:00:00 2001 -From: Cindy Lu -Date: Thu, 22 Dec 2022 15:04:48 +0800 -Subject: [PATCH 07/31] vhost: add support for configure interrupt -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cindy Lu -RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa -RH-Bugzilla: 1905805 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Miroslav Rezanina -RH-Commit: [7/10] d58b439eb093f5dd3b7ca081af0ab75780e42917 (lulu6/qemu-kvm3) - -https://bugzilla.redhat.com/show_bug.cgi?id=1905805 -Add functions to support configure interrupt. -The configure interrupt process will start in vhost_dev_start -and stop in vhost_dev_stop. - -Also add the functions to support vhost_config_pending and -vhost_config_mask. - -Signed-off-by: Cindy Lu -Message-Id: <20221222070451.936503-8-lulu@redhat.com> -Acked-by: Jason Wang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit f9a09ca3ea69d108d828b7c82f1bd61b2df6fc96) -Signed-off-by: Cindy Lu ---- - hw/virtio/vhost.c | 78 ++++++++++++++++++++++++++++++++++++++- - include/hw/virtio/vhost.h | 4 ++ - 2 files changed, 81 insertions(+), 1 deletion(-) - -diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c -index 7fb008bc9e..84dbb39e07 100644 ---- a/hw/virtio/vhost.c -+++ b/hw/virtio/vhost.c -@@ -1596,7 +1596,68 @@ void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n, - file.index = hdev->vhost_ops->vhost_get_vq_index(hdev, n); - r = hdev->vhost_ops->vhost_set_vring_call(hdev, &file); - if (r < 0) { -- VHOST_OPS_DEBUG(r, "vhost_set_vring_call failed"); -+ error_report("vhost_set_vring_call failed %d", -r); -+ } -+} -+ -+bool vhost_config_pending(struct vhost_dev *hdev) -+{ -+ assert(hdev->vhost_ops); -+ if ((hdev->started == false) || -+ (hdev->vhost_ops->vhost_set_config_call == NULL)) { -+ return false; -+ } -+ -+ EventNotifier *notifier = -+ &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier; -+ return event_notifier_test_and_clear(notifier); -+} -+ -+void vhost_config_mask(struct vhost_dev *hdev, VirtIODevice *vdev, bool mask) -+{ -+ int fd; -+ int r; -+ EventNotifier *notifier = -+ &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier; -+ EventNotifier *config_notifier = &vdev->config_notifier; -+ assert(hdev->vhost_ops); -+ -+ if ((hdev->started == false) || -+ (hdev->vhost_ops->vhost_set_config_call == NULL)) { -+ return; -+ } -+ if (mask) { -+ assert(vdev->use_guest_notifier_mask); -+ fd = event_notifier_get_fd(notifier); -+ } else { -+ fd = event_notifier_get_fd(config_notifier); -+ } -+ r = hdev->vhost_ops->vhost_set_config_call(hdev, fd); -+ if (r < 0) { -+ error_report("vhost_set_config_call failed %d", -r); -+ } -+} -+ -+static void vhost_stop_config_intr(struct vhost_dev *dev) -+{ -+ int fd = -1; -+ assert(dev->vhost_ops); -+ if (dev->vhost_ops->vhost_set_config_call) { -+ dev->vhost_ops->vhost_set_config_call(dev, fd); -+ } -+} -+ -+static void vhost_start_config_intr(struct vhost_dev *dev) -+{ -+ int r; -+ -+ assert(dev->vhost_ops); -+ int fd = event_notifier_get_fd(&dev->vdev->config_notifier); -+ if (dev->vhost_ops->vhost_set_config_call) { -+ r = dev->vhost_ops->vhost_set_config_call(dev, fd); -+ if (!r) { -+ event_notifier_set(&dev->vdev->config_notifier); -+ } - } - } - -@@ -1836,6 +1897,16 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) - } - } - -+ r = event_notifier_init( -+ &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier, 0); -+ if (r < 0) { -+ return r; -+ } -+ event_notifier_test_and_clear( -+ &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier); -+ if (!vdev->use_guest_notifier_mask) { -+ vhost_config_mask(hdev, vdev, true); -+ } - if (hdev->log_enabled) { - uint64_t log_base; - -@@ -1874,6 +1945,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) - vhost_device_iotlb_miss(hdev, vq->used_phys, true); - } - } -+ vhost_start_config_intr(hdev); - return 0; - fail_start: - if (vrings) { -@@ -1903,6 +1975,9 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) - - /* should only be called after backend is connected */ - assert(hdev->vhost_ops); -+ event_notifier_test_and_clear( -+ &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier); -+ event_notifier_test_and_clear(&vdev->config_notifier); - - trace_vhost_dev_stop(hdev, vdev->name, vrings); - -@@ -1925,6 +2000,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) - } - memory_listener_unregister(&hdev->iommu_listener); - } -+ vhost_stop_config_intr(hdev); - vhost_log_put(hdev, true); - hdev->started = false; - vdev->vhost_started = false; -diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h -index 67a6807fac..05bedb2416 100644 ---- a/include/hw/virtio/vhost.h -+++ b/include/hw/virtio/vhost.h -@@ -33,6 +33,7 @@ struct vhost_virtqueue { - unsigned used_size; - EventNotifier masked_notifier; - EventNotifier error_notifier; -+ EventNotifier masked_config_notifier; - struct vhost_dev *dev; - }; - -@@ -41,6 +42,7 @@ typedef unsigned long vhost_log_chunk_t; - #define VHOST_LOG_BITS (8 * sizeof(vhost_log_chunk_t)) - #define VHOST_LOG_CHUNK (VHOST_LOG_PAGE * VHOST_LOG_BITS) - #define VHOST_INVALID_FEATURE_BIT (0xff) -+#define VHOST_QUEUE_NUM_CONFIG_INR 0 - - struct vhost_log { - unsigned long long size; -@@ -168,6 +170,8 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev); - * Disable direct notifications to vhost device. - */ - void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev); -+bool vhost_config_pending(struct vhost_dev *hdev); -+void vhost_config_mask(struct vhost_dev *hdev, VirtIODevice *vdev, bool mask); - - /** - * vhost_dev_is_started() - report status of vhost device --- -2.31.1 - diff --git a/SOURCES/kvm-vhost-allocate-SVQ-device-file-descriptors-at-device.patch b/SOURCES/kvm-vhost-allocate-SVQ-device-file-descriptors-at-device.patch deleted file mode 100644 index 940133b..0000000 --- a/SOURCES/kvm-vhost-allocate-SVQ-device-file-descriptors-at-device.patch +++ /dev/null @@ -1,171 +0,0 @@ -From bffccbd59a2e2c641810cd7362c7b5ecf5989ed8 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 15 Dec 2022 12:31:35 +0100 -Subject: [PATCH 03/14] vhost: allocate SVQ device file descriptors at device - start -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 136: vDPA ASID support in Qemu -RH-Bugzilla: 2104412 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/13] bab2d43f0fc0d13a4917e706244b37e1a431b082 (eperezmartin/qemu-kvm) - -The next patches will start control SVQ if possible. However, we don't -know if that will be possible at qemu boot anymore. - -Delay device file descriptors until we know it at device start. This -will avoid to create them if the device does not support SVQ. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Message-Id: <20221215113144.322011-4-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 3cfb4d069cd2977b707fb519c455d7d416e1f4b0) ---- - hw/virtio/vhost-shadow-virtqueue.c | 31 ++------------------------ - hw/virtio/vhost-vdpa.c | 35 ++++++++++++++++++++++++------ - 2 files changed, 30 insertions(+), 36 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index 264ddc166d..3b05bab44d 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -715,43 +715,18 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) - * @iova_tree: Tree to perform descriptors translations - * @ops: SVQ owner callbacks - * @ops_opaque: ops opaque pointer -- * -- * Returns the new virtqueue or NULL. -- * -- * In case of error, reason is reported through error_report. - */ - VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree, - const VhostShadowVirtqueueOps *ops, - void *ops_opaque) - { -- g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1); -- int r; -- -- r = event_notifier_init(&svq->hdev_kick, 0); -- if (r != 0) { -- error_report("Couldn't create kick event notifier: %s (%d)", -- g_strerror(errno), errno); -- goto err_init_hdev_kick; -- } -- -- r = event_notifier_init(&svq->hdev_call, 0); -- if (r != 0) { -- error_report("Couldn't create call event notifier: %s (%d)", -- g_strerror(errno), errno); -- goto err_init_hdev_call; -- } -+ VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1); - - event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND); - svq->iova_tree = iova_tree; - svq->ops = ops; - svq->ops_opaque = ops_opaque; -- return g_steal_pointer(&svq); -- --err_init_hdev_call: -- event_notifier_cleanup(&svq->hdev_kick); -- --err_init_hdev_kick: -- return NULL; -+ return svq; - } - - /** -@@ -763,7 +738,5 @@ void vhost_svq_free(gpointer pvq) - { - VhostShadowVirtqueue *vq = pvq; - vhost_svq_stop(vq); -- event_notifier_cleanup(&vq->hdev_kick); -- event_notifier_cleanup(&vq->hdev_call); - g_free(vq); - } -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 44e6a9b7b3..530d2ca362 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -428,15 +428,11 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v, - - shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free); - for (unsigned n = 0; n < hdev->nvqs; ++n) { -- g_autoptr(VhostShadowVirtqueue) svq; -+ VhostShadowVirtqueue *svq; - - svq = vhost_svq_new(v->iova_tree, v->shadow_vq_ops, - v->shadow_vq_ops_opaque); -- if (unlikely(!svq)) { -- error_setg(errp, "Cannot create svq %u", n); -- return -1; -- } -- g_ptr_array_add(shadow_vqs, g_steal_pointer(&svq)); -+ g_ptr_array_add(shadow_vqs, svq); - } - - v->shadow_vqs = g_steal_pointer(&shadow_vqs); -@@ -871,11 +867,23 @@ static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev, - const EventNotifier *event_notifier = &svq->hdev_kick; - int r; - -+ r = event_notifier_init(&svq->hdev_kick, 0); -+ if (r != 0) { -+ error_setg_errno(errp, -r, "Couldn't create kick event notifier"); -+ goto err_init_hdev_kick; -+ } -+ -+ r = event_notifier_init(&svq->hdev_call, 0); -+ if (r != 0) { -+ error_setg_errno(errp, -r, "Couldn't create call event notifier"); -+ goto err_init_hdev_call; -+ } -+ - file.fd = event_notifier_get_fd(event_notifier); - r = vhost_vdpa_set_vring_dev_kick(dev, &file); - if (unlikely(r != 0)) { - error_setg_errno(errp, -r, "Can't set device kick fd"); -- return r; -+ goto err_init_set_dev_fd; - } - - event_notifier = &svq->hdev_call; -@@ -883,8 +891,18 @@ static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev, - r = vhost_vdpa_set_vring_dev_call(dev, &file); - if (unlikely(r != 0)) { - error_setg_errno(errp, -r, "Can't set device call fd"); -+ goto err_init_set_dev_fd; - } - -+ return 0; -+ -+err_init_set_dev_fd: -+ event_notifier_set_handler(&svq->hdev_call, NULL); -+ -+err_init_hdev_call: -+ event_notifier_cleanup(&svq->hdev_kick); -+ -+err_init_hdev_kick: - return r; - } - -@@ -1096,6 +1114,9 @@ static void vhost_vdpa_svqs_stop(struct vhost_dev *dev) - for (unsigned i = 0; i < v->shadow_vqs->len; ++i) { - VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); - vhost_vdpa_svq_unmap_rings(dev, svq); -+ -+ event_notifier_cleanup(&svq->hdev_kick); -+ event_notifier_cleanup(&svq->hdev_call); - } - } - --- -2.31.1 - diff --git a/SOURCES/kvm-vhost-fix-vhost_dev_enable_notifiers-error-case.patch b/SOURCES/kvm-vhost-fix-vhost_dev_enable_notifiers-error-case.patch new file mode 100644 index 0000000..3282c24 --- /dev/null +++ b/SOURCES/kvm-vhost-fix-vhost_dev_enable_notifiers-error-case.patch @@ -0,0 +1,138 @@ +From ac54f5f746782da89ab674733af5622e524b58eb Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Fri, 2 Jun 2023 18:27:35 +0200 +Subject: [PATCH 4/6] vhost: fix vhost_dev_enable_notifiers() error case +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 176: vhost: fix vhost_dev_enable_notifiers() error case +RH-Jira: RHEL-330 +RH-Acked-by: MST +RH-Acked-by: Cindy Lu +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Jason Wang +RH-Commit: [1/1] fd30d7501be59f7e5b9d6fc5ed84efcc4037d08e (lvivier/qemu-kvm-centos) + +JIRA: https://issues.redhat.com/browse/RHEL-330 + +in vhost_dev_enable_notifiers(), if virtio_bus_set_host_notifier(true) +fails, we call vhost_dev_disable_notifiers() that executes +virtio_bus_set_host_notifier(false) on all queues, even on queues that +have failed to be initialized. + +This triggers a core dump in memory_region_del_eventfd(): + + virtio_bus_set_host_notifier: unable to init event notifier: Too many open files (-24) + vhost VQ 1 notifier binding failed: 24 + .../softmmu/memory.c:2611: memory_region_del_eventfd: Assertion `i != mr->ioeventfd_nb' failed. + +Fix the problem by providing to vhost_dev_disable_notifiers() the +number of queues to disable. + +Fixes: 8771589b6f81 ("vhost: simplify vhost_dev_enable_notifiers") +Cc: longpeng2@huawei.com +Signed-off-by: Laurent Vivier +Message-Id: <20230602162735.3670785-1-lvivier@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Philippe Mathieu-Daudé +(cherry picked from commit 92099aa4e9a3bb6856c290afaf41c76f9e3dd9fd) +--- + hw/virtio/vhost.c | 65 ++++++++++++++++++++++++++--------------------- + 1 file changed, 36 insertions(+), 29 deletions(-) + +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index a266396576..ae0a033e60 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -1545,6 +1545,40 @@ void vhost_dev_cleanup(struct vhost_dev *hdev) + memset(hdev, 0, sizeof(struct vhost_dev)); + } + ++static void vhost_dev_disable_notifiers_nvqs(struct vhost_dev *hdev, ++ VirtIODevice *vdev, ++ unsigned int nvqs) ++{ ++ BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); ++ int i, r; ++ ++ /* ++ * Batch all the host notifiers in a single transaction to avoid ++ * quadratic time complexity in address_space_update_ioeventfds(). ++ */ ++ memory_region_transaction_begin(); ++ ++ for (i = 0; i < nvqs; ++i) { ++ r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i, ++ false); ++ if (r < 0) { ++ error_report("vhost VQ %d notifier cleanup failed: %d", i, -r); ++ } ++ assert(r >= 0); ++ } ++ ++ /* ++ * The transaction expects the ioeventfds to be open when it ++ * commits. Do it now, before the cleanup loop. ++ */ ++ memory_region_transaction_commit(); ++ ++ for (i = 0; i < nvqs; ++i) { ++ virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i); ++ } ++ virtio_device_release_ioeventfd(vdev); ++} ++ + /* Stop processing guest IO notifications in qemu. + * Start processing them in vhost in kernel. + */ +@@ -1574,7 +1608,7 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) + if (r < 0) { + error_report("vhost VQ %d notifier binding failed: %d", i, -r); + memory_region_transaction_commit(); +- vhost_dev_disable_notifiers(hdev, vdev); ++ vhost_dev_disable_notifiers_nvqs(hdev, vdev, i); + return r; + } + } +@@ -1591,34 +1625,7 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) + */ + void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) + { +- BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); +- int i, r; +- +- /* +- * Batch all the host notifiers in a single transaction to avoid +- * quadratic time complexity in address_space_update_ioeventfds(). +- */ +- memory_region_transaction_begin(); +- +- for (i = 0; i < hdev->nvqs; ++i) { +- r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i, +- false); +- if (r < 0) { +- error_report("vhost VQ %d notifier cleanup failed: %d", i, -r); +- } +- assert (r >= 0); +- } +- +- /* +- * The transaction expects the ioeventfds to be open when it +- * commits. Do it now, before the cleanup loop. +- */ +- memory_region_transaction_commit(); +- +- for (i = 0; i < hdev->nvqs; ++i) { +- virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i); +- } +- virtio_device_release_ioeventfd(vdev); ++ vhost_dev_disable_notifiers_nvqs(hdev, vdev, hdev->nvqs); + } + + /* Test and clear event pending status. +-- +2.39.3 + diff --git a/SOURCES/kvm-vhost-fix-vq-dirty-bitmap-syncing-when-vIOMMU-is-ena.patch b/SOURCES/kvm-vhost-fix-vq-dirty-bitmap-syncing-when-vIOMMU-is-ena.patch deleted file mode 100644 index ca93785..0000000 --- a/SOURCES/kvm-vhost-fix-vq-dirty-bitmap-syncing-when-vIOMMU-is-ena.patch +++ /dev/null @@ -1,157 +0,0 @@ -From 55aad90e347599e88747888ddbefcba33427f386 Mon Sep 17 00:00:00 2001 -From: Jason Wang -Date: Fri, 16 Dec 2022 11:35:52 +0800 -Subject: [PATCH 12/31] vhost: fix vq dirty bitmap syncing when vIOMMU is - enabled - -RH-Author: Eric Auger -RH-MergeRequest: 134: vhost: fix vq dirty bitmap syncing when vIOMMU is enabled -RH-Bugzilla: 2124856 -RH-Acked-by: Peter Xu -RH-Acked-by: Jason Wang -RH-Acked-by: Laurent Vivier -RH-Commit: [1/1] 57ef499b63dc2cca6e64ee84d1dc127635868ca2 (eauger1/centos-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2124856 -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=49989924 -Upstream: yes - -When vIOMMU is enabled, the vq->used_phys is actually the IOVA not -GPA. So we need to translate it to GPA before the syncing otherwise we -may hit the following crash since IOVA could be out of the scope of -the GPA log size. This could be noted when using virtio-IOMMU with -vhost using 1G memory. - -Fixes: c471ad0e9bd46 ("vhost_net: device IOTLB support") -Cc: qemu-stable@nongnu.org -Tested-by: Lei Yang -Reported-by: Yalan Zhang -Signed-off-by: Jason Wang -Message-Id: <20221216033552.77087-1-jasowang@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 345cc1cbcbce2bab00abc2b88338d7d89c702d6b) -Signed-off-by: Eric Auger ---- - hw/virtio/vhost.c | 84 ++++++++++++++++++++++++++++++++++++----------- - 1 file changed, 64 insertions(+), 20 deletions(-) - -diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c -index 84dbb39e07..2c566dc539 100644 ---- a/hw/virtio/vhost.c -+++ b/hw/virtio/vhost.c -@@ -20,6 +20,7 @@ - #include "qemu/range.h" - #include "qemu/error-report.h" - #include "qemu/memfd.h" -+#include "qemu/log.h" - #include "standard-headers/linux/vhost_types.h" - #include "hw/virtio/virtio-bus.h" - #include "hw/virtio/virtio-access.h" -@@ -106,6 +107,24 @@ static void vhost_dev_sync_region(struct vhost_dev *dev, - } - } - -+static bool vhost_dev_has_iommu(struct vhost_dev *dev) -+{ -+ VirtIODevice *vdev = dev->vdev; -+ -+ /* -+ * For vhost, VIRTIO_F_IOMMU_PLATFORM means the backend support -+ * incremental memory mapping API via IOTLB API. For platform that -+ * does not have IOMMU, there's no need to enable this feature -+ * which may cause unnecessary IOTLB miss/update transactions. -+ */ -+ if (vdev) { -+ return virtio_bus_device_iommu_enabled(vdev) && -+ virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); -+ } else { -+ return false; -+ } -+} -+ - static int vhost_sync_dirty_bitmap(struct vhost_dev *dev, - MemoryRegionSection *section, - hwaddr first, -@@ -137,8 +156,51 @@ static int vhost_sync_dirty_bitmap(struct vhost_dev *dev, - continue; - } - -- vhost_dev_sync_region(dev, section, start_addr, end_addr, vq->used_phys, -- range_get_last(vq->used_phys, vq->used_size)); -+ if (vhost_dev_has_iommu(dev)) { -+ IOMMUTLBEntry iotlb; -+ hwaddr used_phys = vq->used_phys, used_size = vq->used_size; -+ hwaddr phys, s, offset; -+ -+ while (used_size) { -+ rcu_read_lock(); -+ iotlb = address_space_get_iotlb_entry(dev->vdev->dma_as, -+ used_phys, -+ true, -+ MEMTXATTRS_UNSPECIFIED); -+ rcu_read_unlock(); -+ -+ if (!iotlb.target_as) { -+ qemu_log_mask(LOG_GUEST_ERROR, "translation " -+ "failure for used_iova %"PRIx64"\n", -+ used_phys); -+ return -EINVAL; -+ } -+ -+ offset = used_phys & iotlb.addr_mask; -+ phys = iotlb.translated_addr + offset; -+ -+ /* -+ * Distance from start of used ring until last byte of -+ * IOMMU page. -+ */ -+ s = iotlb.addr_mask - offset; -+ /* -+ * Size of used ring, or of the part of it until end -+ * of IOMMU page. To avoid zero result, do the adding -+ * outside of MIN(). -+ */ -+ s = MIN(s, used_size - 1) + 1; -+ -+ vhost_dev_sync_region(dev, section, start_addr, end_addr, phys, -+ range_get_last(phys, s)); -+ used_size -= s; -+ used_phys += s; -+ } -+ } else { -+ vhost_dev_sync_region(dev, section, start_addr, -+ end_addr, vq->used_phys, -+ range_get_last(vq->used_phys, vq->used_size)); -+ } - } - return 0; - } -@@ -306,24 +368,6 @@ static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size) - dev->log_size = size; - } - --static bool vhost_dev_has_iommu(struct vhost_dev *dev) --{ -- VirtIODevice *vdev = dev->vdev; -- -- /* -- * For vhost, VIRTIO_F_IOMMU_PLATFORM means the backend support -- * incremental memory mapping API via IOTLB API. For platform that -- * does not have IOMMU, there's no need to enable this feature -- * which may cause unnecessary IOTLB miss/update transactions. -- */ -- if (vdev) { -- return virtio_bus_device_iommu_enabled(vdev) && -- virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); -- } else { -- return false; -- } --} -- - static void *vhost_memory_map(struct vhost_dev *dev, hwaddr addr, - hwaddr *plen, bool is_write) - { --- -2.31.1 - diff --git a/SOURCES/kvm-vhost-introduce-new-VhostOps-vhost_set_config_call.patch b/SOURCES/kvm-vhost-introduce-new-VhostOps-vhost_set_config_call.patch deleted file mode 100644 index 1b48f5d..0000000 --- a/SOURCES/kvm-vhost-introduce-new-VhostOps-vhost_set_config_call.patch +++ /dev/null @@ -1,56 +0,0 @@ -From d135303da1187d9f214e520a977fe7c47e5ce1f0 Mon Sep 17 00:00:00 2001 -From: Cindy Lu -Date: Thu, 22 Dec 2022 15:04:45 +0800 -Subject: [PATCH 04/31] vhost: introduce new VhostOps vhost_set_config_call -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cindy Lu -RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa -RH-Bugzilla: 1905805 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Miroslav Rezanina -RH-Commit: [4/10] c2492838d9c1415e42d2507f2956d640a30325f2 (lulu6/qemu-kvm3) - -https://bugzilla.redhat.com/show_bug.cgi?id=1905805 -This patch introduces new VhostOps vhost_set_config_call. -This function allows the qemu to set the config -event fd to kernel driver. - -Signed-off-by: Cindy Lu -Message-Id: <20221222070451.936503-5-lulu@redhat.com> -Acked-by: Jason Wang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 9b30cdf9bbf9524a4f4f8a6eb551eb13cbbd3893) -Signed-off-by: Cindy Lu ---- - include/hw/virtio/vhost-backend.h | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h -index eab46d7f0b..c5ab49051e 100644 ---- a/include/hw/virtio/vhost-backend.h -+++ b/include/hw/virtio/vhost-backend.h -@@ -128,6 +128,8 @@ typedef int (*vhost_get_device_id_op)(struct vhost_dev *dev, uint32_t *dev_id); - - typedef bool (*vhost_force_iommu_op)(struct vhost_dev *dev); - -+typedef int (*vhost_set_config_call_op)(struct vhost_dev *dev, -+ int fd); - typedef struct VhostOps { - VhostBackendType backend_type; - vhost_backend_init vhost_backend_init; -@@ -174,6 +176,7 @@ typedef struct VhostOps { - vhost_vq_get_addr_op vhost_vq_get_addr; - vhost_get_device_id_op vhost_get_device_id; - vhost_force_iommu_op vhost_force_iommu; -+ vhost_set_config_call_op vhost_set_config_call; - } VhostOps; - - int vhost_backend_update_device_iotlb(struct vhost_dev *dev, --- -2.31.1 - diff --git a/SOURCES/kvm-vhost-move-iova_tree-set-to-vhost_svq_start.patch b/SOURCES/kvm-vhost-move-iova_tree-set-to-vhost_svq_start.patch deleted file mode 100644 index de005ba..0000000 --- a/SOURCES/kvm-vhost-move-iova_tree-set-to-vhost_svq_start.patch +++ /dev/null @@ -1,122 +0,0 @@ -From 6584478deca49d0ea20add588e4fdb51cdc26f1d Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 15 Dec 2022 12:31:36 +0100 -Subject: [PATCH 04/14] vhost: move iova_tree set to vhost_svq_start -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 136: vDPA ASID support in Qemu -RH-Bugzilla: 2104412 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [4/13] 200d8e9b58e258a6e301430debc73ef7d962b732 (eperezmartin/qemu-kvm) - -Since we don't know if we will use SVQ at qemu initialization, let's -allocate iova_tree only if needed. To do so, accept it at SVQ start, not -at initialization. - -This will avoid to create it if the device does not support SVQ. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Message-Id: <20221215113144.322011-5-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 5fde952bbdd521c10fc018ee04f922a7dca5f663) ---- - hw/virtio/vhost-shadow-virtqueue.c | 9 ++++----- - hw/virtio/vhost-shadow-virtqueue.h | 5 ++--- - hw/virtio/vhost-vdpa.c | 5 ++--- - 3 files changed, 8 insertions(+), 11 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index 3b05bab44d..4307296358 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -642,9 +642,10 @@ void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd) - * @svq: Shadow Virtqueue - * @vdev: VirtIO device - * @vq: Virtqueue to shadow -+ * @iova_tree: Tree to perform descriptors translations - */ - void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, -- VirtQueue *vq) -+ VirtQueue *vq, VhostIOVATree *iova_tree) - { - size_t desc_size, driver_size, device_size; - -@@ -655,6 +656,7 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, - svq->last_used_idx = 0; - svq->vdev = vdev; - svq->vq = vq; -+ svq->iova_tree = iova_tree; - - svq->vring.num = virtio_queue_get_num(vdev, virtio_get_queue_index(vq)); - driver_size = vhost_svq_driver_area_size(svq); -@@ -712,18 +714,15 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) - * Creates vhost shadow virtqueue, and instructs the vhost device to use the - * shadow methods and file descriptors. - * -- * @iova_tree: Tree to perform descriptors translations - * @ops: SVQ owner callbacks - * @ops_opaque: ops opaque pointer - */ --VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree, -- const VhostShadowVirtqueueOps *ops, -+VhostShadowVirtqueue *vhost_svq_new(const VhostShadowVirtqueueOps *ops, - void *ops_opaque) - { - VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1); - - event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND); -- svq->iova_tree = iova_tree; - svq->ops = ops; - svq->ops_opaque = ops_opaque; - return svq; -diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h -index d04c34a589..926a4897b1 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.h -+++ b/hw/virtio/vhost-shadow-virtqueue.h -@@ -126,11 +126,10 @@ size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq); - size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq); - - void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, -- VirtQueue *vq); -+ VirtQueue *vq, VhostIOVATree *iova_tree); - void vhost_svq_stop(VhostShadowVirtqueue *svq); - --VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree, -- const VhostShadowVirtqueueOps *ops, -+VhostShadowVirtqueue *vhost_svq_new(const VhostShadowVirtqueueOps *ops, - void *ops_opaque); - - void vhost_svq_free(gpointer vq); -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 530d2ca362..e65603022f 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -430,8 +430,7 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v, - for (unsigned n = 0; n < hdev->nvqs; ++n) { - VhostShadowVirtqueue *svq; - -- svq = vhost_svq_new(v->iova_tree, v->shadow_vq_ops, -- v->shadow_vq_ops_opaque); -+ svq = vhost_svq_new(v->shadow_vq_ops, v->shadow_vq_ops_opaque); - g_ptr_array_add(shadow_vqs, svq); - } - -@@ -1070,7 +1069,7 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev) - goto err; - } - -- vhost_svq_start(svq, dev->vdev, vq); -+ vhost_svq_start(svq, dev->vdev, vq, v->iova_tree); - ok = vhost_vdpa_svq_map_rings(dev, svq, &addr, &err); - if (unlikely(!ok)) { - goto err_map; --- -2.31.1 - diff --git a/SOURCES/kvm-vhost-set-SVQ-device-call-handler-at-SVQ-start.patch b/SOURCES/kvm-vhost-set-SVQ-device-call-handler-at-SVQ-start.patch deleted file mode 100644 index 099dd73..0000000 --- a/SOURCES/kvm-vhost-set-SVQ-device-call-handler-at-SVQ-start.patch +++ /dev/null @@ -1,73 +0,0 @@ -From 2906f8df3c5e915a3dc05a705b87990211f114b5 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 15 Dec 2022 12:31:34 +0100 -Subject: [PATCH 02/14] vhost: set SVQ device call handler at SVQ start -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 136: vDPA ASID support in Qemu -RH-Bugzilla: 2104412 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/13] ad90a6cc5c71b70d705904433d5a986e8fedb924 (eperezmartin/qemu-kvm) - -By the end of this series CVQ is shadowed as long as the features -support it. - -Since we don't know at the beginning of qemu running if this is -supported, move the event notifier handler setting to the start of the -SVQ, instead of the start of qemu run. This will avoid to create them if -the device does not support SVQ. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Message-Id: <20221215113144.322011-3-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 20e7412bfd63c68f1798fbdb799aedb7e05fee88) ---- - hw/virtio/vhost-shadow-virtqueue.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index 5bd14cad96..264ddc166d 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -648,6 +648,7 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, - { - size_t desc_size, driver_size, device_size; - -+ event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call); - svq->next_guest_avail_elem = NULL; - svq->shadow_avail_idx = 0; - svq->shadow_used_idx = 0; -@@ -704,6 +705,7 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) - g_free(svq->desc_state); - qemu_vfree(svq->vring.desc); - qemu_vfree(svq->vring.used); -+ event_notifier_set_handler(&svq->hdev_call, NULL); - } - - /** -@@ -740,7 +742,6 @@ VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree, - } - - event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND); -- event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call); - svq->iova_tree = iova_tree; - svq->ops = ops; - svq->ops_opaque = ops_opaque; -@@ -763,7 +764,6 @@ void vhost_svq_free(gpointer pvq) - VhostShadowVirtqueue *vq = pvq; - vhost_svq_stop(vq); - event_notifier_cleanup(&vq->hdev_kick); -- event_notifier_set_handler(&vq->hdev_call, NULL); - event_notifier_cleanup(&vq->hdev_call); - g_free(vq); - } --- -2.31.1 - diff --git a/SOURCES/kvm-vhost-vdpa-add-support-for-config-interrupt.patch b/SOURCES/kvm-vhost-vdpa-add-support-for-config-interrupt.patch deleted file mode 100644 index 88d4df6..0000000 --- a/SOURCES/kvm-vhost-vdpa-add-support-for-config-interrupt.patch +++ /dev/null @@ -1,73 +0,0 @@ -From e01563a8de9a45937ffd8d4c1d74a6890ffb6eb6 Mon Sep 17 00:00:00 2001 -From: Cindy Lu -Date: Thu, 22 Dec 2022 15:04:46 +0800 -Subject: [PATCH 05/31] vhost-vdpa: add support for config interrupt -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cindy Lu -RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa -RH-Bugzilla: 1905805 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Miroslav Rezanina -RH-Commit: [5/10] 49bfd214a503f8e199ff93f4bbfcbd4c4f2405b5 (lulu6/qemu-kvm3) - -https://bugzilla.redhat.com/show_bug.cgi?id=1905805 -Add new call back function in vhost-vdpa, The function -vhost_set_config_call can set the event fd to kernel. -This function will be called in the vhost_dev_start -and vhost_dev_stop - -Signed-off-by: Cindy Lu -Message-Id: <20221222070451.936503-6-lulu@redhat.com> -Acked-by: Jason Wang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 259f3acc1c675dd77ebbdb28a483f5d0220bdbf6) -Signed-off-by: Cindy Lu ---- - hw/virtio/trace-events | 1 + - hw/virtio/vhost-vdpa.c | 8 ++++++++ - 2 files changed, 9 insertions(+) - -diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events -index 14fc5b9bb2..46f2faf04e 100644 ---- a/hw/virtio/trace-events -+++ b/hw/virtio/trace-events -@@ -62,6 +62,7 @@ vhost_vdpa_get_features(void *dev, uint64_t features) "dev: %p features: 0x%"PRI - vhost_vdpa_set_owner(void *dev) "dev: %p" - vhost_vdpa_vq_get_addr(void *dev, void *vq, uint64_t desc_user_addr, uint64_t avail_user_addr, uint64_t used_user_addr) "dev: %p vq: %p desc_user_addr: 0x%"PRIx64" avail_user_addr: 0x%"PRIx64" used_user_addr: 0x%"PRIx64 - vhost_vdpa_get_iova_range(void *dev, uint64_t first, uint64_t last) "dev: %p first: 0x%"PRIx64" last: 0x%"PRIx64 -+vhost_vdpa_set_config_call(void *dev, int fd)"dev: %p fd: %d" - - # virtio.c - virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned out_num) "elem %p size %zd in_num %u out_num %u" -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 7468e44b87..c5be2645b0 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -754,6 +754,13 @@ static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev) - return 0; - } - -+static int vhost_vdpa_set_config_call(struct vhost_dev *dev, -+ int fd) -+{ -+ trace_vhost_vdpa_set_config_call(dev, fd); -+ return vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG_CALL, &fd); -+} -+ - static void vhost_vdpa_dump_config(struct vhost_dev *dev, const uint8_t *config, - uint32_t config_len) - { -@@ -1310,4 +1317,5 @@ const VhostOps vdpa_ops = { - .vhost_get_device_id = vhost_vdpa_get_device_id, - .vhost_vq_get_addr = vhost_vdpa_vq_get_addr, - .vhost_force_iommu = vhost_vdpa_force_iommu, -+ .vhost_set_config_call = vhost_vdpa_set_config_call, - }; --- -2.31.1 - diff --git a/SOURCES/kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch b/SOURCES/kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch index 0c82680..fd29eb7 100644 --- a/SOURCES/kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch +++ b/SOURCES/kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch @@ -1,14 +1,15 @@ -From cca66d3e5f7bc1d88d79a7653ae244ba31566ee8 Mon Sep 17 00:00:00 2001 +From 4e30ca551fb3740a428017a0debf0a6aab976639 Mon Sep 17 00:00:00 2001 From: Ani Sinha Date: Mon, 19 Jun 2023 12:22:09 +0530 -Subject: [PATCH 2/2] vhost-vdpa: do not cleanup the vdpa/vhost-net structures +Subject: [PATCH 6/6] vhost-vdpa: do not cleanup the vdpa/vhost-net structures if peer nic is present RH-Author: Ani Sinha -RH-MergeRequest: 294: vhost-vdpa: do not cleanup the vdpa/vhost-net structures if peer nic is present -RH-Bugzilla: 2227721 +RH-MergeRequest: 174: vhost-vdpa: do not cleanup the vdpa/vhost-net structures if peer nic is present +RH-Bugzilla: 2128929 +RH-Acked-by: Igor Mammedov RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/1] af8fa659afb3d8a2e38bb745b31d8cd665a1fc77 +RH-Commit: [1/1] c70d4e5fd93256326d318e0b507db6b9eb93ad86 (anisinha/centos-qemu-kvm) When a peer nic is still attached to the vdpa backend, it is too early to free up the vhost-net and vdpa structures. If these structures are freed here, then @@ -38,20 +39,17 @@ Message-Id: <20230619065209.442185-1-anisinha@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin (cherry picked from commit a0d7215e339b61c7d7a7b3fcf754954d80d93eb8) -Signed-off-by: Michael Tokarev -(Mjt: context change for stable-7.2) -(cherry picked from commit 3d12598b74ed4bcc6db8b50818a95c4b770d4487) --- net/vhost-vdpa.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 7d9c4ea09d..1b4fec59a2 100644 +index 99904a0da7..8c8900f0f4 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c -@@ -180,6 +180,14 @@ static void vhost_vdpa_cleanup(NetClientState *nc) +@@ -184,6 +184,14 @@ static void vhost_vdpa_cleanup(NetClientState *nc) + { VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); - struct vhost_dev *dev = &s->vhost_net->dev; + /* + * If a peer NIC is attached, do not cleanup anything. @@ -63,7 +61,7 @@ index 7d9c4ea09d..1b4fec59a2 100644 + } qemu_vfree(s->cvq_cmd_out_buffer); qemu_vfree(s->status); - if (dev->vq_index + dev->nvqs == dev->vq_index_end) { + if (s->vhost_net) { -- 2.39.3 diff --git a/SOURCES/kvm-vhost-vdpa-mute-unaligned-memory-error-report.patch b/SOURCES/kvm-vhost-vdpa-mute-unaligned-memory-error-report.patch new file mode 100644 index 0000000..3711949 --- /dev/null +++ b/SOURCES/kvm-vhost-vdpa-mute-unaligned-memory-error-report.patch @@ -0,0 +1,86 @@ +From 3b51a7b84ea21360c6d551284aecb8b6f371e888 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Tue, 4 Jul 2023 09:19:31 +0200 +Subject: [PATCH 9/9] vhost-vdpa: mute unaligned memory error report +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 193: vhost-vdpa: mute unaligned memory error report +RH-Bugzilla: 2141965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Eugenio Pérez +RH-Commit: [1/1] 60f5385d41269ce9310e1e8e0a2f1106e3a16ada (lvivier/qemu-kvm-centos) + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2141965 + +With TPM CRM device, vhost-vdpa reports an error when it tries +to register a listener for a non aligned memory region: + + qemu-system-x86_64: vhost_vdpa_listener_region_add received unaligned region + qemu-system-x86_64: vhost_vdpa_listener_region_del received unaligned region + +This error can be confusing for the user whereas we only need to skip +the region (as it's already done after the error_report()) + +Rather than introducing a special case for TPM CRB memory section +to not display the message in this case, simply replace the +error_report() by a trace function (with more information, like the +memory region name). + +Signed-off-by: Laurent Vivier +Message-Id: <20230704071931.575888-2-lvivier@redhat.com> +Reviewed-by: David Hildenbrand +Acked-by: Jason Wang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 77812aa7b1fdf8f547c35a7f9a4eb1cbf3a073db) +--- + hw/virtio/trace-events | 2 ++ + hw/virtio/vhost-vdpa.c | 8 ++++++-- + 2 files changed, 8 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events +index 68b752e304..300dec8d3e 100644 +--- a/hw/virtio/trace-events ++++ b/hw/virtio/trace-events +@@ -34,7 +34,9 @@ vhost_vdpa_dma_map(void *vdpa, int fd, uint32_t msg_type, uint32_t asid, uint64_ + vhost_vdpa_dma_unmap(void *vdpa, int fd, uint32_t msg_type, uint32_t asid, uint64_t iova, uint64_t size, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" asid: %"PRIu32" iova: 0x%"PRIx64" size: 0x%"PRIx64" type: %"PRIu8 + vhost_vdpa_listener_begin_batch(void *v, int fd, uint32_t msg_type, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" type: %"PRIu8 + vhost_vdpa_listener_commit(void *v, int fd, uint32_t msg_type, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" type: %"PRIu8 ++vhost_vdpa_listener_region_add_unaligned(void *v, const char *name, uint64_t offset_as, uint64_t offset_page) "vdpa: %p region %s offset_within_address_space %"PRIu64" offset_within_region %"PRIu64 + vhost_vdpa_listener_region_add(void *vdpa, uint64_t iova, uint64_t llend, void *vaddr, bool readonly) "vdpa: %p iova 0x%"PRIx64" llend 0x%"PRIx64" vaddr: %p read-only: %d" ++vhost_vdpa_listener_region_del_unaligned(void *v, const char *name, uint64_t offset_as, uint64_t offset_page) "vdpa: %p region %s offset_within_address_space %"PRIu64" offset_within_region %"PRIu64 + vhost_vdpa_listener_region_del(void *vdpa, uint64_t iova, uint64_t llend) "vdpa: %p iova 0x%"PRIx64" llend 0x%"PRIx64 + vhost_vdpa_add_status(void *dev, uint8_t status) "dev: %p status: 0x%"PRIx8 + vhost_vdpa_init(void *dev, void *vdpa) "dev: %p vdpa: %p" +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index bc6bad23d5..c04f14420d 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -202,7 +202,9 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, + + if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != + (section->offset_within_region & ~TARGET_PAGE_MASK))) { +- error_report("%s received unaligned region", __func__); ++ trace_vhost_vdpa_listener_region_add_unaligned(v, section->mr->name, ++ section->offset_within_address_space & ~TARGET_PAGE_MASK, ++ section->offset_within_region & ~TARGET_PAGE_MASK); + return; + } + +@@ -281,7 +283,9 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, + + if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != + (section->offset_within_region & ~TARGET_PAGE_MASK))) { +- error_report("%s received unaligned region", __func__); ++ trace_vhost_vdpa_listener_region_del_unaligned(v, section->mr->name, ++ section->offset_within_address_space & ~TARGET_PAGE_MASK, ++ section->offset_within_region & ~TARGET_PAGE_MASK); + return; + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-add-support-for-configure-interrupt.patch b/SOURCES/kvm-virtio-add-support-for-configure-interrupt.patch deleted file mode 100644 index 02f4666..0000000 --- a/SOURCES/kvm-virtio-add-support-for-configure-interrupt.patch +++ /dev/null @@ -1,115 +0,0 @@ -From e04c76339580effae41617b690b58a6605e0f40b Mon Sep 17 00:00:00 2001 -From: Cindy Lu -Date: Thu, 22 Dec 2022 15:04:47 +0800 -Subject: [PATCH 06/31] virtio: add support for configure interrupt -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cindy Lu -RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa -RH-Bugzilla: 1905805 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Miroslav Rezanina -RH-Commit: [6/10] 7048eb488b732578686d451684babaf17b582b05 (lulu6/qemu-kvm3) - -https://bugzilla.redhat.com/show_bug.cgi?id=1905805 -Add the functions to support the configure interrupt in virtio -The function virtio_config_guest_notifier_read will notify the -guest if there is an configure interrupt. -The function virtio_config_set_guest_notifier_fd_handler is -to set the fd hander for the notifier - -Signed-off-by: Cindy Lu -Message-Id: <20221222070451.936503-7-lulu@redhat.com> -Acked-by: Jason Wang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 7d847d0c9b93b91160f40d69a65c904d76f1edd8) -Signed-off-by: Cindy Lu ---- - hw/virtio/virtio.c | 29 +++++++++++++++++++++++++++++ - include/hw/virtio/virtio.h | 4 ++++ - 2 files changed, 33 insertions(+) - -diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c -index eb6347ab5d..34e9c5d141 100644 ---- a/hw/virtio/virtio.c -+++ b/hw/virtio/virtio.c -@@ -4012,7 +4012,14 @@ static void virtio_queue_guest_notifier_read(EventNotifier *n) - virtio_irq(vq); - } - } -+static void virtio_config_guest_notifier_read(EventNotifier *n) -+{ -+ VirtIODevice *vdev = container_of(n, VirtIODevice, config_notifier); - -+ if (event_notifier_test_and_clear(n)) { -+ virtio_notify_config(vdev); -+ } -+} - void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign, - bool with_irqfd) - { -@@ -4029,6 +4036,23 @@ void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign, - } - } - -+void virtio_config_set_guest_notifier_fd_handler(VirtIODevice *vdev, -+ bool assign, bool with_irqfd) -+{ -+ EventNotifier *n; -+ n = &vdev->config_notifier; -+ if (assign && !with_irqfd) { -+ event_notifier_set_handler(n, virtio_config_guest_notifier_read); -+ } else { -+ event_notifier_set_handler(n, NULL); -+ } -+ if (!assign) { -+ /* Test and clear notifier before closing it,*/ -+ /* in case poll callback didn't have time to run. */ -+ virtio_config_guest_notifier_read(n); -+ } -+} -+ - EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq) - { - return &vq->guest_notifier; -@@ -4109,6 +4133,11 @@ EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq) - return &vq->host_notifier; - } - -+EventNotifier *virtio_config_get_guest_notifier(VirtIODevice *vdev) -+{ -+ return &vdev->config_notifier; -+} -+ - void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled) - { - vq->host_notifier_enabled = enabled; -diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h -index 1f4a41b958..9c3a4642f2 100644 ---- a/include/hw/virtio/virtio.h -+++ b/include/hw/virtio/virtio.h -@@ -138,6 +138,7 @@ struct VirtIODevice - AddressSpace *dma_as; - QLIST_HEAD(, VirtQueue) *vector_queues; - QTAILQ_ENTRY(VirtIODevice) next; -+ EventNotifier config_notifier; - }; - - struct VirtioDeviceClass { -@@ -360,6 +361,9 @@ void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ct - void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx); - VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector); - VirtQueue *virtio_vector_next_queue(VirtQueue *vq); -+EventNotifier *virtio_config_get_guest_notifier(VirtIODevice *vdev); -+void virtio_config_set_guest_notifier_fd_handler(VirtIODevice *vdev, -+ bool assign, bool with_irqfd); - - static inline void virtio_add_feature(uint64_t *features, unsigned int fbit) - { --- -2.31.1 - diff --git a/SOURCES/kvm-virtio-introduce-macro-VIRTIO_CONFIG_IRQ_IDX.patch b/SOURCES/kvm-virtio-introduce-macro-VIRTIO_CONFIG_IRQ_IDX.patch deleted file mode 100644 index ea2589a..0000000 --- a/SOURCES/kvm-virtio-introduce-macro-VIRTIO_CONFIG_IRQ_IDX.patch +++ /dev/null @@ -1,262 +0,0 @@ -From 34a267758cf016f34b327318500efdbf0f606033 Mon Sep 17 00:00:00 2001 -From: Cindy Lu -Date: Thu, 22 Dec 2022 15:04:42 +0800 -Subject: [PATCH 01/31] virtio: introduce macro VIRTIO_CONFIG_IRQ_IDX -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cindy Lu -RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa -RH-Bugzilla: 1905805 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/10] f374aaae221bc5a4c2521a267d21350b812e11ba (lulu6/qemu-kvm3) - -https://bugzilla.redhat.com/show_bug.cgi?id=1905805 -To support configure interrupt for vhost-vdpa -Introduce VIRTIO_CONFIG_IRQ_IDX -1 as configure interrupt's queue index, -Then we can reuse the functions guest_notifier_mask and guest_notifier_pending. -Add the check of queue index in these drivers, if the driver does not support -configure interrupt, the function will just return - -Signed-off-by: Cindy Lu -Message-Id: <20221222070451.936503-2-lulu@redhat.com> -Acked-by: Jason Wang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 544f0278afcab2bebab61b14e4c2c58e65911f5b) -Signed-off-by: Cindy Lu ---- - hw/display/vhost-user-gpu.c | 18 ++++++++++++++++++ - hw/net/virtio-net.c | 22 ++++++++++++++++++++-- - hw/virtio/vhost-user-fs.c | 18 ++++++++++++++++++ - hw/virtio/vhost-user-gpio.c | 10 ++++++++++ - hw/virtio/vhost-vsock-common.c | 18 ++++++++++++++++++ - hw/virtio/virtio-crypto.c | 18 ++++++++++++++++++ - include/hw/virtio/virtio.h | 3 +++ - 7 files changed, 105 insertions(+), 2 deletions(-) - -diff --git a/hw/display/vhost-user-gpu.c b/hw/display/vhost-user-gpu.c -index 19c0e20103..4380a5e672 100644 ---- a/hw/display/vhost-user-gpu.c -+++ b/hw/display/vhost-user-gpu.c -@@ -486,6 +486,15 @@ vhost_user_gpu_guest_notifier_pending(VirtIODevice *vdev, int idx) - { - VhostUserGPU *g = VHOST_USER_GPU(vdev); - -+ /* -+ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 -+ * as the Marco of configure interrupt's IDX, If this driver does not -+ * support, the function will return -+ */ -+ -+ if (idx == VIRTIO_CONFIG_IRQ_IDX) { -+ return false; -+ } - return vhost_virtqueue_pending(&g->vhost->dev, idx); - } - -@@ -494,6 +503,15 @@ vhost_user_gpu_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask) - { - VhostUserGPU *g = VHOST_USER_GPU(vdev); - -+ /* -+ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 -+ * as the Marco of configure interrupt's IDX, If this driver does not -+ * support, the function will return -+ */ -+ -+ if (idx == VIRTIO_CONFIG_IRQ_IDX) { -+ return; -+ } - vhost_virtqueue_mask(&g->vhost->dev, vdev, idx, mask); - } - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index aba12759d5..bee35d6f9f 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -3316,6 +3316,15 @@ static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) - } else { - nc = qemu_get_subqueue(n->nic, vq2q(idx)); - } -+ /* -+ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 -+ * as the Marco of configure interrupt's IDX, If this driver does not -+ * support, the function will return false -+ */ -+ -+ if (idx == VIRTIO_CONFIG_IRQ_IDX) { -+ return false; -+ } - return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); - } - -@@ -3339,8 +3348,17 @@ static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, - } else { - nc = qemu_get_subqueue(n->nic, vq2q(idx)); - } -- vhost_net_virtqueue_mask(get_vhost_net(nc->peer), -- vdev, idx, mask); -+ /* -+ *Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 -+ * as the Marco of configure interrupt's IDX, If this driver does not -+ * support, the function will return -+ */ -+ -+ if (idx == VIRTIO_CONFIG_IRQ_IDX) { -+ return; -+ } -+ -+ vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask); - } - - static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features) -diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c -index d97b179e6f..f5049735ac 100644 ---- a/hw/virtio/vhost-user-fs.c -+++ b/hw/virtio/vhost-user-fs.c -@@ -159,6 +159,15 @@ static void vuf_guest_notifier_mask(VirtIODevice *vdev, int idx, - { - VHostUserFS *fs = VHOST_USER_FS(vdev); - -+ /* -+ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 -+ * as the Marco of configure interrupt's IDX, If this driver does not -+ * support, the function will return -+ */ -+ -+ if (idx == VIRTIO_CONFIG_IRQ_IDX) { -+ return; -+ } - vhost_virtqueue_mask(&fs->vhost_dev, vdev, idx, mask); - } - -@@ -166,6 +175,15 @@ static bool vuf_guest_notifier_pending(VirtIODevice *vdev, int idx) - { - VHostUserFS *fs = VHOST_USER_FS(vdev); - -+ /* -+ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 -+ * as the Marco of configure interrupt's IDX, If this driver does not -+ * support, the function will return -+ */ -+ -+ if (idx == VIRTIO_CONFIG_IRQ_IDX) { -+ return false; -+ } - return vhost_virtqueue_pending(&fs->vhost_dev, idx); - } - -diff --git a/hw/virtio/vhost-user-gpio.c b/hw/virtio/vhost-user-gpio.c -index b7b82a1099..fe3da32c74 100644 ---- a/hw/virtio/vhost-user-gpio.c -+++ b/hw/virtio/vhost-user-gpio.c -@@ -191,6 +191,16 @@ static void vu_gpio_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask) - { - VHostUserGPIO *gpio = VHOST_USER_GPIO(vdev); - -+ /* -+ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 -+ * as the Marco of configure interrupt's IDX, If this driver does not -+ * support, the function will return -+ */ -+ -+ if (idx == VIRTIO_CONFIG_IRQ_IDX) { -+ return; -+ } -+ - vhost_virtqueue_mask(&gpio->vhost_dev, vdev, idx, mask); - } - -diff --git a/hw/virtio/vhost-vsock-common.c b/hw/virtio/vhost-vsock-common.c -index d21c72b401..d2b5519d5a 100644 ---- a/hw/virtio/vhost-vsock-common.c -+++ b/hw/virtio/vhost-vsock-common.c -@@ -127,6 +127,15 @@ static void vhost_vsock_common_guest_notifier_mask(VirtIODevice *vdev, int idx, - { - VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); - -+ /* -+ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 -+ * as the Marco of configure interrupt's IDX, If this driver does not -+ * support, the function will return -+ */ -+ -+ if (idx == VIRTIO_CONFIG_IRQ_IDX) { -+ return; -+ } - vhost_virtqueue_mask(&vvc->vhost_dev, vdev, idx, mask); - } - -@@ -135,6 +144,15 @@ static bool vhost_vsock_common_guest_notifier_pending(VirtIODevice *vdev, - { - VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); - -+ /* -+ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 -+ * as the Marco of configure interrupt's IDX, If this driver does not -+ * support, the function will return -+ */ -+ -+ if (idx == VIRTIO_CONFIG_IRQ_IDX) { -+ return false; -+ } - return vhost_virtqueue_pending(&vvc->vhost_dev, idx); - } - -diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c -index 97da74e719..516425e26a 100644 ---- a/hw/virtio/virtio-crypto.c -+++ b/hw/virtio/virtio-crypto.c -@@ -1182,6 +1182,15 @@ static void virtio_crypto_guest_notifier_mask(VirtIODevice *vdev, int idx, - - assert(vcrypto->vhost_started); - -+ /* -+ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 -+ * as the Marco of configure interrupt's IDX, If this driver does not -+ * support, the function will return -+ */ -+ -+ if (idx == VIRTIO_CONFIG_IRQ_IDX) { -+ return; -+ } - cryptodev_vhost_virtqueue_mask(vdev, queue, idx, mask); - } - -@@ -1192,6 +1201,15 @@ static bool virtio_crypto_guest_notifier_pending(VirtIODevice *vdev, int idx) - - assert(vcrypto->vhost_started); - -+ /* -+ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 -+ * as the Marco of configure interrupt's IDX, If this driver does not -+ * support, the function will return -+ */ -+ -+ if (idx == VIRTIO_CONFIG_IRQ_IDX) { -+ return false; -+ } - return cryptodev_vhost_virtqueue_pending(vdev, queue, idx); - } - -diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h -index acfd4df125..1f4a41b958 100644 ---- a/include/hw/virtio/virtio.h -+++ b/include/hw/virtio/virtio.h -@@ -79,6 +79,9 @@ typedef struct VirtQueueElement - - #define VIRTIO_NO_VECTOR 0xffff - -+/* special index value used internally for config irqs */ -+#define VIRTIO_CONFIG_IRQ_IDX -1 -+ - #define TYPE_VIRTIO_DEVICE "virtio-device" - OBJECT_DECLARE_TYPE(VirtIODevice, VirtioDeviceClass, VIRTIO_DEVICE) - --- -2.31.1 - diff --git a/SOURCES/kvm-virtio-iommu-Fix-64kB-host-page-size-VFIO-device-ass.patch b/SOURCES/kvm-virtio-iommu-Fix-64kB-host-page-size-VFIO-device-ass.patch new file mode 100644 index 0000000..acfb3ae --- /dev/null +++ b/SOURCES/kvm-virtio-iommu-Fix-64kB-host-page-size-VFIO-device-ass.patch @@ -0,0 +1,151 @@ +From 08c8af80dbd03b46a6a8397ef0c41cda3e6de22c Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Wed, 5 Jul 2023 18:51:17 +0200 +Subject: [PATCH 01/37] virtio-iommu: Fix 64kB host page size VFIO device + assignment + +RH-Author: Eric Auger +RH-MergeRequest: 182: VIRTIO-IOMMU/VFIO page size related fixes +RH-Bugzilla: 2211609 2211634 +RH-Acked-by: Gavin Shan +RH-Acked-by: Sebastian Ott +RH-Commit: [1/2] b48db1c964559505dda4c6c9a3b79d68207b25eb (eauger1/centos-qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2211634 + +When running on a 64kB page size host and protecting a VFIO device +with the virtio-iommu, qemu crashes with this kind of message: + +qemu-kvm: virtio-iommu page mask 0xfffffffffffff000 is incompatible +with mask 0x20010000 +qemu: hardware error: vfio: DMA mapping failed, unable to continue + +This is due to the fact the IOMMU MR corresponding to the VFIO device +is enabled very late on domain attach, after the machine init. +The device reports a minimal 64kB page size but it is too late to be +applied. virtio_iommu_set_page_size_mask() fails and this causes +vfio_listener_region_add() to end up with hw_error(); + +To work around this issue, we transiently enable the IOMMU MR on +machine init to collect the page size requirements and then restore +the bypass state. + +Fixes: 90519b9053 ("virtio-iommu: Add bypass mode support to assigned device") +Signed-off-by: Eric Auger + +Message-Id: <20230705165118.28194-2-eric.auger@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Jean-Philippe Brucker +Tested-by: Jean-Philippe Brucker +Reviewed-by: Zhenzhong Duan +(cherry picked from commit 94df5b2180d61fb2ee2b04cc007981e58b6479a9) +Signed-off-by: Eric Auger +--- + hw/virtio/trace-events | 1 + + hw/virtio/virtio-iommu.c | 31 +++++++++++++++++++++++++++++-- + include/hw/virtio/virtio-iommu.h | 2 ++ + 3 files changed, 32 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events +index 8f8d05cf9b..68b752e304 100644 +--- a/hw/virtio/trace-events ++++ b/hw/virtio/trace-events +@@ -131,6 +131,7 @@ virtio_iommu_set_page_size_mask(const char *name, uint64_t old, uint64_t new) "m + virtio_iommu_notify_flag_add(const char *name) "add notifier to mr %s" + virtio_iommu_notify_flag_del(const char *name) "del notifier from mr %s" + virtio_iommu_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)" ++virtio_iommu_freeze_granule(uint64_t page_size_mask) "granule set to 0x%"PRIx64 + + # virtio-mem.c + virtio_mem_send_response(uint16_t type) "type=%" PRIu16 +diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c +index 1cd258135d..542679b321 100644 +--- a/hw/virtio/virtio-iommu.c ++++ b/hw/virtio/virtio-iommu.c +@@ -24,6 +24,7 @@ + #include "hw/virtio/virtio.h" + #include "sysemu/kvm.h" + #include "sysemu/reset.h" ++#include "sysemu/sysemu.h" + #include "qapi/error.h" + #include "qemu/error-report.h" + #include "trace.h" +@@ -1106,12 +1107,12 @@ static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr, + } + + /* +- * After the machine is finalized, we can't change the mask anymore. If by ++ * Once the granule is frozen we can't change the mask anymore. If by + * chance the hotplugged device supports the same granule, we can still + * accept it. Having a different masks is possible but the guest will use + * sub-optimal block sizes, so warn about it. + */ +- if (phase_check(PHASE_MACHINE_READY)) { ++ if (s->granule_frozen) { + int new_granule = ctz64(new_mask); + int cur_granule = ctz64(cur_mask); + +@@ -1146,6 +1147,28 @@ static void virtio_iommu_system_reset(void *opaque) + + } + ++static void virtio_iommu_freeze_granule(Notifier *notifier, void *data) ++{ ++ VirtIOIOMMU *s = container_of(notifier, VirtIOIOMMU, machine_done); ++ int granule; ++ ++ if (likely(s->config.bypass)) { ++ /* ++ * Transient IOMMU MR enable to collect page_size_mask requirements ++ * through memory_region_iommu_set_page_size_mask() called by ++ * VFIO region_add() callback ++ */ ++ s->config.bypass = false; ++ virtio_iommu_switch_address_space_all(s); ++ /* restore default */ ++ s->config.bypass = true; ++ virtio_iommu_switch_address_space_all(s); ++ } ++ s->granule_frozen = true; ++ granule = ctz64(s->config.page_size_mask); ++ trace_virtio_iommu_freeze_granule(BIT(granule)); ++} ++ + static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) + { + VirtIODevice *vdev = VIRTIO_DEVICE(dev); +@@ -1189,6 +1212,9 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) + error_setg(errp, "VIRTIO-IOMMU is not attached to any PCI bus!"); + } + ++ s->machine_done.notify = virtio_iommu_freeze_granule; ++ qemu_add_machine_init_done_notifier(&s->machine_done); ++ + qemu_register_reset(virtio_iommu_system_reset, s); + } + +@@ -1198,6 +1224,7 @@ static void virtio_iommu_device_unrealize(DeviceState *dev) + VirtIOIOMMU *s = VIRTIO_IOMMU(dev); + + qemu_unregister_reset(virtio_iommu_system_reset, s); ++ qemu_remove_machine_init_done_notifier(&s->machine_done); + + g_hash_table_destroy(s->as_by_busptr); + if (s->domains) { +diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h +index 2ad5ee320b..a93fc5383e 100644 +--- a/include/hw/virtio/virtio-iommu.h ++++ b/include/hw/virtio/virtio-iommu.h +@@ -61,6 +61,8 @@ struct VirtIOIOMMU { + QemuRecMutex mutex; + GTree *endpoints; + bool boot_bypass; ++ Notifier machine_done; ++ bool granule_frozen; + }; + + #endif +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-iommu-Rework-the-traces-in-virtio_iommu_set_p.patch b/SOURCES/kvm-virtio-iommu-Rework-the-traces-in-virtio_iommu_set_p.patch new file mode 100644 index 0000000..7934a12 --- /dev/null +++ b/SOURCES/kvm-virtio-iommu-Rework-the-traces-in-virtio_iommu_set_p.patch @@ -0,0 +1,83 @@ +From 643d93343759a350fe0f6327d308bf6a93c79d25 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Wed, 5 Jul 2023 18:51:18 +0200 +Subject: [PATCH 02/37] virtio-iommu: Rework the traces in + virtio_iommu_set_page_size_mask() + +RH-Author: Eric Auger +RH-MergeRequest: 182: VIRTIO-IOMMU/VFIO page size related fixes +RH-Bugzilla: 2211609 2211634 +RH-Acked-by: Gavin Shan +RH-Acked-by: Sebastian Ott +RH-Commit: [2/2] 0af7078dde158f07c83e2b293adc5d9d475688ae (eauger1/centos-qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2211609 + +The current error messages in virtio_iommu_set_page_size_mask() +sound quite similar for different situations and miss the IOMMU +memory region that causes the issue. + +Clarify them and rework the comment. + +Also remove the trace when the new page_size_mask is not applied as +the current frozen granule is kept. This message is rather confusing +for the end user and anyway the current granule would have been used +by the driver. + +Signed-off-by: Eric Auger +Reviewed-by: Zhenzhong Duan +Message-Id: <20230705165118.28194-3-eric.auger@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Jean-Philippe Brucker +Tested-by: Jean-Philippe Brucker +(cherry picked from commit 587a7641d53055054d68d67d94c9408ef808f127) +Signed-off-by: Eric Auger +--- + hw/virtio/virtio-iommu.c | 19 +++++++------------ + 1 file changed, 7 insertions(+), 12 deletions(-) + +diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c +index 542679b321..421e2a944f 100644 +--- a/hw/virtio/virtio-iommu.c ++++ b/hw/virtio/virtio-iommu.c +@@ -1101,29 +1101,24 @@ static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr, + new_mask); + + if ((cur_mask & new_mask) == 0) { +- error_setg(errp, "virtio-iommu page mask 0x%"PRIx64 +- " is incompatible with mask 0x%"PRIx64, cur_mask, new_mask); ++ error_setg(errp, "virtio-iommu %s reports a page size mask 0x%"PRIx64 ++ " incompatible with currently supported mask 0x%"PRIx64, ++ mr->parent_obj.name, new_mask, cur_mask); + return -1; + } + + /* + * Once the granule is frozen we can't change the mask anymore. If by + * chance the hotplugged device supports the same granule, we can still +- * accept it. Having a different masks is possible but the guest will use +- * sub-optimal block sizes, so warn about it. ++ * accept it. + */ + if (s->granule_frozen) { +- int new_granule = ctz64(new_mask); + int cur_granule = ctz64(cur_mask); + +- if (new_granule != cur_granule) { +- error_setg(errp, "virtio-iommu page mask 0x%"PRIx64 +- " is incompatible with mask 0x%"PRIx64, cur_mask, +- new_mask); ++ if (!(BIT(cur_granule) & new_mask)) { ++ error_setg(errp, "virtio-iommu %s does not support frozen granule 0x%llx", ++ mr->parent_obj.name, BIT_ULL(cur_granule)); + return -1; +- } else if (new_mask != cur_mask) { +- warn_report("virtio-iommu page mask 0x%"PRIx64 +- " does not match 0x%"PRIx64, cur_mask, new_mask); + } + return 0; + } +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-iommu-Standardize-granule-extraction-and-form.patch b/SOURCES/kvm-virtio-iommu-Standardize-granule-extraction-and-form.patch new file mode 100644 index 0000000..638ae98 --- /dev/null +++ b/SOURCES/kvm-virtio-iommu-Standardize-granule-extraction-and-form.patch @@ -0,0 +1,88 @@ +From 59cd85621b1b14ada843ea0562cc76b6a7c93df4 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Tue, 18 Jul 2023 20:21:36 +0200 +Subject: [PATCH 08/14] virtio-iommu: Standardize granule extraction and + formatting + +RH-Author: Eric Auger +RH-MergeRequest: 197: virtio-iommu/smmu: backport some late fixes +RH-Bugzilla: 2229133 +RH-Acked-by: Thomas Huth +RH-Acked-by: Peter Xu +RH-Commit: [2/3] 48784ef2a19174518f66479dcb532230bffe8bf1 (eauger1/centos-qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2229133 + +At several locations we compute the granule from the config +page_size_mask using ctz() and then format it in traces using +BIT(). As the page_size_mask is 64b we should use ctz64 and +BIT_ULL() for formatting. We failed to be consistent. + +Note the page_size_mask is garanteed to be non null. The spec +mandates the device to set at least one bit, so ctz64 cannot +return 64. This is garanteed by the fact the device +initializes the page_size_mask to qemu_target_page_mask() +and then the page_size_mask is further constrained by +virtio_iommu_set_page_size_mask() callback which can't +result in a new mask being null. So if Coverity complains +round those ctz64/BIT_ULL with CID 1517772 this is a false +positive + +Signed-off-by: Eric Auger +Fixes: 94df5b2180 ("virtio-iommu: Fix 64kB host page size VFIO device assignment") +Message-Id: <20230718182136.40096-1-eric.auger@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Jean-Philippe Brucker +(cherry picked from commit 1084feddc6a677cdfdde56936bfb97cf32cc4dee) +Signed-off-by: Eric Auger +--- + hw/virtio/virtio-iommu.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c +index 17ce630200..17b3dcd158 100644 +--- a/hw/virtio/virtio-iommu.c ++++ b/hw/virtio/virtio-iommu.c +@@ -854,17 +854,19 @@ static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr, + VirtIOIOMMUEndpoint *ep; + uint32_t sid, flags; + bool bypass_allowed; ++ int granule; + bool found; + int i; + + interval.low = addr; + interval.high = addr + 1; ++ granule = ctz64(s->config.page_size_mask); + + IOMMUTLBEntry entry = { + .target_as = &address_space_memory, + .iova = addr, + .translated_addr = addr, +- .addr_mask = (1 << ctz32(s->config.page_size_mask)) - 1, ++ .addr_mask = BIT_ULL(granule) - 1, + .perm = IOMMU_NONE, + }; + +@@ -1117,7 +1119,7 @@ static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr, + if (s->granule_frozen) { + int cur_granule = ctz64(cur_mask); + +- if (!(BIT(cur_granule) & new_mask)) { ++ if (!(BIT_ULL(cur_granule) & new_mask)) { + error_setg(errp, "virtio-iommu %s does not support frozen granule 0x%llx", + mr->parent_obj.name, BIT_ULL(cur_granule)); + return -1; +@@ -1163,7 +1165,7 @@ static void virtio_iommu_freeze_granule(Notifier *notifier, void *data) + } + s->granule_frozen = true; + granule = ctz64(s->config.page_size_mask); +- trace_virtio_iommu_freeze_granule(BIT(granule)); ++ trace_virtio_iommu_freeze_granule(BIT_ULL(granule)); + } + + static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-mmio-add-support-for-configure-interrupt.patch b/SOURCES/kvm-virtio-mmio-add-support-for-configure-interrupt.patch deleted file mode 100644 index 275b197..0000000 --- a/SOURCES/kvm-virtio-mmio-add-support-for-configure-interrupt.patch +++ /dev/null @@ -1,80 +0,0 @@ -From 181705090c9963c2da97811838ace5bb058737c6 Mon Sep 17 00:00:00 2001 -From: Cindy Lu -Date: Thu, 22 Dec 2022 15:04:50 +0800 -Subject: [PATCH 09/31] virtio-mmio: add support for configure interrupt -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cindy Lu -RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa -RH-Bugzilla: 1905805 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Miroslav Rezanina -RH-Commit: [9/10] 742cc2b425ffd7bbd393772526e7481446ee131c (lulu6/qemu-kvm3) - -https://bugzilla.redhat.com/show_bug.cgi?id=1905805 -Add configure interrupt support in virtio-mmio bus. -add function to set configure guest notifier. - -Signed-off-by: Cindy Lu -Message-Id: <20221222070451.936503-10-lulu@redhat.com> -Acked-by: Jason Wang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit cd336e834620ea78edef049c3567f312974e475b) -Signed-off-by: Cindy Lu ---- - hw/virtio/virtio-mmio.c | 27 +++++++++++++++++++++++++++ - 1 file changed, 27 insertions(+) - -diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c -index d240efef97..103260ec15 100644 ---- a/hw/virtio/virtio-mmio.c -+++ b/hw/virtio/virtio-mmio.c -@@ -670,7 +670,30 @@ static int virtio_mmio_set_guest_notifier(DeviceState *d, int n, bool assign, - - return 0; - } -+static int virtio_mmio_set_config_guest_notifier(DeviceState *d, bool assign, -+ bool with_irqfd) -+{ -+ VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d); -+ VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); -+ VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); -+ EventNotifier *notifier = virtio_config_get_guest_notifier(vdev); -+ int r = 0; - -+ if (assign) { -+ r = event_notifier_init(notifier, 0); -+ if (r < 0) { -+ return r; -+ } -+ virtio_config_set_guest_notifier_fd_handler(vdev, assign, with_irqfd); -+ } else { -+ virtio_config_set_guest_notifier_fd_handler(vdev, assign, with_irqfd); -+ event_notifier_cleanup(notifier); -+ } -+ if (vdc->guest_notifier_mask && vdev->use_guest_notifier_mask) { -+ vdc->guest_notifier_mask(vdev, VIRTIO_CONFIG_IRQ_IDX, !assign); -+ } -+ return r; -+} - static int virtio_mmio_set_guest_notifiers(DeviceState *d, int nvqs, - bool assign) - { -@@ -692,6 +715,10 @@ static int virtio_mmio_set_guest_notifiers(DeviceState *d, int nvqs, - goto assign_error; - } - } -+ r = virtio_mmio_set_config_guest_notifier(d, assign, with_irqfd); -+ if (r < 0) { -+ goto assign_error; -+ } - - return 0; - --- -2.31.1 - diff --git a/SOURCES/kvm-virtio-net-add-support-for-configure-interrupt.patch b/SOURCES/kvm-virtio-net-add-support-for-configure-interrupt.patch deleted file mode 100644 index 74b956a..0000000 --- a/SOURCES/kvm-virtio-net-add-support-for-configure-interrupt.patch +++ /dev/null @@ -1,115 +0,0 @@ -From 2b8e3409edb8a17d89c3829cfa3d92bdfdd43c53 Mon Sep 17 00:00:00 2001 -From: Cindy Lu -Date: Thu, 22 Dec 2022 15:04:49 +0800 -Subject: [PATCH 08/31] virtio-net: add support for configure interrupt -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cindy Lu -RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa -RH-Bugzilla: 1905805 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Miroslav Rezanina -RH-Commit: [8/10] 1b125169bea6c81c508b154fa1bae68af153b312 (lulu6/qemu-kvm3) - -https://bugzilla.redhat.com/show_bug.cgi?id=1905805 -Add functions to support configure interrupt in virtio_net -Add the functions to support vhost_net_config_pending -and vhost_net_config_mask. - -Signed-off-by: Cindy Lu -Message-Id: <20221222070451.936503-9-lulu@redhat.com> -Acked-by: Jason Wang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 8aab0d1dbe90c7b5ac6672a1a09b0578178f5f4c) -Signed-off-by: Cindy Lu ---- - hw/net/vhost_net-stub.c | 9 +++++++++ - hw/net/vhost_net.c | 9 +++++++++ - hw/net/virtio-net.c | 4 ++-- - include/net/vhost_net.h | 2 ++ - 4 files changed, 22 insertions(+), 2 deletions(-) - -diff --git a/hw/net/vhost_net-stub.c b/hw/net/vhost_net-stub.c -index 9f7daae99c..c36f258201 100644 ---- a/hw/net/vhost_net-stub.c -+++ b/hw/net/vhost_net-stub.c -@@ -82,6 +82,15 @@ void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, - { - } - -+bool vhost_net_config_pending(VHostNetState *net) -+{ -+ return false; -+} -+ -+void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask) -+{ -+} -+ - int vhost_net_notify_migration_done(struct vhost_net *net, char* mac_addr) - { - return -1; -diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c -index 043058ff43..6a55f5a473 100644 ---- a/hw/net/vhost_net.c -+++ b/hw/net/vhost_net.c -@@ -478,6 +478,15 @@ void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, - vhost_virtqueue_mask(&net->dev, dev, idx, mask); - } - -+bool vhost_net_config_pending(VHostNetState *net) -+{ -+ return vhost_config_pending(&net->dev); -+} -+ -+void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask) -+{ -+ vhost_config_mask(&net->dev, dev, mask); -+} - VHostNetState *get_vhost_net(NetClientState *nc) - { - VHostNetState *vhost_net = 0; -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index bee35d6f9f..ec974f7a76 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -3323,7 +3323,7 @@ static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) - */ - - if (idx == VIRTIO_CONFIG_IRQ_IDX) { -- return false; -+ return vhost_net_config_pending(get_vhost_net(nc->peer)); - } - return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); - } -@@ -3355,9 +3355,9 @@ static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, - */ - - if (idx == VIRTIO_CONFIG_IRQ_IDX) { -+ vhost_net_config_mask(get_vhost_net(nc->peer), vdev, mask); - return; - } -- - vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask); - } - -diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h -index 40b9a40074..dbbd0dc04e 100644 ---- a/include/net/vhost_net.h -+++ b/include/net/vhost_net.h -@@ -39,6 +39,8 @@ int vhost_net_set_config(struct vhost_net *net, const uint8_t *data, - bool vhost_net_virtqueue_pending(VHostNetState *net, int n); - void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, - int idx, bool mask); -+bool vhost_net_config_pending(VHostNetState *net); -+void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask); - int vhost_net_notify_migration_done(VHostNetState *net, char* mac_addr); - VHostNetState *get_vhost_net(NetClientState *nc); - --- -2.31.1 - diff --git a/SOURCES/kvm-virtio-net-correctly-report-maximum-tx_queue_size-va.patch b/SOURCES/kvm-virtio-net-correctly-report-maximum-tx_queue_size-va.patch new file mode 100644 index 0000000..119ea84 --- /dev/null +++ b/SOURCES/kvm-virtio-net-correctly-report-maximum-tx_queue_size-va.patch @@ -0,0 +1,92 @@ +From 4fe096a6fad61ab721fd29324d48383c7f427ac9 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Mon, 5 Jun 2023 16:21:25 +0200 +Subject: [PATCH 7/9] virtio-net: correctly report maximum tx_queue_size value +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 191: virtio-net: correctly report maximum tx_queue_size value +RH-Bugzilla: 2040509 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Eugenio Pérez +RH-Commit: [1/1] afb944c6d75fe476ac86fe267b1cca5f272dfbbd (lvivier/qemu-kvm-centos) + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2040509 + +Maximum value for tx_queue_size depends on the backend type. +1024 for vDPA/vhost-user, 256 for all the others. + +The value is returned by virtio_net_max_tx_queue_size() to set the +parameter: + + n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n), + n->net_conf.tx_queue_size); + +But the parameter checking uses VIRTQUEUE_MAX_SIZE (1024). + +So the parameter is silently ignored and ethtool reports a different +value than the one provided by the user. + + ... -netdev tap,... -device virtio-net,tx_queue_size=1024 + + # ethtool -g enp0s2 + Ring parameters for enp0s2: + Pre-set maximums: + RX: 256 + RX Mini: n/a + RX Jumbo: n/a + TX: 256 + Current hardware settings: + RX: 256 + RX Mini: n/a + RX Jumbo: n/a + TX: 256 + + ... -netdev vhost-user,... -device virtio-net,tx_queue_size=2048 + + Invalid tx_queue_size (= 2048), must be a power of 2 between 256 and 1024 + +With this patch the correct maximum value is checked and displayed. + +For vDPA/vhost-user: + + Invalid tx_queue_size (= 2048), must be a power of 2 between 256 and 1024 + +For all the others: + + Invalid tx_queue_size (= 512), must be a power of 2 between 256 and 256 + +Fixes: 2eef278b9e63 ("virtio-net: fix tx queue size for !vhost-user") +Cc: mst@redhat.com +Cc: qemu-stable@nongnu.org +Signed-off-by: Laurent Vivier +Signed-off-by: Jason Wang +(cherry picked from commit 4271f4038372f174dbafffacca1a748d058a03ba) +--- + hw/net/virtio-net.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 447f669921..ae1e6a5e3d 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -3628,12 +3628,12 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) + } + + if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE || +- n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE || ++ n->net_conf.tx_queue_size > virtio_net_max_tx_queue_size(n) || + !is_power_of_2(n->net_conf.tx_queue_size)) { + error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), " + "must be a power of 2 between %d and %d", + n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE, +- VIRTQUEUE_MAX_SIZE); ++ virtio_net_max_tx_queue_size(n)); + virtio_cleanup(vdev); + return; + } +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-pci-add-support-for-configure-interrupt.patch b/SOURCES/kvm-virtio-pci-add-support-for-configure-interrupt.patch deleted file mode 100644 index 14070a4..0000000 --- a/SOURCES/kvm-virtio-pci-add-support-for-configure-interrupt.patch +++ /dev/null @@ -1,274 +0,0 @@ -From 61ac1476d3820c97e1cc103af422b17bc94c6ca5 Mon Sep 17 00:00:00 2001 -From: Cindy Lu -Date: Thu, 22 Dec 2022 15:04:51 +0800 -Subject: [PATCH 10/31] virtio-pci: add support for configure interrupt -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cindy Lu -RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa -RH-Bugzilla: 1905805 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Miroslav Rezanina -RH-Commit: [10/10] ebd6a11d7699660d8ac5a4e44a790f823daea57c (lulu6/qemu-kvm3) - -https://bugzilla.redhat.com/show_bug.cgi?id=1905805 -Add process to handle the configure interrupt, The function's -logic is the same with vq interrupt.Add extra process to check -the configure interrupt - -Signed-off-by: Cindy Lu -Message-Id: <20221222070451.936503-11-lulu@redhat.com> -Acked-by: Jason Wang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 1680542862edd963e6380dd4121a5e85df55581f) -Signed-off-by: Cindy Lu ---- - hw/virtio/virtio-pci.c | 118 +++++++++++++++++++++++++++------ - include/hw/virtio/virtio-pci.h | 4 +- - 2 files changed, 102 insertions(+), 20 deletions(-) - -diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c -index ec816ea367..3f00e91718 100644 ---- a/hw/virtio/virtio-pci.c -+++ b/hw/virtio/virtio-pci.c -@@ -751,7 +751,8 @@ static int virtio_pci_get_notifier(VirtIOPCIProxy *proxy, int queue_no, - VirtQueue *vq; - - if (queue_no == VIRTIO_CONFIG_IRQ_IDX) { -- return -1; -+ *n = virtio_config_get_guest_notifier(vdev); -+ *vector = vdev->config_vector; - } else { - if (!virtio_queue_get_num(vdev, queue_no)) { - return -1; -@@ -811,7 +812,7 @@ undo: - } - return ret; - } --static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) -+static int kvm_virtio_pci_vector_vq_use(VirtIOPCIProxy *proxy, int nvqs) - { - int queue_no; - int ret = 0; -@@ -826,6 +827,10 @@ static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) - return ret; - } - -+static int kvm_virtio_pci_vector_config_use(VirtIOPCIProxy *proxy) -+{ -+ return kvm_virtio_pci_vector_use_one(proxy, VIRTIO_CONFIG_IRQ_IDX); -+} - - static void kvm_virtio_pci_vector_release_one(VirtIOPCIProxy *proxy, - int queue_no) -@@ -850,7 +855,7 @@ static void kvm_virtio_pci_vector_release_one(VirtIOPCIProxy *proxy, - kvm_virtio_pci_vq_vector_release(proxy, vector); - } - --static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs) -+static void kvm_virtio_pci_vector_vq_release(VirtIOPCIProxy *proxy, int nvqs) - { - int queue_no; - VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); -@@ -863,6 +868,11 @@ static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs) - } - } - -+static void kvm_virtio_pci_vector_config_release(VirtIOPCIProxy *proxy) -+{ -+ kvm_virtio_pci_vector_release_one(proxy, VIRTIO_CONFIG_IRQ_IDX); -+} -+ - static int virtio_pci_one_vector_unmask(VirtIOPCIProxy *proxy, - unsigned int queue_no, - unsigned int vector, -@@ -944,9 +954,19 @@ static int virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector, - } - vq = virtio_vector_next_queue(vq); - } -- -+ /* unmask config intr */ -+ if (vector == vdev->config_vector) { -+ n = virtio_config_get_guest_notifier(vdev); -+ ret = virtio_pci_one_vector_unmask(proxy, VIRTIO_CONFIG_IRQ_IDX, vector, -+ msg, n); -+ if (ret < 0) { -+ goto undo_config; -+ } -+ } - return 0; -- -+undo_config: -+ n = virtio_config_get_guest_notifier(vdev); -+ virtio_pci_one_vector_mask(proxy, VIRTIO_CONFIG_IRQ_IDX, vector, n); - undo: - vq = virtio_vector_first_queue(vdev, vector); - while (vq && unmasked >= 0) { -@@ -980,6 +1000,11 @@ static void virtio_pci_vector_mask(PCIDevice *dev, unsigned vector) - } - vq = virtio_vector_next_queue(vq); - } -+ -+ if (vector == vdev->config_vector) { -+ n = virtio_config_get_guest_notifier(vdev); -+ virtio_pci_one_vector_mask(proxy, VIRTIO_CONFIG_IRQ_IDX, vector, n); -+ } - } - - static void virtio_pci_vector_poll(PCIDevice *dev, -@@ -1011,6 +1036,34 @@ static void virtio_pci_vector_poll(PCIDevice *dev, - msix_set_pending(dev, vector); - } - } -+ /* poll the config intr */ -+ ret = virtio_pci_get_notifier(proxy, VIRTIO_CONFIG_IRQ_IDX, ¬ifier, -+ &vector); -+ if (ret < 0) { -+ return; -+ } -+ if (vector < vector_start || vector >= vector_end || -+ !msix_is_masked(dev, vector)) { -+ return; -+ } -+ if (k->guest_notifier_pending) { -+ if (k->guest_notifier_pending(vdev, VIRTIO_CONFIG_IRQ_IDX)) { -+ msix_set_pending(dev, vector); -+ } -+ } else if (event_notifier_test_and_clear(notifier)) { -+ msix_set_pending(dev, vector); -+ } -+} -+ -+void virtio_pci_set_guest_notifier_fd_handler(VirtIODevice *vdev, VirtQueue *vq, -+ int n, bool assign, -+ bool with_irqfd) -+{ -+ if (n == VIRTIO_CONFIG_IRQ_IDX) { -+ virtio_config_set_guest_notifier_fd_handler(vdev, assign, with_irqfd); -+ } else { -+ virtio_queue_set_guest_notifier_fd_handler(vq, assign, with_irqfd); -+ } - } - - static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign, -@@ -1019,17 +1072,25 @@ static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign, - VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); - VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); - VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); -- VirtQueue *vq = virtio_get_queue(vdev, n); -- EventNotifier *notifier = virtio_queue_get_guest_notifier(vq); -+ VirtQueue *vq = NULL; -+ EventNotifier *notifier = NULL; -+ -+ if (n == VIRTIO_CONFIG_IRQ_IDX) { -+ notifier = virtio_config_get_guest_notifier(vdev); -+ } else { -+ vq = virtio_get_queue(vdev, n); -+ notifier = virtio_queue_get_guest_notifier(vq); -+ } - - if (assign) { - int r = event_notifier_init(notifier, 0); - if (r < 0) { - return r; - } -- virtio_queue_set_guest_notifier_fd_handler(vq, true, with_irqfd); -+ virtio_pci_set_guest_notifier_fd_handler(vdev, vq, n, true, with_irqfd); - } else { -- virtio_queue_set_guest_notifier_fd_handler(vq, false, with_irqfd); -+ virtio_pci_set_guest_notifier_fd_handler(vdev, vq, n, false, -+ with_irqfd); - event_notifier_cleanup(notifier); - } - -@@ -1072,10 +1133,13 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign) - proxy->nvqs_with_notifiers = nvqs; - - /* Must unset vector notifier while guest notifier is still assigned */ -- if ((proxy->vector_irqfd || k->guest_notifier_mask) && !assign) { -+ if ((proxy->vector_irqfd || -+ (vdev->use_guest_notifier_mask && k->guest_notifier_mask)) && -+ !assign) { - msix_unset_vector_notifiers(&proxy->pci_dev); - if (proxy->vector_irqfd) { -- kvm_virtio_pci_vector_release(proxy, nvqs); -+ kvm_virtio_pci_vector_vq_release(proxy, nvqs); -+ kvm_virtio_pci_vector_config_release(proxy); - g_free(proxy->vector_irqfd); - proxy->vector_irqfd = NULL; - } -@@ -1091,20 +1155,30 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign) - goto assign_error; - } - } -- -+ r = virtio_pci_set_guest_notifier(d, VIRTIO_CONFIG_IRQ_IDX, assign, -+ with_irqfd); -+ if (r < 0) { -+ goto config_assign_error; -+ } - /* Must set vector notifier after guest notifier has been assigned */ -- if ((with_irqfd || k->guest_notifier_mask) && assign) { -+ if ((with_irqfd || -+ (vdev->use_guest_notifier_mask && k->guest_notifier_mask)) && -+ assign) { - if (with_irqfd) { - proxy->vector_irqfd = - g_malloc0(sizeof(*proxy->vector_irqfd) * - msix_nr_vectors_allocated(&proxy->pci_dev)); -- r = kvm_virtio_pci_vector_use(proxy, nvqs); -+ r = kvm_virtio_pci_vector_vq_use(proxy, nvqs); -+ if (r < 0) { -+ goto config_assign_error; -+ } -+ r = kvm_virtio_pci_vector_config_use(proxy); - if (r < 0) { -- goto assign_error; -+ goto config_error; - } - } -- r = msix_set_vector_notifiers(&proxy->pci_dev, -- virtio_pci_vector_unmask, -+ -+ r = msix_set_vector_notifiers(&proxy->pci_dev, virtio_pci_vector_unmask, - virtio_pci_vector_mask, - virtio_pci_vector_poll); - if (r < 0) { -@@ -1117,9 +1191,15 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign) - notifiers_error: - if (with_irqfd) { - assert(assign); -- kvm_virtio_pci_vector_release(proxy, nvqs); -+ kvm_virtio_pci_vector_vq_release(proxy, nvqs); - } -- -+config_error: -+ if (with_irqfd) { -+ kvm_virtio_pci_vector_config_release(proxy); -+ } -+config_assign_error: -+ virtio_pci_set_guest_notifier(d, VIRTIO_CONFIG_IRQ_IDX, !assign, -+ with_irqfd); - assign_error: - /* We get here on assignment failure. Recover by undoing for VQs 0 .. n. */ - assert(assign); -diff --git a/include/hw/virtio/virtio-pci.h b/include/hw/virtio/virtio-pci.h -index 938799e8f6..c02e278f46 100644 ---- a/include/hw/virtio/virtio-pci.h -+++ b/include/hw/virtio/virtio-pci.h -@@ -256,5 +256,7 @@ void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t); - * @fixed_queues. - */ - unsigned virtio_pci_optimal_num_queues(unsigned fixed_queues); -- -+void virtio_pci_set_guest_notifier_fd_handler(VirtIODevice *vdev, VirtQueue *vq, -+ int n, bool assign, -+ bool with_irqfd); - #endif --- -2.31.1 - diff --git a/SOURCES/kvm-virtio-pci-decouple-notifier-from-interrupt-process.patch b/SOURCES/kvm-virtio-pci-decouple-notifier-from-interrupt-process.patch deleted file mode 100644 index a8c32a2..0000000 --- a/SOURCES/kvm-virtio-pci-decouple-notifier-from-interrupt-process.patch +++ /dev/null @@ -1,272 +0,0 @@ -From 9a234f849273d3480e4a88042cb1ea06a37a626b Mon Sep 17 00:00:00 2001 -From: Cindy Lu -Date: Thu, 22 Dec 2022 15:04:43 +0800 -Subject: [PATCH 02/31] virtio-pci: decouple notifier from interrupt process -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cindy Lu -RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa -RH-Bugzilla: 1905805 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/10] a20f4c9ff38b239531d12cbcc7deaa649c86abc3 (lulu6/qemu-kvm3) - -https://bugzilla.redhat.com/show_bug.cgi?id=1905805 -To reuse the notifier process. We add the virtio_pci_get_notifier -to get the notifier and vector. The INPUT for this function is IDX, -The OUTPUT is the notifier and the vector - -Signed-off-by: Cindy Lu -Message-Id: <20221222070451.936503-3-lulu@redhat.com> -Acked-by: Jason Wang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 2e07f69d0c828e21515b63dc22884d548540b382) -Signed-off-by: Cindy Lu ---- - hw/virtio/virtio-pci.c | 88 +++++++++++++++++++++++++++--------------- - 1 file changed, 57 insertions(+), 31 deletions(-) - -diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c -index a1c9dfa7bb..52c7692fff 100644 ---- a/hw/virtio/virtio-pci.c -+++ b/hw/virtio/virtio-pci.c -@@ -728,29 +728,41 @@ static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy, - } - - static int kvm_virtio_pci_irqfd_use(VirtIOPCIProxy *proxy, -- unsigned int queue_no, -+ EventNotifier *n, - unsigned int vector) - { - VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; -- VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); -- VirtQueue *vq = virtio_get_queue(vdev, queue_no); -- EventNotifier *n = virtio_queue_get_guest_notifier(vq); - return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, irqfd->virq); - } - - static void kvm_virtio_pci_irqfd_release(VirtIOPCIProxy *proxy, -- unsigned int queue_no, -+ EventNotifier *n , - unsigned int vector) - { -- VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); -- VirtQueue *vq = virtio_get_queue(vdev, queue_no); -- EventNotifier *n = virtio_queue_get_guest_notifier(vq); - VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; - int ret; - - ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, irqfd->virq); - assert(ret == 0); - } -+static int virtio_pci_get_notifier(VirtIOPCIProxy *proxy, int queue_no, -+ EventNotifier **n, unsigned int *vector) -+{ -+ VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); -+ VirtQueue *vq; -+ -+ if (queue_no == VIRTIO_CONFIG_IRQ_IDX) { -+ return -1; -+ } else { -+ if (!virtio_queue_get_num(vdev, queue_no)) { -+ return -1; -+ } -+ *vector = virtio_queue_vector(vdev, queue_no); -+ vq = virtio_get_queue(vdev, queue_no); -+ *n = virtio_queue_get_guest_notifier(vq); -+ } -+ return 0; -+} - - static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) - { -@@ -759,12 +771,15 @@ static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) - VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); - unsigned int vector; - int ret, queue_no; -- -+ EventNotifier *n; - for (queue_no = 0; queue_no < nvqs; queue_no++) { - if (!virtio_queue_get_num(vdev, queue_no)) { - break; - } -- vector = virtio_queue_vector(vdev, queue_no); -+ ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); -+ if (ret < 0) { -+ break; -+ } - if (vector >= msix_nr_vectors_allocated(dev)) { - continue; - } -@@ -776,7 +791,7 @@ static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) - * Otherwise, delay until unmasked in the frontend. - */ - if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { -- ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector); -+ ret = kvm_virtio_pci_irqfd_use(proxy, n, vector); - if (ret < 0) { - kvm_virtio_pci_vq_vector_release(proxy, vector); - goto undo; -@@ -792,7 +807,11 @@ undo: - continue; - } - if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { -- kvm_virtio_pci_irqfd_release(proxy, queue_no, vector); -+ ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); -+ if (ret < 0) { -+ break; -+ } -+ kvm_virtio_pci_irqfd_release(proxy, n, vector); - } - kvm_virtio_pci_vq_vector_release(proxy, vector); - } -@@ -806,12 +825,16 @@ static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs) - unsigned int vector; - int queue_no; - VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); -- -+ EventNotifier *n; -+ int ret ; - for (queue_no = 0; queue_no < nvqs; queue_no++) { - if (!virtio_queue_get_num(vdev, queue_no)) { - break; - } -- vector = virtio_queue_vector(vdev, queue_no); -+ ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); -+ if (ret < 0) { -+ break; -+ } - if (vector >= msix_nr_vectors_allocated(dev)) { - continue; - } -@@ -819,21 +842,20 @@ static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs) - * Otherwise, it was cleaned when masked in the frontend. - */ - if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { -- kvm_virtio_pci_irqfd_release(proxy, queue_no, vector); -+ kvm_virtio_pci_irqfd_release(proxy, n, vector); - } - kvm_virtio_pci_vq_vector_release(proxy, vector); - } - } - --static int virtio_pci_vq_vector_unmask(VirtIOPCIProxy *proxy, -+static int virtio_pci_one_vector_unmask(VirtIOPCIProxy *proxy, - unsigned int queue_no, - unsigned int vector, -- MSIMessage msg) -+ MSIMessage msg, -+ EventNotifier *n) - { - VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); - VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); -- VirtQueue *vq = virtio_get_queue(vdev, queue_no); -- EventNotifier *n = virtio_queue_get_guest_notifier(vq); - VirtIOIRQFD *irqfd; - int ret = 0; - -@@ -860,14 +882,15 @@ static int virtio_pci_vq_vector_unmask(VirtIOPCIProxy *proxy, - event_notifier_set(n); - } - } else { -- ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector); -+ ret = kvm_virtio_pci_irqfd_use(proxy, n, vector); - } - return ret; - } - --static void virtio_pci_vq_vector_mask(VirtIOPCIProxy *proxy, -+static void virtio_pci_one_vector_mask(VirtIOPCIProxy *proxy, - unsigned int queue_no, -- unsigned int vector) -+ unsigned int vector, -+ EventNotifier *n) - { - VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); - VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); -@@ -878,7 +901,7 @@ static void virtio_pci_vq_vector_mask(VirtIOPCIProxy *proxy, - if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { - k->guest_notifier_mask(vdev, queue_no, true); - } else { -- kvm_virtio_pci_irqfd_release(proxy, queue_no, vector); -+ kvm_virtio_pci_irqfd_release(proxy, n, vector); - } - } - -@@ -888,6 +911,7 @@ static int virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector, - VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev); - VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); - VirtQueue *vq = virtio_vector_first_queue(vdev, vector); -+ EventNotifier *n; - int ret, index, unmasked = 0; - - while (vq) { -@@ -896,7 +920,8 @@ static int virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector, - break; - } - if (index < proxy->nvqs_with_notifiers) { -- ret = virtio_pci_vq_vector_unmask(proxy, index, vector, msg); -+ n = virtio_queue_get_guest_notifier(vq); -+ ret = virtio_pci_one_vector_unmask(proxy, index, vector, msg, n); - if (ret < 0) { - goto undo; - } -@@ -912,7 +937,8 @@ undo: - while (vq && unmasked >= 0) { - index = virtio_get_queue_index(vq); - if (index < proxy->nvqs_with_notifiers) { -- virtio_pci_vq_vector_mask(proxy, index, vector); -+ n = virtio_queue_get_guest_notifier(vq); -+ virtio_pci_one_vector_mask(proxy, index, vector, n); - --unmasked; - } - vq = virtio_vector_next_queue(vq); -@@ -925,15 +951,17 @@ static void virtio_pci_vector_mask(PCIDevice *dev, unsigned vector) - VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev); - VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); - VirtQueue *vq = virtio_vector_first_queue(vdev, vector); -+ EventNotifier *n; - int index; - - while (vq) { - index = virtio_get_queue_index(vq); -+ n = virtio_queue_get_guest_notifier(vq); - if (!virtio_queue_get_num(vdev, index)) { - break; - } - if (index < proxy->nvqs_with_notifiers) { -- virtio_pci_vq_vector_mask(proxy, index, vector); -+ virtio_pci_one_vector_mask(proxy, index, vector, n); - } - vq = virtio_vector_next_queue(vq); - } -@@ -949,19 +977,17 @@ static void virtio_pci_vector_poll(PCIDevice *dev, - int queue_no; - unsigned int vector; - EventNotifier *notifier; -- VirtQueue *vq; -+ int ret; - - for (queue_no = 0; queue_no < proxy->nvqs_with_notifiers; queue_no++) { -- if (!virtio_queue_get_num(vdev, queue_no)) { -+ ret = virtio_pci_get_notifier(proxy, queue_no, ¬ifier, &vector); -+ if (ret < 0) { - break; - } -- vector = virtio_queue_vector(vdev, queue_no); - if (vector < vector_start || vector >= vector_end || - !msix_is_masked(dev, vector)) { - continue; - } -- vq = virtio_get_queue(vdev, queue_no); -- notifier = virtio_queue_get_guest_notifier(vq); - if (k->guest_notifier_pending) { - if (k->guest_notifier_pending(vdev, queue_no)) { - msix_set_pending(dev, vector); --- -2.31.1 - diff --git a/SOURCES/kvm-virtio-pci-decouple-the-single-vector-from-the-inter.patch b/SOURCES/kvm-virtio-pci-decouple-the-single-vector-from-the-inter.patch deleted file mode 100644 index be9b3c7..0000000 --- a/SOURCES/kvm-virtio-pci-decouple-the-single-vector-from-the-inter.patch +++ /dev/null @@ -1,212 +0,0 @@ -From 58cd577ff157cfaf7506bba135db58e75c330ff0 Mon Sep 17 00:00:00 2001 -From: Cindy Lu -Date: Thu, 22 Dec 2022 15:04:44 +0800 -Subject: [PATCH 03/31] virtio-pci: decouple the single vector from the - interrupt process -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cindy Lu -RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa -RH-Bugzilla: 1905805 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/10] 2c79cb678f005fb2f53b2db0f237347634ab3422 (lulu6/qemu-kvm3) - -https://bugzilla.redhat.com/show_bug.cgi?id=1905805 - -To reuse the interrupt process in configure interrupt -Need to decouple the single vector from the interrupt process. -We add new function kvm_virtio_pci_vector_use_one and _release_one. -These functions are used for the single vector, the whole process will -finish in the loop with vq number. - -Signed-off-by: Cindy Lu -Message-Id: <20221222070451.936503-4-lulu@redhat.com> -Acked-by: Jason Wang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit ee3b8dc6cc496ba7f4e27aed4493275c706a7942) -Signed-off-by: Cindy Lu ---- - hw/virtio/virtio-pci.c | 131 +++++++++++++++++++++++------------------ - 1 file changed, 73 insertions(+), 58 deletions(-) - -diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c -index 52c7692fff..ec816ea367 100644 ---- a/hw/virtio/virtio-pci.c -+++ b/hw/virtio/virtio-pci.c -@@ -699,7 +699,6 @@ static uint32_t virtio_read_config(PCIDevice *pci_dev, - } - - static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy, -- unsigned int queue_no, - unsigned int vector) - { - VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; -@@ -764,87 +763,103 @@ static int virtio_pci_get_notifier(VirtIOPCIProxy *proxy, int queue_no, - return 0; - } - --static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) -+static int kvm_virtio_pci_vector_use_one(VirtIOPCIProxy *proxy, int queue_no) - { -+ unsigned int vector; -+ int ret; -+ EventNotifier *n; - PCIDevice *dev = &proxy->pci_dev; - VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); - VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); -- unsigned int vector; -- int ret, queue_no; -- EventNotifier *n; -- for (queue_no = 0; queue_no < nvqs; queue_no++) { -- if (!virtio_queue_get_num(vdev, queue_no)) { -- break; -- } -- ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); -- if (ret < 0) { -- break; -- } -- if (vector >= msix_nr_vectors_allocated(dev)) { -- continue; -- } -- ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector); -+ -+ ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); -+ if (ret < 0) { -+ return ret; -+ } -+ if (vector >= msix_nr_vectors_allocated(dev)) { -+ return 0; -+ } -+ ret = kvm_virtio_pci_vq_vector_use(proxy, vector); -+ if (ret < 0) { -+ goto undo; -+ } -+ /* -+ * If guest supports masking, set up irqfd now. -+ * Otherwise, delay until unmasked in the frontend. -+ */ -+ if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { -+ ret = kvm_virtio_pci_irqfd_use(proxy, n, vector); - if (ret < 0) { -+ kvm_virtio_pci_vq_vector_release(proxy, vector); - goto undo; - } -- /* If guest supports masking, set up irqfd now. -- * Otherwise, delay until unmasked in the frontend. -- */ -- if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { -- ret = kvm_virtio_pci_irqfd_use(proxy, n, vector); -- if (ret < 0) { -- kvm_virtio_pci_vq_vector_release(proxy, vector); -- goto undo; -- } -- } - } -- return 0; - -+ return 0; - undo: -- while (--queue_no >= 0) { -- vector = virtio_queue_vector(vdev, queue_no); -- if (vector >= msix_nr_vectors_allocated(dev)) { -- continue; -+ -+ vector = virtio_queue_vector(vdev, queue_no); -+ if (vector >= msix_nr_vectors_allocated(dev)) { -+ return ret; -+ } -+ if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { -+ ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); -+ if (ret < 0) { -+ return ret; - } -- if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { -- ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); -- if (ret < 0) { -- break; -- } -- kvm_virtio_pci_irqfd_release(proxy, n, vector); -+ kvm_virtio_pci_irqfd_release(proxy, n, vector); -+ } -+ return ret; -+} -+static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) -+{ -+ int queue_no; -+ int ret = 0; -+ VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); -+ -+ for (queue_no = 0; queue_no < nvqs; queue_no++) { -+ if (!virtio_queue_get_num(vdev, queue_no)) { -+ return -1; - } -- kvm_virtio_pci_vq_vector_release(proxy, vector); -+ ret = kvm_virtio_pci_vector_use_one(proxy, queue_no); - } - return ret; - } - --static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs) -+ -+static void kvm_virtio_pci_vector_release_one(VirtIOPCIProxy *proxy, -+ int queue_no) - { -- PCIDevice *dev = &proxy->pci_dev; - VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); - unsigned int vector; -- int queue_no; -- VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); - EventNotifier *n; -- int ret ; -+ int ret; -+ VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); -+ PCIDevice *dev = &proxy->pci_dev; -+ -+ ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); -+ if (ret < 0) { -+ return; -+ } -+ if (vector >= msix_nr_vectors_allocated(dev)) { -+ return; -+ } -+ if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { -+ kvm_virtio_pci_irqfd_release(proxy, n, vector); -+ } -+ kvm_virtio_pci_vq_vector_release(proxy, vector); -+} -+ -+static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs) -+{ -+ int queue_no; -+ VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); -+ - for (queue_no = 0; queue_no < nvqs; queue_no++) { - if (!virtio_queue_get_num(vdev, queue_no)) { - break; - } -- ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); -- if (ret < 0) { -- break; -- } -- if (vector >= msix_nr_vectors_allocated(dev)) { -- continue; -- } -- /* If guest supports masking, clean up irqfd now. -- * Otherwise, it was cleaned when masked in the frontend. -- */ -- if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { -- kvm_virtio_pci_irqfd_release(proxy, n, vector); -- } -- kvm_virtio_pci_vq_vector_release(proxy, vector); -+ kvm_virtio_pci_vector_release_one(proxy, queue_no); - } - } - --- -2.31.1 - diff --git a/SOURCES/kvm-virtio-rng-pci-fix-migration-compat-for-vectors.patch b/SOURCES/kvm-virtio-rng-pci-fix-migration-compat-for-vectors.patch deleted file mode 100644 index 0555a68..0000000 --- a/SOURCES/kvm-virtio-rng-pci-fix-migration-compat-for-vectors.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 35ffe28a91a2ef08dd181d1a22695050ccbb6995 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 9 Jan 2023 16:04:43 +0000 -Subject: [PATCH 1/2] virtio-rng-pci: fix migration compat for vectors - -RH-Author: Dr. David Alan Gilbert -RH-MergeRequest: 131: virtio-rng-pci: fix migration compat for vectors -RH-Bugzilla: 2155749 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Thomas Huth -RH-Commit: [1/1] 1a866491dd191b073d71ae1aa5f4d76ee885de6d (dagrh/c-9-s-qemu-kvm) - -Fixup the migration compatibility for existing machine types -so that they do not enable msi-x. - -Symptom: - -(qemu) qemu: get_pci_config_device: Bad config data: i=0x34 read: 84 device: 98 cmask: ff wmask: 0 w1cmask:0 -qemu: Failed to load PCIDevice:config -qemu: Failed to load virtio-rng:virtio -qemu: error while loading state for instance 0x0 of device '0000:00:03.0/virtio-rng' -qemu: load of migration failed: Invalid argument - -Note: This fix will break migration from 7.2->7.2-fixed with this patch - -bz: https://bugzilla.redhat.com/show_bug.cgi?id=2155749 -Fixes: 9ea02e8f1 ("virtio-rng-pci: Allow setting nvectors, so we can use MSI-X") - -This downstream fix is the equivalent of an upstream fix I've posted to -the 7.2 machine type compatibility. - -Signed-off-by: Dr. David Alan Gilbert ---- - hw/core/machine.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 3d851d34da..7adbac6f87 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -56,6 +56,8 @@ GlobalProperty hw_compat_rhel_9_1[] = { - { "nvme-ns", "eui64-default", "on"}, - /* hw_compat_rhel_9_1 from hw_compat_7_1 */ - { "virtio-device", "queue_reset", "false" }, -+ /* hw_compat_rhel_9_1 bz 2155749 */ -+ { "virtio-rng-pci", "vectors", "0" }, - }; - const size_t hw_compat_rhel_9_1_len = G_N_ELEMENTS(hw_compat_rhel_9_1); - --- -2.31.1 - diff --git a/SOURCES/kvm-virtio-rng-pci-fix-transitional-migration-compat-for.patch b/SOURCES/kvm-virtio-rng-pci-fix-transitional-migration-compat-for.patch deleted file mode 100644 index e5288d6..0000000 --- a/SOURCES/kvm-virtio-rng-pci-fix-transitional-migration-compat-for.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 5413b8825db6eecc6f245854a6bce58e4dee3294 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 7 Feb 2023 17:57:39 +0000 -Subject: [PATCH 20/20] virtio-rng-pci: fix transitional migration compat for - vectors - -RH-Author: Dr. David Alan Gilbert -RH-MergeRequest: 147: virtio-rng-pci: fix transitional migration compat for vectors -RH-Bugzilla: 2162569 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Thomas Huth -RH-Acked-by: Gerd Hoffmann -RH-Commit: [1/1] 6e2bd111cd56808fccf2c0464a40f7784fd893a2 (dagrh/c-9-s-qemu-kvm) - -In upstream bad9c5a5166/downstream 46e08bafe9ed I fixed the virito-rng-pci -migration compatibility, but it was discovered that we also need to fix -the other aliases of the device for the transitional cases. - -I've sent upstream: -https://lists.gnu.org/archive/html/qemu-devel/2023-02/msg01926.html -but downstream we need to change the downstream machine type anyway, -so it's not quite identical. - -Fixes: 9ea02e8f1 ('virtio-rng-pci: Allow setting nvectors, so we can use MSI-X') - -Signed-off-by: Dr. David Alan Gilbert ---- - hw/core/machine.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 7adbac6f87..3ee638394b 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -58,6 +58,9 @@ GlobalProperty hw_compat_rhel_9_1[] = { - { "virtio-device", "queue_reset", "false" }, - /* hw_compat_rhel_9_1 bz 2155749 */ - { "virtio-rng-pci", "vectors", "0" }, -+ /* hw_compat_rhel_9_1 bz 2162569 */ -+ { "virtio-rng-pci-transitional", "vectors", "0" }, -+ { "virtio-rng-pci-non-transitional", "vectors", "0" }, - }; - const size_t hw_compat_rhel_9_1_len = G_N_ELEMENTS(hw_compat_rhel_9_1); - --- -2.31.1 - diff --git a/SOURCES/kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch b/SOURCES/kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch deleted file mode 100644 index c951897..0000000 --- a/SOURCES/kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch +++ /dev/null @@ -1,325 +0,0 @@ -From c64027b1ff9856031c01009f4b5c3560d92cc998 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 21 Feb 2023 16:22:18 -0500 -Subject: [PATCH 03/12] virtio-scsi: reset SCSI devices from main loop thread - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 155: virtio-scsi: reset SCSI devices from main loop thread -RH-Bugzilla: 2155748 -RH-Acked-by: Eric Blake -RH-Acked-by: Kevin Wolf -RH-Acked-by: Laszlo Ersek -RH-Commit: [3/3] 2a29cb9600709a799daadb4addb58a747ed2e3a3 (stefanha/centos-stream-qemu-kvm) - -When an IOThread is configured, the ctrl virtqueue is processed in the -IOThread. TMFs that reset SCSI devices are currently called directly -from the IOThread and trigger an assertion failure in blk_drain() from -the following call stack: - -virtio_scsi_handle_ctrl_req -> virtio_scsi_do_tmf -> device_code_reset --> scsi_disk_reset -> scsi_device_purge_requests -> blk_drain - - ../block/block-backend.c:1780: void blk_drain(BlockBackend *): Assertion `qemu_in_main_thread()' failed. - -The blk_drain() function is not designed to be called from an IOThread -because it needs the Big QEMU Lock (BQL). - -This patch defers TMFs that reset SCSI devices to a Bottom Half (BH) -that runs in the main loop thread under the BQL. This way it's safe to -call blk_drain() and the assertion failure is avoided. - -Introduce s->tmf_bh_list for tracking TMF requests that have been -deferred to the BH. When the BH runs it will grab the entire list and -process all requests. Care must be taken to clear the list when the -virtio-scsi device is reset or unrealized. Otherwise deferred TMF -requests could execute later and lead to use-after-free or other -undefined behavior. - -The s->resetting counter that's used by TMFs that reset SCSI devices is -accessed from multiple threads. This patch makes that explicit by using -atomic accessor functions. With this patch applied the counter is only -modified by the main loop thread under the BQL but can be read by any -thread. - -Reported-by: Qing Wang -Cc: Paolo Bonzini -Reviewed-by: Eric Blake -Signed-off-by: Stefan Hajnoczi -Message-Id: <20230221212218.1378734-4-stefanha@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit be2c42b97c3a3a395b2f05bad1b6c7de20ecf2a5) -Signed-off-by: Stefan Hajnoczi ---- - hw/scsi/virtio-scsi.c | 169 +++++++++++++++++++++++++------- - include/hw/virtio/virtio-scsi.h | 11 ++- - 2 files changed, 143 insertions(+), 37 deletions(-) - -diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index 6f6e2e32ba..7d27e4c2a1 100644 ---- a/hw/scsi/virtio-scsi.c -+++ b/hw/scsi/virtio-scsi.c -@@ -42,13 +42,11 @@ typedef struct VirtIOSCSIReq { - QEMUSGList qsgl; - QEMUIOVector resp_iov; - -- union { -- /* Used for two-stage request submission */ -- QTAILQ_ENTRY(VirtIOSCSIReq) next; -+ /* Used for two-stage request submission and TMFs deferred to BH */ -+ QTAILQ_ENTRY(VirtIOSCSIReq) next; - -- /* Used for cancellation of request during TMFs */ -- int remaining; -- }; -+ /* Used for cancellation of request during TMFs */ -+ int remaining; - - SCSIRequest *sreq; - size_t resp_size; -@@ -293,6 +291,122 @@ static inline void virtio_scsi_ctx_check(VirtIOSCSI *s, SCSIDevice *d) - } - } - -+static void virtio_scsi_do_one_tmf_bh(VirtIOSCSIReq *req) -+{ -+ VirtIOSCSI *s = req->dev; -+ SCSIDevice *d = virtio_scsi_device_get(s, req->req.tmf.lun); -+ BusChild *kid; -+ int target; -+ -+ switch (req->req.tmf.subtype) { -+ case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET: -+ if (!d) { -+ req->resp.tmf.response = VIRTIO_SCSI_S_BAD_TARGET; -+ goto out; -+ } -+ if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) { -+ req->resp.tmf.response = VIRTIO_SCSI_S_INCORRECT_LUN; -+ goto out; -+ } -+ qatomic_inc(&s->resetting); -+ device_cold_reset(&d->qdev); -+ qatomic_dec(&s->resetting); -+ break; -+ -+ case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET: -+ target = req->req.tmf.lun[1]; -+ qatomic_inc(&s->resetting); -+ -+ rcu_read_lock(); -+ QTAILQ_FOREACH_RCU(kid, &s->bus.qbus.children, sibling) { -+ SCSIDevice *d1 = SCSI_DEVICE(kid->child); -+ if (d1->channel == 0 && d1->id == target) { -+ device_cold_reset(&d1->qdev); -+ } -+ } -+ rcu_read_unlock(); -+ -+ qatomic_dec(&s->resetting); -+ break; -+ -+ default: -+ g_assert_not_reached(); -+ break; -+ } -+ -+out: -+ object_unref(OBJECT(d)); -+ -+ virtio_scsi_acquire(s); -+ virtio_scsi_complete_req(req); -+ virtio_scsi_release(s); -+} -+ -+/* Some TMFs must be processed from the main loop thread */ -+static void virtio_scsi_do_tmf_bh(void *opaque) -+{ -+ VirtIOSCSI *s = opaque; -+ QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs); -+ VirtIOSCSIReq *req; -+ VirtIOSCSIReq *tmp; -+ -+ GLOBAL_STATE_CODE(); -+ -+ virtio_scsi_acquire(s); -+ -+ QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) { -+ QTAILQ_REMOVE(&s->tmf_bh_list, req, next); -+ QTAILQ_INSERT_TAIL(&reqs, req, next); -+ } -+ -+ qemu_bh_delete(s->tmf_bh); -+ s->tmf_bh = NULL; -+ -+ virtio_scsi_release(s); -+ -+ QTAILQ_FOREACH_SAFE(req, &reqs, next, tmp) { -+ QTAILQ_REMOVE(&reqs, req, next); -+ virtio_scsi_do_one_tmf_bh(req); -+ } -+} -+ -+static void virtio_scsi_reset_tmf_bh(VirtIOSCSI *s) -+{ -+ VirtIOSCSIReq *req; -+ VirtIOSCSIReq *tmp; -+ -+ GLOBAL_STATE_CODE(); -+ -+ virtio_scsi_acquire(s); -+ -+ if (s->tmf_bh) { -+ qemu_bh_delete(s->tmf_bh); -+ s->tmf_bh = NULL; -+ } -+ -+ QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) { -+ QTAILQ_REMOVE(&s->tmf_bh_list, req, next); -+ -+ /* SAM-6 6.3.2 Hard reset */ -+ req->resp.tmf.response = VIRTIO_SCSI_S_TARGET_FAILURE; -+ virtio_scsi_complete_req(req); -+ } -+ -+ virtio_scsi_release(s); -+} -+ -+static void virtio_scsi_defer_tmf_to_bh(VirtIOSCSIReq *req) -+{ -+ VirtIOSCSI *s = req->dev; -+ -+ QTAILQ_INSERT_TAIL(&s->tmf_bh_list, req, next); -+ -+ if (!s->tmf_bh) { -+ s->tmf_bh = qemu_bh_new(virtio_scsi_do_tmf_bh, s); -+ qemu_bh_schedule(s->tmf_bh); -+ } -+} -+ - /* Return 0 if the request is ready to be completed and return to guest; - * -EINPROGRESS if the request is submitted and will be completed later, in the - * case of async cancellation. */ -@@ -300,8 +414,6 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req) - { - SCSIDevice *d = virtio_scsi_device_get(s, req->req.tmf.lun); - SCSIRequest *r, *next; -- BusChild *kid; -- int target; - int ret = 0; - - virtio_scsi_ctx_check(s, d); -@@ -358,15 +470,9 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req) - break; - - case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET: -- if (!d) { -- goto fail; -- } -- if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) { -- goto incorrect_lun; -- } -- s->resetting++; -- device_cold_reset(&d->qdev); -- s->resetting--; -+ case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET: -+ virtio_scsi_defer_tmf_to_bh(req); -+ ret = -EINPROGRESS; - break; - - case VIRTIO_SCSI_T_TMF_ABORT_TASK_SET: -@@ -409,22 +515,6 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req) - } - break; - -- case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET: -- target = req->req.tmf.lun[1]; -- s->resetting++; -- -- rcu_read_lock(); -- QTAILQ_FOREACH_RCU(kid, &s->bus.qbus.children, sibling) { -- SCSIDevice *d1 = SCSI_DEVICE(kid->child); -- if (d1->channel == 0 && d1->id == target) { -- device_cold_reset(&d1->qdev); -- } -- } -- rcu_read_unlock(); -- -- s->resetting--; -- break; -- - case VIRTIO_SCSI_T_TMF_CLEAR_ACA: - default: - req->resp.tmf.response = VIRTIO_SCSI_S_FUNCTION_REJECTED; -@@ -654,7 +744,7 @@ static void virtio_scsi_request_cancelled(SCSIRequest *r) - if (!req) { - return; - } -- if (req->dev->resetting) { -+ if (qatomic_read(&req->dev->resetting)) { - req->resp.cmd.response = VIRTIO_SCSI_S_RESET; - } else { - req->resp.cmd.response = VIRTIO_SCSI_S_ABORTED; -@@ -830,9 +920,12 @@ static void virtio_scsi_reset(VirtIODevice *vdev) - VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(vdev); - - assert(!s->dataplane_started); -- s->resetting++; -+ -+ virtio_scsi_reset_tmf_bh(s); -+ -+ qatomic_inc(&s->resetting); - bus_cold_reset(BUS(&s->bus)); -- s->resetting--; -+ qatomic_dec(&s->resetting); - - vs->sense_size = VIRTIO_SCSI_SENSE_DEFAULT_SIZE; - vs->cdb_size = VIRTIO_SCSI_CDB_DEFAULT_SIZE; -@@ -1052,6 +1145,8 @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp) - VirtIOSCSI *s = VIRTIO_SCSI(dev); - Error *err = NULL; - -+ QTAILQ_INIT(&s->tmf_bh_list); -+ - virtio_scsi_common_realize(dev, - virtio_scsi_handle_ctrl, - virtio_scsi_handle_event, -@@ -1089,6 +1184,8 @@ static void virtio_scsi_device_unrealize(DeviceState *dev) - { - VirtIOSCSI *s = VIRTIO_SCSI(dev); - -+ virtio_scsi_reset_tmf_bh(s); -+ - qbus_set_hotplug_handler(BUS(&s->bus), NULL); - virtio_scsi_common_unrealize(dev); - } -diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h -index a36aad9c86..1c1cd77d6e 100644 ---- a/include/hw/virtio/virtio-scsi.h -+++ b/include/hw/virtio/virtio-scsi.h -@@ -75,13 +75,22 @@ struct VirtIOSCSICommon { - VirtQueue **cmd_vqs; - }; - -+struct VirtIOSCSIReq; -+ - struct VirtIOSCSI { - VirtIOSCSICommon parent_obj; - - SCSIBus bus; -- int resetting; -+ int resetting; /* written from main loop thread, read from any thread */ - bool events_dropped; - -+ /* -+ * TMFs deferred to main loop BH. These fields are protected by -+ * virtio_scsi_acquire(). -+ */ -+ QEMUBH *tmf_bh; -+ QTAILQ_HEAD(, VirtIOSCSIReq) tmf_bh_list; -+ - /* Fields for dataplane below */ - AioContext *ctx; /* one iothread per virtio-scsi-pci for now */ - --- -2.39.1 - diff --git a/SOURCES/kvm-virtio_net-Modify-virtio_net_get_config-to-early-ret.patch b/SOURCES/kvm-virtio_net-Modify-virtio_net_get_config-to-early-ret.patch deleted file mode 100644 index d797023..0000000 --- a/SOURCES/kvm-virtio_net-Modify-virtio_net_get_config-to-early-ret.patch +++ /dev/null @@ -1,74 +0,0 @@ -From 3f55d12df35552ae948587a62d6f9015664adc13 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Wed, 21 Dec 2022 12:50:12 +0100 -Subject: [PATCH 1/9] virtio_net: Modify virtio_net_get_config to early return -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 137: vDPA net SVQ guest announce support -RH-Bugzilla: 2141088 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Jason Wang -RH-Commit: [1/4] 4f5e79afd54e157f32e6fff56ae33e2b71492525 (eperezmartin/qemu-kvm) - -Next patches introduce more code on vhost-vdpa branch, with already have -too much indentation. - -Signed-off-by: Eugenio Pérez -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Michael S. Tsirkin -Acked-by: Jason Wang -Message-Id: <20221221115015.1400889-2-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit ebc141a62508dc91901373c1a19fe7e2cf560dfb) ---- - hw/net/virtio-net.c | 28 +++++++++++++++------------- - 1 file changed, 15 insertions(+), 13 deletions(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index ec974f7a76..5935e55653 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -168,20 +168,22 @@ static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) - if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { - ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg, - n->config_size); -- if (ret != -1) { -- /* -- * Some NIC/kernel combinations present 0 as the mac address. As -- * that is not a legal address, try to proceed with the -- * address from the QEMU command line in the hope that the -- * address has been configured correctly elsewhere - just not -- * reported by the device. -- */ -- if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) { -- info_report("Zero hardware mac address detected. Ignoring."); -- memcpy(netcfg.mac, n->mac, ETH_ALEN); -- } -- memcpy(config, &netcfg, n->config_size); -+ if (ret == -1) { -+ return; - } -+ -+ /* -+ * Some NIC/kernel combinations present 0 as the mac address. As that -+ * is not a legal address, try to proceed with the address from the -+ * QEMU command line in the hope that the address has been configured -+ * correctly elsewhere - just not reported by the device. -+ */ -+ if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) { -+ info_report("Zero hardware mac address detected. Ignoring."); -+ memcpy(netcfg.mac, n->mac, ETH_ALEN); -+ } -+ -+ memcpy(config, &netcfg, n->config_size); - } - } - --- -2.31.1 - diff --git a/SOURCES/kvm-virtio_net-copy-VIRTIO_NET_S_ANNOUNCE-if-device-mode.patch b/SOURCES/kvm-virtio_net-copy-VIRTIO_NET_S_ANNOUNCE-if-device-mode.patch deleted file mode 100644 index 866957c..0000000 --- a/SOURCES/kvm-virtio_net-copy-VIRTIO_NET_S_ANNOUNCE-if-device-mode.patch +++ /dev/null @@ -1,46 +0,0 @@ -From b3d728b53abaae0c9884dfb5e9c216b1088196e3 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Wed, 21 Dec 2022 12:50:13 +0100 -Subject: [PATCH 2/9] virtio_net: copy VIRTIO_NET_S_ANNOUNCE if device model - has it -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 137: vDPA net SVQ guest announce support -RH-Bugzilla: 2141088 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Jason Wang -RH-Commit: [2/4] fb04186829eb93bab3c9ececf90fa5b035ffa2ec (eperezmartin/qemu-kvm) - -Status part of the emulated feature. It will follow device model, so we -must copy it as long as NIC device model has it set. - -Signed-off-by: Eugenio Pérez -Message-Id: <20221221115015.1400889-3-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Acked-by: Jason Wang -(cherry picked from commit 4f93aafc8f9d731c6588f5dc5594c6a1dd1fbe66) ---- - hw/net/virtio-net.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index 5935e55653..948bcf33cf 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -183,6 +183,8 @@ static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) - memcpy(netcfg.mac, n->mac, ETH_ALEN); - } - -+ netcfg.status |= virtio_tswap16(vdev, -+ n->status & VIRTIO_NET_S_ANNOUNCE); - memcpy(config, &netcfg, n->config_size); - } - } --- -2.31.1 - diff --git a/SOURCES/kvm-vl.c-Create-late-backends-before-migration-object.patch b/SOURCES/kvm-vl.c-Create-late-backends-before-migration-object.patch new file mode 100644 index 0000000..e1eef6d --- /dev/null +++ b/SOURCES/kvm-vl.c-Create-late-backends-before-migration-object.patch @@ -0,0 +1,58 @@ +From 63e2339a6f38706c6fc5eb251426812520db6a6d Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Wed, 19 Apr 2023 12:17:37 -0400 +Subject: [PATCH 03/56] vl.c: Create late backends before migration object +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [2/50] 7209bb94faa48650388be8fef08c77afd26517d8 (peterx/qemu-kvm) + +The migration object may want to check against different types of memory +when initialized. Delay the creation to be after late backends. + +Signed-off-by: Peter Xu +Reviewed-by: Juan Quintela +Reviewed-by: David Hildenbrand +Signed-off-by: Juan Quintela +(cherry picked from commit cb9d8b8ce1aaf38f53295fc59ec1b8b7eb4338d2) +Signed-off-by: Peter Xu +--- + softmmu/vl.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +diff --git a/softmmu/vl.c b/softmmu/vl.c +index ad4173138d..a44b49430b 100644 +--- a/softmmu/vl.c ++++ b/softmmu/vl.c +@@ -3592,14 +3592,19 @@ void qemu_init(int argc, char **argv) + machine_class->name, machine_class->deprecation_reason); + } + ++ /* ++ * Create backends before creating migration objects, so that it can ++ * check against compatibilities on the backend memories (e.g. postcopy ++ * over memory-backend-file objects). ++ */ ++ qemu_create_late_backends(); ++ + /* + * Note: creates a QOM object, must run only after global and + * compat properties have been set up. + */ + migration_object_init(); + +- qemu_create_late_backends(); +- + /* parse features once if machine provides default cpu_type */ + current_machine->cpu_type = machine_class->default_cpu_type; + if (cpu_option) { +-- +2.39.1 + diff --git a/SPECS/qemu-kvm.spec b/SPECS/qemu-kvm.spec index 0504dc7..f831cdd 100644 --- a/SPECS/qemu-kvm.spec +++ b/SPECS/qemu-kvm.spec @@ -100,7 +100,7 @@ %endif %global target_list %{kvm_target}-softmmu -%global block_drivers_rw_list qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,compress +%global block_drivers_rw_list qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,compress,virtio-blk-vhost-vdpa,virtio-blk-vfio-pci,virtio-blk-vhost-user,io_uring,nvme-io_uring %global block_drivers_ro_list vdi,vmdk,vhdx,vpc,https %define qemudocdir %{_docdir}/%{name} %global firmwaredirs "%{_datadir}/qemu-firmware:%{_datadir}/ipxe/qemu:%{_datadir}/seavgabios:%{_datadir}/seabios" @@ -125,6 +125,7 @@ Requires: %{name}-device-usb-host = %{epoch}:%{version}-%{release} \ %if %{have_usbredir} \ Requires: %{name}-device-usb-redirect = %{epoch}:%{version}-%{release} \ %endif \ +Requires: %{name}-block-blkio = %{epoch}:%{version}-%{release} \ Requires: %{name}-block-rbd = %{epoch}:%{version}-%{release} \ Requires: %{name}-audio-pa = %{epoch}:%{version}-%{release} @@ -147,8 +148,8 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm -Version: 7.2.0 -Release: 14%{?rcrel}%{?dist}%{?cc_suffix}.5 +Version: 8.0.0 +Release: 16%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -184,256 +185,377 @@ Patch0011: 0011-Enable-make-check.patch Patch0012: 0012-vfio-cap-number-of-devices-that-can-be-assigned.patch Patch0013: 0013-Add-support-statement-to-help-output.patch Patch0014: 0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch -Patch0015: 0015-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch -Patch0016: 0016-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch -Patch0018: 0018-Addd-7.2-compat-bits-for-RHEL-9.1-machine-type.patch -Patch0019: 0019-redhat-Update-s390x-machine-type-compatibility-for-Q.patch -Patch0020: 0020-redhat-aarch64-add-rhel9.2.0-virt-machine-type.patch -Patch0021: 0021-redhat-Add-new-rhel-9.2.0-s390x-machine-type.patch -Patch0022: 0022-x86-rhel-9.2.0-machine-type.patch -Patch23: kvm-redhat-fix-virt-rhel9.2.0-compat-props.patch -# For bz#2113840 - [RHEL9.2] Memory mapping optimization for virt machine -Patch24: kvm-hw-arm-virt-Introduce-virt_set_high_memmap-helper.patch -# For bz#2113840 - [RHEL9.2] Memory mapping optimization for virt machine -Patch25: kvm-hw-arm-virt-Rename-variable-size-to-region_size-in-v.patch -# For bz#2113840 - [RHEL9.2] Memory mapping optimization for virt machine -Patch26: kvm-hw-arm-virt-Introduce-variable-region_base-in-virt_s.patch -# For bz#2113840 - [RHEL9.2] Memory mapping optimization for virt machine -Patch27: kvm-hw-arm-virt-Introduce-virt_get_high_memmap_enabled-h.patch -# For bz#2113840 - [RHEL9.2] Memory mapping optimization for virt machine -Patch28: kvm-hw-arm-virt-Improve-high-memory-region-address-assig.patch -# For bz#2113840 - [RHEL9.2] Memory mapping optimization for virt machine -Patch29: kvm-hw-arm-virt-Add-compact-highmem-property.patch -# For bz#2113840 - [RHEL9.2] Memory mapping optimization for virt machine -Patch30: kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch -# For bz#2113840 - [RHEL9.2] Memory mapping optimization for virt machine -Patch31: kvm-hw-arm-virt-Enable-compat-high-memory-region-address.patch -# For bz#2155749 - [regression][stable guest abi][qemu-kvm7.2]Migration failed due to virtio-rng device between RHEL8.8 and RHEL9.2/MSI-X -Patch32: kvm-virtio-rng-pci-fix-migration-compat-for-vectors.patch -# For bz#1905805 - support config interrupt in vhost-vdpa qemu -Patch33: kvm-virtio-introduce-macro-VIRTIO_CONFIG_IRQ_IDX.patch -# For bz#1905805 - support config interrupt in vhost-vdpa qemu -Patch34: kvm-virtio-pci-decouple-notifier-from-interrupt-process.patch -# For bz#1905805 - support config interrupt in vhost-vdpa qemu -Patch35: kvm-virtio-pci-decouple-the-single-vector-from-the-inter.patch -# For bz#1905805 - support config interrupt in vhost-vdpa qemu -Patch36: kvm-vhost-introduce-new-VhostOps-vhost_set_config_call.patch -# For bz#1905805 - support config interrupt in vhost-vdpa qemu -Patch37: kvm-vhost-vdpa-add-support-for-config-interrupt.patch -# For bz#1905805 - support config interrupt in vhost-vdpa qemu -Patch38: kvm-virtio-add-support-for-configure-interrupt.patch -# For bz#1905805 - support config interrupt in vhost-vdpa qemu -Patch39: kvm-vhost-add-support-for-configure-interrupt.patch -# For bz#1905805 - support config interrupt in vhost-vdpa qemu -Patch40: kvm-virtio-net-add-support-for-configure-interrupt.patch -# For bz#1905805 - support config interrupt in vhost-vdpa qemu -Patch41: kvm-virtio-mmio-add-support-for-configure-interrupt.patch -# For bz#1905805 - support config interrupt in vhost-vdpa qemu -Patch42: kvm-virtio-pci-add-support-for-configure-interrupt.patch -# For bz#2159408 - [s390x] VMs with ISM passthrough don't autostart after leapp upgrade from RHEL 8 -Patch43: kvm-s390x-s390-virtio-ccw-Activate-zPCI-features-on-s390.patch -# For bz#2124856 - VM with virtio interface and iommu=on will crash when try to migrate -Patch44: kvm-vhost-fix-vq-dirty-bitmap-syncing-when-vIOMMU-is-ena.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch45: kvm-block-drop-bdrv_remove_filter_or_cow_child.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch46: kvm-qed-Don-t-yield-in-bdrv_qed_co_drain_begin.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch47: kvm-test-bdrv-drain-Don-t-yield-in-.bdrv_co_drained_begi.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch48: kvm-block-Revert-.bdrv_drained_begin-end-to-non-coroutin.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch49: kvm-block-Remove-drained_end_counter.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch50: kvm-block-Inline-bdrv_drain_invoke.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch51: kvm-block-Fix-locking-for-bdrv_reopen_queue_child.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch52: kvm-block-Drain-individual-nodes-during-reopen.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch53: kvm-block-Don-t-use-subtree-drains-in-bdrv_drop_intermed.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch54: kvm-stream-Replace-subtree-drain-with-a-single-node-drai.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch55: kvm-block-Remove-subtree-drains.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch56: kvm-block-Call-drain-callbacks-only-once.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch57: kvm-block-Remove-ignore_bds_parents-parameter-from-drain.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch58: kvm-block-Drop-out-of-coroutine-in-bdrv_do_drained_begin.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch59: kvm-block-Don-t-poll-in-bdrv_replace_child_noperm.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch60: kvm-block-Remove-poll-parameter-from-bdrv_parent_drained.patch -# For bz#1979276 - SVM: non atomic memslot updates cause boot failure with seabios and cpu-pm=on -Patch61: kvm-accel-introduce-accelerator-blocker-API.patch -# For bz#1979276 - SVM: non atomic memslot updates cause boot failure with seabios and cpu-pm=on -Patch62: kvm-KVM-keep-track-of-running-ioctls.patch -# For bz#1979276 - SVM: non atomic memslot updates cause boot failure with seabios and cpu-pm=on -Patch63: kvm-kvm-Atomic-memslot-updates.patch -# For bz#2141088 - vDPA SVQ guest announce support -Patch64: kvm-virtio_net-Modify-virtio_net_get_config-to-early-ret.patch -# For bz#2141088 - vDPA SVQ guest announce support -Patch65: kvm-virtio_net-copy-VIRTIO_NET_S_ANNOUNCE-if-device-mode.patch -# For bz#2141088 - vDPA SVQ guest announce support -Patch66: kvm-vdpa-handle-VIRTIO_NET_CTRL_ANNOUNCE-in-vhost_vdpa_n.patch -# For bz#2141088 - vDPA SVQ guest announce support -Patch67: kvm-vdpa-do-not-handle-VIRTIO_NET_F_GUEST_ANNOUNCE-in-vh.patch -# For bz#2122523 - Secure guest can't boot with maximal number of vcpus (248) -Patch68: kvm-s390x-pv-Implement-a-CGS-check-helper.patch -# For bz#2163701 - [s390x] VM fails to start with ISM passed through -Patch69: kvm-s390x-pci-coalesce-unmap-operations.patch -# For bz#2163701 - [s390x] VM fails to start with ISM passed through -Patch70: kvm-s390x-pci-shrink-DMA-aperture-to-be-bound-by-vfio-DM.patch -# For bz#2163701 - [s390x] VM fails to start with ISM passed through -Patch71: kvm-s390x-pci-reset-ISM-passthrough-devices-on-shutdown-.patch -# For bz#2149191 - [RFE][guest-agent] - USB bus type support -Patch72: kvm-qga-linux-add-usb-support-to-guest-get-fsinfo.patch -# For bz#2104412 - vDPA ASID support in Qemu -Patch73: kvm-vdpa-use-v-shadow_vqs_enabled-in-vhost_vdpa_svqs_sta.patch -# For bz#2104412 - vDPA ASID support in Qemu -Patch74: kvm-vhost-set-SVQ-device-call-handler-at-SVQ-start.patch -# For bz#2104412 - vDPA ASID support in Qemu -Patch75: kvm-vhost-allocate-SVQ-device-file-descriptors-at-device.patch -# For bz#2104412 - vDPA ASID support in Qemu -Patch76: kvm-vhost-move-iova_tree-set-to-vhost_svq_start.patch -# For bz#2104412 - vDPA ASID support in Qemu -Patch77: kvm-vdpa-add-vhost_vdpa_net_valid_svq_features.patch -# For bz#2104412 - vDPA ASID support in Qemu -Patch78: kvm-vdpa-request-iova_range-only-once.patch -# For bz#2104412 - vDPA ASID support in Qemu -Patch79: kvm-vdpa-move-SVQ-vring-features-check-to-net.patch -# For bz#2104412 - vDPA ASID support in Qemu -Patch80: kvm-vdpa-allocate-SVQ-array-unconditionally.patch -# For bz#2104412 - vDPA ASID support in Qemu -Patch81: kvm-vdpa-add-asid-parameter-to-vhost_vdpa_dma_map-unmap.patch -# For bz#2104412 - vDPA ASID support in Qemu -Patch82: kvm-vdpa-store-x-svq-parameter-in-VhostVDPAState.patch -# For bz#2104412 - vDPA ASID support in Qemu -Patch83: kvm-vdpa-add-shadow_data-to-vhost_vdpa.patch -# For bz#2104412 - vDPA ASID support in Qemu -Patch84: kvm-vdpa-always-start-CVQ-in-SVQ-mode-if-possible.patch -# For bz#2104412 - vDPA ASID support in Qemu -Patch85: kvm-vdpa-fix-VHOST_BACKEND_F_IOTLB_ASID-flag-check.patch -# For bz#2150180 - qemu-img finishes successfully while having errors in commit or bitmaps operations -Patch86: kvm-qcow2-Fix-theoretical-corruption-in-store_bitmap-err.patch -# For bz#2150180 - qemu-img finishes successfully while having errors in commit or bitmaps operations -Patch87: kvm-qemu-img-commit-Report-errors-while-closing-the-imag.patch -# For bz#2150180 - qemu-img finishes successfully while having errors in commit or bitmaps operations -Patch88: kvm-qemu-img-bitmap-Report-errors-while-closing-the-imag.patch -# For bz#2150180 - qemu-img finishes successfully while having errors in commit or bitmaps operations -Patch89: kvm-qemu-iotests-Test-qemu-img-bitmap-commit-exit-code-o.patch -# For bz#2165280 - [kvm-unit-tests] debug-wp-migration fails -Patch90: kvm-accel-tcg-Test-CPUJumpCache-in-tb_jmp_cache_clear_pa.patch -# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info -Patch91: kvm-block-Improve-empty-format-specific-info-dump.patch -# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info -Patch92: kvm-block-file-Add-file-specific-image-info.patch -# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info -Patch93: kvm-block-vmdk-Change-extent-info-type.patch -# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info -Patch94: kvm-block-Split-BlockNodeInfo-off-of-ImageInfo.patch -# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info -Patch95: kvm-qemu-img-Use-BlockNodeInfo.patch -# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info -Patch96: kvm-block-qapi-Let-bdrv_query_image_info-recurse.patch -# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info -Patch97: kvm-block-qapi-Introduce-BlockGraphInfo.patch -# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info -Patch98: kvm-block-qapi-Add-indentation-to-bdrv_node_info_dump.patch -# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info -Patch99: kvm-iotests-Filter-child-node-information.patch -# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info -Patch100: kvm-iotests-106-214-308-Read-only-one-size-line.patch -# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info -Patch101: kvm-qemu-img-Let-info-print-block-graph.patch -# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info -Patch102: kvm-qemu-img-Change-info-key-names-for-protocol-nodes.patch -# For bz#2155173 - [vhost-user] unable to start vhost net: 71: falling back on userspace -Patch103: kvm-Revert-vhost-user-Monitor-slave-channel-in-vhost_use.patch -# For bz#2155173 - [vhost-user] unable to start vhost net: 71: falling back on userspace -Patch104: kvm-Revert-vhost-user-Introduce-nested-event-loop-in-vho.patch -# For bz#2162569 - [transitional device][virtio-rng-pci-transitional]Stable Guest ABI failed between RHEL 8.6 to RHEL 9.2 -Patch105: kvm-virtio-rng-pci-fix-transitional-migration-compat-for.patch -# For bz#2169232 - RFE: reconnect option for stream socket back-end -Patch106: kvm-tests-qtest-netdev-test-stream-and-dgram-backends.patch -# For bz#2169232 - RFE: reconnect option for stream socket back-end -Patch107: kvm-net-stream-add-a-new-option-to-automatically-reconne.patch -# For bz#2158704 - RFE: Prefer /dev/userfaultfd over userfaultfd(2) syscall -Patch108: kvm-linux-headers-Update-to-v6.1.patch -# For bz#2158704 - RFE: Prefer /dev/userfaultfd over userfaultfd(2) syscall -Patch109: kvm-util-userfaultfd-Add-uffd_open.patch -# For bz#2158704 - RFE: Prefer /dev/userfaultfd over userfaultfd(2) syscall -Patch110: kvm-util-userfaultfd-Support-dev-userfaultfd.patch -# For bz#2169732 - Multifd migration fails under a weak network/socket ordering race -Patch111: kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch -# For bz#2169732 - Multifd migration fails under a weak network/socket ordering race -Patch112: kvm-migration-check-magic-value-for-deciding-the-mapping.patch -# For bz#2168172 - [s390x] qemu-kvm coredumps when SE crashes -Patch113: kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch -# For bz#2168209 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch114: kvm-block-temporarily-hold-the-new-AioContext-of-bs_top-.patch -# For bz#2169904 - [SVVP] job 'Check SMBIOS Table Specific Requirements' failed on win2022 -Patch115: kvm-hw-smbios-fix-field-corruption-in-type-4-table.patch -# For bz#2155748 - qemu crash on void blk_drain(BlockBackend *): Assertion qemu_in_main_thread() failed -Patch116: kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch -# For bz#2155748 - qemu crash on void blk_drain(BlockBackend *): Assertion qemu_in_main_thread() failed -Patch117: kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch -# For bz#2155748 - qemu crash on void blk_drain(BlockBackend *): Assertion qemu_in_main_thread() failed -Patch118: kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch -# For bz#2175660 - Guest hangs when starting or rebooting -Patch119: kvm-qatomic-add-smp_mb__before-after_rmw.patch -# For bz#2175660 - Guest hangs when starting or rebooting -Patch120: kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch -# For bz#2175660 - Guest hangs when starting or rebooting -Patch121: kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch -# For bz#2175660 - Guest hangs when starting or rebooting -Patch122: kvm-edu-add-smp_mb__after_rmw.patch -# For bz#2175660 - Guest hangs when starting or rebooting -Patch123: kvm-aio-wait-switch-to-smp_mb__after_rmw.patch -# For bz#2175660 - Guest hangs when starting or rebooting -Patch124: kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch -# For bz#2175660 - Guest hangs when starting or rebooting -Patch125: kvm-physmem-add-missing-memory-barrier.patch -# For bz#2175660 - Guest hangs when starting or rebooting -Patch126: kvm-async-update-documentation-of-the-memory-barriers.patch -# For bz#2175660 - Guest hangs when starting or rebooting -Patch127: kvm-async-clarify-usage-of-barriers-in-the-polling-case.patch -# For bz#2173590 - bugs in emulation of BMI instructions (for libguestfs without KVM) -Patch128: kvm-target-i386-fix-operand-size-of-unary-SSE-operations.patch -# For bz#2173590 - bugs in emulation of BMI instructions (for libguestfs without KVM) -Patch129: kvm-tests-tcg-i386-Introduce-and-use-reg_t-consistently.patch -# For bz#2173590 - bugs in emulation of BMI instructions (for libguestfs without KVM) -Patch130: kvm-target-i386-Fix-BEXTR-instruction.patch -# For bz#2173590 - bugs in emulation of BMI instructions (for libguestfs without KVM) -Patch131: kvm-target-i386-Fix-C-flag-for-BLSI-BLSMSK-BLSR.patch -# For bz#2173590 - bugs in emulation of BMI instructions (for libguestfs without KVM) -Patch132: kvm-target-i386-fix-ADOX-followed-by-ADCX.patch -# For bz#2173590 - bugs in emulation of BMI instructions (for libguestfs without KVM) -Patch133: kvm-target-i386-Fix-32-bit-AD-CO-X-insns-in-64-bit-mode.patch -# For bz#2173590 - bugs in emulation of BMI instructions (for libguestfs without KVM) -Patch134: kvm-target-i386-Fix-BZHI-instruction.patch -# For bz#2156876 - [virtual network][rhel7.9_guest] qemu-kvm: vhost vring error in virtqueue 1: Invalid argument (22) -Patch135: kvm-intel-iommu-fail-DEVIOTLB_UNMAP-without-dt-mode.patch -# For bz#2203745 - Disk detach is unsuccessful while the guest is still booting [rhel-9.2.0.z] -Patch136: kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch -# For bz#2213864 - [mlx vhost_vdpa][rhel 9.2]qemu core dump when hot unplug then hotplug a vdpa interface with multi-queue setting [rhel-9.2.0.z] -Patch137: kvm-vdpa-stop-all-svq-on-device-deletion.patch -# For bz#2221219 - query-stats QMP command interrupts vcpus, the Max Latencies could be more than 100us (rhel 9.3.0 clone) [rhel-9.2.0.z] -Patch138: kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch -# For bz#2211923 - [qemu-kvm] rhel guest failed boot with multi disks on error Failed to start udev Wait for Complete Device Initialization [rhel-9.2.0.z] -Patch139: kvm-aio-posix-fix-race-between-epoll-upgrade-and-aio_set.patch -# For bz#2227721 - [rhel9.2] hotplug/hotunplug mlx vdpa device to the occupied addr port, then qemu core dump occurs after shutdown guest [rhel-9.2.0.z] -Patch140: kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch -# For RHEL-1060 - [vhost-vdpa][rhel 9.2]Boot a guest with "x-svq=on", then hot unplug this nic, guest trigger qemu core dump [rhel-9.2.0.z] -Patch141: kvm-vdpa-net-move-iova-tree-creation-from-init-to-start.patch -# For RHEL-1060 - [vhost-vdpa][rhel 9.2]Boot a guest with "x-svq=on", then hot unplug this nic, guest trigger qemu core dump [rhel-9.2.0.z] -Patch142: kvm-vdpa-reorder-vhost_vdpa_net_cvq_cmd_page_len-functio.patch -# For RHEL-1060 - [vhost-vdpa][rhel 9.2]Boot a guest with "x-svq=on", then hot unplug this nic, guest trigger qemu core dump [rhel-9.2.0.z] -Patch143: kvm-vdpa-map-shadow-vrings-with-MAP_SHARED.patch -# For bz#2216503 - CVE-2023-3354 qemu-kvm: QEMU: VNC: improper I/O watch removal in TLS handshake can lead to remote unauthenticated denial of service [rhel-9.2.0.z] -Patch144: kvm-io-remove-io-watch-if-TLS-channel-is-closed-during-h.patch +Patch0015: 0015-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch +Patch0016: 0016-qga-linux-add-usb-support-to-guest-get-fsinfo.patch +Patch0017: 0017-Add-RHEL-9.2.0-compat-structure.patch +Patch0018: 0018-redhat-hw-i386-pc-Update-x86-machine-type-compatibil.patch +Patch0019: 0019-Disable-unwanted-new-devices.patch +# For bz#2087047 - Disk detach is unsuccessful while the guest is still booting +Patch20: kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch +# For bz#1934134 - ACPI table limits warning when booting guest with 512 VCPUs +Patch21: kvm-hw-acpi-limit-warning-on-acpi-table-size-to-pc-machi.patch +# For bz#1934134 - ACPI table limits warning when booting guest with 512 VCPUs +Patch22: kvm-hw-acpi-Mark-acpi-blobs-as-resizable-on-RHEL-pc-mach.patch +# For bz#2058982 - Qemu core dump if cut off nfs storage during migration +Patch23: kvm-migration-Handle-block-device-inactivation-failures-.patch +# For bz#2058982 - Qemu core dump if cut off nfs storage during migration +Patch24: kvm-migration-Minor-control-flow-simplification.patch +# For bz#2058982 - Qemu core dump if cut off nfs storage during migration +Patch25: kvm-migration-Attempt-disk-reactivation-in-more-failure-.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch26: kvm-util-mmap-alloc-qemu_fd_getfs.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch27: kvm-vl.c-Create-late-backends-before-migration-object.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch28: kvm-migration-postcopy-Detect-file-system-on-dest-host.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch29: kvm-migration-mark-mixed-functions-that-can-suspend.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch30: kvm-postcopy-ram-do-not-use-qatomic_mb_read.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch31: kvm-migration-remove-extra-whitespace-character-for-code.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch32: kvm-migration-Merge-ram_counters-and-ram_atomic_counters.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch33: kvm-migration-Update-atomic-stats-out-of-the-mutex.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch34: kvm-migration-Make-multifd_bytes-atomic.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch35: kvm-migration-Make-dirty_sync_missed_zero_copy-atomic.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch36: kvm-migration-Make-precopy_bytes-atomic.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch37: kvm-migration-Make-downtime_bytes-atomic.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch38: kvm-migration-Make-dirty_sync_count-atomic.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch39: kvm-migration-Make-postcopy_requests-atomic.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch40: kvm-migration-Rename-duplicate-to-zero_pages.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch41: kvm-migration-Rename-normal-to-normal_pages.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch42: kvm-migration-rename-enabled_capabilities-to-capabilitie.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch43: kvm-migration-Pass-migrate_caps_check-the-old-and-new-ca.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch44: kvm-migration-move-migration_global_dump-to-migration-hm.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch45: kvm-spice-move-client_migrate_info-command-to-ui.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch46: kvm-migration-Create-migrate_cap_set.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch47: kvm-migration-Create-options.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch48: kvm-migration-Move-migrate_colo_enabled-to-options.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch49: kvm-migration-Move-migrate_use_compression-to-options.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch50: kvm-migration-Move-migrate_use_events-to-options.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch51: kvm-migration-Move-migrate_use_multifd-to-options.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch52: kvm-migration-Move-migrate_use_zero_copy_send-to-options.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch53: kvm-migration-Move-migrate_use_xbzrle-to-options.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch54: kvm-migration-Move-migrate_use_block-to-options.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch55: kvm-migration-Move-migrate_use_return-to-options.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch56: kvm-migration-Create-migrate_rdma_pin_all-function.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch57: kvm-migration-Move-migrate_caps_check-to-options.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch58: kvm-migration-Move-qmp_query_migrate_capabilities-to-opt.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch59: kvm-migration-Move-qmp_migrate_set_capabilities-to-optio.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch60: kvm-migration-Move-migrate_cap_set-to-options.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch61: kvm-migration-Move-parameters-functions-to-option.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch62: kvm-migration-Use-migrate_max_postcopy_bandwidth.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch63: kvm-migration-Move-migrate_use_block_incremental-to-opti.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch64: kvm-migration-Create-migrate_throttle_trigger_threshold.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch65: kvm-migration-Create-migrate_checkpoint_delay.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch66: kvm-migration-Create-migrate_max_cpu_throttle.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch67: kvm-migration-Move-migrate_announce_params-to-option.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch68: kvm-migration-Create-migrate_cpu_throttle_initial-to-opt.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch69: kvm-migration-Create-migrate_cpu_throttle_increment-func.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch70: kvm-migration-Create-migrate_cpu_throttle_tailslow-funct.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch71: kvm-migration-Move-migrate_postcopy-to-options.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch72: kvm-migration-Create-migrate_max_bandwidth-function.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch73: kvm-migration-Move-migrate_use_tls-to-options.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch74: kvm-migration-Move-qmp_migrate_set_parameters-to-options.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch75: kvm-migration-Allow-postcopy_ram_supported_by_host-to-re.patch +# For bz#2185688 - [qemu-kvm] no response with QMP command block_resize +Patch76: kvm-block-bdrv-blk_co_unref-for-calls-in-coroutine-conte.patch +# For bz#2185688 - [qemu-kvm] no response with QMP command block_resize +Patch77: kvm-block-Don-t-call-no_coroutine_fns-in-qmp_block_resiz.patch +# For bz#2185688 - [qemu-kvm] no response with QMP command block_resize +Patch78: kvm-iotests-Use-alternative-CPU-type-that-is-not-depreca.patch +# For bz#2185688 - [qemu-kvm] no response with QMP command block_resize +Patch79: kvm-iotests-Test-resizing-image-attached-to-an-iothread.patch +# For bz#2186725 - Qemu hang when commit during fio running(iothread enable) +Patch80: kvm-block-compile-out-assert_bdrv_graph_readable-by-defa.patch +# For bz#2186725 - Qemu hang when commit during fio running(iothread enable) +Patch81: kvm-graph-lock-Disable-locking-for-now.patch +# For bz#2186725 - Qemu hang when commit during fio running(iothread enable) +Patch82: kvm-nbd-server-Fix-drained_poll-to-wake-coroutine-in-rig.patch +# For bz#2186725 - Qemu hang when commit during fio running(iothread enable) +Patch83: kvm-iotests-Test-commit-with-iothreads-and-ongoing-I-O.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch84: kvm-memory-prevent-dma-reentracy-issues.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch85: kvm-async-Add-an-optional-reentrancy-guard-to-the-BH-API.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch86: kvm-checkpatch-add-qemu_bh_new-aio_bh_new-checks.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch87: kvm-hw-replace-most-qemu_bh_new-calls-with-qemu_bh_new_g.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch88: kvm-lsi53c895a-disable-reentrancy-detection-for-script-R.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch89: kvm-bcm2835_property-disable-reentrancy-detection-for-io.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch90: kvm-raven-disable-reentrancy-detection-for-iomem.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch91: kvm-apic-disable-reentrancy-detection-for-apic-msi.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch92: kvm-async-avoid-use-after-free-on-re-entrancy-guard.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch93: kvm-loongarch-mark-loongarch_ipi_iocsr-re-entrnacy-safe.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch94: kvm-memory-stricter-checks-prior-to-unsetting-engaged_in.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch95: kvm-lsi53c895a-disable-reentrancy-detection-for-MMIO-reg.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch96: kvm-hw-scsi-lsi53c895a-Fix-reentrancy-issues-in-the-LSI-.patch +# For bz#2189423 - Failed to migrate VM from rhel 9.3 to rhel 9.2 +Patch97: kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-register-for-machi.patch +# For bz#2196289 - Fix number of ready channels on multifd +Patch98: kvm-multifd-Fix-the-number-of-channels-ready.patch +# For bz#2168500 - [IBM 9.3 FEAT] KVM: Improve memory reclaiming for z15 Secure Execution guests - qemu part +Patch99: kvm-util-async-teardown-wire-up-query-command-line-optio.patch +# For bz#2168500 - [IBM 9.3 FEAT] KVM: Improve memory reclaiming for z15 Secure Execution guests - qemu part +Patch100: kvm-s390x-pv-Fix-spurious-warning-with-asynchronous-tear.patch +# For bz#2216201 - [qemu-kvm]VM reports vulnerabilty to mmio_stale_data on patched host with microcode +Patch101: kvm-target-i386-add-support-for-FLUSH_L1D-feature.patch +# For bz#2216201 - [qemu-kvm]VM reports vulnerabilty to mmio_stale_data on patched host with microcode +Patch102: kvm-target-i386-add-support-for-FB_CLEAR-feature.patch +# For bz#2180076 - [qemu-kvm] support fd passing for libblkio QEMU BlockDrivers +Patch103: kvm-block-blkio-use-qemu_open-to-support-fd-passing-for-.patch +# For bz#2180076 - [qemu-kvm] support fd passing for libblkio QEMU BlockDrivers +Patch104: kvm-qapi-add-fdset-feature-for-BlockdevOptionsVirtioBlkV.patch +# For bz#2171363 - [aarch64] Kernel hits Call trace with irregular CPU-to-NUMA association +Patch105: kvm-numa-Validate-cluster-and-NUMA-node-boundary-if-requ.patch +# For bz#2171363 - [aarch64] Kernel hits Call trace with irregular CPU-to-NUMA association +Patch106: kvm-hw-arm-Validate-cluster-and-NUMA-node-boundary.patch +# For bz#2171363 - [aarch64] Kernel hits Call trace with irregular CPU-to-NUMA association +Patch107: kvm-hw-arm-virt-Validate-cluster-and-NUMA-node-boundary-.patch +# For RHEL-330 - [virtual network][qemu-kvm-8.0.0-rc1]qemu core dump: qemu-kvm: ../softmmu/memory.c:2592: void memory_region_del_eventfd(MemoryRegion *, hwaddr, unsigned int, _Bool, uint64_t, EventNotifier *): Assertion `i != mr->ioeventfd_nb' failed +Patch108: kvm-vhost-fix-vhost_dev_enable_notifiers-error-case.patch +# For bz#2218644 - query-stats QMP command interrupts vcpus, the Max Latencies could be more than 100us (rhel 9.3.0 clone) +Patch109: kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch +# For bz#2128929 - [rhel9.2] hotplug/hotunplug mlx vdpa device to the occupied addr port, then qemu core dump occurs after shutdown guest +Patch110: kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch +# For bz#2211609 - With virtio-iommu and vfio-pci, qemu reports "warning: virtio-iommu page mask 0xfffffffffffff000 does not match 0x40201000" +# For bz#2211634 - [aarch64] With virtio-iommu and vfio-pci, qemu coredump when host using kernel-64k package +Patch111: kvm-virtio-iommu-Fix-64kB-host-page-size-VFIO-device-ass.patch +# For bz#2211609 - With virtio-iommu and vfio-pci, qemu reports "warning: virtio-iommu page mask 0xfffffffffffff000 does not match 0x40201000" +# For bz#2211634 - [aarch64] With virtio-iommu and vfio-pci, qemu coredump when host using kernel-64k package +Patch112: kvm-virtio-iommu-Rework-the-traces-in-virtio_iommu_set_p.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch113: kvm-vfio-pci-add-support-for-VF-token.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch114: kvm-vfio-migration-Skip-log_sync-during-migration-SETUP-.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch115: kvm-vfio-pci-Static-Resizable-BAR-capability.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch116: kvm-vfio-pci-Fix-a-use-after-free-issue.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch117: kvm-util-vfio-helpers-Use-g_file_read_link.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch118: kvm-migration-Make-all-functions-check-have-the-same-for.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch119: kvm-migration-Move-migration_properties-to-options.c.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch120: kvm-migration-Add-switchover-ack-capability.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch121: kvm-migration-Implement-switchover-ack-logic.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch122: kvm-migration-Enable-switchover-ack-capability.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch123: kvm-vfio-migration-Refactor-vfio_save_block-to-return-sa.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch124: kvm-vfio-migration-Store-VFIO-migration-flags-in-VFIOMig.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch125: kvm-vfio-migration-Add-VFIO-migration-pre-copy-support.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch126: kvm-vfio-migration-Add-support-for-switchover-ack-capabi.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch127: kvm-vfio-Implement-a-common-device-info-helper.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch128: kvm-hw-vfio-pci-quirks-Support-alternate-offset-for-GPUD.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch129: kvm-vfio-pci-Call-vfio_prepare_kvm_msi_virq_batch-in-MSI.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch130: kvm-vfio-migration-Reset-bytes_transferred-properly.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch131: kvm-vfio-migration-Make-VFIO-migration-non-experimental.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch132: kvm-vfio-pci-Fix-a-segfault-in-vfio_realize.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch133: kvm-vfio-pci-Free-leaked-timer-in-vfio_realize-error-pat.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch134: kvm-hw-vfio-pci-quirks-Sanitize-capability-pointer.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch135: kvm-vfio-pci-Disable-INTx-in-vfio_realize-error-path.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch136: kvm-vfio-migration-Change-vIOMMU-blocker-from-global-to-.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch137: kvm-vfio-migration-Free-resources-when-vfio_migration_re.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch138: kvm-vfio-migration-Remove-print-of-Migration-disabled.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch139: kvm-vfio-migration-Return-bool-type-for-vfio_migration_r.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch140: kvm-vfio-Fix-null-pointer-dereference-bug-in-vfio_bars_f.patch +# For bz#2220866 - Misaligned symbol for s390-ccw image during qemu-kvm build +Patch141: kvm-pc-bios-s390-ccw-Makefile-Use-z-noexecstack-to-silen.patch +# For bz#2220866 - Misaligned symbol for s390-ccw image during qemu-kvm build +Patch142: kvm-pc-bios-s390-ccw-Fix-indentation-in-start.S.patch +# For bz#2220866 - Misaligned symbol for s390-ccw image during qemu-kvm build +Patch143: kvm-pc-bios-s390-ccw-Provide-space-for-initial-stack-fra.patch +# For bz#2220866 - Misaligned symbol for s390-ccw image during qemu-kvm build +Patch144: kvm-pc-bios-s390-ccw-Don-t-use-__bss_start-with-the-larl.patch +# For bz#2222579 - PNG screendump doesn't save screen correctly +Patch145: kvm-ui-Fix-pixel-colour-channel-order-for-PNG-screenshot.patch +# For bz#2213317 - Enable libblkio-based block drivers in QEMU +Patch146: kvm-block-blkio-fix-module_block.py-parsing.patch +# For bz#2176702 - [RHEL9][virtio-scsi] scsi-hd cannot hot-plug successfully after hot-plug it repeatly +Patch147: kvm-scsi-fetch-unit-attention-when-creating-the-request.patch +# For bz#2176702 - [RHEL9][virtio-scsi] scsi-hd cannot hot-plug successfully after hot-plug it repeatly +Patch148: kvm-scsi-cleanup-scsi_clear_unit_attention.patch +# For bz#2176702 - [RHEL9][virtio-scsi] scsi-hd cannot hot-plug successfully after hot-plug it repeatly +Patch149: kvm-scsi-clear-unit-attention-only-for-REPORT-LUNS-comma.patch +# For RHEL-794 - Backport s390x fixes from QEMU 8.1 +Patch150: kvm-s390x-ap-Wire-up-the-device-request-notifier-interfa.patch +# For bz#2196295 - Multifd flushes its channels 10 times per second +Patch151: kvm-multifd-Create-property-multifd-flush-after-each-sec.patch +# For bz#2196295 - Multifd flushes its channels 10 times per second +Patch152: kvm-multifd-Protect-multifd_send_sync_main-calls.patch +# For bz#2196295 - Multifd flushes its channels 10 times per second +Patch153: kvm-multifd-Only-flush-once-each-full-round-of-memory.patch +# For RHEL-582 - [passt][rhel 9.3] qemu core dump occurs when guest is shutdown after hotunplug/hotplug a passt interface +Patch154: kvm-net-socket-prepare-to-cleanup-net_init_socket.patch +# For RHEL-582 - [passt][rhel 9.3] qemu core dump occurs when guest is shutdown after hotunplug/hotplug a passt interface +Patch155: kvm-net-socket-move-fd-type-checking-to-its-own-function.patch +# For RHEL-582 - [passt][rhel 9.3] qemu core dump occurs when guest is shutdown after hotunplug/hotplug a passt interface +Patch156: kvm-net-socket-remove-net_init_socket.patch +# For bz#2215819 - Migration test failed while guest with PCIe devices +Patch157: kvm-pcie-Add-hotplug-detect-state-register-to-cmask.patch +# For bz#2174676 - Guest hit EXT4-fs error on host 4K disk when repeatedly hot-plug/unplug running IO disk [RHEL9] +Patch158: kvm-util-iov-Make-qiov_slice-public.patch +# For bz#2174676 - Guest hit EXT4-fs error on host 4K disk when repeatedly hot-plug/unplug running IO disk [RHEL9] +Patch159: kvm-block-Collapse-padded-I-O-vecs-exceeding-IOV_MAX.patch +# For bz#2174676 - Guest hit EXT4-fs error on host 4K disk when repeatedly hot-plug/unplug running IO disk [RHEL9] +Patch160: kvm-util-iov-Remove-qemu_iovec_init_extended.patch +# For bz#2174676 - Guest hit EXT4-fs error on host 4K disk when repeatedly hot-plug/unplug running IO disk [RHEL9] +Patch161: kvm-iotests-iov-padding-New-test.patch +# For bz#2174676 - Guest hit EXT4-fs error on host 4K disk when repeatedly hot-plug/unplug running IO disk [RHEL9] +Patch162: kvm-block-Fix-pad_request-s-request-restriction.patch +# For RHEL-573 - [mlx vhost_vdpa][rhel 9.3]live migration fail with "net vdpa cannot migrate with CVQ feature" +Patch163: kvm-vdpa-do-not-block-migration-if-device-has-cvq-and-x-.patch +# For bz#2040509 - [RFE]:Add support for changing "tx_queue_size" to a setable value +Patch164: kvm-virtio-net-correctly-report-maximum-tx_queue_size-va.patch +# For bz#2223691 - [machine type 9.2]Failed to migrate VM from RHEL 9.3 to RHEL 9.2 +Patch165: kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-reg-for-machine-ty.patch +# For bz#2141965 - [TPM][vhost-vdpa][rhel9.2]Boot a guest with "vhost-vdpa + TPM emulator", qemu output: qemu-kvm: vhost_vdpa_listener_region_add received unaligned region +Patch166: kvm-vhost-vdpa-mute-unaligned-memory-error-report.patch +# For bz#2225354 - [vdpa-blk] The new driver virtio-blk-vhost-user not work in VM booting +# For bz#2225439 - [vdpa-blk] read-only=on option not work on driver virtio-blk-vhost-vdpa +Patch167: kvm-block-blkio-enable-the-completion-eventfd.patch +# For bz#2225354 - [vdpa-blk] The new driver virtio-blk-vhost-user not work in VM booting +# For bz#2225439 - [vdpa-blk] read-only=on option not work on driver virtio-blk-vhost-vdpa +Patch168: kvm-block-blkio-do-not-use-open-flags-in-qemu_open.patch +# For bz#2225354 - [vdpa-blk] The new driver virtio-blk-vhost-user not work in VM booting +# For bz#2225439 - [vdpa-blk] read-only=on option not work on driver virtio-blk-vhost-vdpa +Patch169: kvm-block-blkio-move-blkio_connect-in-the-drivers-functi.patch +# For bz#2225354 - [vdpa-blk] The new driver virtio-blk-vhost-user not work in VM booting +# For bz#2225439 - [vdpa-blk] read-only=on option not work on driver virtio-blk-vhost-vdpa +Patch170: kvm-block-blkio-retry-blkio_connect-if-it-fails-using-fd.patch +# For bz#2225354 - [vdpa-blk] The new driver virtio-blk-vhost-user not work in VM booting +# For bz#2225439 - [vdpa-blk] read-only=on option not work on driver virtio-blk-vhost-vdpa +Patch171: kvm-block-blkio-fall-back-on-using-path-when-fd-setting-.patch +# For bz#2225354 - [vdpa-blk] The new driver virtio-blk-vhost-user not work in VM booting +# For bz#2225439 - [vdpa-blk] read-only=on option not work on driver virtio-blk-vhost-vdpa +Patch172: kvm-block-blkio-use-blkio_set_int-fd-to-check-fd-support.patch +# For bz#2229133 - Backport some virtio-iommu and smmu fixes +Patch173: kvm-hw-virtio-iommu-Fix-potential-OOB-access-in-virtio_i.patch +# For bz#2229133 - Backport some virtio-iommu and smmu fixes +Patch174: kvm-virtio-iommu-Standardize-granule-extraction-and-form.patch +# For bz#2229133 - Backport some virtio-iommu and smmu fixes +Patch175: kvm-hw-arm-smmu-Handle-big-endian-hosts-correctly.patch +# For bz#2214839 - [AMDSERVER 9.3 Bug] Qemu SEV reduced-phys-bits fixes +Patch176: kvm-qapi-i386-sev-Change-the-reduced-phys-bits-value-fro.patch +# For bz#2214839 - [AMDSERVER 9.3 Bug] Qemu SEV reduced-phys-bits fixes +Patch177: kvm-qemu-options.hx-Update-the-reduced-phys-bits-documen.patch +# For bz#2214839 - [AMDSERVER 9.3 Bug] Qemu SEV reduced-phys-bits fixes +Patch178: kvm-i386-sev-Update-checks-and-information-related-to-re.patch +# For bz#2214839 - [AMDSERVER 9.3 Bug] Qemu SEV reduced-phys-bits fixes +Patch179: kvm-i386-cpu-Update-how-the-EBX-register-of-CPUID-0x8000.patch +# For bz#2094913 - Add EPYC-Genoa CPU model in qemu +Patch180: kvm-target-i386-allow-versioned-CPUs-to-specify-new-cach.patch +# For bz#2094913 - Add EPYC-Genoa CPU model in qemu +Patch181: kvm-target-i386-Add-new-EPYC-CPU-versions-with-updated-c.patch +# For bz#2094913 - Add EPYC-Genoa CPU model in qemu +Patch182: kvm-target-i386-Add-a-couple-of-feature-bits-in-8000_000.patch +# For bz#2094913 - Add EPYC-Genoa CPU model in qemu +Patch183: kvm-target-i386-Add-feature-bits-for-CPUID_Fn80000021_EA.patch +# For bz#2094913 - Add EPYC-Genoa CPU model in qemu +Patch184: kvm-target-i386-Add-missing-feature-bits-in-EPYC-Milan-m.patch +# For bz#2094913 - Add EPYC-Genoa CPU model in qemu +Patch185: kvm-target-i386-Add-VNMI-and-automatic-IBRS-feature-bits.patch +# For bz#2094913 - Add EPYC-Genoa CPU model in qemu +Patch186: kvm-target-i386-Add-EPYC-Genoa-model-to-support-Zen-4-pr.patch +# For RHEL-923 - vhost shadow virtqueue: state restore through CVQ +Patch187: kvm-vdpa-return-errno-in-vhost_vdpa_get_vring_group-erro.patch +# For RHEL-923 - vhost shadow virtqueue: state restore through CVQ +Patch188: kvm-vdpa-move-CVQ-isolation-check-to-net_init_vhost_vdpa.patch +# For RHEL-923 - vhost shadow virtqueue: state restore through CVQ +Patch189: kvm-vdpa-use-first-queue-SVQ-state-for-CVQ-default.patch +# For RHEL-923 - vhost shadow virtqueue: state restore through CVQ +Patch190: kvm-vdpa-export-vhost_vdpa_set_vring_ready.patch +# For RHEL-923 - vhost shadow virtqueue: state restore through CVQ +Patch191: kvm-vdpa-rename-vhost_vdpa_net_load-to-vhost_vdpa_net_cv.patch +# For RHEL-923 - vhost shadow virtqueue: state restore through CVQ +Patch192: kvm-vdpa-move-vhost_vdpa_set_vring_ready-to-the-caller.patch +# For RHEL-923 - vhost shadow virtqueue: state restore through CVQ +Patch193: kvm-vdpa-remove-net-cvq-migration-blocker.patch +# For bz#2216504 - CVE-2023-3354 qemu-kvm: QEMU: VNC: improper I/O watch removal in TLS handshake can lead to remote unauthenticated denial of service [rhel-9.3.0] +Patch194: kvm-io-remove-io-watch-if-TLS-channel-is-closed-during-h.patch +# For bz#2229868 - [vfio migration]Disable postcopy for VM with migratable vfio device +Patch195: kvm-migration-Add-migration-prefix-to-functions-in-targe.patch +# For bz#2229868 - [vfio migration]Disable postcopy for VM with migratable vfio device +Patch196: kvm-migration-Move-more-initializations-to-migrate_init.patch +# For bz#2229868 - [vfio migration]Disable postcopy for VM with migratable vfio device +Patch197: kvm-migration-Add-.save_prepare-handler-to-struct-SaveVM.patch +# For bz#2229868 - [vfio migration]Disable postcopy for VM with migratable vfio device +Patch198: kvm-vfio-migration-Block-VFIO-migration-with-postcopy-mi.patch %if %{have_clang} BuildRequires: clang @@ -450,6 +572,8 @@ BuildRequires: glib2-devel BuildRequires: gnutls-devel BuildRequires: cyrus-sasl-devel BuildRequires: libaio-devel +BuildRequires: libblkio-devel +BuildRequires: liburing-devel BuildRequires: python3-devel BuildRequires: libattr-devel BuildRequires: libusbx-devel >= %{libusbx_version} @@ -636,6 +760,17 @@ Install this package if you want access to the avocado_qemu tests, or qemu-iotests. +%package block-blkio +Summary: QEMU libblkio block drivers +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +%description block-blkio +This package provides the additional libblkio block drivers for QEMU. + +Install this package if you want to use virtio-blk-vdpa-blk, +virtio-blk-vfio-pci, virtio-blk-vhost-user, io_uring, and nvme-io_uring block +drivers provided by libblkio. + + %package block-curl Summary: QEMU CURL block driver Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} @@ -732,6 +867,19 @@ Obsoletes: %{name}-hw-usbredir <= %{epoch}:%{version} This package provides usbredir support. %endif +%package ui-dbus +Summary: QEMU D-Bus UI driver +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +%description ui-dbus +This package provides the additional D-Bus UI for QEMU. + +%package audio-dbus +Summary: QEMU D-Bus audio driver +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +Requires: %{name}-ui-dbus = %{epoch}:%{version}-%{release} +%description audio-dbus +This package provides the additional D-Bus audio driver for QEMU. + %prep %setup -q -n qemu-%{version}%{?rcstr} %autopatch -p1 @@ -752,6 +900,7 @@ ulimit -n 10240 --disable-auth-pam \\\ --disable-avx2 \\\ --disable-avx512f \\\ + --disable-avx512bw \\\ --disable-blkio \\\ --disable-block-drv-whitelist-in-tools \\\ --disable-bochs \\\ @@ -795,6 +944,7 @@ ulimit -n 10240 --disable-kvm \\\ --disable-l2tpv3 \\\ --disable-libdaxctl \\\ + --disable-libdw \\\ --disable-libiscsi \\\ --disable-libnfs \\\ --disable-libpmem \\\ @@ -866,7 +1016,6 @@ ulimit -n 10240 --disable-vhost-vdpa \\\ --disable-virglrenderer \\\ --disable-virtfs \\\ - --disable-virtiofsd \\\ --disable-vnc \\\ --disable-vnc-jpeg \\\ --disable-png \\\ @@ -901,7 +1050,7 @@ run_configure() { --with-suffix="%{name}" \ --firmwarepath=%{firmwaredirs} \ --meson="%{__meson}" \ - --enable-trace-backend=dtrace \ + --enable-trace-backends=dtrace \ --with-coroutine=ucontext \ --with-git=git \ --tls-priority=@QEMU,SYSTEM \ @@ -928,15 +1077,18 @@ run_configure \ --block-drv-ro-whitelist=%{block_drivers_ro_list} \ %endif --enable-attr \ + --enable-blkio \ --enable-cap-ng \ --enable-capstone \ --enable-coroutine-pool \ --enable-curl \ + --enable-dbus-display \ --enable-debug-info \ --enable-docs \ %if %{have_fdt} --enable-fdt=system \ %endif + --enable-gio \ --enable-gnutls \ --enable-guest-agent \ --enable-iconv \ @@ -947,6 +1099,7 @@ run_configure \ --enable-libusb \ --enable-libudev \ --enable-linux-aio \ + --enable-linux-io-uring \ --enable-lzo \ --enable-malloc-trim \ --enable-modules \ @@ -978,7 +1131,6 @@ run_configure \ --enable-usb-redir \ %endif --enable-vdi \ - --enable-virtiofsd \ --enable-vhost-kernel \ --enable-vhost-net \ --enable-vhost-user \ @@ -1114,10 +1266,6 @@ popd mkdir -p %{buildroot}%{_datadir}/systemtap/tapset -# Move vhost-user JSON files to the standard "qemu" directory -mkdir -p %{buildroot}%{_datadir}/qemu -mv %{buildroot}%{_datadir}/%{name}/vhost-user %{buildroot}%{_datadir}/qemu/ - install -m 0755 %{qemu_kvm_build}/%{kvm_target}-softmmu/qemu-system-%{kvm_target} %{buildroot}%{_libexecdir}/qemu-kvm install -m 0644 %{qemu_kvm_build}/qemu-kvm.stp %{buildroot}%{_datadir}/systemtap/tapset/ install -m 0644 %{qemu_kvm_build}/qemu-kvm-log.stp %{buildroot}%{_datadir}/systemtap/tapset/ @@ -1133,7 +1281,6 @@ rm %{buildroot}%{_bindir}/qemu-system-%{kvm_target} rm %{buildroot}%{_datadir}/systemtap/tapset/qemu-system-%{kvm_target}.stp rm %{buildroot}%{_datadir}/systemtap/tapset/qemu-system-%{kvm_target}-simpletrace.stp rm %{buildroot}%{_datadir}/systemtap/tapset/qemu-system-%{kvm_target}-log.stp -rm %{buildroot}%{_bindir}/elf2dmp # Install simpletrace install -m 0755 scripts/simpletrace.py %{buildroot}%{_datadir}/%{name}/simpletrace.py @@ -1273,17 +1420,13 @@ rm -rf %{buildroot}%{qemudocdir}/specs # endif !tools_only %endif -# Remove virtiofsd (we use separate package for virtiofsd) -rm -rf %{buildroot}%{_mandir}/man1/virtiofsd.1* -rm -rf %{buildroot}%{_libexecdir}/virtiofsd -rm -rf %{buildroot}%{_datadir}/qemu/vhost-user/50-qemu-virtiofsd.json - %check %if !%{tools_only} pushd %{qemu_kvm_build} echo "Testing %{name}-build" -%make_build check +#%make_build check +make V=1 check popd # endif !tools_only @@ -1349,6 +1492,7 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_bindir}/qemu-keymap %{_bindir}/qemu-edid %{_bindir}/qemu-trace-stap +%{_bindir}/elf2dmp %{_datadir}/%{name}/simpletrace.py* %{_datadir}/%{name}/tracetool/*.py* %{_datadir}/%{name}/tracetool/backend/*.py* @@ -1436,6 +1580,9 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{testsdir} %{_libdir}/%{name}/accel-qtest-%{kvm_target}.so +%files block-blkio +%{_libdir}/%{name}/block-blkio.so + %files block-curl %{_libdir}/%{name}/block-curl.so %if %{have_block_rbd} @@ -1460,42 +1607,322 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_libdir}/%{name}/hw-usb-redirect.so %endif +%files audio-dbus +%{_libdir}/%{name}/audio-dbus.so + +%files ui-dbus +%{_libdir}/%{name}/ui-dbus.so + # endif !tools_only %endif %changelog -* Mon Aug 21 2023 Miroslav Rezanina - 7.2.0-14.el9_2.5 -- kvm-vdpa-net-move-iova-tree-creation-from-init-to-start.patch [RHEL-1060] -- kvm-vdpa-reorder-vhost_vdpa_net_cvq_cmd_page_len-functio.patch [RHEL-1060] -- kvm-vdpa-map-shadow-vrings-with-MAP_SHARED.patch [RHEL-1060] -- kvm-io-remove-io-watch-if-TLS-channel-is-closed-during-h.patch [bz#2216503] -- Resolves: RHEL-1060 - ([vhost-vdpa][rhel 9.2]Boot a guest with "x-svq=on", then hot unplug this nic, guest trigger qemu core dump [rhel-9.2.0.z]) -- Resolves: bz#2216503 - (CVE-2023-3354 qemu-kvm: QEMU: VNC: improper I/O watch removal in TLS handshake can lead to remote unauthenticated denial of service [rhel-9.2.0.z]) - -* Mon Aug 07 2023 Miroslav Rezanina - 7.2.0-14.el9_2.4 -- kvm-aio-posix-fix-race-between-epoll-upgrade-and-aio_set.patch [bz#2211923] -- kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch [bz#2227721] -- Resolves: bz#2211923 - ([qemu-kvm] rhel guest failed boot with multi disks on error Failed to start udev Wait for Complete Device Initialization [rhel-9.2.0.z]) -- Resolves: bz#2227721 - ([rhel9.2] hotplug/hotunplug mlx vdpa device to the occupied addr port, then qemu core dump occurs after shutdown guest [rhel-9.2.0.z]) - -* Tue Jul 11 2023 Miroslav Rezanina - 7.2.0-14.el9_2.3 -- kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch [bz#2221219] -- Resolves: bz#2221219 - (query-stats QMP command interrupts vcpus, the Max Latencies could be more than 100us (rhel 9.3.0 clone) [rhel-9.2.0.z]) - -* Mon Jun 19 2023 Miroslav Rezanina - 7.2.0-14.el9_2.2 -- kvm-vdpa-stop-all-svq-on-device-deletion.patch [bz#2213864] -- Resolves: bz#2213864 - ([mlx vhost_vdpa][rhel 9.2]qemu core dump when hot unplug then hotplug a vdpa interface with multi-queue setting [rhel-9.2.0.z]) - -* Thu May 25 2023 Miroslav Rezanina - 7.2.0-14.el9_2.1 -- kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch [bz#2203745] -- Resolves: bz#2203745 - (Disk detach is unsuccessful while the guest is still booting [rhel-9.2.0.z]) +* Mon Sep 18 2023 Miroslav Rezanina - 8.0.0-16.el9_3 +- kvm-migration-Add-migration-prefix-to-functions-in-targe.patch [bz#2229868] +- kvm-migration-Move-more-initializations-to-migrate_init.patch [bz#2229868] +- kvm-migration-Add-.save_prepare-handler-to-struct-SaveVM.patch [bz#2229868] +- kvm-vfio-migration-Block-VFIO-migration-with-postcopy-mi.patch [bz#2229868] +- Resolves: bz#2229868 + ([vfio migration]Disable postcopy for VM with migratable vfio device) + +* Fri Sep 15 2023 Miroslav Rezanina - 8.0.0-15.el9_3 +- kvm-io-remove-io-watch-if-TLS-channel-is-closed-during-h.patch [bz#2216504] +- Resolves: bz#2216504 + (CVE-2023-3354 qemu-kvm: QEMU: VNC: improper I/O watch removal in TLS handshake can lead to remote unauthenticated denial of service [rhel-9.3.0]) + +* Thu Aug 24 2023 Miroslav Rezanina - 8.0.0-13 +- kvm-vdpa-return-errno-in-vhost_vdpa_get_vring_group-erro.patch [RHEL-923] +- kvm-vdpa-move-CVQ-isolation-check-to-net_init_vhost_vdpa.patch [RHEL-923] +- kvm-vdpa-use-first-queue-SVQ-state-for-CVQ-default.patch [RHEL-923] +- kvm-vdpa-export-vhost_vdpa_set_vring_ready.patch [RHEL-923] +- kvm-vdpa-rename-vhost_vdpa_net_load-to-vhost_vdpa_net_cv.patch [RHEL-923] +- kvm-vdpa-move-vhost_vdpa_set_vring_ready-to-the-caller.patch [RHEL-923] +- kvm-vdpa-remove-net-cvq-migration-blocker.patch [RHEL-923] +- Resolves: RHEL-923 + (vhost shadow virtqueue: state restore through CVQ) + +* Mon Aug 21 2023 Miroslav Rezanina - 8.0.0-12 +- kvm-target-i386-allow-versioned-CPUs-to-specify-new-cach.patch [bz#2094913] +- kvm-target-i386-Add-new-EPYC-CPU-versions-with-updated-c.patch [bz#2094913] +- kvm-target-i386-Add-a-couple-of-feature-bits-in-8000_000.patch [bz#2094913] +- kvm-target-i386-Add-feature-bits-for-CPUID_Fn80000021_EA.patch [bz#2094913] +- kvm-target-i386-Add-missing-feature-bits-in-EPYC-Milan-m.patch [bz#2094913] +- kvm-target-i386-Add-VNMI-and-automatic-IBRS-feature-bits.patch [bz#2094913] +- kvm-target-i386-Add-EPYC-Genoa-model-to-support-Zen-4-pr.patch [bz#2094913] +- Resolves: bz#2094913 + (Add EPYC-Genoa CPU model in qemu) + +* Mon Aug 07 2023 Miroslav Rezanina - 8.0.0-11 +- kvm-block-blkio-enable-the-completion-eventfd.patch [bz#2225354 bz#2225439] +- kvm-block-blkio-do-not-use-open-flags-in-qemu_open.patch [bz#2225354 bz#2225439] +- kvm-block-blkio-move-blkio_connect-in-the-drivers-functi.patch [bz#2225354 bz#2225439] +- kvm-block-blkio-retry-blkio_connect-if-it-fails-using-fd.patch [bz#2225354 bz#2225439] +- kvm-block-blkio-fall-back-on-using-path-when-fd-setting-.patch [bz#2225354 bz#2225439] +- kvm-block-blkio-use-blkio_set_int-fd-to-check-fd-support.patch [bz#2225354 bz#2225439] +- kvm-hw-virtio-iommu-Fix-potential-OOB-access-in-virtio_i.patch [bz#2229133] +- kvm-virtio-iommu-Standardize-granule-extraction-and-form.patch [bz#2229133] +- kvm-hw-arm-smmu-Handle-big-endian-hosts-correctly.patch [bz#2229133] +- kvm-qapi-i386-sev-Change-the-reduced-phys-bits-value-fro.patch [bz#2214839] +- kvm-qemu-options.hx-Update-the-reduced-phys-bits-documen.patch [bz#2214839] +- kvm-i386-sev-Update-checks-and-information-related-to-re.patch [bz#2214839] +- kvm-i386-cpu-Update-how-the-EBX-register-of-CPUID-0x8000.patch [bz#2214839] +- kvm-Provide-elf2dmp-binary-in-qemu-tools.patch [bz#2165917] +- Resolves: bz#2225354 + ([vdpa-blk] The new driver virtio-blk-vhost-user not work in VM booting) +- Resolves: bz#2225439 + ([vdpa-blk] read-only=on option not work on driver virtio-blk-vhost-vdpa) +- Resolves: bz#2229133 + (Backport some virtio-iommu and smmu fixes) +- Resolves: bz#2214839 + ([AMDSERVER 9.3 Bug] Qemu SEV reduced-phys-bits fixes) +- Resolves: bz#2165917 + (qemu-kvm: contrib/elf2dmp: Windows Server 2022 support) + +* Mon Jul 31 2023 Miroslav Rezanina - 8.0.0-10 +- kvm-util-iov-Make-qiov_slice-public.patch [bz#2174676] +- kvm-block-Collapse-padded-I-O-vecs-exceeding-IOV_MAX.patch [bz#2174676] +- kvm-util-iov-Remove-qemu_iovec_init_extended.patch [bz#2174676] +- kvm-iotests-iov-padding-New-test.patch [bz#2174676] +- kvm-block-Fix-pad_request-s-request-restriction.patch [bz#2174676] +- kvm-vdpa-do-not-block-migration-if-device-has-cvq-and-x-.patch [RHEL-573] +- kvm-virtio-net-correctly-report-maximum-tx_queue_size-va.patch [bz#2040509] +- kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-reg-for-machine-ty.patch [bz#2223691] +- kvm-vhost-vdpa-mute-unaligned-memory-error-report.patch [bz#2141965] +- Resolves: bz#2174676 + (Guest hit EXT4-fs error on host 4K disk when repeatedly hot-plug/unplug running IO disk [RHEL9]) +- Resolves: RHEL-573 + ([mlx vhost_vdpa][rhel 9.3]live migration fail with "net vdpa cannot migrate with CVQ feature") +- Resolves: bz#2040509 + ([RFE]:Add support for changing "tx_queue_size" to a setable value) +- Resolves: bz#2223691 + ([machine type 9.2]Failed to migrate VM from RHEL 9.3 to RHEL 9.2) +- Resolves: bz#2141965 + ([TPM][vhost-vdpa][rhel9.2]Boot a guest with "vhost-vdpa + TPM emulator", qemu output: qemu-kvm: vhost_vdpa_listener_region_add received unaligned region) + +* Mon Jul 24 2023 Miroslav Rezanina - 8.0.0-9 +- kvm-scsi-fetch-unit-attention-when-creating-the-request.patch [bz#2176702] +- kvm-scsi-cleanup-scsi_clear_unit_attention.patch [bz#2176702] +- kvm-scsi-clear-unit-attention-only-for-REPORT-LUNS-comma.patch [bz#2176702] +- kvm-s390x-ap-Wire-up-the-device-request-notifier-interfa.patch [RHEL-794] +- kvm-multifd-Create-property-multifd-flush-after-each-sec.patch [bz#2196295] +- kvm-multifd-Protect-multifd_send_sync_main-calls.patch [bz#2196295] +- kvm-multifd-Only-flush-once-each-full-round-of-memory.patch [bz#2196295] +- kvm-net-socket-prepare-to-cleanup-net_init_socket.patch [RHEL-582] +- kvm-net-socket-move-fd-type-checking-to-its-own-function.patch [RHEL-582] +- kvm-net-socket-remove-net_init_socket.patch [RHEL-582] +- kvm-pcie-Add-hotplug-detect-state-register-to-cmask.patch [bz#2215819] +- kvm-spec-Build-DBUS-display.patch [bz#2207940] +- Resolves: bz#2176702 + ([RHEL9][virtio-scsi] scsi-hd cannot hot-plug successfully after hot-plug it repeatly) +- Resolves: RHEL-794 + (Backport s390x fixes from QEMU 8.1) +- Resolves: bz#2196295 + (Multifd flushes its channels 10 times per second) +- Resolves: RHEL-582 + ([passt][rhel 9.3] qemu core dump occurs when guest is shutdown after hotunplug/hotplug a passt interface) +- Resolves: bz#2215819 + (Migration test failed while guest with PCIe devices) +- Resolves: bz#2207940 + ([RFE] Enable qemu-ui-dbus subpackage) + +* Mon Jul 17 2023 Miroslav Rezanina - 8.0.0-8 +- kvm-virtio-iommu-Fix-64kB-host-page-size-VFIO-device-ass.patch [bz#2211609 bz#2211634] +- kvm-virtio-iommu-Rework-the-traces-in-virtio_iommu_set_p.patch [bz#2211609 bz#2211634] +- kvm-vfio-pci-add-support-for-VF-token.patch [bz#2192818] +- kvm-vfio-migration-Skip-log_sync-during-migration-SETUP-.patch [bz#2192818] +- kvm-vfio-pci-Static-Resizable-BAR-capability.patch [bz#2192818] +- kvm-vfio-pci-Fix-a-use-after-free-issue.patch [bz#2192818] +- kvm-util-vfio-helpers-Use-g_file_read_link.patch [bz#2192818] +- kvm-migration-Make-all-functions-check-have-the-same-for.patch [bz#2192818] +- kvm-migration-Move-migration_properties-to-options.c.patch [bz#2192818] +- kvm-migration-Add-switchover-ack-capability.patch [bz#2192818] +- kvm-migration-Implement-switchover-ack-logic.patch [bz#2192818] +- kvm-migration-Enable-switchover-ack-capability.patch [bz#2192818] +- kvm-vfio-migration-Refactor-vfio_save_block-to-return-sa.patch [bz#2192818] +- kvm-vfio-migration-Store-VFIO-migration-flags-in-VFIOMig.patch [bz#2192818] +- kvm-vfio-migration-Add-VFIO-migration-pre-copy-support.patch [bz#2192818] +- kvm-vfio-migration-Add-support-for-switchover-ack-capabi.patch [bz#2192818] +- kvm-vfio-Implement-a-common-device-info-helper.patch [bz#2192818] +- kvm-hw-vfio-pci-quirks-Support-alternate-offset-for-GPUD.patch [bz#2192818] +- kvm-vfio-pci-Call-vfio_prepare_kvm_msi_virq_batch-in-MSI.patch [bz#2192818] +- kvm-vfio-migration-Reset-bytes_transferred-properly.patch [bz#2192818] +- kvm-vfio-migration-Make-VFIO-migration-non-experimental.patch [bz#2192818] +- kvm-vfio-pci-Fix-a-segfault-in-vfio_realize.patch [bz#2192818] +- kvm-vfio-pci-Free-leaked-timer-in-vfio_realize-error-pat.patch [bz#2192818] +- kvm-hw-vfio-pci-quirks-Sanitize-capability-pointer.patch [bz#2192818] +- kvm-vfio-pci-Disable-INTx-in-vfio_realize-error-path.patch [bz#2192818] +- kvm-vfio-migration-Change-vIOMMU-blocker-from-global-to-.patch [bz#2192818] +- kvm-vfio-migration-Free-resources-when-vfio_migration_re.patch [bz#2192818] +- kvm-vfio-migration-Remove-print-of-Migration-disabled.patch [bz#2192818] +- kvm-vfio-migration-Return-bool-type-for-vfio_migration_r.patch [bz#2192818] +- kvm-vfio-Fix-null-pointer-dereference-bug-in-vfio_bars_f.patch [bz#2192818] +- kvm-pc-bios-s390-ccw-Makefile-Use-z-noexecstack-to-silen.patch [bz#2220866] +- kvm-pc-bios-s390-ccw-Fix-indentation-in-start.S.patch [bz#2220866] +- kvm-pc-bios-s390-ccw-Provide-space-for-initial-stack-fra.patch [bz#2220866] +- kvm-pc-bios-s390-ccw-Don-t-use-__bss_start-with-the-larl.patch [bz#2220866] +- kvm-ui-Fix-pixel-colour-channel-order-for-PNG-screenshot.patch [bz#2222579] +- kvm-block-blkio-fix-module_block.py-parsing.patch [bz#2213317] +- kvm-Fix-virtio-blk-vhost-vdpa-typo-in-spec-file.patch [bz#2213317] +- Resolves: bz#2211609 + (With virtio-iommu and vfio-pci, qemu reports "warning: virtio-iommu page mask 0xfffffffffffff000 does not match 0x40201000") +- Resolves: bz#2211634 + ([aarch64] With virtio-iommu and vfio-pci, qemu coredump when host using kernel-64k package) +- Resolves: bz#2192818 + ([VFIO LM] Live migration) +- Resolves: bz#2220866 + (Misaligned symbol for s390-ccw image during qemu-kvm build) +- Resolves: bz#2222579 + (PNG screendump doesn't save screen correctly) +- Resolves: bz#2213317 + (Enable libblkio-based block drivers in QEMU) + +* Mon Jul 10 2023 Miroslav Rezanina - 8.0.0-7 +- kvm-numa-Validate-cluster-and-NUMA-node-boundary-if-requ.patch [bz#2171363] +- kvm-hw-arm-Validate-cluster-and-NUMA-node-boundary.patch [bz#2171363] +- kvm-hw-arm-virt-Validate-cluster-and-NUMA-node-boundary-.patch [bz#2171363] +- kvm-vhost-fix-vhost_dev_enable_notifiers-error-case.patch [RHEL-330] +- kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch [bz#2218644] +- kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch [bz#2128929] +- Resolves: bz#2171363 + ([aarch64] Kernel hits Call trace with irregular CPU-to-NUMA association) +- Resolves: RHEL-330 + ([virtual network][qemu-kvm-8.0.0-rc1]qemu core dump: qemu-kvm: ../softmmu/memory.c:2592: void memory_region_del_eventfd(MemoryRegion *, hwaddr, unsigned int, _Bool, uint64_t, EventNotifier *): Assertion `i != mr->ioeventfd_nb' failed) +- Resolves: bz#2218644 + (query-stats QMP command interrupts vcpus, the Max Latencies could be more than 100us (rhel 9.3.0 clone)) +- Resolves: bz#2128929 + ([rhel9.2] hotplug/hotunplug mlx vdpa device to the occupied addr port, then qemu core dump occurs after shutdown guest) + +* Mon Jun 26 2023 Miroslav Rezanina - 8.0.0-6 +- kvm-target-i386-add-support-for-FLUSH_L1D-feature.patch [bz#2216201] +- kvm-target-i386-add-support-for-FB_CLEAR-feature.patch [bz#2216201] +- kvm-block-blkio-use-qemu_open-to-support-fd-passing-for-.patch [bz#2180076] +- kvm-qapi-add-fdset-feature-for-BlockdevOptionsVirtioBlkV.patch [bz#2180076] +- kvm-Enable-libblkio-block-drivers.patch [bz#2213317] +- Resolves: bz#2216201 + ([qemu-kvm]VM reports vulnerabilty to mmio_stale_data on patched host with microcode) +- Resolves: bz#2180076 + ([qemu-kvm] support fd passing for libblkio QEMU BlockDrivers) +- Resolves: bz#2213317 + (Enable libblkio-based block drivers in QEMU) + +* Tue Jun 13 2023 Miroslav Rezanina - 8.0.0-5 +- kvm-block-compile-out-assert_bdrv_graph_readable-by-defa.patch [bz#2186725] +- kvm-graph-lock-Disable-locking-for-now.patch [bz#2186725] +- kvm-nbd-server-Fix-drained_poll-to-wake-coroutine-in-rig.patch [bz#2186725] +- kvm-iotests-Test-commit-with-iothreads-and-ongoing-I-O.patch [bz#2186725] +- kvm-memory-prevent-dma-reentracy-issues.patch [RHEL-516] +- kvm-async-Add-an-optional-reentrancy-guard-to-the-BH-API.patch [RHEL-516] +- kvm-checkpatch-add-qemu_bh_new-aio_bh_new-checks.patch [RHEL-516] +- kvm-hw-replace-most-qemu_bh_new-calls-with-qemu_bh_new_g.patch [RHEL-516] +- kvm-lsi53c895a-disable-reentrancy-detection-for-script-R.patch [RHEL-516] +- kvm-bcm2835_property-disable-reentrancy-detection-for-io.patch [RHEL-516] +- kvm-raven-disable-reentrancy-detection-for-iomem.patch [RHEL-516] +- kvm-apic-disable-reentrancy-detection-for-apic-msi.patch [RHEL-516] +- kvm-async-avoid-use-after-free-on-re-entrancy-guard.patch [RHEL-516] +- kvm-loongarch-mark-loongarch_ipi_iocsr-re-entrnacy-safe.patch [RHEL-516] +- kvm-memory-stricter-checks-prior-to-unsetting-engaged_in.patch [RHEL-516] +- kvm-lsi53c895a-disable-reentrancy-detection-for-MMIO-reg.patch [RHEL-516] +- kvm-hw-scsi-lsi53c895a-Fix-reentrancy-issues-in-the-LSI-.patch [RHEL-516] +- kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-register-for-machi.patch [bz#2189423] +- kvm-multifd-Fix-the-number-of-channels-ready.patch [bz#2196289] +- kvm-util-async-teardown-wire-up-query-command-line-optio.patch [bz#2168500] +- kvm-s390x-pv-Fix-spurious-warning-with-asynchronous-tear.patch [bz#2168500] +- Resolves: bz#2186725 + (Qemu hang when commit during fio running(iothread enable)) +- Resolves: RHEL-516 + (CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9]) +- Resolves: bz#2189423 + (Failed to migrate VM from rhel 9.3 to rhel 9.2) +- Resolves: bz#2196289 + (Fix number of ready channels on multifd) +- Resolves: bz#2168500 + ([IBM 9.3 FEAT] KVM: Improve memory reclaiming for z15 Secure Execution guests - qemu part) + +* Mon May 22 2023 Miroslav Rezanina - 8.0.0-4 +- kvm-migration-Attempt-disk-reactivation-in-more-failure-.patch [bz#2058982] +- kvm-util-mmap-alloc-qemu_fd_getfs.patch [bz#2057267] +- kvm-vl.c-Create-late-backends-before-migration-object.patch [bz#2057267] +- kvm-migration-postcopy-Detect-file-system-on-dest-host.patch [bz#2057267] +- kvm-migration-mark-mixed-functions-that-can-suspend.patch [bz#2057267] +- kvm-postcopy-ram-do-not-use-qatomic_mb_read.patch [bz#2057267] +- kvm-migration-remove-extra-whitespace-character-for-code.patch [bz#2057267] +- kvm-migration-Merge-ram_counters-and-ram_atomic_counters.patch [bz#2057267] +- kvm-migration-Update-atomic-stats-out-of-the-mutex.patch [bz#2057267] +- kvm-migration-Make-multifd_bytes-atomic.patch [bz#2057267] +- kvm-migration-Make-dirty_sync_missed_zero_copy-atomic.patch [bz#2057267] +- kvm-migration-Make-precopy_bytes-atomic.patch [bz#2057267] +- kvm-migration-Make-downtime_bytes-atomic.patch [bz#2057267] +- kvm-migration-Make-dirty_sync_count-atomic.patch [bz#2057267] +- kvm-migration-Make-postcopy_requests-atomic.patch [bz#2057267] +- kvm-migration-Rename-duplicate-to-zero_pages.patch [bz#2057267] +- kvm-migration-Rename-normal-to-normal_pages.patch [bz#2057267] +- kvm-migration-rename-enabled_capabilities-to-capabilitie.patch [bz#2057267] +- kvm-migration-Pass-migrate_caps_check-the-old-and-new-ca.patch [bz#2057267] +- kvm-migration-move-migration_global_dump-to-migration-hm.patch [bz#2057267] +- kvm-spice-move-client_migrate_info-command-to-ui.patch [bz#2057267] +- kvm-migration-Create-migrate_cap_set.patch [bz#2057267] +- kvm-migration-Create-options.c.patch [bz#2057267] +- kvm-migration-Move-migrate_colo_enabled-to-options.c.patch [bz#2057267] +- kvm-migration-Move-migrate_use_compression-to-options.c.patch [bz#2057267] +- kvm-migration-Move-migrate_use_events-to-options.c.patch [bz#2057267] +- kvm-migration-Move-migrate_use_multifd-to-options.c.patch [bz#2057267] +- kvm-migration-Move-migrate_use_zero_copy_send-to-options.patch [bz#2057267] +- kvm-migration-Move-migrate_use_xbzrle-to-options.c.patch [bz#2057267] +- kvm-migration-Move-migrate_use_block-to-options.c.patch [bz#2057267] +- kvm-migration-Move-migrate_use_return-to-options.c.patch [bz#2057267] +- kvm-migration-Create-migrate_rdma_pin_all-function.patch [bz#2057267] +- kvm-migration-Move-migrate_caps_check-to-options.c.patch [bz#2057267] +- kvm-migration-Move-qmp_query_migrate_capabilities-to-opt.patch [bz#2057267] +- kvm-migration-Move-qmp_migrate_set_capabilities-to-optio.patch [bz#2057267] +- kvm-migration-Move-migrate_cap_set-to-options.c.patch [bz#2057267] +- kvm-migration-Move-parameters-functions-to-option.c.patch [bz#2057267] +- kvm-migration-Use-migrate_max_postcopy_bandwidth.patch [bz#2057267] +- kvm-migration-Move-migrate_use_block_incremental-to-opti.patch [bz#2057267] +- kvm-migration-Create-migrate_throttle_trigger_threshold.patch [bz#2057267] +- kvm-migration-Create-migrate_checkpoint_delay.patch [bz#2057267] +- kvm-migration-Create-migrate_max_cpu_throttle.patch [bz#2057267] +- kvm-migration-Move-migrate_announce_params-to-option.c.patch [bz#2057267] +- kvm-migration-Create-migrate_cpu_throttle_initial-to-opt.patch [bz#2057267] +- kvm-migration-Create-migrate_cpu_throttle_increment-func.patch [bz#2057267] +- kvm-migration-Create-migrate_cpu_throttle_tailslow-funct.patch [bz#2057267] +- kvm-migration-Move-migrate_postcopy-to-options.c.patch [bz#2057267] +- kvm-migration-Create-migrate_max_bandwidth-function.patch [bz#2057267] +- kvm-migration-Move-migrate_use_tls-to-options.c.patch [bz#2057267] +- kvm-migration-Move-qmp_migrate_set_parameters-to-options.patch [bz#2057267] +- kvm-migration-Allow-postcopy_ram_supported_by_host-to-re.patch [bz#2057267] +- kvm-block-bdrv-blk_co_unref-for-calls-in-coroutine-conte.patch [bz#2185688] +- kvm-block-Don-t-call-no_coroutine_fns-in-qmp_block_resiz.patch [bz#2185688] +- kvm-iotests-Use-alternative-CPU-type-that-is-not-depreca.patch [bz#2185688] +- kvm-iotests-Test-resizing-image-attached-to-an-iothread.patch [bz#2185688] +- kvm-Enable-Linux-io_uring.patch [bz#1947230] +- Resolves: bz#2058982 + (Qemu core dump if cut off nfs storage during migration) +- Resolves: bz#2057267 + (Migration with postcopy fail when vm set with shared memory) +- Resolves: bz#2185688 + ([qemu-kvm] no response with QMP command block_resize) +- Resolves: bz#1947230 + (Enable QEMU support for io_uring in RHEL9) + +* Mon May 15 2023 Miroslav Rezanina - 8.0.0-3 +- kvm-migration-Handle-block-device-inactivation-failures-.patch [bz#2058982] +- kvm-migration-Minor-control-flow-simplification.patch [bz#2058982] +- Resolves: bz#2058982 + (Qemu core dump if cut off nfs storage during migration) + +* Mon May 08 2023 Miroslav Rezanina - 8.0.0-2 +- kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch [bz#2087047] +- kvm-hw-acpi-limit-warning-on-acpi-table-size-to-pc-machi.patch [bz#1934134] +- kvm-hw-acpi-Mark-acpi-blobs-as-resizable-on-RHEL-pc-mach.patch [bz#1934134] +- Resolves: bz#2087047 + (Disk detach is unsuccessful while the guest is still booting) +- Resolves: bz#1934134 + (ACPI table limits warning when booting guest with 512 VCPUs) + +* Thu Apr 20 2023 Miroslav Rezanina - 8.0.0-1 +- Rebase to QEMU 8.0.0 +- Resolves: bz#2180898 + (Rebase to QEMU 8.0.0 for RHEL 9.3.0) * Mon Mar 20 2023 Miroslav Rezanina - 7.2.0-14 - Rebuild for 9.2 release @@ -1518,7 +1945,7 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ - Resolves: bz#2156876 ([virtual network][rhel7.9_guest] qemu-kvm: vhost vring error in virtqueue 1: Invalid argument (22)) -* Wed Mar 15 2023 MSVSphere Packaging Team - 7.0.0-13 +* Wed Mar 15 2023 MSVSphere Packaging Team - 7.2.0-12 - Rebuilt for MSVSphere 9.1. * Sun Mar 12 2023 Miroslav Rezanina - 7.2.0-12 From e15c980fb07432564c630d2b0cdfdd8d0e93c435 Mon Sep 17 00:00:00 2001 From: MSVSphere Packaging Team Date: Wed, 8 Nov 2023 16:49:08 +0300 Subject: [PATCH 2/5] import qemu-kvm-8.0.0-16.el9_3.1 --- ...-of-coroutine-context-in-virtio_load.patch | 151 ++++++++++++++++++ SPECS/qemu-kvm.spec | 9 +- 2 files changed, 159 insertions(+), 1 deletion(-) create mode 100644 SOURCES/kvm-virtio-Drop-out-of-coroutine-context-in-virtio_load.patch diff --git a/SOURCES/kvm-virtio-Drop-out-of-coroutine-context-in-virtio_load.patch b/SOURCES/kvm-virtio-Drop-out-of-coroutine-context-in-virtio_load.patch new file mode 100644 index 0000000..20a99e5 --- /dev/null +++ b/SOURCES/kvm-virtio-Drop-out-of-coroutine-context-in-virtio_load.patch @@ -0,0 +1,151 @@ +From b99a7e5e5631af3ee806fd0d78d7c7056eb559b5 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 5 Sep 2023 16:50:02 +0200 +Subject: [PATCH] virtio: Drop out of coroutine context in virtio_load() + +RH-Author: Kevin Wolf +RH-MergeRequest: 319: virtio: Drop out of coroutine context in virtio_load() [9.3.0.z 0day] +RH-Jira: RHEL-4453 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [1/1] 6ae1d5a464e27bfaf892e093febcaf211a1ff5ec + +virtio_load() as a whole should run in coroutine context because it +reads from the migration stream and we don't want this to block. + +However, it calls virtio_set_features_nocheck() and devices don't +expect their .set_features callback to run in a coroutine and therefore +call functions that may not be called in coroutine context. To fix this, +drop out of coroutine context for calling virtio_set_features_nocheck(). + +Without this fix, the following crash was reported: + + #0 __pthread_kill_implementation (threadid=, signo=signo@entry=6, no_tid=no_tid@entry=0) at pthread_kill.c:44 + #1 0x00007efc738c05d3 in __pthread_kill_internal (signo=6, threadid=) at pthread_kill.c:78 + #2 0x00007efc73873d26 in __GI_raise (sig=sig@entry=6) at ../sysdeps/posix/raise.c:26 + #3 0x00007efc738477f3 in __GI_abort () at abort.c:79 + #4 0x00007efc7384771b in __assert_fail_base (fmt=0x7efc739dbcb8 "", assertion=assertion@entry=0x560aebfbf5cf "!qemu_in_coroutine()", + file=file@entry=0x560aebfcd2d4 "../block/graph-lock.c", line=line@entry=275, function=function@entry=0x560aebfcd34d "void bdrv_graph_rdlock_main_loop(void)") at assert.c:92 + #5 0x00007efc7386ccc6 in __assert_fail (assertion=0x560aebfbf5cf "!qemu_in_coroutine()", file=0x560aebfcd2d4 "../block/graph-lock.c", line=275, + function=0x560aebfcd34d "void bdrv_graph_rdlock_main_loop(void)") at assert.c:101 + #6 0x0000560aebcd8dd6 in bdrv_register_buf () + #7 0x0000560aeb97ed97 in ram_block_added.llvm () + #8 0x0000560aebb8303f in ram_block_add.llvm () + #9 0x0000560aebb834fa in qemu_ram_alloc_internal.llvm () + #10 0x0000560aebb2ac98 in vfio_region_mmap () + #11 0x0000560aebb3ea0f in vfio_bars_register () + #12 0x0000560aebb3c628 in vfio_realize () + #13 0x0000560aeb90f0c2 in pci_qdev_realize () + #14 0x0000560aebc40305 in device_set_realized () + #15 0x0000560aebc48e07 in property_set_bool.llvm () + #16 0x0000560aebc46582 in object_property_set () + #17 0x0000560aebc4cd58 in object_property_set_qobject () + #18 0x0000560aebc46ba7 in object_property_set_bool () + #19 0x0000560aeb98b3ca in qdev_device_add_from_qdict () + #20 0x0000560aebb1fbaf in virtio_net_set_features () + #21 0x0000560aebb46b51 in virtio_set_features_nocheck () + #22 0x0000560aebb47107 in virtio_load () + #23 0x0000560aeb9ae7ce in vmstate_load_state () + #24 0x0000560aeb9d2ee9 in qemu_loadvm_state_main () + #25 0x0000560aeb9d45e1 in qemu_loadvm_state () + #26 0x0000560aeb9bc32c in process_incoming_migration_co.llvm () + #27 0x0000560aebeace56 in coroutine_trampoline.llvm () + +Cc: qemu-stable@nongnu.org +Buglink: https://issues.redhat.com/browse/RHEL-832 +Signed-off-by: Kevin Wolf +Message-ID: <20230905145002.46391-3-kwolf@redhat.com> +Reviewed-by: Stefan Hajnoczi +Signed-off-by: Kevin Wolf +(cherry picked from commit 92e2e6a867334a990f8d29f07ca34e3162fdd6ec) +Signed-off-by: Kevin Wolf +--- + hw/virtio/virtio.c | 45 ++++++++++++++++++++++++++++++++++++++++----- + 1 file changed, 40 insertions(+), 5 deletions(-) + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index 98c4819fcc..0010a9a5f1 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -2825,8 +2825,9 @@ static int virtio_device_put(QEMUFile *f, void *opaque, size_t size, + } + + /* A wrapper for use as a VMState .get function */ +-static int virtio_device_get(QEMUFile *f, void *opaque, size_t size, +- const VMStateField *field) ++static int coroutine_mixed_fn ++virtio_device_get(QEMUFile *f, void *opaque, size_t size, ++ const VMStateField *field) + { + VirtIODevice *vdev = VIRTIO_DEVICE(opaque); + DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev)); +@@ -2853,6 +2854,39 @@ static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val) + return bad ? -1 : 0; + } + ++typedef struct VirtioSetFeaturesNocheckData { ++ Coroutine *co; ++ VirtIODevice *vdev; ++ uint64_t val; ++ int ret; ++} VirtioSetFeaturesNocheckData; ++ ++static void virtio_set_features_nocheck_bh(void *opaque) ++{ ++ VirtioSetFeaturesNocheckData *data = opaque; ++ ++ data->ret = virtio_set_features_nocheck(data->vdev, data->val); ++ aio_co_wake(data->co); ++} ++ ++static int coroutine_mixed_fn ++virtio_set_features_nocheck_maybe_co(VirtIODevice *vdev, uint64_t val) ++{ ++ if (qemu_in_coroutine()) { ++ VirtioSetFeaturesNocheckData data = { ++ .co = qemu_coroutine_self(), ++ .vdev = vdev, ++ .val = val, ++ }; ++ aio_bh_schedule_oneshot(qemu_get_current_aio_context(), ++ virtio_set_features_nocheck_bh, &data); ++ qemu_coroutine_yield(); ++ return data.ret; ++ } else { ++ return virtio_set_features_nocheck(vdev, val); ++ } ++} ++ + int virtio_set_features(VirtIODevice *vdev, uint64_t val) + { + int ret; +@@ -2906,7 +2940,8 @@ size_t virtio_get_config_size(const VirtIOConfigSizeParams *params, + return config_size; + } + +-int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) ++int coroutine_mixed_fn ++virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) + { + int i, ret; + int32_t config_len; +@@ -3023,14 +3058,14 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) + * host_features. + */ + uint64_t features64 = vdev->guest_features; +- if (virtio_set_features_nocheck(vdev, features64) < 0) { ++ if (virtio_set_features_nocheck_maybe_co(vdev, features64) < 0) { + error_report("Features 0x%" PRIx64 " unsupported. " + "Allowed features: 0x%" PRIx64, + features64, vdev->host_features); + return -1; + } + } else { +- if (virtio_set_features_nocheck(vdev, features) < 0) { ++ if (virtio_set_features_nocheck_maybe_co(vdev, features) < 0) { + error_report("Features 0x%x unsupported. " + "Allowed features: 0x%" PRIx64, + features, vdev->host_features); +-- +2.39.3 + diff --git a/SPECS/qemu-kvm.spec b/SPECS/qemu-kvm.spec index f831cdd..c338d99 100644 --- a/SPECS/qemu-kvm.spec +++ b/SPECS/qemu-kvm.spec @@ -149,7 +149,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 8.0.0 -Release: 16%{?rcrel}%{?dist}%{?cc_suffix} +Release: 16%{?rcrel}%{?dist}%{?cc_suffix}.1 # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -556,6 +556,8 @@ Patch196: kvm-migration-Move-more-initializations-to-migrate_init.patch Patch197: kvm-migration-Add-.save_prepare-handler-to-struct-SaveVM.patch # For bz#2229868 - [vfio migration]Disable postcopy for VM with migratable vfio device Patch198: kvm-vfio-migration-Block-VFIO-migration-with-postcopy-mi.patch +# For RHEL-4453 - qemu-kvm crashed when migrating guest with failover vf [rhel-9.3.0.z] +Patch199: kvm-virtio-Drop-out-of-coroutine-context-in-virtio_load.patch %if %{have_clang} BuildRequires: clang @@ -1617,6 +1619,11 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Mon Oct 09 2023 Miroslav Rezanina - 8.0.0-16.el9_3.1 +- kvm-virtio-Drop-out-of-coroutine-context-in-virtio_load.patch [RHEL-4453] +- Resolves: RHEL-4453 + (qemu-kvm crashed when migrating guest with failover vf [rhel-9.3.0.z]) + * Mon Sep 18 2023 Miroslav Rezanina - 8.0.0-16.el9_3 - kvm-migration-Add-migration-prefix-to-functions-in-targe.patch [bz#2229868] - kvm-migration-Move-more-initializations-to-migrate_init.patch [bz#2229868] From 2aad0ff3fdcb9ec749a1405b6eae10c343639989 Mon Sep 17 00:00:00 2001 From: MSVSphere Packaging Team Date: Thu, 25 Jan 2024 03:36:32 +0300 Subject: [PATCH 3/5] import qemu-kvm-8.0.0-16.el9_3.3 --- .../kvm-dump-Add-arch-cleanup-function.patch | 69 +++++++++++++++ ...h_dump-Add-arch-cleanup-function-for.patch | 84 +++++++++++++++++++ ...p-Remove-unneeded-dump-info-function.patch | 56 +++++++++++++ SPECS/qemu-kvm.spec | 15 +++- 4 files changed, 223 insertions(+), 1 deletion(-) create mode 100644 SOURCES/kvm-dump-Add-arch-cleanup-function.patch create mode 100644 SOURCES/kvm-target-s390x-arch_dump-Add-arch-cleanup-function-for.patch create mode 100644 SOURCES/kvm-target-s390x-dump-Remove-unneeded-dump-info-function.patch diff --git a/SOURCES/kvm-dump-Add-arch-cleanup-function.patch b/SOURCES/kvm-dump-Add-arch-cleanup-function.patch new file mode 100644 index 0000000..99502d5 --- /dev/null +++ b/SOURCES/kvm-dump-Add-arch-cleanup-function.patch @@ -0,0 +1,69 @@ +From d032e43c4cebdbeb279d2da9b514fa50c6ed4da3 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Tue, 21 Nov 2023 16:36:26 +0100 +Subject: [PATCH 2/3] dump: Add arch cleanup function +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 325: Fix problem that secure execution guest remains in "paused" state after dump failure +RH-Jira: RHEL-16997 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/3] d70fcc72a69f65432f2fbfb7d864452ea37ec25d + +JIRA: https://issues.redhat.com/browse/RHEL-16997 + +commit e72629e5149aba6f44122ea6d2a803ef136a0c6b +Author: Janosch Frank +Date: Thu Nov 9 12:04:42 2023 +0000 + + dump: Add arch cleanup function + + Some architectures (s390x) need to cleanup after a failed dump to be + able to continue to run the vm. Add a cleanup function pointer and + call it if it's set. + + Signed-off-by: Janosch Frank + Reviewed-by: Thomas Huth + Reviewed-by: Marc-André Lureau + Message-ID: <20231109120443.185979-3-frankja@linux.ibm.com> + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + dump/dump.c | 4 ++++ + include/sysemu/dump-arch.h | 1 + + 2 files changed, 5 insertions(+) + +diff --git a/dump/dump.c b/dump/dump.c +index 1f1a6edcab..6a50e85f49 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -96,6 +96,10 @@ uint64_t cpu_to_dump64(DumpState *s, uint64_t val) + + static int dump_cleanup(DumpState *s) + { ++ if (s->dump_info.arch_cleanup_fn) { ++ s->dump_info.arch_cleanup_fn(s); ++ } ++ + guest_phys_blocks_free(&s->guest_phys_blocks); + memory_mapping_list_free(&s->list); + close(s->fd); +diff --git a/include/sysemu/dump-arch.h b/include/sysemu/dump-arch.h +index 59bbc9be38..743916e46c 100644 +--- a/include/sysemu/dump-arch.h ++++ b/include/sysemu/dump-arch.h +@@ -24,6 +24,7 @@ typedef struct ArchDumpInfo { + void (*arch_sections_add_fn)(DumpState *s); + uint64_t (*arch_sections_write_hdr_fn)(DumpState *s, uint8_t *buff); + int (*arch_sections_write_fn)(DumpState *s, uint8_t *buff); ++ void (*arch_cleanup_fn)(DumpState *s); + } ArchDumpInfo; + + struct GuestPhysBlockList; /* memory_mapping.h */ +-- +2.39.3 + diff --git a/SOURCES/kvm-target-s390x-arch_dump-Add-arch-cleanup-function-for.patch b/SOURCES/kvm-target-s390x-arch_dump-Add-arch-cleanup-function-for.patch new file mode 100644 index 0000000..f8f3083 --- /dev/null +++ b/SOURCES/kvm-target-s390x-arch_dump-Add-arch-cleanup-function-for.patch @@ -0,0 +1,84 @@ +From a84f9954b3f3607d34661b221a72677d81743a5b Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Tue, 21 Nov 2023 16:36:26 +0100 +Subject: [PATCH 3/3] target/s390x/arch_dump: Add arch cleanup function for PV + dumps +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 325: Fix problem that secure execution guest remains in "paused" state after dump failure +RH-Jira: RHEL-16997 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/3] 0b3a9a6e992a615d96e7e9978a6b849b17ca69b6 + +JIRA: https://issues.redhat.com/browse/RHEL-16997 + +commit d12a91e0baafce7b1cbacff7cf9339eeb0011732 +Author: Janosch Frank +Date: Thu Nov 9 12:04:43 2023 +0000 + + target/s390x/arch_dump: Add arch cleanup function for PV dumps + + PV dumps block vcpu runs until dump end is reached. If there's an + error between PV dump init and PV dump end the vm will never be able + to run again. One example of such an error is insufficient disk space + for the dump file. + + Let's add a cleanup function that tries to do a dump end. The dump + completion data is discarded but there's no point in writing it to a + file anyway if there's a possibility that other PV dump data is + missing. + + Signed-off-by: Janosch Frank + Reviewed-by: Thomas Huth + Reviewed-by: Claudio Imbrenda + Reviewed-by: Marc-André Lureau + Message-ID: <20231109120443.185979-4-frankja@linux.ibm.com> + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + target/s390x/arch_dump.c | 17 +++++++++++++++++ + 1 file changed, 17 insertions(+) + +diff --git a/target/s390x/arch_dump.c b/target/s390x/arch_dump.c +index f6af8f780a..de0b3d7d84 100644 +--- a/target/s390x/arch_dump.c ++++ b/target/s390x/arch_dump.c +@@ -433,6 +433,22 @@ static int arch_sections_write(DumpState *s, uint8_t *buff) + return 0; + } + ++static void arch_cleanup(DumpState *s) ++{ ++ g_autofree uint8_t *buff = NULL; ++ int rc; ++ ++ if (!pv_dump_initialized) { ++ return; ++ } ++ ++ buff = g_malloc(kvm_s390_pv_dmp_get_size_completion_data()); ++ rc = kvm_s390_dump_completion_data(buff); ++ if (!rc) { ++ pv_dump_initialized = false; ++ } ++} ++ + int cpu_get_dump_info(ArchDumpInfo *info, + const struct GuestPhysBlockList *guest_phys_blocks) + { +@@ -448,6 +464,7 @@ int cpu_get_dump_info(ArchDumpInfo *info, + info->arch_sections_add_fn = *arch_sections_add; + info->arch_sections_write_hdr_fn = *arch_sections_write_hdr; + info->arch_sections_write_fn = *arch_sections_write; ++ info->arch_cleanup_fn = *arch_cleanup; + } + return 0; + } +-- +2.39.3 + diff --git a/SOURCES/kvm-target-s390x-dump-Remove-unneeded-dump-info-function.patch b/SOURCES/kvm-target-s390x-dump-Remove-unneeded-dump-info-function.patch new file mode 100644 index 0000000..df69915 --- /dev/null +++ b/SOURCES/kvm-target-s390x-dump-Remove-unneeded-dump-info-function.patch @@ -0,0 +1,56 @@ +From b7e726278fe5564ed7f1d9e9fb15b88a4dcd57ef Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Tue, 21 Nov 2023 16:36:26 +0100 +Subject: [PATCH 1/3] target/s390x/dump: Remove unneeded dump info function + pointer init +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 325: Fix problem that secure execution guest remains in "paused" state after dump failure +RH-Jira: RHEL-16997 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/3] d7c935ffff9722d27fb47486976719d566a71810 + +JIRA: https://issues.redhat.com/browse/RHEL-16997 + +commit 816644b1219900875f47d7adf9bfb283f1b29aa0 +Author: Janosch Frank +Date: Thu Nov 9 12:04:41 2023 +0000 + + target/s390x/dump: Remove unneeded dump info function pointer init + + dump_state_prepare() now sets the function pointers to NULL so we only + need to touch them if we're going to use them. + + Signed-off-by: Janosch Frank + Reviewed-by: Marc-André Lureau + Reviewed-by: Thomas Huth + Message-ID: <20231109120443.185979-2-frankja@linux.ibm.com> + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + target/s390x/arch_dump.c | 4 ---- + 1 file changed, 4 deletions(-) + +diff --git a/target/s390x/arch_dump.c b/target/s390x/arch_dump.c +index cb98f4894d..f6af8f780a 100644 +--- a/target/s390x/arch_dump.c ++++ b/target/s390x/arch_dump.c +@@ -448,10 +448,6 @@ int cpu_get_dump_info(ArchDumpInfo *info, + info->arch_sections_add_fn = *arch_sections_add; + info->arch_sections_write_hdr_fn = *arch_sections_write_hdr; + info->arch_sections_write_fn = *arch_sections_write; +- } else { +- info->arch_sections_add_fn = NULL; +- info->arch_sections_write_hdr_fn = NULL; +- info->arch_sections_write_fn = NULL; + } + return 0; + } +-- +2.39.3 + diff --git a/SPECS/qemu-kvm.spec b/SPECS/qemu-kvm.spec index c338d99..88a306c 100644 --- a/SPECS/qemu-kvm.spec +++ b/SPECS/qemu-kvm.spec @@ -149,7 +149,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 8.0.0 -Release: 16%{?rcrel}%{?dist}%{?cc_suffix}.1 +Release: 16%{?rcrel}%{?dist}%{?cc_suffix}.3 # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -558,6 +558,12 @@ Patch197: kvm-migration-Add-.save_prepare-handler-to-struct-SaveVM.patch Patch198: kvm-vfio-migration-Block-VFIO-migration-with-postcopy-mi.patch # For RHEL-4453 - qemu-kvm crashed when migrating guest with failover vf [rhel-9.3.0.z] Patch199: kvm-virtio-Drop-out-of-coroutine-context-in-virtio_load.patch +# For RHEL-16997 - RHEL9.3 - KVM : Secure execution guest remains in "paused" state, post "virsh dump" failure (qemu-kvm) [rhel-9.3.0.z] +Patch200: kvm-target-s390x-dump-Remove-unneeded-dump-info-function.patch +# For RHEL-16997 - RHEL9.3 - KVM : Secure execution guest remains in "paused" state, post "virsh dump" failure (qemu-kvm) [rhel-9.3.0.z] +Patch201: kvm-dump-Add-arch-cleanup-function.patch +# For RHEL-16997 - RHEL9.3 - KVM : Secure execution guest remains in "paused" state, post "virsh dump" failure (qemu-kvm) [rhel-9.3.0.z] +Patch202: kvm-target-s390x-arch_dump-Add-arch-cleanup-function-for.patch %if %{have_clang} BuildRequires: clang @@ -1619,6 +1625,13 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Fri Dec 05 2023 Miroslav Rezanina - 8.0.0-16.el9_3.3 +- kvm-target-s390x-dump-Remove-unneeded-dump-info-function.patch [RHEL-16997] +- kvm-dump-Add-arch-cleanup-function.patch [RHEL-16997] +- kvm-target-s390x-arch_dump-Add-arch-cleanup-function-for.patch [RHEL-16997] +- Resolves: RHEL-16997 + (RHEL9.3 - KVM : Secure execution guest remains in "paused" state, post "virsh dump" failure (qemu-kvm) [rhel-9.3.0.z]) + * Mon Oct 09 2023 Miroslav Rezanina - 8.0.0-16.el9_3.1 - kvm-virtio-Drop-out-of-coroutine-context-in-virtio_load.patch [RHEL-4453] - Resolves: RHEL-4453 From 037c76f04cf2e0dac5ea7c702e662659f152a38f Mon Sep 17 00:00:00 2001 From: MSVSphere Packaging Team Date: Wed, 1 May 2024 04:01:32 +0300 Subject: [PATCH 4/5] import qemu-kvm-8.2.0-11.el9_4 --- .gitignore | 2 +- .qemu-kvm.metadata | 2 +- SOURCES/0004-Initial-redhat-build.patch | 52 +- ...0005-Enable-disable-devices-for-RHEL.patch | 478 +- ...Machine-type-related-general-changes.patch | 143 +- SOURCES/0007-Add-aarch64-machine-types.patch | 368 +- SOURCES/0008-Add-ppc64-machine-types.patch | 76 +- SOURCES/0009-Add-s390x-machine-types.patch | 58 +- SOURCES/0010-Add-x86_64-machine-types.patch | 233 +- SOURCES/0011-Enable-make-check.patch | 129 +- ...mber-of-devices-that-can-be-assigned.patch | 32 +- ...Add-support-statement-to-help-output.patch | 18 +- ...documentation-instead-of-qemu-system.patch | 8 +- ...on-warning-when-opening-v2-images-rw.patch | 10 +- ....4.0-qemu-kvm-machine-type-for-aarch.patch | 44 + ...-add-usb-support-to-guest-get-fsinfo.patch | 53 - ...0017-Add-RHEL-9.2.0-compat-structure.patch | 110 - ...c-Update-x86-machine-type-compatibil.patch | 76 - .../0019-Disable-unwanted-new-devices.patch | 83 - SOURCES/README.tests | 4 +- ...vm-Compile-IOMMUFD-object-on-aarch64.patch | 37 + SOURCES/kvm-Compile-IOMMUFD-on-s390x.patch | 37 + SOURCES/kvm-Compile-IOMMUFD-on-x86_64.patch | 37 + .../kvm-Implement-SMBIOS-type-9-v2.6.patch | 155 + ...ent-base-of-SMBIOS-type-9-descriptor.patch | 218 + ...har-socket-Fix-TLS-io-channels-sendi.patch | 60 + ...se-a-child-source-for-qio-input-sour.patch | 216 + ...-allow-repeating-hot-unplug-requests.patch | 84 - ...text_acquire-aio_context_release-a-n.patch | 60 + ...ontext_acquire-aio_context_release-A.patch | 102 + ...uivalence-between-AIO_WAIT_WHILE-and.patch | 81 + ...le-reentrancy-detection-for-apic-msi.patch | 55 - ...ional-reentrancy-guard-to-the-BH-API.patch | 231 - ...-use-after-free-on-re-entrancy-guard.patch | 70 - ...iommufd-Introduce-the-iommufd-object.patch | 476 ++ ...-Remove-check-on-number-of-backend-u.patch | 47 + .../kvm-backends-iommufd-Remove-mutex.patch | 112 + ...-disable-reentrancy-detection-for-io.patch | 57 - ...se-padded-I-O-vecs-exceeding-IOV_MAX.patch | 354 -- ...-no_coroutine_fns-in-qmp_block_resiz.patch | 56 - ...ix-pad_request-s-request-restriction.patch | 73 - ...end-Allow-concurrent-context-changes.patch | 104 + ...o_unref-for-calls-in-coroutine-conte.patch | 386 -- ...o-do-not-use-open-flags-in-qemu_open.patch | 74 - ...-blkio-enable-the-completion-eventfd.patch | 54 - ...-back-on-using-path-when-fd-setting-.patch | 67 - ...ck-blkio-fix-module_block.py-parsing.patch | 205 - ...-blkio_connect-in-the-drivers-functi.patch | 151 - ...y-blkio_connect-if-it-fails-using-fd.patch | 85 - ...blkio_set_int-fd-to-check-fd-support.patch | 49 - ...qemu_open-to-support-fd-passing-for-.patch | 108 - ...t-assert_bdrv_graph_readable-by-defa.patch | 121 - ...wrapper-use-qemu_get_current_aio_con.patch | 69 + ...-set-up-Linux-AIO-and-io_uring-in-th.patch | 217 + .../kvm-block-remove-AioContext-locking.patch | 4438 +++++++++++++++++ SOURCES/kvm-block-remove-bdrv_co_lock.patch | 97 + ...outdated-AioContext-locking-comments.patch | 411 ++ ...ket-Fix-TLS-io-channels-sending-too-.patch | 105 + ...iority-of-the-HUP-GSource-in-socket-.patch | 78 + ...ch-add-qemu_bh_new-aio_bh_new-checks.patch | 55 - ...utine-cap-per-thread-local-pool-size.patch | 412 ++ ...kvm-coroutine-reserve-5-000-mappings.patch | 61 + ...-don-t-lock-AioContext-in-dma_blk_cb.patch | 75 + ...d-VFIO-iommufd-backend-documentation.patch | 228 + ...e-AioContext-lock-from-IOThread-docs.patch | 98 + .../kvm-dump-Add-arch-cleanup-function.patch | 69 - ...m-graph-lock-Disable-locking-for-now.patch | 153 - ...graph-lock-remove-AioContext-locking.patch | 1190 +++++ ...et_min_alignment-to-express-32-GiB-a.patch | 94 + ...i-blobs-as-resizable-on-RHEL-pc-mach.patch | 40 - ...rning-on-acpi-table-size-to-pc-machi.patch | 101 - ...m-Activate-IOMMUFD-for-virt-machines.patch | 42 + ...idate-cluster-and-NUMA-node-boundary.patch | 60 - ...mu-Handle-big-endian-hosts-correctly.patch | 166 - ...properties-to-disable-high-memory-re.patch | 88 + SOURCES/kvm-hw-arm-virt-Fix-compats.patch | 132 + ...date-cluster-and-NUMA-node-boundary-.patch | 41 - ...ecate-virt-rhel9.-0-2-.0-machine-typ.patch | 41 + ...86-Activate-IOMMUFD-for-q35-machines.patch | 41 + ...-smbios_set_defaults-to-machine_done.patch | 186 + ...CI_ERR_UNCOR_MASK-reg-for-machine-ty.patch | 44 - ...CI_ERR_UNCOR_MASK-register-for-machi.patch | 118 - .../kvm-hw-ppc-Kconfig-Imply-VFIO_PCI.patch | 116 + ...qemu_bh_new-calls-with-qemu_bh_new_g.patch | 470 -- ...5a-Fix-reentrancy-issues-in-the-LSI-.patch | 141 - ...teration-over-global-VFIODevice-list.patch | 73 + ...i-quirks-Sanitize-capability-pointer.patch | 76 - ...ks-Support-alternate-offset-for-GPUD.patch | 110 - ...Fix-potential-OOB-access-in-virtio_i.patch | 62 - ...how-the-EBX-register-of-CPUID-0x8000.patch | 52 - ...checks-and-information-related-to-re.patch | 77 - ....h-fix-qemu_rect_init-mis-assignment.patch | 54 + ...ch-if-TLS-channel-is-closed-during-h.patch | 102 - ...-for-reset-AioContext-switches-with-.patch | 133 + ...ommit-with-iothreads-and-ongoing-I-O.patch | 144 - ...sizing-image-attached-to-an-iothread.patch | 132 - ...rnative-CPU-type-that-is-not-depreca.patch | 44 - ...ts-add-filter_qmp_generated_node_ids.patch | 49 + ...ds-stream-Use-the-right-TimeoutError.patch | 49 + .../kvm-iotests-iov-padding-New-test.patch | 186 - ...1-to-Python-for-reliable-QMP-testing.patch | 592 +++ ...outdated-AioContext-locking-comments.patch | 105 + ...-Activate-IOMMUFD-for-s390x-machines.patch | 42 + ...pu-stats-fd-to-avoid-vcpu-interrupti.patch | 160 - ...loongarch_ipi_iocsr-re-entrnacy-safe.patch | 53 - ...le-reentrancy-detection-for-MMIO-reg.patch | 70 - ...le-reentrancy-detection-for-script-R.patch | 58 - ...reintroduce-memory-region-size-check.patch | 112 + ...-memory-prevent-dma-reentracy-issues.patch | 150 - ...checks-prior-to-unsetting-engaged_in.patch | 67 - ...ave_prepare-handler-to-struct-SaveVM.patch | 186 - ...gration-prefix-to-functions-in-targe.patch | 139 - ...ration-Add-switchover-ack-capability.patch | 162 - ...postcopy_ram_supported_by_host-to-re.patch | 308 -- ...t-disk-reactivation-in-more-failure-.patch | 111 - ...kvm-migration-Create-migrate_cap_set.patch | 93 - ...tion-Create-migrate_checkpoint_delay.patch | 84 - ...-migrate_cpu_throttle_increment-func.patch | 75 - ...-migrate_cpu_throttle_initial-to-opt.patch | 75 - ...-migrate_cpu_throttle_tailslow-funct.patch | 78 - ...reate-migrate_max_bandwidth-function.patch | 232 - ...tion-Create-migrate_max_cpu_throttle.patch | 88 - ...Create-migrate_rdma_pin_all-function.patch | 95 - ...e-migrate_throttle_trigger_threshold.patch | 75 - SOURCES/kvm-migration-Create-options.c.patch | 524 -- ...ion-Enable-switchover-ack-capability.patch | 56 - ...-block-device-inactivation-failures-.patch | 116 - ...ation-Implement-switchover-ack-logic.patch | 339 -- ...ll-functions-check-have-the-same-for.patch | 431 -- ...gration-Make-dirty_sync_count-atomic.patch | 105 - ...e-dirty_sync_missed_zero_copy-atomic.patch | 92 - ...migration-Make-downtime_bytes-atomic.patch | 68 - ...-migration-Make-multifd_bytes-atomic.patch | 99 - ...ration-Make-postcopy_requests-atomic.patch | 69 - ...-migration-Make-precopy_bytes-atomic.patch | 68 - ...ram_counters-and-ram_atomic_counters.patch | 270 - ...on-Minor-control-flow-simplification.patch | 52 - ...-migrate_announce_params-to-option.c.patch | 90 - ...on-Move-migrate_cap_set-to-options.c.patch | 110 - ...Move-migrate_caps_check-to-options.c.patch | 458 -- ...ve-migrate_colo_enabled-to-options.c.patch | 136 - ...n-Move-migrate_postcopy-to-options.c.patch | 98 - ...-Move-migrate_use_block-to-options.c.patch | 134 - ...igrate_use_block_incremental-to-opti.patch | 121 - ...migrate_use_compression-to-options.c.patch | 183 - ...Move-migrate_use_events-to-options.c.patch | 120 - ...ove-migrate_use_multifd-to-options.c.patch | 247 - ...Move-migrate_use_return-to-options.c.patch | 138 - ...on-Move-migrate_use_tls-to-options.c.patch | 134 - ...Move-migrate_use_xbzrle-to-options.c.patch | 156 - ...igrate_use_zero_copy_send-to-options.patch | 167 - ...ve-migration_properties-to-options.c.patch | 409 -- ...more-initializations-to-migrate_init.patch | 94 - ...ove-parameters-functions-to-option.c.patch | 317 -- ...mp_migrate_set_capabilities-to-optio.patch | 100 - ...mp_migrate_set_parameters-to-options.patch | 943 ---- ...mp_query_migrate_capabilities-to-opt.patch | 100 - ...igrate_caps_check-the-old-and-new-ca.patch | 226 - ...ation-Rename-duplicate-to-zero_pages.patch | 109 - ...ration-Rename-normal-to-normal_pages.patch | 109 - ...Update-atomic-stats-out-of-the-mutex.patch | 52 - ...n-Use-migrate_max_postcopy_bandwidth.patch | 40 - ...ark-mixed-functions-that-can-suspend.patch | 153 - ...igration_global_dump-to-migration-hm.patch | 121 - ...copy-Detect-file-system-on-dest-host.patch | 117 - ...-extra-whitespace-character-for-code.patch | 44 - ...-enabled_capabilities-to-capabilitie.patch | 329 -- ...all-job_pause_point-under-graph-lock.patch | 90 + ...-coroutine-commands-in-qemu_aio_cont.patch | 1630 ++++++ ...roperty-multifd-flush-after-each-sec.patch | 127 - ...ifd-Fix-the-number-of-channels-ready.patch | 58 - ...flush-once-each-full-round-of-memory.patch | 166 - ...Protect-multifd_send_sync_main-calls.patch | 78 - ...rained_poll-to-wake-coroutine-in-rig.patch | 159 - ...rver-Fix-race-in-draining-the-export.patch | 95 + ...id-per-NBDRequest-nbd_client_get-put.patch | 53 + ...duce-NBDClient-lock-to-protect-field.patch | 373 ++ ...traverse-NBDExport-clients-from-main.patch | 176 + ...fd-type-checking-to-its-own-function.patch | 78 - ...t-prepare-to-cleanup-net_init_socket.patch | 60 - ...vm-net-socket-remove-net_init_socket.patch | 102 - ...uster-and-NUMA-node-boundary-if-requ.patch | 145 - ...-Don-t-use-__bss_start-with-the-larl.patch | 78 - ...-s390-ccw-Fix-indentation-in-start.S.patch | 218 - ...-Makefile-Use-z-noexecstack-to-silen.patch | 50 - ...-Provide-space-for-initial-stack-fra.patch | 59 - ...S-entry-point-type-to-auto-by-defaul.patch | 115 + ...manufacturer-product-version-to-matc.patch | 44 + ...tplug-detect-state-register-to-cmask.patch | 87 - ...tcopy-ram-do-not-use-qatomic_mb_read.patch | 42 - ...eature-for-BlockdevOptionsVirtioBlkV.patch | 79 - ...ange-the-reduced-phys-bits-value-fro.patch | 50 - ...adVirtQueueMappingList-property-type.patch | 167 + ...es-alias-all-object-class-properties.patch | 85 + ...Update-the-reduced-phys-bits-documen.patch | 60 - ...-increase-NOFILE-soft-limit-on-POSIX.patch | 135 + ...sable-reentrancy-detection-for-iomem.patch | 54 - ...-the-device-request-notifier-interfa.patch | 220 - ...i-avoid-double-enable-disable-of-aif.patch | 106 + ...drive-ISM-reset-from-subsystem-reset.patch | 137 + ...-pci-refresh-fh-before-disabling-aif.patch | 71 + ...rious-warning-with-asynchronous-tear.patch | 129 - SOURCES/kvm-scsi-Await-request-purging.patch | 124 + ...-callbacks-run-in-the-correct-AioCon.patch | 88 + ...si-cleanup-scsi_clear_unit_attention.patch | 81 - ...attention-only-for-REPORT-LUNS-comma.patch | 110 - ...n-t-lock-AioContext-in-I-O-code-path.patch | 245 + ...-attention-when-creating-the-request.patch | 132 - ...-SCSIDevice-requests-from-one-thread.patch | 307 ++ .../kvm-scsi-remove-AioContext-locking.patch | 280 ++ ...ove-outdated-AioContext-lock-comment.patch | 41 + ...-add-smbios_add_usr_blob_size-helper.patch | 62 + ...-avoid-mangling-user-provided-tables.patch | 309 ++ ...legacy-mode-code-only-for-pc-machine.patch | 517 ++ ...mbios_get_tables-from-legacy-handlin.patch | 65 + ...ios_type4_count-before-building-tabl.patch | 38 + ...heck-type4-structures-in-legacy-mode.patch | 133 + ...-when-building-type-4-table-is-not-p.patch | 72 + ...bios-entry-point-type-with-auto-valu.patch | 48 + ...ios-get-rid-of-global-smbios_ep_type.patch | 281 ++ ...bios-get-rid-of-smbios_legacy-global.patch | 198 + ...get-rid-of-smbios_smp_sockets-global.patch | 134 + ...vm-smbios-handle-errors-consistently.patch | 217 + ...f-entry-point-is-auto-try-to-build-v.patch | 131 + ...pose-structures-bitmaps-used-by-both.patch | 330 ++ ...ve-client_migrate_info-command-to-ui.patch | 248 - ...t-visitor-Fix-pseudo-struct-handling.patch | 190 + ...tput-visitor-show-structs-as-omitted.patch | 90 + ...EPYC-Genoa-model-to-support-Zen-4-pr.patch | 203 - ...VNMI-and-automatic-IBRS-feature-bits.patch | 105 - ...a-couple-of-feature-bits-in-8000_000.patch | 94 - ...feature-bits-for-CPUID_Fn80000021_EA.patch | 126 - ...missing-feature-bits-in-EPYC-Milan-m.patch | 152 - ...new-EPYC-CPU-versions-with-updated-c.patch | 192 - ...386-add-support-for-FB_CLEAR-feature.patch | 71 - ...86-add-support-for-FLUSH_L1D-feature.patch | 70 - ...w-versioned-CPUs-to-specify-new-cach.patch | 116 - ...h_dump-Add-arch-cleanup-function-for.patch | 84 - ...p-Remove-unneeded-dump-info-function.patch | 56 - ...-pv-Provide-some-more-useful-informa.patch | 205 + ...sts-remove-aio_context_acquire-tests.patch | 125 + ...test-replication-timeout-to-60-secon.patch | 46 + ...our-channel-order-for-PNG-screenshot.patch | 88 - ...d-add-asserts-for-update-and-request.patch | 81 + ...k-type-as-not-available-when-there-i.patch | 107 + ...own-wire-up-query-command-line-optio.patch | 180 - SOURCES/kvm-util-char_dev-Add-open_cdev.patch | 175 + .../kvm-util-iov-Make-qiov_slice-public.patch | 97 - ...-iov-Remove-qemu_iovec_init_extended.patch | 156 - .../kvm-util-mmap-alloc-qemu_fd_getfs.patch | 95 - ...il-vfio-helpers-Use-g_file_read_link.patch | 82 - ...k-migration-if-device-has-cvq-and-x-.patch | 61 - ...pa-export-vhost_vdpa_set_vring_ready.patch | 105 - ...olation-check-to-net_init_vhost_vdpa.patch | 286 -- ...t_vdpa_set_vring_ready-to-the-caller.patch | 134 - ...dpa-remove-net-cvq-migration-blocker.patch | 51 - ...t_vdpa_net_load-to-vhost_vdpa_net_cv.patch | 49 - ...o-in-vhost_vdpa_get_vring_group-erro.patch | 67 - ...irst-queue-SVQ-state-for-CVQ-default.patch | 46 - ...inter-dereference-bug-in-vfio_bars_f.patch | 72 - ...mplement-a-common-device-info-helper.patch | 196 - ...-helper-function-to-initialize-VFIOD.patch | 154 + ...ase-object-for-VFIOContainer-and-tar.patch | 129 + ...ntainerBase-poiner-parameter-const-i.patch | 276 + ...e-selection-of-a-given-iommu-backend.patch | 75 + ...o-cdev-pre-openable-by-passing-a-fil.patch | 87 + ...ODevice-initializations-in-vfio_ap_i.patch | 81 + ...he-selection-of-a-given-iommu-backen.patch | 79 + ...io-cdev-pre-openable-by-passing-a-fi.patch | 93 + ...IODevice-initializations-in-vfio_ccw.patch | 85 + ...oduce-vfio_container_init-destroy-he.patch | 98 + ...n-Move-giommu_list-in-base-container.patch | 221 + ...on-return-early-if-space-isn-t-empty.patch | 55 + ...-Convert-functions-to-base-container.patch | 257 + ...ainer-Implement-attach-detach_device.patch | 97 + ...nitialize-VFIOIOMMUOps-under-vfio_in.patch | 65 + ...ntoduce-a-new-VFIOIOMMUClass-setup-h.patch | 55 + ...-Introduce-a-VFIOIOMMU-QOM-interface.patch | 143 + ...ntroduce-a-VFIOIOMMU-legacy-QOM-inte.patch | 168 + ...ainer-Introduce-a-empty-VFIOIOMMUOps.patch | 71 + ...ntroduce-vfio_legacy_setup-for-furth.patch | 118 + ...ove-dirty_pgsizes-and-max_dirty_bitm.patch | 102 + ...r-Move-iova_ranges-to-base-container.patch | 168 + ...iner-Move-listener-to-base-container.patch | 522 ++ ...ove-per-container-device-list-in-bas.patch | 230 + ...ove-pgsizes-and-dma_max_mappings-to-.patch | 242 + ...r-Move-space-field-to-base-container.patch | 265 + ...ner-Move-vrdl_list-to-base-container.patch | 255 + ...ename-vfio_init_container-to-vfio_se.patch | 66 + ...eplace-basename-with-g_path_get_base.patch | 59 + ...witch-to-IOMMU-BE-set_dirty_page_tra.patch | 235 + ...ontainer-Switch-to-dma_map-unmap-API.patch | 303 ++ ...-support-for-iova_ranges-and-pgsizes.patch | 115 + ...ble-pci-hot-reset-through-iommufd-cd.patch | 215 + ...ommufd-Implement-the-iommufd-backend.patch | 561 +++ ...roduce-a-VFIOIOMMU-iommufd-QOM-inter.patch | 155 + ...lax-assert-check-for-iommufd-backend.patch | 71 + ...-iommufd-Remove-CONFIG_IOMMUFD-usage.patch | 55 + ...ove-the-use-of-stat-to-check-file-ex.patch | 56 + ...-Add-VFIO-migration-pre-copy-support.patch | 438 -- ...dd-helper-function-to-set-state-or-r.patch | 115 + ...dd-support-for-switchover-ack-capabi.patch | 192 - ...lock-VFIO-migration-with-postcopy-mi.patch | 90 - ...hange-vIOMMU-blocker-from-global-to-.patch | 171 - ...ree-resources-when-vfio_migration_re.patch | 145 - ...Make-VFIO-migration-non-experimental.patch | 283 -- ...efactor-vfio_save_block-to-return-sa.patch | 102 - ...n-Remove-print-of-Migration-disabled.patch | 56 - ...ion-Reset-bytes_transferred-properly.patch | 165 - ...eturn-bool-type-for-vfio_migration_r.patch | 125 - ...kip-log_sync-during-migration-SETUP-.patch | 68 - ...tore-VFIO-migration-flags-in-VFIOMig.patch | 70 - ...he-selection-of-a-given-iommu-backen.patch | 81 + ...io_prepare_kvm_msi_virq_batch-in-MSI.patch | 67 - ...fio-pci-Clear-MSI-X-IRQ-index-always.patch | 69 + ...able-INTx-in-vfio_realize-error-path.patch | 54 - ...-out-a-helper-vfio_pci_get_pci_hot_r.patch | 139 + ...o-pci-Fix-a-segfault-in-vfio_realize.patch | 67 - ...-vfio-pci-Fix-a-use-after-free-issue.patch | 56 - ...aked-timer-in-vfio_realize-error-pat.patch | 55 - ...oduce-a-vfio-pci-hot-reset-interface.patch | 466 ++ ...io-cdev-pre-openable-by-passing-a-fi.patch | 237 + ...IODevice-initializations-in-vfio_ins.patch | 70 + ...-pci-Static-Resizable-BAR-capability.patch | 141 - ...vm-vfio-pci-add-support-for-VF-token.patch | 104 - ...low-the-selection-of-a-given-iommu-b.patch | 77 + ...ke-vfio-cdev-pre-openable-by-passing.patch | 108 + ...ve-VFIODevice-initializations-in-vfi.patch | 64 + ...d-VFIOIOMMUOps-with-a-release-handle.patch | 129 + ...duce-a-sPAPR-VFIOIOMMU-QOM-interface.patch | 150 + ...duce-spapr-backend-and-target-interf.patch | 91 + ...ve-hostwin_list-into-spapr-container.patch | 188 + ...prereg_listener-into-spapr-container.patch | 120 + ...compile-sPAPR-IOMMU-support-when-nee.patch | 46 + ...h-to-spapr-IOMMU-BE-add-del_section_.patch | 184 + ...host_dev_enable_notifiers-error-case.patch | 138 - ...t-cleanup-the-vdpa-vhost-net-structu.patch | 67 - ...a-mute-unaligned-memory-error-report.patch | 86 - ...-of-coroutine-context-in-virtio_load.patch | 151 - ...-Re-enable-notifications-after-drain.patch | 139 + ...otential-nullpointer-read-access-in-.patch | 47 + ...-ioeventfd_attach-in-start_ioeventfd.patch | 75 + ...lk-add-iothread-vq-mapping-parameter.patch | 464 ++ ...-virtio-blk-add-lock-to-protect-s-rq.patch | 177 + ...-always-set-ioeventfd-during-startup.patch | 63 + ...-using-ioeventfd-state-in-irqfd-cond.patch | 72 + ...-lock-AioContext-in-the-completion-c.patch | 167 + ...-lock-AioContext-in-the-submission-c.patch | 67 + ...ove-dataplane-code-into-virtio-blk.c.patch | 1009 ++++ ...e-dataplane-create-destroy-functions.patch | 117 + ...io-blk-rename-dataplane-to-ioeventfd.patch | 307 ++ ...-restart-s-rq-reqs-in-vq-AioContexts.patch | 106 + ...ate-failure-to-set-BlockBackend-AioC.patch | 72 + ...lock-migration-of-VMs-with-blob-true.patch | 87 + ...-64kB-host-page-size-VFIO-device-ass.patch | 151 - ...ork-the-traces-in-virtio_iommu_set_p.patch | 83 - ...ndardize-granule-extraction-and-form.patch | 88 - ...-mem-default-enable-dynamic-memslots.patch | 70 + ...ctly-report-maximum-tx_queue_size-va.patch | 92 - ...ttach-event-vq-notifier-with-no_poll.patch | 78 + ...t-lock-AioContext-around-virtio_queu.patch | 58 + ...ace-AioContext-lock-with-tmf_bh_lock.patch | 173 + ...ate-backends-before-migration-object.patch | 58 - ...6-rhel-9.2.0-machine-type-compat-fix.patch | 48 + SOURCES/qemu-ga.sysconfig | 12 +- SOURCES/qemu-guest-agent.service | 2 +- SPECS/qemu-kvm.spec | 1150 +++-- 367 files changed, 32549 insertions(+), 25485 deletions(-) create mode 100644 SOURCES/0016-Introduce-RHEL-9.4.0-qemu-kvm-machine-type-for-aarch.patch delete mode 100644 SOURCES/0016-qga-linux-add-usb-support-to-guest-get-fsinfo.patch delete mode 100644 SOURCES/0017-Add-RHEL-9.2.0-compat-structure.patch delete mode 100644 SOURCES/0018-redhat-hw-i386-pc-Update-x86-machine-type-compatibil.patch delete mode 100644 SOURCES/0019-Disable-unwanted-new-devices.patch create mode 100644 SOURCES/kvm-Compile-IOMMUFD-object-on-aarch64.patch create mode 100644 SOURCES/kvm-Compile-IOMMUFD-on-s390x.patch create mode 100644 SOURCES/kvm-Compile-IOMMUFD-on-x86_64.patch create mode 100644 SOURCES/kvm-Implement-SMBIOS-type-9-v2.6.patch create mode 100644 SOURCES/kvm-Implement-base-of-SMBIOS-type-9-descriptor.patch create mode 100644 SOURCES/kvm-Revert-chardev-char-socket-Fix-TLS-io-channels-sendi.patch create mode 100644 SOURCES/kvm-Revert-chardev-use-a-child-source-for-qio-input-sour.patch delete mode 100644 SOURCES/kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch create mode 100644 SOURCES/kvm-aio-make-aio_context_acquire-aio_context_release-a-n.patch create mode 100644 SOURCES/kvm-aio-remove-aio_context_acquire-aio_context_release-A.patch create mode 100644 SOURCES/kvm-aio-wait-draw-equivalence-between-AIO_WAIT_WHILE-and.patch delete mode 100644 SOURCES/kvm-apic-disable-reentrancy-detection-for-apic-msi.patch delete mode 100644 SOURCES/kvm-async-Add-an-optional-reentrancy-guard-to-the-BH-API.patch delete mode 100644 SOURCES/kvm-async-avoid-use-after-free-on-re-entrancy-guard.patch create mode 100644 SOURCES/kvm-backends-iommufd-Introduce-the-iommufd-object.patch create mode 100644 SOURCES/kvm-backends-iommufd-Remove-check-on-number-of-backend-u.patch create mode 100644 SOURCES/kvm-backends-iommufd-Remove-mutex.patch delete mode 100644 SOURCES/kvm-bcm2835_property-disable-reentrancy-detection-for-io.patch delete mode 100644 SOURCES/kvm-block-Collapse-padded-I-O-vecs-exceeding-IOV_MAX.patch delete mode 100644 SOURCES/kvm-block-Don-t-call-no_coroutine_fns-in-qmp_block_resiz.patch delete mode 100644 SOURCES/kvm-block-Fix-pad_request-s-request-restriction.patch create mode 100644 SOURCES/kvm-block-backend-Allow-concurrent-context-changes.patch delete mode 100644 SOURCES/kvm-block-bdrv-blk_co_unref-for-calls-in-coroutine-conte.patch delete mode 100644 SOURCES/kvm-block-blkio-do-not-use-open-flags-in-qemu_open.patch delete mode 100644 SOURCES/kvm-block-blkio-enable-the-completion-eventfd.patch delete mode 100644 SOURCES/kvm-block-blkio-fall-back-on-using-path-when-fd-setting-.patch delete mode 100644 SOURCES/kvm-block-blkio-fix-module_block.py-parsing.patch delete mode 100644 SOURCES/kvm-block-blkio-move-blkio_connect-in-the-drivers-functi.patch delete mode 100644 SOURCES/kvm-block-blkio-retry-blkio_connect-if-it-fails-using-fd.patch delete mode 100644 SOURCES/kvm-block-blkio-use-blkio_set_int-fd-to-check-fd-support.patch delete mode 100644 SOURCES/kvm-block-blkio-use-qemu_open-to-support-fd-passing-for-.patch delete mode 100644 SOURCES/kvm-block-compile-out-assert_bdrv_graph_readable-by-defa.patch create mode 100644 SOURCES/kvm-block-coroutine-wrapper-use-qemu_get_current_aio_con.patch create mode 100644 SOURCES/kvm-block-file-posix-set-up-Linux-AIO-and-io_uring-in-th.patch create mode 100644 SOURCES/kvm-block-remove-AioContext-locking.patch create mode 100644 SOURCES/kvm-block-remove-bdrv_co_lock.patch create mode 100644 SOURCES/kvm-block-remove-outdated-AioContext-locking-comments.patch create mode 100644 SOURCES/kvm-chardev-char-socket-Fix-TLS-io-channels-sending-too-.patch create mode 100644 SOURCES/kvm-chardev-lower-priority-of-the-HUP-GSource-in-socket-.patch delete mode 100644 SOURCES/kvm-checkpatch-add-qemu_bh_new-aio_bh_new-checks.patch create mode 100644 SOURCES/kvm-coroutine-cap-per-thread-local-pool-size.patch create mode 100644 SOURCES/kvm-coroutine-reserve-5-000-mappings.patch create mode 100644 SOURCES/kvm-dma-helpers-don-t-lock-AioContext-in-dma_blk_cb.patch create mode 100644 SOURCES/kvm-docs-devel-Add-VFIO-iommufd-backend-documentation.patch create mode 100644 SOURCES/kvm-docs-remove-AioContext-lock-from-IOThread-docs.patch delete mode 100644 SOURCES/kvm-dump-Add-arch-cleanup-function.patch delete mode 100644 SOURCES/kvm-graph-lock-Disable-locking-for-now.patch create mode 100644 SOURCES/kvm-graph-lock-remove-AioContext-locking.patch create mode 100644 SOURCES/kvm-hv-balloon-use-get_min_alignment-to-express-32-GiB-a.patch delete mode 100644 SOURCES/kvm-hw-acpi-Mark-acpi-blobs-as-resizable-on-RHEL-pc-mach.patch delete mode 100644 SOURCES/kvm-hw-acpi-limit-warning-on-acpi-table-size-to-pc-machi.patch create mode 100644 SOURCES/kvm-hw-arm-Activate-IOMMUFD-for-virt-machines.patch delete mode 100644 SOURCES/kvm-hw-arm-Validate-cluster-and-NUMA-node-boundary.patch delete mode 100644 SOURCES/kvm-hw-arm-smmu-Handle-big-endian-hosts-correctly.patch create mode 100644 SOURCES/kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch create mode 100644 SOURCES/kvm-hw-arm-virt-Fix-compats.patch delete mode 100644 SOURCES/kvm-hw-arm-virt-Validate-cluster-and-NUMA-node-boundary-.patch create mode 100644 SOURCES/kvm-hw-arm-virt-deprecate-virt-rhel9.-0-2-.0-machine-typ.patch create mode 100644 SOURCES/kvm-hw-i386-Activate-IOMMUFD-for-q35-machines.patch create mode 100644 SOURCES/kvm-hw-i386-pc-Defer-smbios_set_defaults-to-machine_done.patch delete mode 100644 SOURCES/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-reg-for-machine-ty.patch delete mode 100644 SOURCES/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-register-for-machi.patch create mode 100644 SOURCES/kvm-hw-ppc-Kconfig-Imply-VFIO_PCI.patch delete mode 100644 SOURCES/kvm-hw-replace-most-qemu_bh_new-calls-with-qemu_bh_new_g.patch delete mode 100644 SOURCES/kvm-hw-scsi-lsi53c895a-Fix-reentrancy-issues-in-the-LSI-.patch create mode 100644 SOURCES/kvm-hw-vfio-fix-iteration-over-global-VFIODevice-list.patch delete mode 100644 SOURCES/kvm-hw-vfio-pci-quirks-Sanitize-capability-pointer.patch delete mode 100644 SOURCES/kvm-hw-vfio-pci-quirks-Support-alternate-offset-for-GPUD.patch delete mode 100644 SOURCES/kvm-hw-virtio-iommu-Fix-potential-OOB-access-in-virtio_i.patch delete mode 100644 SOURCES/kvm-i386-cpu-Update-how-the-EBX-register-of-CPUID-0x8000.patch delete mode 100644 SOURCES/kvm-i386-sev-Update-checks-and-information-related-to-re.patch create mode 100644 SOURCES/kvm-include-ui-rect.h-fix-qemu_rect_init-mis-assignment.patch delete mode 100644 SOURCES/kvm-io-remove-io-watch-if-TLS-channel-is-closed-during-h.patch create mode 100644 SOURCES/kvm-iotests-Add-test-for-reset-AioContext-switches-with-.patch delete mode 100644 SOURCES/kvm-iotests-Test-commit-with-iothreads-and-ongoing-I-O.patch delete mode 100644 SOURCES/kvm-iotests-Test-resizing-image-attached-to-an-iothread.patch delete mode 100644 SOURCES/kvm-iotests-Use-alternative-CPU-type-that-is-not-depreca.patch create mode 100644 SOURCES/kvm-iotests-add-filter_qmp_generated_node_ids.patch create mode 100644 SOURCES/kvm-iotests-iothreads-stream-Use-the-right-TimeoutError.patch delete mode 100644 SOURCES/kvm-iotests-iov-padding-New-test.patch create mode 100644 SOURCES/kvm-iotests-port-141-to-Python-for-reliable-QMP-testing.patch create mode 100644 SOURCES/kvm-job-remove-outdated-AioContext-locking-comments.patch create mode 100644 SOURCES/kvm-kconfig-Activate-IOMMUFD-for-s390x-machines.patch delete mode 100644 SOURCES/kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch delete mode 100644 SOURCES/kvm-loongarch-mark-loongarch_ipi_iocsr-re-entrnacy-safe.patch delete mode 100644 SOURCES/kvm-lsi53c895a-disable-reentrancy-detection-for-MMIO-reg.patch delete mode 100644 SOURCES/kvm-lsi53c895a-disable-reentrancy-detection-for-script-R.patch create mode 100644 SOURCES/kvm-memory-device-reintroduce-memory-region-size-check.patch delete mode 100644 SOURCES/kvm-memory-prevent-dma-reentracy-issues.patch delete mode 100644 SOURCES/kvm-memory-stricter-checks-prior-to-unsetting-engaged_in.patch delete mode 100644 SOURCES/kvm-migration-Add-.save_prepare-handler-to-struct-SaveVM.patch delete mode 100644 SOURCES/kvm-migration-Add-migration-prefix-to-functions-in-targe.patch delete mode 100644 SOURCES/kvm-migration-Add-switchover-ack-capability.patch delete mode 100644 SOURCES/kvm-migration-Allow-postcopy_ram_supported_by_host-to-re.patch delete mode 100644 SOURCES/kvm-migration-Attempt-disk-reactivation-in-more-failure-.patch delete mode 100644 SOURCES/kvm-migration-Create-migrate_cap_set.patch delete mode 100644 SOURCES/kvm-migration-Create-migrate_checkpoint_delay.patch delete mode 100644 SOURCES/kvm-migration-Create-migrate_cpu_throttle_increment-func.patch delete mode 100644 SOURCES/kvm-migration-Create-migrate_cpu_throttle_initial-to-opt.patch delete mode 100644 SOURCES/kvm-migration-Create-migrate_cpu_throttle_tailslow-funct.patch delete mode 100644 SOURCES/kvm-migration-Create-migrate_max_bandwidth-function.patch delete mode 100644 SOURCES/kvm-migration-Create-migrate_max_cpu_throttle.patch delete mode 100644 SOURCES/kvm-migration-Create-migrate_rdma_pin_all-function.patch delete mode 100644 SOURCES/kvm-migration-Create-migrate_throttle_trigger_threshold.patch delete mode 100644 SOURCES/kvm-migration-Create-options.c.patch delete mode 100644 SOURCES/kvm-migration-Enable-switchover-ack-capability.patch delete mode 100644 SOURCES/kvm-migration-Handle-block-device-inactivation-failures-.patch delete mode 100644 SOURCES/kvm-migration-Implement-switchover-ack-logic.patch delete mode 100644 SOURCES/kvm-migration-Make-all-functions-check-have-the-same-for.patch delete mode 100644 SOURCES/kvm-migration-Make-dirty_sync_count-atomic.patch delete mode 100644 SOURCES/kvm-migration-Make-dirty_sync_missed_zero_copy-atomic.patch delete mode 100644 SOURCES/kvm-migration-Make-downtime_bytes-atomic.patch delete mode 100644 SOURCES/kvm-migration-Make-multifd_bytes-atomic.patch delete mode 100644 SOURCES/kvm-migration-Make-postcopy_requests-atomic.patch delete mode 100644 SOURCES/kvm-migration-Make-precopy_bytes-atomic.patch delete mode 100644 SOURCES/kvm-migration-Merge-ram_counters-and-ram_atomic_counters.patch delete mode 100644 SOURCES/kvm-migration-Minor-control-flow-simplification.patch delete mode 100644 SOURCES/kvm-migration-Move-migrate_announce_params-to-option.c.patch delete mode 100644 SOURCES/kvm-migration-Move-migrate_cap_set-to-options.c.patch delete mode 100644 SOURCES/kvm-migration-Move-migrate_caps_check-to-options.c.patch delete mode 100644 SOURCES/kvm-migration-Move-migrate_colo_enabled-to-options.c.patch delete mode 100644 SOURCES/kvm-migration-Move-migrate_postcopy-to-options.c.patch delete mode 100644 SOURCES/kvm-migration-Move-migrate_use_block-to-options.c.patch delete mode 100644 SOURCES/kvm-migration-Move-migrate_use_block_incremental-to-opti.patch delete mode 100644 SOURCES/kvm-migration-Move-migrate_use_compression-to-options.c.patch delete mode 100644 SOURCES/kvm-migration-Move-migrate_use_events-to-options.c.patch delete mode 100644 SOURCES/kvm-migration-Move-migrate_use_multifd-to-options.c.patch delete mode 100644 SOURCES/kvm-migration-Move-migrate_use_return-to-options.c.patch delete mode 100644 SOURCES/kvm-migration-Move-migrate_use_tls-to-options.c.patch delete mode 100644 SOURCES/kvm-migration-Move-migrate_use_xbzrle-to-options.c.patch delete mode 100644 SOURCES/kvm-migration-Move-migrate_use_zero_copy_send-to-options.patch delete mode 100644 SOURCES/kvm-migration-Move-migration_properties-to-options.c.patch delete mode 100644 SOURCES/kvm-migration-Move-more-initializations-to-migrate_init.patch delete mode 100644 SOURCES/kvm-migration-Move-parameters-functions-to-option.c.patch delete mode 100644 SOURCES/kvm-migration-Move-qmp_migrate_set_capabilities-to-optio.patch delete mode 100644 SOURCES/kvm-migration-Move-qmp_migrate_set_parameters-to-options.patch delete mode 100644 SOURCES/kvm-migration-Move-qmp_query_migrate_capabilities-to-opt.patch delete mode 100644 SOURCES/kvm-migration-Pass-migrate_caps_check-the-old-and-new-ca.patch delete mode 100644 SOURCES/kvm-migration-Rename-duplicate-to-zero_pages.patch delete mode 100644 SOURCES/kvm-migration-Rename-normal-to-normal_pages.patch delete mode 100644 SOURCES/kvm-migration-Update-atomic-stats-out-of-the-mutex.patch delete mode 100644 SOURCES/kvm-migration-Use-migrate_max_postcopy_bandwidth.patch delete mode 100644 SOURCES/kvm-migration-mark-mixed-functions-that-can-suspend.patch delete mode 100644 SOURCES/kvm-migration-move-migration_global_dump-to-migration-hm.patch delete mode 100644 SOURCES/kvm-migration-postcopy-Detect-file-system-on-dest-host.patch delete mode 100644 SOURCES/kvm-migration-remove-extra-whitespace-character-for-code.patch delete mode 100644 SOURCES/kvm-migration-rename-enabled_capabilities-to-capabilitie.patch create mode 100644 SOURCES/kvm-mirror-Don-t-call-job_pause_point-under-graph-lock.patch create mode 100644 SOURCES/kvm-monitor-only-run-coroutine-commands-in-qemu_aio_cont.patch delete mode 100644 SOURCES/kvm-multifd-Create-property-multifd-flush-after-each-sec.patch delete mode 100644 SOURCES/kvm-multifd-Fix-the-number-of-channels-ready.patch delete mode 100644 SOURCES/kvm-multifd-Only-flush-once-each-full-round-of-memory.patch delete mode 100644 SOURCES/kvm-multifd-Protect-multifd_send_sync_main-calls.patch delete mode 100644 SOURCES/kvm-nbd-server-Fix-drained_poll-to-wake-coroutine-in-rig.patch create mode 100644 SOURCES/kvm-nbd-server-Fix-race-in-draining-the-export.patch create mode 100644 SOURCES/kvm-nbd-server-avoid-per-NBDRequest-nbd_client_get-put.patch create mode 100644 SOURCES/kvm-nbd-server-introduce-NBDClient-lock-to-protect-field.patch create mode 100644 SOURCES/kvm-nbd-server-only-traverse-NBDExport-clients-from-main.patch delete mode 100644 SOURCES/kvm-net-socket-move-fd-type-checking-to-its-own-function.patch delete mode 100644 SOURCES/kvm-net-socket-prepare-to-cleanup-net_init_socket.patch delete mode 100644 SOURCES/kvm-net-socket-remove-net_init_socket.patch delete mode 100644 SOURCES/kvm-numa-Validate-cluster-and-NUMA-node-boundary-if-requ.patch delete mode 100644 SOURCES/kvm-pc-bios-s390-ccw-Don-t-use-__bss_start-with-the-larl.patch delete mode 100644 SOURCES/kvm-pc-bios-s390-ccw-Fix-indentation-in-start.S.patch delete mode 100644 SOURCES/kvm-pc-bios-s390-ccw-Makefile-Use-z-noexecstack-to-silen.patch delete mode 100644 SOURCES/kvm-pc-bios-s390-ccw-Provide-space-for-initial-stack-fra.patch create mode 100644 SOURCES/kvm-pc-q35-set-SMBIOS-entry-point-type-to-auto-by-defaul.patch create mode 100644 SOURCES/kvm-pc-smbios-fixup-manufacturer-product-version-to-matc.patch delete mode 100644 SOURCES/kvm-pcie-Add-hotplug-detect-state-register-to-cmask.patch delete mode 100644 SOURCES/kvm-postcopy-ram-do-not-use-qatomic_mb_read.patch delete mode 100644 SOURCES/kvm-qapi-add-fdset-feature-for-BlockdevOptionsVirtioBlkV.patch delete mode 100644 SOURCES/kvm-qapi-i386-sev-Change-the-reduced-phys-bits-value-fro.patch create mode 100644 SOURCES/kvm-qdev-add-IOThreadVirtQueueMappingList-property-type.patch create mode 100644 SOURCES/kvm-qdev-properties-alias-all-object-class-properties.patch delete mode 100644 SOURCES/kvm-qemu-options.hx-Update-the-reduced-phys-bits-documen.patch create mode 100644 SOURCES/kvm-qemu_init-increase-NOFILE-soft-limit-on-POSIX.patch delete mode 100644 SOURCES/kvm-raven-disable-reentrancy-detection-for-iomem.patch delete mode 100644 SOURCES/kvm-s390x-ap-Wire-up-the-device-request-notifier-interfa.patch create mode 100644 SOURCES/kvm-s390x-pci-avoid-double-enable-disable-of-aif.patch create mode 100644 SOURCES/kvm-s390x-pci-drive-ISM-reset-from-subsystem-reset.patch create mode 100644 SOURCES/kvm-s390x-pci-refresh-fh-before-disabling-aif.patch delete mode 100644 SOURCES/kvm-s390x-pv-Fix-spurious-warning-with-asynchronous-tear.patch create mode 100644 SOURCES/kvm-scsi-Await-request-purging.patch create mode 100644 SOURCES/kvm-scsi-assert-that-callbacks-run-in-the-correct-AioCon.patch delete mode 100644 SOURCES/kvm-scsi-cleanup-scsi_clear_unit_attention.patch delete mode 100644 SOURCES/kvm-scsi-clear-unit-attention-only-for-REPORT-LUNS-comma.patch create mode 100644 SOURCES/kvm-scsi-don-t-lock-AioContext-in-I-O-code-path.patch delete mode 100644 SOURCES/kvm-scsi-fetch-unit-attention-when-creating-the-request.patch create mode 100644 SOURCES/kvm-scsi-only-access-SCSIDevice-requests-from-one-thread.patch create mode 100644 SOURCES/kvm-scsi-remove-AioContext-locking.patch create mode 100644 SOURCES/kvm-scsi-remove-outdated-AioContext-lock-comment.patch create mode 100644 SOURCES/kvm-smbios-add-smbios_add_usr_blob_size-helper.patch create mode 100644 SOURCES/kvm-smbios-avoid-mangling-user-provided-tables.patch create mode 100644 SOURCES/kvm-smbios-build-legacy-mode-code-only-for-pc-machine.patch create mode 100644 SOURCES/kvm-smbios-cleanup-smbios_get_tables-from-legacy-handlin.patch create mode 100644 SOURCES/kvm-smbios-clear-smbios_type4_count-before-building-tabl.patch create mode 100644 SOURCES/kvm-smbios-don-t-check-type4-structures-in-legacy-mode.patch create mode 100644 SOURCES/kvm-smbios-error-out-when-building-type-4-table-is-not-p.patch create mode 100644 SOURCES/kvm-smbios-extend-smbios-entry-point-type-with-auto-valu.patch create mode 100644 SOURCES/kvm-smbios-get-rid-of-global-smbios_ep_type.patch create mode 100644 SOURCES/kvm-smbios-get-rid-of-smbios_legacy-global.patch create mode 100644 SOURCES/kvm-smbios-get-rid-of-smbios_smp_sockets-global.patch create mode 100644 SOURCES/kvm-smbios-handle-errors-consistently.patch create mode 100644 SOURCES/kvm-smbios-in-case-of-entry-point-is-auto-try-to-build-v.patch create mode 100644 SOURCES/kvm-smbios-rename-expose-structures-bitmaps-used-by-both.patch delete mode 100644 SOURCES/kvm-spice-move-client_migrate_info-command-to-ui.patch create mode 100644 SOURCES/kvm-string-output-visitor-Fix-pseudo-struct-handling.patch create mode 100644 SOURCES/kvm-string-output-visitor-show-structs-as-omitted.patch delete mode 100644 SOURCES/kvm-target-i386-Add-EPYC-Genoa-model-to-support-Zen-4-pr.patch delete mode 100644 SOURCES/kvm-target-i386-Add-VNMI-and-automatic-IBRS-feature-bits.patch delete mode 100644 SOURCES/kvm-target-i386-Add-a-couple-of-feature-bits-in-8000_000.patch delete mode 100644 SOURCES/kvm-target-i386-Add-feature-bits-for-CPUID_Fn80000021_EA.patch delete mode 100644 SOURCES/kvm-target-i386-Add-missing-feature-bits-in-EPYC-Milan-m.patch delete mode 100644 SOURCES/kvm-target-i386-Add-new-EPYC-CPU-versions-with-updated-c.patch delete mode 100644 SOURCES/kvm-target-i386-add-support-for-FB_CLEAR-feature.patch delete mode 100644 SOURCES/kvm-target-i386-add-support-for-FLUSH_L1D-feature.patch delete mode 100644 SOURCES/kvm-target-i386-allow-versioned-CPUs-to-specify-new-cach.patch delete mode 100644 SOURCES/kvm-target-s390x-arch_dump-Add-arch-cleanup-function-for.patch delete mode 100644 SOURCES/kvm-target-s390x-dump-Remove-unneeded-dump-info-function.patch create mode 100644 SOURCES/kvm-target-s390x-kvm-pv-Provide-some-more-useful-informa.patch create mode 100644 SOURCES/kvm-tests-remove-aio_context_acquire-tests.patch create mode 100644 SOURCES/kvm-tests-unit-Bump-test-replication-timeout-to-60-secon.patch delete mode 100644 SOURCES/kvm-ui-Fix-pixel-colour-channel-order-for-PNG-screenshot.patch create mode 100644 SOURCES/kvm-ui-clipboard-add-asserts-for-update-and-request.patch create mode 100644 SOURCES/kvm-ui-clipboard-mark-type-as-not-available-when-there-i.patch delete mode 100644 SOURCES/kvm-util-async-teardown-wire-up-query-command-line-optio.patch create mode 100644 SOURCES/kvm-util-char_dev-Add-open_cdev.patch delete mode 100644 SOURCES/kvm-util-iov-Make-qiov_slice-public.patch delete mode 100644 SOURCES/kvm-util-iov-Remove-qemu_iovec_init_extended.patch delete mode 100644 SOURCES/kvm-util-mmap-alloc-qemu_fd_getfs.patch delete mode 100644 SOURCES/kvm-util-vfio-helpers-Use-g_file_read_link.patch delete mode 100644 SOURCES/kvm-vdpa-do-not-block-migration-if-device-has-cvq-and-x-.patch delete mode 100644 SOURCES/kvm-vdpa-export-vhost_vdpa_set_vring_ready.patch delete mode 100644 SOURCES/kvm-vdpa-move-CVQ-isolation-check-to-net_init_vhost_vdpa.patch delete mode 100644 SOURCES/kvm-vdpa-move-vhost_vdpa_set_vring_ready-to-the-caller.patch delete mode 100644 SOURCES/kvm-vdpa-remove-net-cvq-migration-blocker.patch delete mode 100644 SOURCES/kvm-vdpa-rename-vhost_vdpa_net_load-to-vhost_vdpa_net_cv.patch delete mode 100644 SOURCES/kvm-vdpa-return-errno-in-vhost_vdpa_get_vring_group-erro.patch delete mode 100644 SOURCES/kvm-vdpa-use-first-queue-SVQ-state-for-CVQ-default.patch delete mode 100644 SOURCES/kvm-vfio-Fix-null-pointer-dereference-bug-in-vfio_bars_f.patch delete mode 100644 SOURCES/kvm-vfio-Implement-a-common-device-info-helper.patch create mode 100644 SOURCES/kvm-vfio-Introduce-a-helper-function-to-initialize-VFIOD.patch create mode 100644 SOURCES/kvm-vfio-Introduce-base-object-for-VFIOContainer-and-tar.patch create mode 100644 SOURCES/kvm-vfio-Make-VFIOContainerBase-poiner-parameter-const-i.patch create mode 100644 SOURCES/kvm-vfio-ap-Allow-the-selection-of-a-given-iommu-backend.patch create mode 100644 SOURCES/kvm-vfio-ap-Make-vfio-cdev-pre-openable-by-passing-a-fil.patch create mode 100644 SOURCES/kvm-vfio-ap-Move-VFIODevice-initializations-in-vfio_ap_i.patch create mode 100644 SOURCES/kvm-vfio-ccw-Allow-the-selection-of-a-given-iommu-backen.patch create mode 100644 SOURCES/kvm-vfio-ccw-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch create mode 100644 SOURCES/kvm-vfio-ccw-Move-VFIODevice-initializations-in-vfio_ccw.patch create mode 100644 SOURCES/kvm-vfio-common-Introduce-vfio_container_init-destroy-he.patch create mode 100644 SOURCES/kvm-vfio-common-Move-giommu_list-in-base-container.patch create mode 100644 SOURCES/kvm-vfio-common-return-early-if-space-isn-t-empty.patch create mode 100644 SOURCES/kvm-vfio-container-Convert-functions-to-base-container.patch create mode 100644 SOURCES/kvm-vfio-container-Implement-attach-detach_device.patch create mode 100644 SOURCES/kvm-vfio-container-Initialize-VFIOIOMMUOps-under-vfio_in.patch create mode 100644 SOURCES/kvm-vfio-container-Intoduce-a-new-VFIOIOMMUClass-setup-h.patch create mode 100644 SOURCES/kvm-vfio-container-Introduce-a-VFIOIOMMU-QOM-interface.patch create mode 100644 SOURCES/kvm-vfio-container-Introduce-a-VFIOIOMMU-legacy-QOM-inte.patch create mode 100644 SOURCES/kvm-vfio-container-Introduce-a-empty-VFIOIOMMUOps.patch create mode 100644 SOURCES/kvm-vfio-container-Introduce-vfio_legacy_setup-for-furth.patch create mode 100644 SOURCES/kvm-vfio-container-Move-dirty_pgsizes-and-max_dirty_bitm.patch create mode 100644 SOURCES/kvm-vfio-container-Move-iova_ranges-to-base-container.patch create mode 100644 SOURCES/kvm-vfio-container-Move-listener-to-base-container.patch create mode 100644 SOURCES/kvm-vfio-container-Move-per-container-device-list-in-bas.patch create mode 100644 SOURCES/kvm-vfio-container-Move-pgsizes-and-dma_max_mappings-to-.patch create mode 100644 SOURCES/kvm-vfio-container-Move-space-field-to-base-container.patch create mode 100644 SOURCES/kvm-vfio-container-Move-vrdl_list-to-base-container.patch create mode 100644 SOURCES/kvm-vfio-container-Rename-vfio_init_container-to-vfio_se.patch create mode 100644 SOURCES/kvm-vfio-container-Replace-basename-with-g_path_get_base.patch create mode 100644 SOURCES/kvm-vfio-container-Switch-to-IOMMU-BE-set_dirty_page_tra.patch create mode 100644 SOURCES/kvm-vfio-container-Switch-to-dma_map-unmap-API.patch create mode 100644 SOURCES/kvm-vfio-iommufd-Add-support-for-iova_ranges-and-pgsizes.patch create mode 100644 SOURCES/kvm-vfio-iommufd-Enable-pci-hot-reset-through-iommufd-cd.patch create mode 100644 SOURCES/kvm-vfio-iommufd-Implement-the-iommufd-backend.patch create mode 100644 SOURCES/kvm-vfio-iommufd-Introduce-a-VFIOIOMMU-iommufd-QOM-inter.patch create mode 100644 SOURCES/kvm-vfio-iommufd-Relax-assert-check-for-iommufd-backend.patch create mode 100644 SOURCES/kvm-vfio-iommufd-Remove-CONFIG_IOMMUFD-usage.patch create mode 100644 SOURCES/kvm-vfio-iommufd-Remove-the-use-of-stat-to-check-file-ex.patch delete mode 100644 SOURCES/kvm-vfio-migration-Add-VFIO-migration-pre-copy-support.patch create mode 100644 SOURCES/kvm-vfio-migration-Add-helper-function-to-set-state-or-r.patch delete mode 100644 SOURCES/kvm-vfio-migration-Add-support-for-switchover-ack-capabi.patch delete mode 100644 SOURCES/kvm-vfio-migration-Block-VFIO-migration-with-postcopy-mi.patch delete mode 100644 SOURCES/kvm-vfio-migration-Change-vIOMMU-blocker-from-global-to-.patch delete mode 100644 SOURCES/kvm-vfio-migration-Free-resources-when-vfio_migration_re.patch delete mode 100644 SOURCES/kvm-vfio-migration-Make-VFIO-migration-non-experimental.patch delete mode 100644 SOURCES/kvm-vfio-migration-Refactor-vfio_save_block-to-return-sa.patch delete mode 100644 SOURCES/kvm-vfio-migration-Remove-print-of-Migration-disabled.patch delete mode 100644 SOURCES/kvm-vfio-migration-Reset-bytes_transferred-properly.patch delete mode 100644 SOURCES/kvm-vfio-migration-Return-bool-type-for-vfio_migration_r.patch delete mode 100644 SOURCES/kvm-vfio-migration-Skip-log_sync-during-migration-SETUP-.patch delete mode 100644 SOURCES/kvm-vfio-migration-Store-VFIO-migration-flags-in-VFIOMig.patch create mode 100644 SOURCES/kvm-vfio-pci-Allow-the-selection-of-a-given-iommu-backen.patch delete mode 100644 SOURCES/kvm-vfio-pci-Call-vfio_prepare_kvm_msi_virq_batch-in-MSI.patch create mode 100644 SOURCES/kvm-vfio-pci-Clear-MSI-X-IRQ-index-always.patch delete mode 100644 SOURCES/kvm-vfio-pci-Disable-INTx-in-vfio_realize-error-path.patch create mode 100644 SOURCES/kvm-vfio-pci-Extract-out-a-helper-vfio_pci_get_pci_hot_r.patch delete mode 100644 SOURCES/kvm-vfio-pci-Fix-a-segfault-in-vfio_realize.patch delete mode 100644 SOURCES/kvm-vfio-pci-Fix-a-use-after-free-issue.patch delete mode 100644 SOURCES/kvm-vfio-pci-Free-leaked-timer-in-vfio_realize-error-pat.patch create mode 100644 SOURCES/kvm-vfio-pci-Introduce-a-vfio-pci-hot-reset-interface.patch create mode 100644 SOURCES/kvm-vfio-pci-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch create mode 100644 SOURCES/kvm-vfio-pci-Move-VFIODevice-initializations-in-vfio_ins.patch delete mode 100644 SOURCES/kvm-vfio-pci-Static-Resizable-BAR-capability.patch delete mode 100644 SOURCES/kvm-vfio-pci-add-support-for-VF-token.patch create mode 100644 SOURCES/kvm-vfio-platform-Allow-the-selection-of-a-given-iommu-b.patch create mode 100644 SOURCES/kvm-vfio-platform-Make-vfio-cdev-pre-openable-by-passing.patch create mode 100644 SOURCES/kvm-vfio-platform-Move-VFIODevice-initializations-in-vfi.patch create mode 100644 SOURCES/kvm-vfio-spapr-Extend-VFIOIOMMUOps-with-a-release-handle.patch create mode 100644 SOURCES/kvm-vfio-spapr-Introduce-a-sPAPR-VFIOIOMMU-QOM-interface.patch create mode 100644 SOURCES/kvm-vfio-spapr-Introduce-spapr-backend-and-target-interf.patch create mode 100644 SOURCES/kvm-vfio-spapr-Move-hostwin_list-into-spapr-container.patch create mode 100644 SOURCES/kvm-vfio-spapr-Move-prereg_listener-into-spapr-container.patch create mode 100644 SOURCES/kvm-vfio-spapr-Only-compile-sPAPR-IOMMU-support-when-nee.patch create mode 100644 SOURCES/kvm-vfio-spapr-switch-to-spapr-IOMMU-BE-add-del_section_.patch delete mode 100644 SOURCES/kvm-vhost-fix-vhost_dev_enable_notifiers-error-case.patch delete mode 100644 SOURCES/kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch delete mode 100644 SOURCES/kvm-vhost-vdpa-mute-unaligned-memory-error-report.patch delete mode 100644 SOURCES/kvm-virtio-Drop-out-of-coroutine-context-in-virtio_load.patch create mode 100644 SOURCES/kvm-virtio-Re-enable-notifications-after-drain.patch create mode 100644 SOURCES/kvm-virtio-blk-Fix-potential-nullpointer-read-access-in-.patch create mode 100644 SOURCES/kvm-virtio-blk-Use-ioeventfd_attach-in-start_ioeventfd.patch create mode 100644 SOURCES/kvm-virtio-blk-add-iothread-vq-mapping-parameter.patch create mode 100644 SOURCES/kvm-virtio-blk-add-lock-to-protect-s-rq.patch create mode 100644 SOURCES/kvm-virtio-blk-always-set-ioeventfd-during-startup.patch create mode 100644 SOURCES/kvm-virtio-blk-avoid-using-ioeventfd-state-in-irqfd-cond.patch create mode 100644 SOURCES/kvm-virtio-blk-don-t-lock-AioContext-in-the-completion-c.patch create mode 100644 SOURCES/kvm-virtio-blk-don-t-lock-AioContext-in-the-submission-c.patch create mode 100644 SOURCES/kvm-virtio-blk-move-dataplane-code-into-virtio-blk.c.patch create mode 100644 SOURCES/kvm-virtio-blk-rename-dataplane-create-destroy-functions.patch create mode 100644 SOURCES/kvm-virtio-blk-rename-dataplane-to-ioeventfd.patch create mode 100644 SOURCES/kvm-virtio-blk-restart-s-rq-reqs-in-vq-AioContexts.patch create mode 100644 SOURCES/kvm-virtio-blk-tolerate-failure-to-set-BlockBackend-AioC.patch create mode 100644 SOURCES/kvm-virtio-gpu-block-migration-of-VMs-with-blob-true.patch delete mode 100644 SOURCES/kvm-virtio-iommu-Fix-64kB-host-page-size-VFIO-device-ass.patch delete mode 100644 SOURCES/kvm-virtio-iommu-Rework-the-traces-in-virtio_iommu_set_p.patch delete mode 100644 SOURCES/kvm-virtio-iommu-Standardize-granule-extraction-and-form.patch create mode 100644 SOURCES/kvm-virtio-mem-default-enable-dynamic-memslots.patch delete mode 100644 SOURCES/kvm-virtio-net-correctly-report-maximum-tx_queue_size-va.patch create mode 100644 SOURCES/kvm-virtio-scsi-Attach-event-vq-notifier-with-no_poll.patch create mode 100644 SOURCES/kvm-virtio-scsi-don-t-lock-AioContext-around-virtio_queu.patch create mode 100644 SOURCES/kvm-virtio-scsi-replace-AioContext-lock-with-tmf_bh_lock.patch delete mode 100644 SOURCES/kvm-vl.c-Create-late-backends-before-migration-object.patch create mode 100644 SOURCES/kvm-x86-rhel-9.2.0-machine-type-compat-fix.patch diff --git a/.gitignore b/.gitignore index 7dc73be..ded41b6 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -SOURCES/qemu-8.0.0.tar.xz +SOURCES/qemu-8.2.0.tar.xz diff --git a/.qemu-kvm.metadata b/.qemu-kvm.metadata index a158c44..4a22f24 100644 --- a/.qemu-kvm.metadata +++ b/.qemu-kvm.metadata @@ -1 +1 @@ -17d54a85aa5d7f5dcfc619aa34049f9a91ceed0d SOURCES/qemu-8.0.0.tar.xz +1615e59b1bd68324e0819245fe003e33c14a52f9 SOURCES/qemu-8.2.0.tar.xz diff --git a/SOURCES/0004-Initial-redhat-build.patch b/SOURCES/0004-Initial-redhat-build.patch index 612633e..a63b5c3 100644 --- a/SOURCES/0004-Initial-redhat-build.patch +++ b/SOURCES/0004-Initial-redhat-build.patch @@ -1,4 +1,4 @@ -From 84039bfc860878f3c3421de4a1836ac5d6300ed7 Mon Sep 17 00:00:00 2001 +From faae70a870156f86a5cf55ca967b15d7612941ff Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 26 May 2021 10:56:02 +0200 Subject: Initial redhat build @@ -13,7 +13,7 @@ several issues are fixed in QEMU tree: We disable make check due to issues with some of the tests. -This rebase is based on qemu-kvm-7.2.0-14.el9 +This rebase is based on qemu-kvm-8.1.0-5.el9 Signed-off-by: Miroslav Rezanina -- @@ -50,32 +50,39 @@ Rebase changes (7.0.0): - Change permissions on installing tests/Makefile.include - Remove ssh block driver -Rebase changes (7.1.0 rc0): +Rebase changes (7.1.0): - --disable-vnc-png renamed to --disable-png (upstream) - removed --disable-vhost-vsock and --disable-vhost-scsi - capstone submodule removed - Temporary include capstone build -Rebase changes (7.2.0 rc0): +Rebase changes (7.2.0): - Switch --enable-slirp=system to --enable-slirp - -Rebaes changes (7.2.0 rc2): - Added new configure options (blkio and sndio, both disabled) Rebase changes (7.2.0): - Fix SRPM name generation to work on Fedora 37 - Switch back to system meson -Rebase changes (8.0.0-rc1): +Rebase changes (8.0.0): - use enable-dtrace-backands instead of enable-dtrace-backend - Removed qemu virtiofsd bits - -Rebase changes (8.0.0-rc2): - test/check-block.sh removed (upstream) - -Rebase changes (8.0.0-rc3): - Add new --disable-* options for configure +Rebase changes (8.1.0): +- qmp-spec.txt installed by make +- Removed --meson configure option +- Add --disable-pypi +- Removed --with-git and -with-gitsubmodules +- Renamed --disable-pypi to --disable-downloads +- Minor updates in README.tests + +Rebase changes (8.2.0): +- Removed --disable-hax (upstream) +- Added --disable-plugins configure option +- Fixing frh.py strings + Merged patches (6.0.0): - 605758c902 Limit build on Power to qemu-img and qemu-ga only @@ -168,24 +175,35 @@ Merged patches (7.0.0): - d46d2710b2 spec: Obsolete old usb redir subpackage - 6f52a50b68 spec: Obsolete ssh driver -Merged patches (7.2.0 rc4): +Merged patches (7.2.0): - 8c6834feb6 Remove opengl display device subpackages (C9S MR 124) - 0ecc97f29e spec: Add requires for packages with additional virtio-gpu variants (C9S MR 124) -Merged patches (8.0.0-rc1): +Merged patches (8.0.0): - 7754f6ba78 Minor packaging fixes - 401af56187 spec: Disable VDUSE +Merged patches (8.1.0): +- 0c2306676f Enable Linux io_uring +- b7fa6426d5 Enable libblkio block drivers +- 19f6d7a6f4 Fix virtio-blk-vhost-vdpa typo in spec file +- f356cae88f spec: Build DBUS display +- 77b763efd5 Provide elf2dmp binary in qemu-tools + +Merged patches (8.2.0): +- cd9efa221d Enable qemu-kvm-device-usb-redirec for aarch64 + Signed-off-by: Miroslav Rezanina --- .distro/Makefile | 100 + - .distro/Makefile.common | 41 + + .distro/Makefile.common | 42 + .distro/README.tests | 39 + .distro/modules-load.conf | 4 + .distro/qemu-guest-agent.service | 1 - - .distro/qemu-kvm.spec.template | 4528 +++++++++++++++++++++++ + .distro/qemu-kvm.spec.template | 4909 +++++++++++++++++++++++ .distro/rpminspect.yaml | 6 +- .distro/scripts/extract_build_cmd.py | 12 + + .distro/scripts/frh.py | 4 +- .distro/scripts/process-patches.sh | 4 + .gitignore | 1 + README.systemtap | 43 + @@ -193,7 +211,7 @@ Signed-off-by: Miroslav Rezanina scripts/systemtap/conf.d/qemu_kvm.conf | 4 + scripts/systemtap/script.d/qemu_kvm.stp | 1 + ui/vnc-auth-sasl.c | 2 +- - 15 files changed, 4784 insertions(+), 4 deletions(-) + 16 files changed, 5168 insertions(+), 6 deletions(-) create mode 100644 .distro/Makefile create mode 100644 .distro/Makefile.common create mode 100644 .distro/README.tests @@ -296,5 +314,5 @@ index 47fdae5b21..2a950caa2a 100644 if (saslErr != SASL_OK) { error_setg(errp, "Failed to initialize SASL auth: %s", -- -2.39.1 +2.39.3 diff --git a/SOURCES/0005-Enable-disable-devices-for-RHEL.patch b/SOURCES/0005-Enable-disable-devices-for-RHEL.patch index 14dd3f9..97c53b4 100644 --- a/SOURCES/0005-Enable-disable-devices-for-RHEL.patch +++ b/SOURCES/0005-Enable-disable-devices-for-RHEL.patch @@ -1,4 +1,4 @@ -From 63829772dbc2075fc014a9d52e3968735d228018 Mon Sep 17 00:00:00 2001 +From 048067b4618ba1fa7c8c517185d4cd3a675eba72 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 7 Dec 2022 03:05:48 -0500 Subject: Enable/disable devices for RHEL @@ -22,21 +22,31 @@ Rebase notes (7.0.0): - Renamed CONFIG_ARM_GIC_TCG to CONFIG_ARM_GICV3_TCG - Removed upstream devices -Rebase notes (7.1.0 rc0): +Rebase notes (7.1.0): - Added CONFIG_VHOST_VSOCK and CONFIG_VHOST_USER_VSOCK configs - Added CONFIG_CXL and CONFIG_CXL_MEM_DEVICE for aarch64 and x86_64 - -Rebase notes (7.1.0 rc3): - Added CONFIG_VHOST_USER_FS option (all archs) -Rebase notes (7.2.0 rc20): +Rebase notes (7.2.0): - Removed disabling a15mpcore.c as no longer needed -Rebase notes (8.0.0-rc1): +Rebase notes (8.0.0): - Rename CONFIG_ACPI_X86_ICH to CONFIG_ACPI_ICH9 - Inlude qemu/error-report.h in hw/display/cirrus_vga.c - Change virtiofsd dependency version +Rebase notes (8.1.0): +- Added CONFIG_PCIE_PCI_BRIDGE for x86_64 +- Disabling tcg cpus for aarch64 +- Disable CONFIG_ARM_V7M and remove related hack +- Moved aarch64 tcg cpu disabling from arm machine type commit + +Rebase notes (8.2.0): +- Disabled new a710 arm64 tcg cpu +- No longer needed hack for removal of i2c-echo +- Disable new neoverse-v2 +- Removed CONFIG_OPENGL from x86_64 config file + Merged patches (6.1.0): - c51bf45304 Remove SPICE and QXL from x86_64-rh-devices.mak - 02fc745601 aarch64-rh-devices: add CONFIG_PVPANIC_PCI @@ -53,32 +63,47 @@ Merged patches (7.0.0): - fd7c45a5a8 redhat: Enable virtio-mem as tech-preview on x86-64 - c9e68ea451 Enable SGX -- RH Only -Merged patches (7.1.0 rc0): +Merged patches (7.1.0): - 38b89dc245 pc: Move s3/s4 suspend disabling to compat (only hw/acpi/ich9.c chunk) - 8f663466c6 configs/devices/aarch64-softmmu: Enable CONFIG_VIRTIO_MEM - 1bf372717a Enable virtio-iommu-pci on aarch64 - ae3f269458 Enable virtio-iommu-pci on x86_64 + +Merged patches (8.1.0): +- 8173d2eaba Disable unwanted new devices + +Merged patches (8.2.0): +- b29f66431f Enable igb on x86_64 --- .distro/qemu-kvm.spec.template | 18 +-- .../aarch64-softmmu/aarch64-rh-devices.mak | 41 +++++++ .../ppc64-softmmu/ppc64-rh-devices.mak | 37 ++++++ configs/devices/rh-virtio.mak | 10 ++ .../s390x-softmmu/s390x-rh-devices.mak | 18 +++ - .../x86_64-softmmu/x86_64-rh-devices.mak | 109 ++++++++++++++++++ - hw/arm/meson.build | 2 +- + .../x86_64-softmmu/x86_64-rh-devices.mak | 110 ++++++++++++++++++ + hw/arm/virt.c | 2 + hw/block/fdc.c | 10 ++ hw/cpu/meson.build | 3 +- - hw/display/cirrus_vga.c | 7 +- + hw/cxl/meson.build | 3 +- + hw/display/cirrus_vga.c | 4 + hw/ide/piix.c | 5 +- + hw/ide/qdev.c | 9 ++ hw/input/pckbd.c | 2 + hw/net/e1000.c | 2 + hw/ppc/spapr_cpu_core.c | 2 + hw/usb/meson.build | 2 +- - target/arm/cpu_tcg.c | 10 ++ + hw/virtio/meson.build | 5 +- + target/arm/arm-qmp-cmds.c | 2 + + target/arm/cpu.c | 4 + + target/arm/cpu.h | 3 + + target/arm/cpu64.c | 12 +- + target/arm/tcg/cpu32.c | 2 + + target/arm/tcg/cpu64.c | 8 ++ target/ppc/cpu-models.c | 9 ++ target/s390x/cpu_models_sysemu.c | 3 + target/s390x/kvm/kvm.c | 8 ++ - 19 files changed, 285 insertions(+), 13 deletions(-) + tests/qtest/arm-cpu-features.c | 4 + + 28 files changed, 323 insertions(+), 15 deletions(-) create mode 100644 configs/devices/aarch64-softmmu/aarch64-rh-devices.mak create mode 100644 configs/devices/ppc64-softmmu/ppc64-rh-devices.mak create mode 100644 configs/devices/rh-virtio.mak @@ -87,7 +112,7 @@ Merged patches (7.1.0 rc0): diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak new file mode 100644 -index 0000000000..720ec0cb57 +index 0000000000..aec1831199 --- /dev/null +++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak @@ -0,0 +1,41 @@ @@ -97,12 +122,12 @@ index 0000000000..720ec0cb57 +CONFIG_ARM_GICV3_TCG=y +CONFIG_ARM_GIC=y +CONFIG_ARM_SMMUV3=y -+CONFIG_ARM_V7M=y +CONFIG_ARM_VIRT=y +CONFIG_CXL=y +CONFIG_CXL_MEM_DEVICE=y +CONFIG_EDID=y +CONFIG_PCIE_PORT=y ++CONFIG_PCIE_PCI_BRIDGE=y +CONFIG_PCI_DEVICES=y +CONFIG_PCI_TESTDEV=y +CONFIG_PFLASH_CFI01=y @@ -217,10 +242,10 @@ index 0000000000..69a799adbd +CONFIG_VHOST_USER_FS=y diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak new file mode 100644 -index 0000000000..668b2d0e18 +index 0000000000..ce5be73633 --- /dev/null +++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak -@@ -0,0 +1,109 @@ +@@ -0,0 +1,110 @@ +include ../rh-virtio.mak + +CONFIG_ACPI=y @@ -259,6 +284,7 @@ index 0000000000..668b2d0e18 +CONFIG_IDE_PCI=y +CONFIG_IDE_PIIX=y +CONFIG_IDE_QDEV=y ++CONFIG_IGB_PCI_EXPRESS=y +CONFIG_IOAPIC=y +CONFIG_IOH3420=y +CONFIG_ISA_BUS=y @@ -268,7 +294,6 @@ index 0000000000..668b2d0e18 +CONFIG_MC146818RTC=y +CONFIG_MEM_DEVICE=y +CONFIG_NVDIMM=y -+CONFIG_OPENGL=y +CONFIG_PAM=y +CONFIG_PC=y +CONFIG_PCI=y @@ -282,6 +307,7 @@ index 0000000000..668b2d0e18 +CONFIG_PCSPK=y +CONFIG_PC_ACPI=y +CONFIG_PC_PCI=y ++CONFIG_PCIE_PCI_BRIDGE=y +CONFIG_PFLASH_CFI01=y +CONFIG_PVPANIC_ISA=y +CONFIG_PXB=y @@ -330,19 +356,26 @@ index 0000000000..668b2d0e18 +CONFIG_VHOST_VSOCK=y +CONFIG_VHOST_USER_VSOCK=y +CONFIG_VHOST_USER_FS=y -diff --git a/hw/arm/meson.build b/hw/arm/meson.build -index b545ba0e4f..a41a16cba7 100644 ---- a/hw/arm/meson.build -+++ b/hw/arm/meson.build -@@ -29,7 +29,7 @@ arm_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c')) - arm_ss.add(when: 'CONFIG_ZYNQ', if_true: files('xilinx_zynq.c')) - arm_ss.add(when: 'CONFIG_SABRELITE', if_true: files('sabrelite.c')) - --arm_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('armv7m.c')) -+#arm_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('armv7m.c')) - arm_ss.add(when: 'CONFIG_EXYNOS4', if_true: files('exynos4210.c')) - arm_ss.add(when: 'CONFIG_PXA2XX', if_true: files('pxa2xx.c', 'pxa2xx_gpio.c', 'pxa2xx_pic.c')) - arm_ss.add(when: 'CONFIG_DIGIC', if_true: files('digic.c')) +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index be2856c018..af9ea4dd1c 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -205,6 +205,7 @@ static const int a15irqmap[] = { + }; + + static const char *valid_cpus[] = { ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + #ifdef CONFIG_TCG + ARM_CPU_TYPE_NAME("cortex-a7"), + ARM_CPU_TYPE_NAME("cortex-a15"), +@@ -219,6 +220,7 @@ static const char *valid_cpus[] = { + ARM_CPU_TYPE_NAME("neoverse-n2"), + #endif + ARM_CPU_TYPE_NAME("cortex-a53"), ++#endif /* disabled for RHEL */ + ARM_CPU_TYPE_NAME("cortex-a57"), + ARM_CPU_TYPE_NAME("host"), + ARM_CPU_TYPE_NAME("max"), diff --git a/hw/block/fdc.c b/hw/block/fdc.c index d7cc4d3ec1..12d0a60905 100644 --- a/hw/block/fdc.c @@ -372,18 +405,32 @@ index d7cc4d3ec1..12d0a60905 100644 error_setg(errp, "Cannot choose a fallback FDrive type of 'auto'"); return; diff --git a/hw/cpu/meson.build b/hw/cpu/meson.build -index e37490074f..4431e3731c 100644 +index 6d319947ca..91962fd863 100644 --- a/hw/cpu/meson.build +++ b/hw/cpu/meson.build @@ -1,4 +1,5 @@ --softmmu_ss.add(files('core.c', 'cluster.c')) -+#softmmu_ss.add(files('core.c', 'cluster.c')) -+softmmu_ss.add(files('core.c')) +-system_ss.add(files('core.c', 'cluster.c')) ++#system_ss.add(files('core.c', 'cluster.c')) ++system_ss.add(files('core.c')) - softmmu_ss.add(when: 'CONFIG_ARM11MPCORE', if_true: files('arm11mpcore.c')) - softmmu_ss.add(when: 'CONFIG_REALVIEW', if_true: files('realview_mpcore.c')) + system_ss.add(when: 'CONFIG_ARM11MPCORE', if_true: files('arm11mpcore.c')) + system_ss.add(when: 'CONFIG_REALVIEW', if_true: files('realview_mpcore.c')) +diff --git a/hw/cxl/meson.build b/hw/cxl/meson.build +index ea0aebf6e3..6878f06974 100644 +--- a/hw/cxl/meson.build ++++ b/hw/cxl/meson.build +@@ -6,7 +6,8 @@ system_ss.add(when: 'CONFIG_CXL', + 'cxl-host.c', + 'cxl-cdat.c', + 'cxl-events.c', +- 'switch-mailbox-cci.c', ++# Disabled for 8.2.0 rebase for RHEL 9.4.0 ++# 'switch-mailbox-cci.c', + ), + if_false: files( + 'cxl-host-stubs.c', diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c -index b80f98b6c4..cbde6a8f15 100644 +index b80f98b6c4..0370cf8a64 100644 --- a/hw/display/cirrus_vga.c +++ b/hw/display/cirrus_vga.c @@ -36,6 +36,7 @@ @@ -394,31 +441,21 @@ index b80f98b6c4..cbde6a8f15 100644 #include "sysemu/reset.h" #include "qapi/error.h" #include "trace.h" -@@ -47,6 +48,7 @@ - #include "qom/object.h" - #include "ui/console.h" - -+ - /* - * TODO: - * - destination write mask support not complete (bits 5..7) -@@ -2946,7 +2948,10 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) +@@ -2946,6 +2947,9 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev); int16_t device_id = pc->device_id; -- /* -+ warn_report("'cirrus-vga' is deprecated, " -+ "please use a different VGA card instead"); ++ warn_report("'cirrus-vga' is deprecated, " ++ "please use a different VGA card instead"); + -+ /* + /* * Follow real hardware, cirrus card emulated has 4 MB video memory. * Also accept 8 MB/16 MB for backward compatibility. - */ diff --git a/hw/ide/piix.c b/hw/ide/piix.c -index 41d60921e3..a4af45b4e8 100644 +index 4e5e12935f..03ca06bb17 100644 --- a/hw/ide/piix.c +++ b/hw/ide/piix.c -@@ -193,7 +193,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) +@@ -190,7 +190,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) k->device_id = PCI_DEVICE_ID_INTEL_82371SB_1; k->class_id = PCI_CLASS_STORAGE_IDE; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); @@ -428,7 +465,7 @@ index 41d60921e3..a4af45b4e8 100644 } static const TypeInfo piix3_ide_info = { -@@ -216,6 +217,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) +@@ -214,6 +215,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) k->class_id = PCI_CLASS_STORAGE_IDE; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); dc->hotpluggable = false; @@ -437,6 +474,52 @@ index 41d60921e3..a4af45b4e8 100644 } static const TypeInfo piix4_ide_info = { +diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c +index 1b3b4da01d..454bfa5783 100644 +--- a/hw/ide/qdev.c ++++ b/hw/ide/qdev.c +@@ -283,10 +283,13 @@ static void ide_cd_realize(IDEDevice *dev, Error **errp) + ide_dev_initfn(dev, IDE_CD, errp); + } + ++/* Disabled for Red Hat Enterprise Linux */ ++#if 0 + static void ide_cf_realize(IDEDevice *dev, Error **errp) + { + ide_dev_initfn(dev, IDE_CFATA, errp); + } ++#endif + + #define DEFINE_IDE_DEV_PROPERTIES() \ + DEFINE_BLOCK_PROPERTIES(IDEDrive, dev.conf), \ +@@ -346,6 +349,8 @@ static const TypeInfo ide_cd_info = { + .class_init = ide_cd_class_init, + }; + ++/* Disabled for Red Hat Enterprise Linux */ ++#if 0 + static Property ide_cf_properties[] = { + DEFINE_IDE_DEV_PROPERTIES(), + DEFINE_BLOCK_CHS_PROPERTIES(IDEDrive, dev.conf), +@@ -371,6 +376,7 @@ static const TypeInfo ide_cf_info = { + .instance_size = sizeof(IDEDrive), + .class_init = ide_cf_class_init, + }; ++#endif + + static void ide_device_class_init(ObjectClass *klass, void *data) + { +@@ -396,7 +402,10 @@ static void ide_register_types(void) + type_register_static(&ide_bus_info); + type_register_static(&ide_hd_info); + type_register_static(&ide_cd_info); ++/* Disabled for Red Hat Enterprise Linux */ ++#if 0 + type_register_static(&ide_cf_info); ++#endif + type_register_static(&ide_device_type_info); + } + diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c index b92b63bedc..3b6235dde6 100644 --- a/hw/input/pckbd.c @@ -451,10 +534,10 @@ index b92b63bedc..3b6235dde6 100644 static const TypeInfo i8042_info = { diff --git a/hw/net/e1000.c b/hw/net/e1000.c -index 23d660619f..b75c9aa799 100644 +index 8ffe1077f1..b3dfeeca4f 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c -@@ -1805,6 +1805,7 @@ static const E1000Info e1000_devices[] = { +@@ -1746,6 +1746,7 @@ static const E1000Info e1000_devices[] = { .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, }, @@ -462,7 +545,7 @@ index 23d660619f..b75c9aa799 100644 { .name = "e1000-82544gc", .device_id = E1000_DEV_ID_82544GC_COPPER, -@@ -1817,6 +1818,7 @@ static const E1000Info e1000_devices[] = { +@@ -1758,6 +1759,7 @@ static const E1000Info e1000_devices[] = { .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, }, @@ -471,10 +554,10 @@ index 23d660619f..b75c9aa799 100644 static void e1000_register_types(void) diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index 8a4861f45a..fcb5dfe792 100644 +index 91fae56573..33e0c8724c 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c -@@ -379,10 +379,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { +@@ -386,10 +386,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { .instance_size = sizeof(SpaprCpuCore), .class_size = sizeof(SpaprCpuCoreClass), }, @@ -488,10 +571,10 @@ index 8a4861f45a..fcb5dfe792 100644 DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"), DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"), diff --git a/hw/usb/meson.build b/hw/usb/meson.build -index 599dc24f0d..905a994c3a 100644 +index e94149ebde..4a8adbf3dc 100644 --- a/hw/usb/meson.build +++ b/hw/usb/meson.build -@@ -52,7 +52,7 @@ softmmu_ss.add(when: 'CONFIG_USB_SMARTCARD', if_true: files('dev-smartcard-reade +@@ -52,7 +52,7 @@ system_ss.add(when: 'CONFIG_USB_SMARTCARD', if_true: files('dev-smartcard-reader if cacard.found() usbsmartcard_ss = ss.source_set() usbsmartcard_ss.add(when: 'CONFIG_USB_SMARTCARD', @@ -500,86 +583,206 @@ index 599dc24f0d..905a994c3a 100644 hw_usb_modules += {'smartcard': usbsmartcard_ss} endif -diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c -index df0c45e523..c154a4dcf2 100644 ---- a/target/arm/cpu_tcg.c -+++ b/target/arm/cpu_tcg.c -@@ -155,6 +155,7 @@ void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu) - /* CPU models. These are not needed for the AArch64 linux-user build. */ - #if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) +diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build +index c0055a7832..12e1d6c67e 100644 +--- a/hw/virtio/meson.build ++++ b/hw/virtio/meson.build +@@ -17,8 +17,9 @@ if have_vhost + if have_vhost_user + # fixme - this really should be generic + specific_virtio_ss.add(files('vhost-user.c')) +- system_virtio_ss.add(files('vhost-user-device.c')) +- system_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('vhost-user-device-pci.c')) ++# Disabled for 8.2.0 rebase for RHEL 9.4.0 ++# system_virtio_ss.add(files('vhost-user-device.c')) ++# system_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('vhost-user-device-pci.c')) + endif + if have_vhost_vdpa + system_virtio_ss.add(files('vhost-vdpa.c')) +diff --git a/target/arm/arm-qmp-cmds.c b/target/arm/arm-qmp-cmds.c +index b53d5efe13..64989a02d1 100644 +--- a/target/arm/arm-qmp-cmds.c ++++ b/target/arm/arm-qmp-cmds.c +@@ -231,6 +231,7 @@ CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type, + static void arm_cpu_add_definition(gpointer data, gpointer user_data) + { + ObjectClass *oc = data; ++ CPUClass *cc = CPU_CLASS(oc); + CpuDefinitionInfoList **cpu_list = user_data; + CpuDefinitionInfo *info; + const char *typename; +@@ -240,6 +241,7 @@ static void arm_cpu_add_definition(gpointer data, gpointer user_data) + info->name = g_strndup(typename, + strlen(typename) - strlen("-" TYPE_ARM_CPU)); + info->q_typename = g_strdup(typename); ++ info->deprecated = !!cc->deprecation_note; + + QAPI_LIST_PREPEND(*cpu_list, info); + } +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index efb22a87f9..a32521ada9 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -2524,6 +2524,10 @@ static void cpu_register_class_init(ObjectClass *oc, void *data) + + acc->info = data; + cc->gdb_core_xml_file = "arm-core.xml"; ++ ++ if (acc->info->deprecation_note) { ++ cc->deprecation_note = acc->info->deprecation_note; ++ } + } + + void arm_cpu_register(const ARMCPUInfo *info) +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index a0282e0d28..7e0f0dfea7 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -34,6 +34,8 @@ + #define KVM_HAVE_MCE_INJECTION 1 + #endif + ++#define RHEL_CPU_DEPRECATION "use 'host' / 'max'" ++ + #define EXCP_UDEF 1 /* undefined instruction */ + #define EXCP_SWI 2 /* software interrupt */ + #define EXCP_PREFETCH_ABORT 3 +@@ -1120,6 +1122,7 @@ typedef struct ARMCPUInfo { + const char *name; + void (*initfn)(Object *obj); + void (*class_init)(ObjectClass *oc, void *data); ++ const char *deprecation_note; + } ARMCPUInfo; + + /** +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index 1e9c6c85ae..10be900803 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -648,6 +648,7 @@ static void aarch64_a57_initfn(Object *obj) + define_cortex_a72_a57_a53_cp_reginfo(cpu); + } +#if 0 /* Disabled for Red Hat Enterprise Linux */ - #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG) - static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) + static void aarch64_a53_initfn(Object *obj) + { + ARMCPU *cpu = ARM_CPU(obj); +@@ -704,6 +705,7 @@ static void aarch64_a53_initfn(Object *obj) + cpu->gic_pribits = 5; + define_cortex_a72_a57_a53_cp_reginfo(cpu); + } ++#endif + + static void aarch64_host_initfn(Object *obj) { -@@ -508,6 +509,7 @@ static void cortex_a9_initfn(Object *obj) - cpu->isar.reset_pmcr_el0 = 0x41093000; - define_arm_cp_regs(cpu, cortexa9_cp_reginfo); +@@ -742,8 +744,11 @@ static void aarch64_max_initfn(Object *obj) } + + static const ARMCPUInfo aarch64_cpus[] = { +- { .name = "cortex-a57", .initfn = aarch64_a57_initfn }, ++ { .name = "cortex-a57", .initfn = aarch64_a57_initfn, ++ .deprecation_note = RHEL_CPU_DEPRECATION }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { .name = "cortex-a53", .initfn = aarch64_a53_initfn }, +#endif /* disabled for RHEL */ + { .name = "max", .initfn = aarch64_max_initfn }, + #if defined(CONFIG_KVM) || defined(CONFIG_HVF) + { .name = "host", .initfn = aarch64_host_initfn }, +@@ -815,8 +820,13 @@ static void aarch64_cpu_instance_init(Object *obj) + static void cpu_register_class_init(ObjectClass *oc, void *data) + { + ARMCPUClass *acc = ARM_CPU_CLASS(oc); ++ CPUClass *cc = CPU_CLASS(oc); - #ifndef CONFIG_USER_ONLY - static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri) -@@ -532,6 +534,7 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { - .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, - }; + acc->info = data; ++ ++ if (acc->info->deprecation_note) { ++ cc->deprecation_note = acc->info->deprecation_note; ++ } + } + + void aarch64_cpu_register(const ARMCPUInfo *info) +diff --git a/target/arm/tcg/cpu32.c b/target/arm/tcg/cpu32.c +index d9e0e2a4dd..c5c639a6ea 100644 +--- a/target/arm/tcg/cpu32.c ++++ b/target/arm/tcg/cpu32.c +@@ -98,6 +98,7 @@ void aa32_max_features(ARMCPU *cpu) + /* CPU models. These are not needed for the AArch64 linux-user build. */ + #if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) +#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void cortex_a7_initfn(Object *obj) + #if !defined(CONFIG_USER_ONLY) + static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) { - ARMCPU *cpu = ARM_CPU(obj); -@@ -580,6 +583,7 @@ static void cortex_a7_initfn(Object *obj) - cpu->isar.reset_pmcr_el0 = 0x41072000; - define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */ - } +@@ -1189,3 +1190,4 @@ static void arm_tcg_cpu_register_types(void) + type_init(arm_tcg_cpu_register_types) + + #endif /* !CONFIG_USER_ONLY || !TARGET_AARCH64 */ +#endif /* disabled for RHEL */ +diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c +index fcda99e158..bd5a993ff8 100644 +--- a/target/arm/tcg/cpu64.c ++++ b/target/arm/tcg/cpu64.c +@@ -29,6 +29,7 @@ + #include "cpu-features.h" + #include "cpregs.h" - static void cortex_a15_initfn(Object *obj) ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static uint64_t make_ccsidr64(unsigned assoc, unsigned linesize, + unsigned cachesize) { -@@ -628,6 +632,7 @@ static void cortex_a15_initfn(Object *obj) - define_arm_cp_regs(cpu, cortexa15_cp_reginfo); +@@ -134,6 +135,7 @@ static void aarch64_a35_initfn(Object *obj) + /* These values are the same with A53/A57/A72. */ + define_cortex_a72_a57_a53_cp_reginfo(cpu); } ++#endif + + static void cpu_max_get_sve_max_vq(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +@@ -223,6 +225,7 @@ static void cpu_max_get_l0gptsz(Object *obj, Visitor *v, const char *name, + static Property arm_cpu_lpa2_property = + DEFINE_PROP_BOOL("lpa2", ARMCPU, prop_lpa2, true); +#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void cortex_m0_initfn(Object *obj) + static void aarch64_a55_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); -@@ -1110,6 +1115,7 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) - - cc->gdb_core_xml_file = "arm-m-profile.xml"; +@@ -1065,6 +1068,7 @@ static void aarch64_neoverse_n2_initfn(Object *obj) + aarch64_add_pauth_properties(obj); + aarch64_add_sve_properties(obj); } -+#endif /* disabled for RHEL */ ++#endif - #ifndef TARGET_AARCH64 /* -@@ -1177,6 +1183,7 @@ static void arm_max_initfn(Object *obj) - #endif /* !TARGET_AARCH64 */ + * -cpu max: a CPU with as many features enabled as our emulation supports. +@@ -1259,6 +1263,7 @@ void aarch64_max_tcg_initfn(Object *obj) + qdev_property_add_static(DEVICE(obj), &arm_cpu_lpa2_property); + } - static const ARMCPUInfo arm_tcg_cpus[] = { +#if 0 /* Disabled for Red Hat Enterprise Linux */ - { .name = "arm926", .initfn = arm926_initfn }, - { .name = "arm946", .initfn = arm946_initfn }, - { .name = "arm1026", .initfn = arm1026_initfn }, -@@ -1192,7 +1199,9 @@ static const ARMCPUInfo arm_tcg_cpus[] = { - { .name = "cortex-a7", .initfn = cortex_a7_initfn }, - { .name = "cortex-a8", .initfn = cortex_a8_initfn }, - { .name = "cortex-a9", .initfn = cortex_a9_initfn }, -+#endif /* disabled for RHEL */ - { .name = "cortex-a15", .initfn = cortex_a15_initfn }, + static const ARMCPUInfo aarch64_cpus[] = { + { .name = "cortex-a35", .initfn = aarch64_a35_initfn }, + { .name = "cortex-a55", .initfn = aarch64_a55_initfn }, +@@ -1270,14 +1275,17 @@ static const ARMCPUInfo aarch64_cpus[] = { + { .name = "neoverse-v1", .initfn = aarch64_neoverse_v1_initfn }, + { .name = "neoverse-n2", .initfn = aarch64_neoverse_n2_initfn }, + }; ++#endif + + static void aarch64_cpu_register_types(void) + { +#if 0 /* Disabled for Red Hat Enterprise Linux */ - { .name = "cortex-m0", .initfn = cortex_m0_initfn, - .class_init = arm_v7m_class_init }, - { .name = "cortex-m3", .initfn = cortex_m3_initfn, -@@ -1224,6 +1233,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { - { .name = "pxa270-b1", .initfn = pxa270b1_initfn }, - { .name = "pxa270-c0", .initfn = pxa270c0_initfn }, - { .name = "pxa270-c5", .initfn = pxa270c5_initfn }, -+#endif /* disabled for RHEL */ - #ifndef TARGET_AARCH64 - { .name = "max", .initfn = arm_max_initfn }, - #endif + size_t i; + + for (i = 0; i < ARRAY_SIZE(aarch64_cpus); ++i) { + aarch64_cpu_register(&aarch64_cpus[i]); + } ++#endif + } + + type_init(aarch64_cpu_register_types) diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c -index 912b037c63..cd3ff700ac 100644 +index 7dbb47de64..69fddb05bc 100644 --- a/target/ppc/cpu-models.c +++ b/target/ppc/cpu-models.c @@ -66,6 +66,7 @@ @@ -609,7 +812,7 @@ index 912b037c63..cd3ff700ac 100644 POWERPC_DEF("power7_v2.3", CPU_POWERPC_POWER7_v23, POWER7, "POWER7 v2.3") POWERPC_DEF("power7+_v2.1", CPU_POWERPC_POWER7P_v21, POWER7, -@@ -896,12 +900,15 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { +@@ -898,12 +902,15 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { { "7447a", "7447a_v1.2" }, { "7457a", "7457a_v1.2" }, { "apollo7pm", "7457a_v1.0" }, @@ -625,7 +828,7 @@ index 912b037c63..cd3ff700ac 100644 { "power7", "power7_v2.3" }, { "power7+", "power7+_v2.1" }, { "power8e", "power8e_v2.1" }, -@@ -911,12 +918,14 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { +@@ -913,12 +920,14 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { { "power10", "power10_v2.0" }, #endif @@ -655,10 +858,10 @@ index 63981bf36b..87a4480c05 100644 /* detect missing features if any to properly report them */ diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c -index 3ac7ec9acf..97da1a6424 100644 +index 33ab3551f4..912e493951 100644 --- a/target/s390x/kvm/kvm.c +++ b/target/s390x/kvm/kvm.c -@@ -2529,6 +2529,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) +@@ -2567,6 +2567,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) error_setg(errp, "KVM doesn't support CPU models"); return; } @@ -673,6 +876,37 @@ index 3ac7ec9acf..97da1a6424 100644 prop.cpuid = s390_cpuid_from_cpu_model(model); prop.ibc = s390_ibc_from_cpu_model(model); /* configure cpu features indicated via STFL(e) */ +diff --git a/tests/qtest/arm-cpu-features.c b/tests/qtest/arm-cpu-features.c +index a8a4c668ad..2458cc527c 100644 +--- a/tests/qtest/arm-cpu-features.c ++++ b/tests/qtest/arm-cpu-features.c +@@ -451,8 +451,10 @@ static void test_query_cpu_model_expansion(const void *data) + assert_error(qts, "host", "The CPU type 'host' requires KVM", NULL); + + /* Test expected feature presence/absence for some cpu types */ ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + assert_has_feature_enabled(qts, "cortex-a15", "pmu"); + assert_has_not_feature(qts, "cortex-a15", "aarch64"); ++#endif /* disabled for RHEL */ + + /* Enabling and disabling pmu should always work. */ + assert_has_feature_enabled(qts, "max", "pmu"); +@@ -469,6 +471,7 @@ static void test_query_cpu_model_expansion(const void *data) + assert_has_feature_enabled(qts, "cortex-a57", "pmu"); + assert_has_feature_enabled(qts, "cortex-a57", "aarch64"); + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + assert_has_feature_enabled(qts, "a64fx", "pmu"); + assert_has_feature_enabled(qts, "a64fx", "aarch64"); + /* +@@ -481,6 +484,7 @@ static void test_query_cpu_model_expansion(const void *data) + "{ 'sve384': true }"); + assert_error(qts, "a64fx", "cannot enable sve640", + "{ 'sve640': true }"); ++#endif /* disabled for RHEL */ + + sve_tests_default(qts, "max"); + pauth_tests_default(qts, "max"); -- -2.39.1 +2.39.3 diff --git a/SOURCES/0006-Machine-type-related-general-changes.patch b/SOURCES/0006-Machine-type-related-general-changes.patch index 5dd591f..4a4c6fb 100644 --- a/SOURCES/0006-Machine-type-related-general-changes.patch +++ b/SOURCES/0006-Machine-type-related-general-changes.patch @@ -1,4 +1,4 @@ -From c13f8e21b32aa06b08847e88080f2fdea5084a9b Mon Sep 17 00:00:00 2001 +From d9ff466c980d219ebf230ea24becce294c196f1f Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 11 Jan 2019 09:54:45 +0100 Subject: Machine type related general changes @@ -19,10 +19,13 @@ Rebase notes (7.0.0): - Remove downstream changes leftovers in hw/rtc/mc146818rtc.c - Remove unnecessary change in hw/usb/hcd-uhci.c -Rebase notes (7.1.0 rc0): +Rebase notes (7.1.0): - Moved adding rhel_old_machine_deprecation variable from s390x to general machine types commit - Moved adding hw_compat_rhel_8_6 struct from x86_64 to general machine types commit +Rebase notes (8.1.0): +- Do not modify unused vga-isa.c + Merged patches (6.1.0): - f2fb42a3c6 redhat: add missing entries in hw_compat_rhel_8_4 - 1949ec258e hw/arm/virt: Disable PL011 clock migration through hw_compat_rhel_8_3 @@ -40,39 +43,45 @@ Merged patches (7.0.0): - ef5afcc86d Fix virtio-net-pci* "vectors" compat - 168f0d56e3 compat: Update hw_compat_rhel_8_5 with 6.2.0 RC2 changes -Merged patches (7.1.0 rc0): +Merged patches (7.1.0): - 38b89dc245 pc: Move s3/s4 suspend disabling to compat (only hw/acpi/piix4.c chunk) - 1d6439527a WRB: Introduce RHEL 9.0.0 hw compat structure (only hw/core/machine.c and include/hw/boards.h chunk) -Merged patches (7.2.0 rc0): +Merged patches (7.2.0): - 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts) -Merged patches (8.0.0-rc1): +Merged patches (8.0.0): - 21ed34787b Addd 7.2 compat bits for RHEL 9.1 machine type - e5c8d5d603 virtio-rng-pci: fix migration compat for vectors - 5a5fa77059 virtio-rng-pci: fix transitional migration compat for vectors + +Merged patches (8.1.0): +- bd5d81d286 Add RHEL 9.2.0 compat structure (general part) +- 1165e24c6b hw/pci: Disable PCI_ERR_UNCOR_MASK reg for machine type <= pc-q35-rhel9.2.0 + +Merged patches (8.2.0): +- 4ee284aca9 Add machine types compat bits. (partial) --- hw/acpi/piix4.c | 2 +- hw/arm/virt.c | 2 +- - hw/core/machine.c | 229 +++++++++++++++++++++++++++++++++++ - hw/display/vga-isa.c | 2 +- + hw/core/machine.c | 267 +++++++++++++++++++++++++++++++++++ hw/i386/pc_piix.c | 2 + hw/i386/pc_q35.c | 2 + hw/net/rtl8139.c | 4 +- - hw/smbios/smbios.c | 46 ++++++- + hw/smbios/smbios.c | 46 +++++- hw/timer/i8254_common.c | 2 +- - hw/usb/hcd-xhci-pci.c | 59 ++++++--- + hw/usb/hcd-xhci-pci.c | 59 ++++++-- hw/usb/hcd-xhci-pci.h | 1 + - include/hw/boards.h | 31 +++++ + include/hw/boards.h | 40 ++++++ include/hw/firmware/smbios.h | 5 +- include/hw/i386/pc.h | 3 + - 14 files changed, 367 insertions(+), 23 deletions(-) + 13 files changed, 413 insertions(+), 22 deletions(-) diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c -index 63d2113b86..a24b9aac92 100644 +index dd523d2e4c..5050c0ba97 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c -@@ -247,7 +247,7 @@ static bool vmstate_test_migrate_acpi_index(void *opaque, int version_id) +@@ -245,7 +245,7 @@ static bool vmstate_test_migrate_acpi_index(void *opaque, int version_id) static const VMStateDescription vmstate_acpi = { .name = "piix4_pm", .version_id = 3, @@ -82,10 +91,10 @@ index 63d2113b86..a24b9aac92 100644 .fields = (VMStateField[]) { VMSTATE_PCI_DEVICE(parent_obj, PIIX4PMState), diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index ac626b3bef..4a6e89c7bc 100644 +index af9ea4dd1c..62f0f7d4d6 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -1629,7 +1629,7 @@ static void virt_build_smbios(VirtMachineState *vms) +@@ -1638,7 +1638,7 @@ static void virt_build_smbios(VirtMachineState *vms) smbios_set_defaults("QEMU", product, vmc->smbios_old_sys_ver ? "1.0" : mc->name, false, @@ -95,10 +104,10 @@ index ac626b3bef..4a6e89c7bc 100644 /* build the array of physical mem area from base_memmap */ mem_array.address = vms->memmap[VIRT_MEM].base; diff --git a/hw/core/machine.c b/hw/core/machine.c -index cd13b8b0a3..5aa567fad3 100644 +index 0c17398141..446601ee30 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c -@@ -46,6 +46,235 @@ GlobalProperty hw_compat_7_2[] = { +@@ -57,6 +57,273 @@ GlobalProperty hw_compat_7_2[] = { }; const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2); @@ -108,6 +117,44 @@ index cd13b8b0a3..5aa567fad3 100644 +const char *rhel_old_machine_deprecation = + "machine types for previous major releases are deprecated"; + ++GlobalProperty hw_compat_rhel_9_4[] = { ++ /* hw_compat_rhel_9_4 from hw_compat_8_0 */ ++ { TYPE_VIRTIO_NET, "host_uso", "off"}, ++ /* hw_compat_rhel_9_4 from hw_compat_8_0 */ ++ { TYPE_VIRTIO_NET, "guest_uso4", "off"}, ++ /* hw_compat_rhel_9_4 from hw_compat_8_0 */ ++ { TYPE_VIRTIO_NET, "guest_uso6", "off"}, ++ /* hw_compat_rhel_9_4 from hw_compat_8_1 */ ++ { TYPE_PCI_BRIDGE, "x-pci-express-writeable-slt-bug", "true" }, ++ /* hw_compat_rhel_9_4 from hw_compat_8_1 */ ++ { "ramfb", "x-migrate", "off" }, ++ /* hw_compat_rhel_9_4 from hw_compat_8_1 */ ++ { "vfio-pci-nohotplug", "x-ramfb-migrate", "off" }, ++ /* hw_compat_rhel_9_4 from hw_compat_8_1 */ ++ { "igb", "x-pcie-flr-init", "off" }, ++}; ++const size_t hw_compat_rhel_9_4_len = G_N_ELEMENTS(hw_compat_rhel_9_4); ++ ++GlobalProperty hw_compat_rhel_9_3[] = { ++ /* hw_compat_rhel_9_3 from hw_compat_8_0 */ ++ { "migration", "multifd-flush-after-each-section", "on"}, ++ /* hw_compat_rhel_9_3 from hw_compat_8_0 */ ++ { TYPE_PCI_DEVICE, "x-pcie-ari-nextfn-1", "on" }, ++}; ++const size_t hw_compat_rhel_9_3_len = G_N_ELEMENTS(hw_compat_rhel_9_3); ++ ++GlobalProperty hw_compat_rhel_9_2[] = { ++ /* hw_compat_rhel_9_2 from hw_compat_7_2 */ ++ { "e1000e", "migrate-timadj", "off" }, ++ /* hw_compat_rhel_9_2 from hw_compat_7_2 */ ++ { "virtio-mem", "x-early-migration", "false" }, ++ /* hw_compat_rhel_9_2 from hw_compat_7_2 */ ++ { "migration", "x-preempt-pre-7-2", "true" }, ++ /* hw_compat_rhel_9_2 from hw_compat_7_2 */ ++ { TYPE_PCI_DEVICE, "x-pcie-err-unc-mask", "off" }, ++}; ++const size_t hw_compat_rhel_9_2_len = G_N_ELEMENTS(hw_compat_rhel_9_2); ++ +/* + * Mostly the same as hw_compat_7_0 + */ @@ -334,25 +381,12 @@ index cd13b8b0a3..5aa567fad3 100644 GlobalProperty hw_compat_7_1[] = { { "virtio-device", "queue_reset", "false" }, { "virtio-rng-pci", "vectors", "0" }, -diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c -index 2a5437d803..0db2c2b2a1 100644 ---- a/hw/display/vga-isa.c -+++ b/hw/display/vga-isa.c -@@ -89,7 +89,7 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp) - } - - static Property vga_isa_properties[] = { -- DEFINE_PROP_UINT32("vgamem_mb", ISAVGAState, state.vram_size_mb, 8), -+ DEFINE_PROP_UINT32("vgamem_mb", ISAVGAState, state.vram_size_mb, 16), - DEFINE_PROP_END_OF_LIST(), - }; - diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 30eedd62a3..14a794081e 100644 +index eace854335..2a9f465619 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c -@@ -201,6 +201,8 @@ static void pc_init1(MachineState *machine, - smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)", +@@ -238,6 +238,8 @@ static void pc_init1(MachineState *machine, + smbios_set_defaults("QEMU", mc->desc, mc->name, pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded, + pcmc->smbios_stream_product, @@ -361,11 +395,11 @@ index 30eedd62a3..14a794081e 100644 } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 797ba347fd..dc0ba5f9e7 100644 +index 4f3e5412f6..912cb0c0dc 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c -@@ -202,6 +202,8 @@ static void pc_q35_init(MachineState *machine) - smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)", +@@ -206,6 +206,8 @@ static void pc_q35_init(MachineState *machine) + smbios_set_defaults("QEMU", mc->desc, mc->name, pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded, + pcmc->smbios_stream_product, @@ -374,10 +408,10 @@ index 797ba347fd..dc0ba5f9e7 100644 } diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c -index 5a5aaf868d..3d473d5869 100644 +index 4af8c66266..7dc12907ab 100644 --- a/hw/net/rtl8139.c +++ b/hw/net/rtl8139.c -@@ -3178,7 +3178,7 @@ static int rtl8139_pre_save(void *opaque) +@@ -3169,7 +3169,7 @@ static int rtl8139_pre_save(void *opaque) static const VMStateDescription vmstate_rtl8139 = { .name = "rtl8139", @@ -386,7 +420,7 @@ index 5a5aaf868d..3d473d5869 100644 .minimum_version_id = 3, .post_load = rtl8139_post_load, .pre_save = rtl8139_pre_save, -@@ -3259,7 +3259,9 @@ static const VMStateDescription vmstate_rtl8139 = { +@@ -3250,7 +3250,9 @@ static const VMStateDescription vmstate_rtl8139 = { VMSTATE_UINT32(tally_counters.TxMCol, RTL8139State), VMSTATE_UINT64(tally_counters.RxOkPhy, RTL8139State), VMSTATE_UINT64(tally_counters.RxOkBrd, RTL8139State), @@ -397,7 +431,7 @@ index 5a5aaf868d..3d473d5869 100644 VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State), diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index d2007e70fb..319eae9e9d 100644 +index 2a90601ac5..7bde23e59d 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c @@ -58,6 +58,9 @@ static bool smbios_legacy = true; @@ -419,7 +453,7 @@ index d2007e70fb..319eae9e9d 100644 SMBIOS_TABLE_SET_STR(2, manufacturer_str, type2.manufacturer); SMBIOS_TABLE_SET_STR(2, product_str, type2.product); -@@ -980,7 +983,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) +@@ -985,7 +988,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) void smbios_set_defaults(const char *manufacturer, const char *product, const char *version, bool legacy_mode, @@ -431,7 +465,7 @@ index d2007e70fb..319eae9e9d 100644 { smbios_have_defaults = true; smbios_legacy = legacy_mode; -@@ -1001,11 +1007,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product, +@@ -1006,11 +1012,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product, g_free(smbios_entries); } @@ -479,10 +513,10 @@ index d2007e70fb..319eae9e9d 100644 SMBIOS_SET_DEFAULT(type3.manufacturer, manufacturer); SMBIOS_SET_DEFAULT(type3.version, version); diff --git a/hw/timer/i8254_common.c b/hw/timer/i8254_common.c -index 050875b497..32935da46c 100644 +index b25da448c8..0331e84398 100644 --- a/hw/timer/i8254_common.c +++ b/hw/timer/i8254_common.c -@@ -231,7 +231,7 @@ static const VMStateDescription vmstate_pit_common = { +@@ -229,7 +229,7 @@ static const VMStateDescription vmstate_pit_common = { .pre_save = pit_dispatch_pre_save, .post_load = pit_dispatch_post_load, .fields = (VMStateField[]) { @@ -603,13 +637,22 @@ index 08f70ce97c..1be7527c1b 100644 #endif diff --git a/include/hw/boards.h b/include/hw/boards.h -index 6fbbfd56c8..c5a965d27f 100644 +index da85f86efb..4a21eddbf9 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -459,4 +459,35 @@ extern const size_t hw_compat_2_2_len; +@@ -503,4 +503,44 @@ extern const size_t hw_compat_2_2_len; extern GlobalProperty hw_compat_2_1[]; extern const size_t hw_compat_2_1_len; ++extern GlobalProperty hw_compat_rhel_9_4[]; ++extern const size_t hw_compat_rhel_9_4_len; ++ ++extern GlobalProperty hw_compat_rhel_9_3[]; ++extern const size_t hw_compat_rhel_9_3_len; ++ ++extern GlobalProperty hw_compat_rhel_9_2[]; ++extern const size_t hw_compat_rhel_9_2_len; ++ +extern GlobalProperty hw_compat_rhel_9_1[]; +extern const size_t hw_compat_rhel_9_1_len; + @@ -659,13 +702,13 @@ index 7f3259a630..d24b3ccd32 100644 void smbios_get_tables(MachineState *ms, const struct smbios_phys_mem_area *mem_array, diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 8206d5405a..908a275736 100644 +index a10ceeabbf..037942d233 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -111,6 +111,9 @@ struct PCMachineClass { - bool smbios_defaults; +@@ -113,6 +113,9 @@ struct PCMachineClass { bool smbios_legacy_mode; bool smbios_uuid_encoded; + SmbiosEntryPointType default_smbios_ep_type; + /* New fields needed for Windows HardwareID-6 matching */ + const char *smbios_stream_product; + const char *smbios_stream_version; @@ -673,5 +716,5 @@ index 8206d5405a..908a275736 100644 /* RAM / address space compat: */ bool gigabyte_align; -- -2.39.1 +2.39.3 diff --git a/SOURCES/0007-Add-aarch64-machine-types.patch b/SOURCES/0007-Add-aarch64-machine-types.patch index f47bbd0..fde7982 100644 --- a/SOURCES/0007-Add-aarch64-machine-types.patch +++ b/SOURCES/0007-Add-aarch64-machine-types.patch @@ -1,4 +1,4 @@ -From ec6468b65a3af0e2b84575c9f965f61916d0d8ea Mon Sep 17 00:00:00 2001 +From 23f614ab0b79ec1c6f65a7f0d6993bfdfc53fd23 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 12:53:31 +0200 Subject: Add aarch64 machine types @@ -17,18 +17,19 @@ Rebase notes (7.0.0): - Added dtb-kaslr-seed option - Set no_tcg_lpa2 to true -Rebase notes (7.1.0 rc0): +Rebase notes (7.1.0): - replace dtb_kaslr_seed by dtb_randomness - -Rebase notes (7.1.0 rc3): - Updated dtb_randomness comment -Rebase notes (7.2.0 rc0): +Rebase notes (7.2.0): - Disabled cortex-a35 -Rebase notes (8.0.0-rc1): +Rebase notes (8.0.0): - Moved changed code from target/arm/helper.c to target/arm/arm-qmp-cmds.c +Rebase notes (8.1.0): +- Added setting default_nic + Merged patches (6.2.0): - 9a3d4fde0e hw/arm/virt: Remove 9.0 machine type - f7d04d6695 hw: arm: virt: Add hw_compat_rhel_8_5 to 8.5 machine type @@ -46,33 +47,33 @@ Merged patches (7.0.0): - f79b31bdef hw/arm/virt: Remove the dtb-kaslr-seed machine option - b6fca85f4a hw/arm/virt: Fix missing initialization in instance/class_init() -Merged patches (7.1.0 rc0): +Merged patches (7.1.0): - ac97dd4f9f RHEL-only: AArch64: Drop unsupported CPU types - e9c0a70664 target/arm: deprecate named CPU models -Merged patches (7.2.0 rc0): +Merged patches (7.2.0): - 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts) -Merged patches (8.0.0-rc1): +Merged patches (8.0.0): - c1a21266d8 redhat: aarch64: add rhel9.2.0 virt machine type - d97cd7c513 redhat: fix virt-rhel9.2.0 compat props + +Merged patches (8.1.0): +- bd5d81d286 Add RHEL 9.2.0 compat structure (arm part) +- c07f666086 hw/arm/virt: Validate cluster and NUMA node boundary for RHEL machines + +Merged patches (8.2.0): +- 4ee284aca9 Add machine types compat bits. (partial) --- - hw/arm/virt.c | 251 ++++++++++++++++++++++++++++++++- - include/hw/arm/virt.h | 8 ++ - target/arm/arm-qmp-cmds.c | 2 + - target/arm/cpu-qom.h | 1 + - target/arm/cpu.c | 5 + - target/arm/cpu.h | 2 + - target/arm/cpu64.c | 16 ++- - target/arm/cpu_tcg.c | 12 +- - tests/qtest/arm-cpu-features.c | 6 + - 9 files changed, 289 insertions(+), 14 deletions(-) + hw/arm/virt.c | 250 +++++++++++++++++++++++++++++++++++++++++- + include/hw/arm/virt.h | 8 ++ + 2 files changed, 257 insertions(+), 1 deletion(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 4a6e89c7bc..1ae1654be5 100644 +index 62f0f7d4d6..c541efee5e 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -81,6 +81,7 @@ +@@ -82,6 +82,7 @@ #include "hw/char/pl011.h" #include "qemu/guest-random.h" @@ -80,13 +81,12 @@ index 4a6e89c7bc..1ae1654be5 100644 #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ void *data) \ -@@ -107,7 +108,48 @@ +@@ -108,7 +109,48 @@ DEFINE_VIRT_MACHINE_LATEST(major, minor, true) #define DEFINE_VIRT_MACHINE(major, minor) \ DEFINE_VIRT_MACHINE_LATEST(major, minor, false) -- +#endif /* disabled for RHEL */ -+ + +#define DEFINE_RHEL_MACHINE_LATEST(m, n, s, latest) \ + static void rhel##m##n##s##_virt_class_init(ObjectClass *oc, \ + void *data) \ @@ -130,28 +130,7 @@ index 4a6e89c7bc..1ae1654be5 100644 /* Number of external interrupt lines to configure the GIC with */ #define NUM_IRQS 256 -@@ -204,16 +246,20 @@ static const int a15irqmap[] = { - }; - - static const char *valid_cpus[] = { -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - ARM_CPU_TYPE_NAME("cortex-a7"), - ARM_CPU_TYPE_NAME("cortex-a15"), - ARM_CPU_TYPE_NAME("cortex-a35"), - ARM_CPU_TYPE_NAME("cortex-a53"), - ARM_CPU_TYPE_NAME("cortex-a55"), -+#endif /* disabled for RHEL */ - ARM_CPU_TYPE_NAME("cortex-a57"), -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - ARM_CPU_TYPE_NAME("cortex-a72"), - ARM_CPU_TYPE_NAME("cortex-a76"), - ARM_CPU_TYPE_NAME("a64fx"), - ARM_CPU_TYPE_NAME("neoverse-n1"), -+#endif /* disabled for RHEL */ - ARM_CPU_TYPE_NAME("host"), - ARM_CPU_TYPE_NAME("max"), - }; -@@ -2339,6 +2385,7 @@ static void machvirt_init(MachineState *machine) +@@ -2341,6 +2383,7 @@ static void machvirt_init(MachineState *machine) qemu_add_machine_init_done_notifier(&vms->machine_done); } @@ -159,7 +138,7 @@ index 4a6e89c7bc..1ae1654be5 100644 static bool virt_get_secure(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2366,6 +2413,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) +@@ -2368,6 +2411,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) vms->virt = value; } @@ -167,16 +146,15 @@ index 4a6e89c7bc..1ae1654be5 100644 static bool virt_get_highmem(Object *obj, Error **errp) { -@@ -2380,7 +2428,7 @@ static void virt_set_highmem(Object *obj, bool value, Error **errp) - +@@ -2383,6 +2427,7 @@ static void virt_set_highmem(Object *obj, bool value, Error **errp) vms->highmem = value; } -- + +#if 0 /* Disabled for Red Hat Enterprise Linux */ static bool virt_get_compact_highmem(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2436,7 +2484,7 @@ static void virt_set_highmem_mmio(Object *obj, bool value, Error **errp) +@@ -2438,7 +2483,7 @@ static void virt_set_highmem_mmio(Object *obj, bool value, Error **errp) vms->highmem_mmio = value; } @@ -185,7 +163,7 @@ index 4a6e89c7bc..1ae1654be5 100644 static bool virt_get_its(Object *obj, Error **errp) { -@@ -2452,6 +2500,7 @@ static void virt_set_its(Object *obj, bool value, Error **errp) +@@ -2454,6 +2499,7 @@ static void virt_set_its(Object *obj, bool value, Error **errp) vms->its = value; } @@ -193,7 +171,7 @@ index 4a6e89c7bc..1ae1654be5 100644 static bool virt_get_dtb_randomness(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2465,6 +2514,7 @@ static void virt_set_dtb_randomness(Object *obj, bool value, Error **errp) +@@ -2467,6 +2513,7 @@ static void virt_set_dtb_randomness(Object *obj, bool value, Error **errp) vms->dtb_randomness = value; } @@ -201,7 +179,7 @@ index 4a6e89c7bc..1ae1654be5 100644 static char *virt_get_oem_id(Object *obj, Error **errp) { -@@ -2548,6 +2598,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp) +@@ -2550,6 +2597,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp) vms->ras = value; } @@ -209,7 +187,7 @@ index 4a6e89c7bc..1ae1654be5 100644 static bool virt_get_mte(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2561,6 +2612,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) +@@ -2563,6 +2611,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) vms->mte = value; } @@ -217,7 +195,7 @@ index 4a6e89c7bc..1ae1654be5 100644 static char *virt_get_gic_version(Object *obj, Error **errp) { -@@ -2988,6 +3040,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) +@@ -2935,6 +2984,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) return fixed_ipa ? 0 : requested_pa_size; } @@ -225,7 +203,7 @@ index 4a6e89c7bc..1ae1654be5 100644 static void virt_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); -@@ -3441,3 +3494,195 @@ static void virt_machine_2_6_options(MachineClass *mc) +@@ -3405,3 +3455,201 @@ static void virt_machine_2_6_options(MachineClass *mc) vmc->no_pmu = true; } DEFINE_VIRT_MACHINE(2, 6) @@ -263,7 +241,10 @@ index 4a6e89c7bc..1ae1654be5 100644 + mc->smp_props.clusters_supported = true; + mc->auto_enable_numa_with_memhp = true; + mc->auto_enable_numa_with_memdev = true; ++ /* platform instead of architectural choice */ ++ mc->cpu_cluster_has_numa_boundary = true; + mc->default_ram_id = "mach-virt.ram"; ++ mc->default_nic = "virtio-net-pci"; + + object_class_property_add(oc, "acpi", "OnOffAuto", + virt_get_acpi, virt_set_acpi, @@ -404,6 +385,9 @@ index 4a6e89c7bc..1ae1654be5 100644 +static void rhel920_virt_options(MachineClass *mc) +{ + compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_4, hw_compat_rhel_9_4_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_3, hw_compat_rhel_9_3_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len); +} +DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0) + @@ -422,10 +406,10 @@ index 4a6e89c7bc..1ae1654be5 100644 +} +DEFINE_RHEL_MACHINE(9, 0, 0) diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index e1ddbea96b..81c2363a40 100644 +index f69239850e..7b8abe5645 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h -@@ -187,9 +187,17 @@ struct VirtMachineState { +@@ -177,9 +177,17 @@ struct VirtMachineState { #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) @@ -443,270 +427,6 @@ index e1ddbea96b..81c2363a40 100644 void virt_acpi_setup(VirtMachineState *vms); bool virt_is_acpi_enabled(VirtMachineState *vms); -diff --git a/target/arm/arm-qmp-cmds.c b/target/arm/arm-qmp-cmds.c -index c8fa524002..3aa089abf3 100644 ---- a/target/arm/arm-qmp-cmds.c -+++ b/target/arm/arm-qmp-cmds.c -@@ -231,6 +231,7 @@ CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type, - static void arm_cpu_add_definition(gpointer data, gpointer user_data) - { - ObjectClass *oc = data; -+ CPUClass *cc = CPU_CLASS(oc); - CpuDefinitionInfoList **cpu_list = user_data; - CpuDefinitionInfo *info; - const char *typename; -@@ -240,6 +241,7 @@ static void arm_cpu_add_definition(gpointer data, gpointer user_data) - info->name = g_strndup(typename, - strlen(typename) - strlen("-" TYPE_ARM_CPU)); - info->q_typename = g_strdup(typename); -+ info->deprecated = !!cc->deprecation_note; - - QAPI_LIST_PREPEND(*cpu_list, info); - } -diff --git a/target/arm/cpu-qom.h b/target/arm/cpu-qom.h -index 514c22ced9..f789173451 100644 ---- a/target/arm/cpu-qom.h -+++ b/target/arm/cpu-qom.h -@@ -35,6 +35,7 @@ typedef struct ARMCPUInfo { - const char *name; - void (*initfn)(Object *obj); - void (*class_init)(ObjectClass *oc, void *data); -+ const char *deprecation_note; - } ARMCPUInfo; - - void arm_cpu_register(const ARMCPUInfo *info); -diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index 5182ed0c91..6740a8b940 100644 ---- a/target/arm/cpu.c -+++ b/target/arm/cpu.c -@@ -2290,8 +2290,13 @@ static void arm_cpu_instance_init(Object *obj) - static void cpu_register_class_init(ObjectClass *oc, void *data) - { - ARMCPUClass *acc = ARM_CPU_CLASS(oc); -+ CPUClass *cc = CPU_CLASS(oc); - - acc->info = data; -+ -+ if (acc->info->deprecation_note) { -+ cc->deprecation_note = acc->info->deprecation_note; -+ } - } - - void arm_cpu_register(const ARMCPUInfo *info) -diff --git a/target/arm/cpu.h b/target/arm/cpu.h -index c097cae988..829d4a2328 100644 ---- a/target/arm/cpu.h -+++ b/target/arm/cpu.h -@@ -34,6 +34,8 @@ - #define KVM_HAVE_MCE_INJECTION 1 - #endif - -+#define RHEL_CPU_DEPRECATION "use 'host' / 'max'" -+ - #define EXCP_UDEF 1 /* undefined instruction */ - #define EXCP_SWI 2 /* software interrupt */ - #define EXCP_PREFETCH_ABORT 3 -diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c -index 0fb07cc7b6..47459627fb 100644 ---- a/target/arm/cpu64.c -+++ b/target/arm/cpu64.c -@@ -31,6 +31,7 @@ - #include "hw/qdev-properties.h" - #include "internals.h" - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void aarch64_a35_initfn(Object *obj) - { - ARMCPU *cpu = ARM_CPU(obj); -@@ -110,6 +111,7 @@ static void aarch64_a35_initfn(Object *obj) - /* These values are the same with A53/A57/A72. */ - define_cortex_a72_a57_a53_cp_reginfo(cpu); - } -+#endif /* disabled for RHEL */ - - void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) - { -@@ -730,6 +732,7 @@ static void aarch64_a57_initfn(Object *obj) - define_cortex_a72_a57_a53_cp_reginfo(cpu); - } - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void aarch64_a53_initfn(Object *obj) - { - ARMCPU *cpu = ARM_CPU(obj); -@@ -1164,6 +1167,7 @@ static void aarch64_neoverse_n1_initfn(Object *obj) - - define_neoverse_n1_cp_reginfo(cpu); - } -+#endif /* disabled for RHEL */ - - static void aarch64_host_initfn(Object *obj) - { -@@ -1373,14 +1377,19 @@ static void aarch64_max_initfn(Object *obj) - } - - static const ARMCPUInfo aarch64_cpus[] = { -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - { .name = "cortex-a35", .initfn = aarch64_a35_initfn }, -- { .name = "cortex-a57", .initfn = aarch64_a57_initfn }, -+#endif /* disabled for RHEL */ -+ { .name = "cortex-a57", .initfn = aarch64_a57_initfn, -+ .deprecation_note = RHEL_CPU_DEPRECATION }, -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - { .name = "cortex-a53", .initfn = aarch64_a53_initfn }, - { .name = "cortex-a55", .initfn = aarch64_a55_initfn }, - { .name = "cortex-a72", .initfn = aarch64_a72_initfn }, - { .name = "cortex-a76", .initfn = aarch64_a76_initfn }, - { .name = "a64fx", .initfn = aarch64_a64fx_initfn }, - { .name = "neoverse-n1", .initfn = aarch64_neoverse_n1_initfn }, -+#endif /* disabled for RHEL */ - { .name = "max", .initfn = aarch64_max_initfn }, - #if defined(CONFIG_KVM) || defined(CONFIG_HVF) - { .name = "host", .initfn = aarch64_host_initfn }, -@@ -1452,8 +1461,13 @@ static void aarch64_cpu_instance_init(Object *obj) - static void cpu_register_class_init(ObjectClass *oc, void *data) - { - ARMCPUClass *acc = ARM_CPU_CLASS(oc); -+ CPUClass *cc = CPU_CLASS(oc); - - acc->info = data; -+ -+ if (acc->info->deprecation_note) { -+ cc->deprecation_note = acc->info->deprecation_note; -+ } - } - - void aarch64_cpu_register(const ARMCPUInfo *info) -diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c -index c154a4dcf2..f29425b656 100644 ---- a/target/arm/cpu_tcg.c -+++ b/target/arm/cpu_tcg.c -@@ -152,10 +152,10 @@ void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu) - } - #endif /* !CONFIG_USER_ONLY */ - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - /* CPU models. These are not needed for the AArch64 linux-user build. */ - #if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) - --#if 0 /* Disabled for Red Hat Enterprise Linux */ - #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG) - static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) - { -@@ -509,7 +509,6 @@ static void cortex_a9_initfn(Object *obj) - cpu->isar.reset_pmcr_el0 = 0x41093000; - define_arm_cp_regs(cpu, cortexa9_cp_reginfo); - } --#endif /* disabled for RHEL */ - - #ifndef CONFIG_USER_ONLY - static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri) -@@ -534,7 +533,6 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { - .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, - }; - --#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void cortex_a7_initfn(Object *obj) - { - ARMCPU *cpu = ARM_CPU(obj); -@@ -583,7 +581,6 @@ static void cortex_a7_initfn(Object *obj) - cpu->isar.reset_pmcr_el0 = 0x41072000; - define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */ - } --#endif /* disabled for RHEL */ - - static void cortex_a15_initfn(Object *obj) - { -@@ -632,7 +629,6 @@ static void cortex_a15_initfn(Object *obj) - define_arm_cp_regs(cpu, cortexa15_cp_reginfo); - } - --#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void cortex_m0_initfn(Object *obj) - { - ARMCPU *cpu = ARM_CPU(obj); -@@ -1115,7 +1111,6 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) - - cc->gdb_core_xml_file = "arm-m-profile.xml"; - } --#endif /* disabled for RHEL */ - - #ifndef TARGET_AARCH64 - /* -@@ -1183,7 +1178,6 @@ static void arm_max_initfn(Object *obj) - #endif /* !TARGET_AARCH64 */ - - static const ARMCPUInfo arm_tcg_cpus[] = { --#if 0 /* Disabled for Red Hat Enterprise Linux */ - { .name = "arm926", .initfn = arm926_initfn }, - { .name = "arm946", .initfn = arm946_initfn }, - { .name = "arm1026", .initfn = arm1026_initfn }, -@@ -1199,9 +1193,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { - { .name = "cortex-a7", .initfn = cortex_a7_initfn }, - { .name = "cortex-a8", .initfn = cortex_a8_initfn }, - { .name = "cortex-a9", .initfn = cortex_a9_initfn }, --#endif /* disabled for RHEL */ - { .name = "cortex-a15", .initfn = cortex_a15_initfn }, --#if 0 /* Disabled for Red Hat Enterprise Linux */ - { .name = "cortex-m0", .initfn = cortex_m0_initfn, - .class_init = arm_v7m_class_init }, - { .name = "cortex-m3", .initfn = cortex_m3_initfn, -@@ -1233,7 +1225,6 @@ static const ARMCPUInfo arm_tcg_cpus[] = { - { .name = "pxa270-b1", .initfn = pxa270b1_initfn }, - { .name = "pxa270-c0", .initfn = pxa270c0_initfn }, - { .name = "pxa270-c5", .initfn = pxa270c5_initfn }, --#endif /* disabled for RHEL */ - #ifndef TARGET_AARCH64 - { .name = "max", .initfn = arm_max_initfn }, - #endif -@@ -1261,3 +1252,4 @@ static void arm_tcg_cpu_register_types(void) - type_init(arm_tcg_cpu_register_types) - - #endif /* !CONFIG_USER_ONLY || !TARGET_AARCH64 */ -+#endif /* disabled for RHEL */ -diff --git a/tests/qtest/arm-cpu-features.c b/tests/qtest/arm-cpu-features.c -index 1cb08138ad..834497dfec 100644 ---- a/tests/qtest/arm-cpu-features.c -+++ b/tests/qtest/arm-cpu-features.c -@@ -441,8 +441,10 @@ static void test_query_cpu_model_expansion(const void *data) - assert_error(qts, "host", "The CPU type 'host' requires KVM", NULL); - - /* Test expected feature presence/absence for some cpu types */ -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - assert_has_feature_enabled(qts, "cortex-a15", "pmu"); - assert_has_not_feature(qts, "cortex-a15", "aarch64"); -+#endif /* disabled for RHEL */ - - /* Enabling and disabling pmu should always work. */ - assert_has_feature_enabled(qts, "max", "pmu"); -@@ -459,6 +461,7 @@ static void test_query_cpu_model_expansion(const void *data) - assert_has_feature_enabled(qts, "cortex-a57", "pmu"); - assert_has_feature_enabled(qts, "cortex-a57", "aarch64"); - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - assert_has_feature_enabled(qts, "a64fx", "pmu"); - assert_has_feature_enabled(qts, "a64fx", "aarch64"); - /* -@@ -471,6 +474,7 @@ static void test_query_cpu_model_expansion(const void *data) - "{ 'sve384': true }"); - assert_error(qts, "a64fx", "cannot enable sve640", - "{ 'sve640': true }"); -+#endif /* disabled for RHEL */ - - sve_tests_default(qts, "max"); - pauth_tests_default(qts, "max"); -@@ -506,9 +510,11 @@ static void test_query_cpu_model_expansion_kvm(const void *data) - QDict *resp; - char *error; - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - assert_error(qts, "cortex-a15", - "We cannot guarantee the CPU type 'cortex-a15' works " - "with KVM on this host", NULL); -+#endif /* disabled for RHEL */ - - assert_has_feature_enabled(qts, "host", "aarch64"); - -- -2.39.1 +2.39.3 diff --git a/SOURCES/0008-Add-ppc64-machine-types.patch b/SOURCES/0008-Add-ppc64-machine-types.patch index ab78cae..a269adb 100644 --- a/SOURCES/0008-Add-ppc64-machine-types.patch +++ b/SOURCES/0008-Add-ppc64-machine-types.patch @@ -1,4 +1,4 @@ -From 401d0ebf1ee959fd944df6b5b4ae9c51c36d1244 Mon Sep 17 00:00:00 2001 +From d03cff85f5f1b69b1a66011ebaa974ece81d31bc Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:27:13 +0200 Subject: Add ppc64 machine types @@ -20,7 +20,7 @@ Merged patches (6.1.0): - af69d1ca6e Remove RHEL 7.4.0 machine types (only ppc64 changes) - 8f7a74ab78 Remove RHEL 7.5.0 machine types (only ppc64 changes) -Merged patches (7.1.0 rc0): +Merged patches (7.1.0): - baa6790171 target/ppc/cpu-models: Fix ppc_cpu_aliases list for RHEL --- hw/ppc/spapr.c | 243 ++++++++++++++++++++++++++++++++++++++++ @@ -34,10 +34,10 @@ Merged patches (7.1.0 rc0): 8 files changed, 314 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 4921198b9d..e24b3e22e3 100644 +index df09aa9d6a..ff459e1a46 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c -@@ -1634,6 +1634,9 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason) +@@ -1689,6 +1689,9 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason) pef_kvm_reset(machine->cgs, &error_fatal); spapr_caps_apply(spapr); @@ -47,7 +47,7 @@ index 4921198b9d..e24b3e22e3 100644 first_ppc_cpu = POWERPC_CPU(first_cpu); if (kvm_enabled() && kvmppc_has_cap_mmu_radix() && -@@ -3348,6 +3351,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp) +@@ -3397,6 +3400,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp) spapr->host_serial = g_strdup(value); } @@ -68,7 +68,7 @@ index 4921198b9d..e24b3e22e3 100644 static void spapr_instance_init(Object *obj) { SpaprMachineState *spapr = SPAPR_MACHINE(obj); -@@ -3426,6 +3443,12 @@ static void spapr_instance_init(Object *obj) +@@ -3475,6 +3492,12 @@ static void spapr_instance_init(Object *obj) spapr_get_host_serial, spapr_set_host_serial); object_property_set_description(obj, "host-serial", "Host serial number to advertise in guest device tree"); @@ -81,7 +81,7 @@ index 4921198b9d..e24b3e22e3 100644 } static void spapr_machine_finalizefn(Object *obj) -@@ -4683,6 +4706,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) +@@ -4734,6 +4757,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) vmc->client_architecture_support = spapr_vof_client_architecture_support; vmc->quiesce = spapr_vof_quiesce; vmc->setprop = spapr_vof_setprop; @@ -89,15 +89,15 @@ index 4921198b9d..e24b3e22e3 100644 } static const TypeInfo spapr_machine_info = { -@@ -4734,6 +4758,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) +@@ -4785,6 +4809,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) } \ type_init(spapr_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ /* - * pseries-8.0 + * pseries-8.2 */ -@@ -4894,6 +4919,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) +@@ -4967,6 +4992,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) } DEFINE_SPAPR_MACHINE(4_1, "4.1", false); @@ -105,8 +105,8 @@ index 4921198b9d..e24b3e22e3 100644 /* * pseries-4.0 -@@ -4913,6 +4939,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, - *nv2atsd = 0; +@@ -4982,6 +5008,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, + } return true; } + @@ -114,7 +114,7 @@ index 4921198b9d..e24b3e22e3 100644 static void spapr_machine_4_0_class_options(MachineClass *mc) { SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -@@ -5240,6 +5268,221 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) +@@ -5306,6 +5334,221 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) compat_props_add(mc->compat_props, hw_compat_2_1, hw_compat_2_1_len); } DEFINE_SPAPR_MACHINE(2_1, "2.1", false); @@ -337,7 +337,7 @@ index 4921198b9d..e24b3e22e3 100644 static void spapr_machine_register_types(void) { diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index fcb5dfe792..ab8fb5bf62 100644 +index 33e0c8724c..9d01663f43 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -25,6 +25,7 @@ @@ -348,7 +348,7 @@ index fcb5dfe792..ab8fb5bf62 100644 static void spapr_reset_vcpu(PowerPCCPU *cpu) { -@@ -259,6 +260,7 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, +@@ -261,6 +262,7 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, { CPUPPCState *env = &cpu->env; CPUState *cs = CPU(cpu); @@ -356,7 +356,7 @@ index fcb5dfe792..ab8fb5bf62 100644 if (!qdev_realize(DEVICE(cpu), NULL, errp)) { return false; -@@ -270,6 +272,17 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, +@@ -277,6 +279,17 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, /* Set time-base frequency to 512 MHz. vhyp must be set first. */ cpu_ppc_tb_init(env, SPAPR_TIMEBASE_FREQ); @@ -375,10 +375,10 @@ index fcb5dfe792..ab8fb5bf62 100644 qdev_unrealize(DEVICE(cpu)); return false; diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h -index 5c8aabd444..04489d5808 100644 +index e91791a1a9..1951d8a2a0 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h -@@ -155,6 +155,7 @@ struct SpaprMachineClass { +@@ -154,6 +154,7 @@ struct SpaprMachineClass { bool pre_5_2_numa_associativity; bool pre_6_2_numa_affinity; @@ -386,7 +386,7 @@ index 5c8aabd444..04489d5808 100644 bool (*phb_placement)(SpaprMachineState *spapr, uint32_t index, uint64_t *buid, hwaddr *pio, hwaddr *mmio32, hwaddr *mmio64, -@@ -257,6 +258,9 @@ struct SpaprMachineState { +@@ -256,6 +257,9 @@ struct SpaprMachineState { /* Set by -boot */ char *boot_device; @@ -397,7 +397,7 @@ index 5c8aabd444..04489d5808 100644 char *kvm_type; char *host_model; diff --git a/target/ppc/compat.c b/target/ppc/compat.c -index 7949a24f5a..f207a9ba01 100644 +index ebef2cccec..ff2c00c60e 100644 --- a/target/ppc/compat.c +++ b/target/ppc/compat.c @@ -114,8 +114,19 @@ static const CompatInfo *compat_by_pvr(uint32_t pvr) @@ -422,10 +422,10 @@ index 7949a24f5a..f207a9ba01 100644 const CompatInfo *compat = compat_by_pvr(compat_pvr); const CompatInfo *min = compat_by_pvr(min_compat_pvr); diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c -index cd3ff700ac..1cb49c8087 100644 +index 69fddb05bc..64a05aaef3 100644 --- a/target/ppc/cpu-models.c +++ b/target/ppc/cpu-models.c -@@ -746,6 +746,7 @@ +@@ -748,6 +748,7 @@ /* PowerPC CPU aliases */ PowerPCCPUAlias ppc_cpu_aliases[] = { @@ -434,10 +434,10 @@ index cd3ff700ac..1cb49c8087 100644 { "405cr", "405crc" }, { "405gp", "405gpd" }, diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h -index 557d736dab..6646ec1c27 100644 +index f8101ffa29..e799a2bee6 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h -@@ -1482,6 +1482,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) +@@ -1635,6 +1635,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) /* Compatibility modes */ #if defined(TARGET_PPC64) @@ -446,18 +446,18 @@ index 557d736dab..6646ec1c27 100644 uint32_t min_compat_pvr, uint32_t max_compat_pvr); bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr, diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c -index 78f6fc50cd..68d06c3f8f 100644 +index 9b1abe2fc4..56f1c46e8e 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c -@@ -88,6 +88,7 @@ static int cap_ppc_nested_kvm_hv; - static int cap_large_decr; +@@ -89,6 +89,7 @@ static int cap_large_decr; static int cap_fwnmi; static int cap_rpt_invalidate; + static int cap_ail_mode_3; +static int cap_ppc_secure_guest; static uint32_t debug_inst_opcode; -@@ -135,6 +136,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) +@@ -141,6 +142,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT); kvmppc_get_cpu_characteristics(s); cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV); @@ -465,8 +465,8 @@ index 78f6fc50cd..68d06c3f8f 100644 cap_large_decr = kvmppc_get_dec_bits(); cap_fwnmi = kvm_vm_check_extension(s, KVM_CAP_PPC_FWNMI); /* -@@ -2569,6 +2571,16 @@ int kvmppc_has_cap_rpt_invalidate(void) - return cap_rpt_invalidate; +@@ -2579,6 +2581,16 @@ bool kvmppc_supports_ail_3(void) + return cap_ail_mode_3; } +bool kvmppc_has_cap_secure_guest(void) @@ -482,7 +482,7 @@ index 78f6fc50cd..68d06c3f8f 100644 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) { uint32_t host_pvr = mfpvr(); -@@ -2969,3 +2981,18 @@ bool kvm_arch_cpu_check_are_resettable(void) +@@ -2979,3 +2991,18 @@ bool kvm_arch_cpu_check_are_resettable(void) void kvm_arch_accel_class_init(ObjectClass *oc) { } @@ -502,27 +502,27 @@ index 78f6fc50cd..68d06c3f8f 100644 + } +} diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h -index 5fd9753953..b5ebfe2be0 100644 +index 1975fb5ee6..d1017f98be 100644 --- a/target/ppc/kvm_ppc.h +++ b/target/ppc/kvm_ppc.h -@@ -43,6 +43,7 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu); +@@ -46,6 +46,7 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu); target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu, bool radix, bool gtse, uint64_t proc_tbl); +void kvmppc_svm_allow(Error **errp); - #ifndef CONFIG_USER_ONLY bool kvmppc_spapr_use_multitce(void); int kvmppc_spapr_enable_inkernel_multitce(void); -@@ -77,6 +78,8 @@ int kvmppc_get_cap_large_decr(void); - int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable); + void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift, +@@ -79,6 +80,8 @@ int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable); int kvmppc_has_cap_rpt_invalidate(void); + bool kvmppc_supports_ail_3(void); int kvmppc_enable_hwrng(void); +bool kvmppc_has_cap_secure_guest(void); +int kvmppc_enable_cap_secure_guest(void); int kvmppc_put_books_sregs(PowerPCCPU *cpu); PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void); void kvmppc_check_papr_resize_hpt(Error **errp); -@@ -396,6 +399,16 @@ static inline int kvmppc_has_cap_rpt_invalidate(void) +@@ -427,6 +430,16 @@ static inline bool kvmppc_supports_ail_3(void) return false; } @@ -540,5 +540,5 @@ index 5fd9753953..b5ebfe2be0 100644 { return -1; -- -2.39.1 +2.39.3 diff --git a/SOURCES/0009-Add-s390x-machine-types.patch b/SOURCES/0009-Add-s390x-machine-types.patch index 07dfb57..c3b9936 100644 --- a/SOURCES/0009-Add-s390x-machine-types.patch +++ b/SOURCES/0009-Add-s390x-machine-types.patch @@ -1,4 +1,4 @@ -From 3c7647197729fcd76e219070c6f359bb3667d04d Mon Sep 17 00:00:00 2001 +From 3623043d4a923bf9f541d439c76e7874cf0fa81d Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:47:32 +0200 Subject: Add s390x machine types @@ -8,7 +8,7 @@ Adding changes to add RHEL machine types for s390x architecture. Signed-off-by: Miroslav Rezanina -- -Rebase changes (7.1.0 rc0): +Rebase changes (7.1.0): - Moved adding rhel_old_machine_deprecation variable to general machine types commit Merged patches (6.1.0): @@ -23,52 +23,74 @@ Merged patches (7.0.0): - 4b0efa7e21 redhat: Add rhel8.6.0 and rhel9.0.0 machine types for s390x - dcc64971bf RHEL: mark old machine types as deprecated (partialy) -Merged patches (7.1.0 rc0): +Merged patches (7.1.0): - 1d6439527a WRB: Introduce RHEL 9.0.0 hw compat structure (only hw/s390x/s390-virtio-ccw.c chunk) - c8ad21ca31 redhat: Update s390x machine type compatibility for rebase to QEMU 7.0.0 - 5bcf8d874c target/s390x: deprecate CPUs older than z14 -Merged patches (7.2.0 rc0): +Merged patches (7.2.0): - 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts) -Merged patches (8.0.0-rc1): +Merged patches (8.0.0): - 27c188c6a4 redhat: Update s390x machine type compatibility for QEMU 7.2.0 update - a932b8d429 redhat: Add new rhel-9.2.0 s390x machine type - ac88104bad s390x/s390-virtio-ccw: Activate zPCI features on s390-ccw-virtio-rhel8.6.0 + +Merged patches (8.1.0): +- bd5d81d286 Add RHEL 9.2.0 compat structure (s390x part) + +Merged patches (8.2.0): +- 4ee284aca9 Add machine types compat bits. (partial) --- - hw/s390x/s390-virtio-ccw.c | 143 +++++++++++++++++++++++++++++++ + hw/s390x/s390-virtio-ccw.c | 159 +++++++++++++++++++++++++++++++ target/s390x/cpu_models.c | 11 +++ target/s390x/cpu_models.h | 2 + target/s390x/cpu_models_sysemu.c | 2 + - 4 files changed, 158 insertions(+) + 4 files changed, 174 insertions(+) diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 503f212a31..dcd3b966b0 100644 +index 7262725d2e..984891b82a 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c -@@ -826,6 +826,7 @@ bool css_migration_enabled(void) +@@ -855,6 +855,7 @@ bool css_migration_enabled(void) } \ type_init(ccw_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void ccw_machine_8_0_instance_options(MachineState *machine) + static void ccw_machine_8_2_instance_options(MachineState *machine) { } -@@ -1201,6 +1202,148 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) +@@ -1256,6 +1257,164 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); } DEFINE_CCW_MACHINE(2_4, "2.4", false); +#endif + + ++static void ccw_machine_rhel940_instance_options(MachineState *machine) ++{ ++} ++ ++static void ccw_machine_rhel940_class_options(MachineClass *mc) ++{ ++} ++DEFINE_CCW_MACHINE(rhel940, "rhel9.4.0", true); ++ +static void ccw_machine_rhel920_instance_options(MachineState *machine) +{ ++ ccw_machine_rhel940_instance_options(machine); +} + +static void ccw_machine_rhel920_class_options(MachineClass *mc) +{ ++ ccw_machine_rhel940_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_4, hw_compat_rhel_9_4_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_3, hw_compat_rhel_9_3_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len); ++ mc->smp_props.drawers_supported = false; /* from ccw_machine_8_1 */ ++ mc->smp_props.books_supported = false; /* from ccw_machine_8_1 */ +} -+DEFINE_CCW_MACHINE(rhel920, "rhel9.2.0", true); ++DEFINE_CCW_MACHINE(rhel920, "rhel9.2.0", false); + +static void ccw_machine_rhel900_instance_options(MachineState *machine) +{ @@ -204,7 +226,7 @@ index 503f212a31..dcd3b966b0 100644 static void ccw_machine_register_types(void) { diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c -index 457b5cb10c..ff6b9463cb 100644 +index a63d990e4e..198b81f2c0 100644 --- a/target/s390x/cpu_models.c +++ b/target/s390x/cpu_models.c @@ -46,6 +46,9 @@ @@ -217,7 +239,7 @@ index 457b5cb10c..ff6b9463cb 100644 static S390CPUDef s390_cpu_defs[] = { CPUDEF_INIT(0x2064, 7, 1, 38, 0x00000000U, "z900", "IBM zSeries 900 GA1"), CPUDEF_INIT(0x2064, 7, 2, 38, 0x00000000U, "z900.2", "IBM zSeries 900 GA2"), -@@ -857,22 +860,30 @@ static void s390_host_cpu_model_class_init(ObjectClass *oc, void *data) +@@ -856,22 +859,30 @@ static void s390_host_cpu_model_class_init(ObjectClass *oc, void *data) static void s390_base_cpu_model_class_init(ObjectClass *oc, void *data) { S390CPUClass *xcc = S390_CPU_CLASS(oc); @@ -249,16 +271,16 @@ index 457b5cb10c..ff6b9463cb 100644 static void s390_qemu_cpu_model_class_init(ObjectClass *oc, void *data) diff --git a/target/s390x/cpu_models.h b/target/s390x/cpu_models.h -index fb1adc8b21..d76745afa9 100644 +index d7b8912989..1a806a97c4 100644 --- a/target/s390x/cpu_models.h +++ b/target/s390x/cpu_models.h -@@ -38,6 +38,8 @@ struct S390CPUDef { +@@ -38,6 +38,8 @@ typedef struct S390CPUDef { S390FeatBitmap full_feat; /* used to init full_feat from generated data */ S390FeatInit full_init; + /* if deprecated, provides a suggestion */ + const char *deprecation_note; - }; + } S390CPUDef; /* CPU model based on a CPU definition */ diff --git a/target/s390x/cpu_models_sysemu.c b/target/s390x/cpu_models_sysemu.c @@ -282,5 +304,5 @@ index 87a4480c05..28c1b0486c 100644 if (cpu_list_data->model) { Object *obj; -- -2.39.1 +2.39.3 diff --git a/SOURCES/0010-Add-x86_64-machine-types.patch b/SOURCES/0010-Add-x86_64-machine-types.patch index 9685338..d24bb57 100644 --- a/SOURCES/0010-Add-x86_64-machine-types.patch +++ b/SOURCES/0010-Add-x86_64-machine-types.patch @@ -1,4 +1,4 @@ -From 510291040cb280e1f68b793a84ec0f7d1c88aafa Mon Sep 17 00:00:00 2001 +From b432505cb28bc3b9b0c1849210ac6c63bca3fe37 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:10:31 +0200 Subject: Add x86_64 machine types @@ -13,9 +13,12 @@ Rebase notes (6.1.0): Rebase notes (7.0.0): - Reset alias for all machine-types except latest one -Rebase notes (8.0.0-rc1): +Rebase notes (8.0.0): - remove legacy_no_rng_seed usage (removed upstream) +Rebase notes (8.1.0): +- default_nic_model to default_nic + Merged patches (6.1.0): - 59c284ad3b x86: Add x86 rhel8.5 machine types - a8868b42fe redhat: x86: Enable 'kvm-asyncpf-int' by default @@ -35,35 +38,44 @@ Merged patches (7.0.0): - dcc64971bf RHEL: mark old machine types as deprecated (partialy) - 6b396f182b RHEL: disable "seqpacket" for "vhost-vsock-device" in rhel8.6.0 -Merged patches (7.1.0 rc0): +Merged patches (7.1.0): - 38b89dc245 pc: Move s3/s4 suspend disabling to compat (only hw/i386/pc.c chunk) - 1d6439527a WRB: Introduce RHEL 9.0.0 hw compat structure (x86_64 specific changes) - 35b5c8554f target/i386: deprecate CPUs older than x86_64-v2 ABI -Merged patches (7.2.0 rc0): +Merged patches (7.2.0): - 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts) -Merged patches (8.0.0-rc1): +Merged patches (8.0.0): - f33ca8aed4 x86: rhel 9.2.0 machine type + +Merged patches (8.1.0): +- bd5d81d286 Add RHEL 9.2.0 compat structure (x86_64 part) +- c6eaf73add redhat: hw/i386/pc: Update x86 machine type compatibility for QEMU 8.0.0 update +- 6cbf496e5e hw/acpi: Mark acpi blobs as resizable on RHEL pc machines version 7.6 and above + +Merged patches (8.2.0): +- 4ee284aca9 Add machine types compat bits. (partial) +- 719e2ac147 Fix x86 machine type compatibility for qemu-kvm 8.1.0 --- - hw/i386/pc.c | 147 +++++++++++++++++++++- - hw/i386/pc_piix.c | 86 ++++++++++++- - hw/i386/pc_q35.c | 252 ++++++++++++++++++++++++++++++++++++- + hw/i386/pc.c | 159 ++++++++++++++++++++- + hw/i386/pc_piix.c | 112 ++++++++++++++- + hw/i386/pc_q35.c | 285 ++++++++++++++++++++++++++++++++++++- include/hw/boards.h | 2 + - include/hw/i386/pc.h | 27 ++++ - target/i386/cpu.c | 21 ++++ + include/hw/i386/pc.h | 33 +++++ + target/i386/cpu.c | 21 +++ target/i386/kvm/kvm-cpu.c | 1 + target/i386/kvm/kvm.c | 4 + tests/qtest/pvpanic-test.c | 5 +- - 9 files changed, 538 insertions(+), 7 deletions(-) + 9 files changed, 615 insertions(+), 7 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 1489abf010..8abb1f872e 100644 +index 29b9964733..a1faa9e92c 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c -@@ -407,6 +407,149 @@ GlobalProperty pc_compat_1_4[] = { +@@ -323,6 +323,161 @@ GlobalProperty pc_compat_2_0[] = { }; - const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); + const size_t pc_compat_2_0_len = G_N_ELEMENTS(pc_compat_2_0); +/* This macro is for changes to properties that are RHEL specific, + * different to the current upstream and to be applied to the latest @@ -80,13 +92,25 @@ index 1489abf010..8abb1f872e 100644 + { TYPE_X86_CPU, "host-phys-bits-limit", "48" }, + { TYPE_X86_CPU, "vmx-entry-load-perf-global-ctrl", "off" }, + { TYPE_X86_CPU, "vmx-exit-load-perf-global-ctrl", "off" }, -+ /* bz 1508330 */ ++ /* bz 1508330 */ + { "vfio-pci", "x-no-geforce-quirks", "on" }, + /* bz 1941397 */ + { TYPE_X86_CPU, "kvm-asyncpf-int", "on" }, +}; +const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + ++GlobalProperty pc_rhel_9_3_compat[] = { ++ /* pc_rhel_9_3_compat from pc_compat_8_0 */ ++ { "virtio-mem", "unplugged-inaccessible", "auto" }, ++}; ++const size_t pc_rhel_9_3_compat_len = G_N_ELEMENTS(pc_rhel_9_3_compat); ++ ++GlobalProperty pc_rhel_9_2_compat[] = { ++ /* pc_rhel_9_2_compat from pc_compat_7_2 */ ++ { "ICH9-LPC", "noreboot", "true" }, ++}; ++const size_t pc_rhel_9_2_compat_len = G_N_ELEMENTS(pc_rhel_9_2_compat); ++ +GlobalProperty pc_rhel_9_0_compat[] = { + /* pc_rhel_9_0_compat from pc_compat_6_2 */ + { "virtio-mem", "unplugged-inaccessible", "off" }, @@ -177,27 +201,27 @@ index 1489abf010..8abb1f872e 100644 + * machine types irrespective of host. + */ +GlobalProperty pc_rhel_7_6_compat[] = { -+ /* pc_rhel_7_6_compat from pc_compat_3_0 */ ++ /* pc_rhel_7_6_compat from pc_compat_3_0 */ + { TYPE_X86_CPU, "x-hv-synic-kvm-only", "on" }, -+ /* pc_rhel_7_6_compat from pc_compat_3_0 */ ++ /* pc_rhel_7_6_compat from pc_compat_3_0 */ + { "Skylake-Server" "-" TYPE_X86_CPU, "pku", "off" }, -+ /* pc_rhel_7_6_compat from pc_compat_3_0 */ ++ /* pc_rhel_7_6_compat from pc_compat_3_0 */ + { "Skylake-Server-IBRS" "-" TYPE_X86_CPU, "pku", "off" }, -+ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ + { TYPE_X86_CPU, "x-migrate-smi-count", "off" }, -+ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ + { "Skylake-Client" "-" TYPE_X86_CPU, "mpx", "on" }, -+ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ + { "Skylake-Client-IBRS" "-" TYPE_X86_CPU, "mpx", "on" }, -+ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ + { "Skylake-Server" "-" TYPE_X86_CPU, "mpx", "on" }, -+ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ + { "Skylake-Server-IBRS" "-" TYPE_X86_CPU, "mpx", "on" }, -+ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ + { "Cascadelake-Server" "-" TYPE_X86_CPU, "mpx", "on" }, -+ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ + { "Icelake-Client" "-" TYPE_X86_CPU, "mpx", "on" }, -+ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ + { "Icelake-Server" "-" TYPE_X86_CPU, "mpx", "on" }, +}; +const size_t pc_rhel_7_6_compat_len = G_N_ELEMENTS(pc_rhel_7_6_compat); @@ -211,15 +235,15 @@ index 1489abf010..8abb1f872e 100644 GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled) { GSIState *s; -@@ -1944,6 +2087,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) - pcmc->pvh_enabled = true; +@@ -1826,6 +1981,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->kvmclock_create_always = true; + pcmc->resizable_acpi_blob = true; assert(!mc->get_hotplug_handler); + mc->async_pf_vmexit_disable = false; mc->get_hotplug_handler = pc_get_hotplug_handler; mc->hotplug_allowed = pc_hotplug_allowed; mc->cpu_index_to_instance_props = x86_cpu_index_to_props; -@@ -1954,7 +2098,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -1836,7 +1992,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) mc->has_hotpluggable_cpus = true; mc->default_boot_order = "cad"; mc->block_default_type = IF_IDE; @@ -230,10 +254,10 @@ index 1489abf010..8abb1f872e 100644 mc->wakeup = pc_machine_wakeup; hc->pre_plug = pc_machine_device_pre_plug_cb; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 14a794081e..3e330fd36f 100644 +index 2a9f465619..44038391fb 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c -@@ -54,6 +54,7 @@ +@@ -53,6 +53,7 @@ #include "qapi/error.h" #include "qemu/error-report.h" #include "sysemu/xen.h" @@ -241,18 +265,18 @@ index 14a794081e..3e330fd36f 100644 #ifdef CONFIG_XEN #include #include "hw/xen/xen_pt.h" -@@ -198,8 +199,8 @@ static void pc_init1(MachineState *machine, +@@ -235,8 +236,8 @@ static void pc_init1(MachineState *machine, if (pcmc->smbios_defaults) { MachineClass *mc = MACHINE_GET_CLASS(machine); /* These values are guest ABI, do not change */ -- smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)", +- smbios_set_defaults("QEMU", mc->desc, - mc->name, pcmc->smbios_legacy_mode, + smbios_set_defaults("Red Hat", "KVM", + mc->desc, pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded, pcmc->smbios_stream_product, pcmc->smbios_stream_version, -@@ -351,6 +352,7 @@ static void pc_init1(MachineState *machine, +@@ -453,6 +454,7 @@ static void pc_set_south_bridge(Object *obj, int value, Error **errp) * hw_compat_*, pc_compat_*, or * pc_*_machine_options(). */ @@ -260,7 +284,7 @@ index 14a794081e..3e330fd36f 100644 static void pc_compat_2_3_fn(MachineState *machine) { X86MachineState *x86ms = X86_MACHINE(machine); -@@ -899,3 +901,83 @@ static void xenfv_3_1_machine_options(MachineClass *m) +@@ -970,3 +972,109 @@ static void xenfv_3_1_machine_options(MachineClass *m) DEFINE_PC_MACHINE(xenfv, "xenfv-3.1", pc_xen_hvm_init, xenfv_3_1_machine_options); #endif @@ -274,8 +298,9 @@ index 14a794081e..3e330fd36f 100644 + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + m->family = "pc_piix_Y"; + m->default_machine_opts = "firmware=bios-256k.bin,hpet=off"; -+ pcmc->default_nic_model = "e1000"; + pcmc->pci_root_uid = 0; ++ pcmc->resizable_acpi_blob = true; ++ m->default_nic = "e1000"; + m->default_display = "std"; + m->no_parallel = 1; + m->numa_mem_supported = true; @@ -296,6 +321,7 @@ index 14a794081e..3e330fd36f 100644 +static void pc_machine_rhel760_options(MachineClass *m) +{ + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ ObjectClass *oc = OBJECT_CLASS(m); + pc_machine_rhel7_options(m); + m->desc = "RHEL 7.6.0 PC (i440FX + PIIX, 1996)"; + m->async_pf_vmexit_disable = true; @@ -309,7 +335,31 @@ index 14a794081e..3e330fd36f 100644 + pcmc->kvmclock_create_always = false; + /* From pc_i440fx_5_1_machine_options() */ + pcmc->pci_root_uid = 1; ++ /* From pc_i440fx_7_0_machine_options() */ + pcmc->enforce_amd_1tb_hole = false; ++ /* From pc_i440fx_8_0_machine_options() */ ++ pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32; ++ /* Introduced in QEMU 8.2 */ ++ pcmc->default_south_bridge = TYPE_PIIX3_DEVICE; ++ ++ object_class_property_add_enum(oc, "x-south-bridge", "PCSouthBridgeOption", ++ &PCSouthBridgeOption_lookup, ++ pc_get_south_bridge, ++ pc_set_south_bridge); ++ object_class_property_set_description(oc, "x-south-bridge", ++ "Use a different south bridge than PIIX3"); ++ ++ ++ compat_props_add(m->compat_props, hw_compat_rhel_9_4, ++ hw_compat_rhel_9_4_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_9_3, ++ hw_compat_rhel_9_3_len); ++ compat_props_add(m->compat_props, pc_rhel_9_3_compat, ++ pc_rhel_9_3_compat_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_9_2, ++ hw_compat_rhel_9_2_len); ++ compat_props_add(m->compat_props, pc_rhel_9_2_compat, ++ pc_rhel_9_2_compat_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_1, + hw_compat_rhel_9_1_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_0, @@ -345,21 +395,21 @@ index 14a794081e..3e330fd36f 100644 +DEFINE_PC_MACHINE(rhel760, "pc-i440fx-rhel7.6.0", pc_init_rhel760, + pc_machine_rhel760_options); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index dc0ba5f9e7..98601bb76f 100644 +index 912cb0c0dc..6387df97c8 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c -@@ -199,8 +199,8 @@ static void pc_q35_init(MachineState *machine) +@@ -203,8 +203,8 @@ static void pc_q35_init(MachineState *machine) if (pcmc->smbios_defaults) { /* These values are guest ABI, do not change */ -- smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)", +- smbios_set_defaults("QEMU", mc->desc, - mc->name, pcmc->smbios_legacy_mode, + smbios_set_defaults("Red Hat", "KVM", + mc->desc, pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded, pcmc->smbios_stream_product, pcmc->smbios_stream_version, -@@ -354,6 +354,7 @@ static void pc_q35_init(MachineState *machine) +@@ -363,6 +363,7 @@ static void pc_q35_init(MachineState *machine) DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn) @@ -367,7 +417,7 @@ index dc0ba5f9e7..98601bb76f 100644 static void pc_q35_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -@@ -663,3 +664,250 @@ static void pc_q35_2_4_machine_options(MachineClass *m) +@@ -699,3 +700,283 @@ static void pc_q35_2_4_machine_options(MachineClass *m) DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, pc_q35_2_4_machine_options); @@ -379,8 +429,8 @@ index dc0ba5f9e7..98601bb76f 100644 +static void pc_q35_machine_rhel_options(MachineClass *m) +{ + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -+ pcmc->default_nic_model = "e1000e"; + pcmc->pci_root_uid = 0; ++ m->default_nic = "e1000e"; + m->family = "pc_q35_Z"; + m->units_per_default_bus = 1; + m->default_machine_opts = "firmware=bios-256k.bin,hpet=off"; @@ -396,6 +446,24 @@ index dc0ba5f9e7..98601bb76f 100644 + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); +} + ++static void pc_q35_init_rhel940(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel940_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel_options(m); ++ m->desc = "RHEL-9.4.0 PC (Q35 + ICH9, 2009)"; ++ pcmc->smbios_stream_product = "RHEL"; ++ pcmc->smbios_stream_version = "9.4.0"; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel940, "pc-q35-rhel9.4.0", pc_q35_init_rhel940, ++ pc_q35_machine_rhel940_options); ++ ++ +static void pc_q35_init_rhel920(MachineState *machine) +{ + pc_q35_init(machine); @@ -404,10 +472,25 @@ index dc0ba5f9e7..98601bb76f 100644 +static void pc_q35_machine_rhel920_options(MachineClass *m) +{ + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -+ pc_q35_machine_rhel_options(m); ++ pc_q35_machine_rhel940_options(m); + m->desc = "RHEL-9.2.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; + pcmc->smbios_stream_product = "RHEL"; + pcmc->smbios_stream_version = "9.2.0"; ++ ++ /* From pc_q35_8_0_machine_options() */ ++ pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32; ++ ++ compat_props_add(m->compat_props, hw_compat_rhel_9_4, ++ hw_compat_rhel_9_4_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_9_3, ++ hw_compat_rhel_9_3_len); ++ compat_props_add(m->compat_props, pc_rhel_9_3_compat, ++ pc_rhel_9_3_compat_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_9_2, ++ hw_compat_rhel_9_2_len); ++ compat_props_add(m->compat_props, pc_rhel_9_2_compat, ++ pc_rhel_9_2_compat_len); +} + +DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920, @@ -619,10 +702,10 @@ index dc0ba5f9e7..98601bb76f 100644 +DEFINE_PC_MACHINE(q35_rhel760, "pc-q35-rhel7.6.0", pc_q35_init_rhel760, + pc_q35_machine_rhel760_options); diff --git a/include/hw/boards.h b/include/hw/boards.h -index c5a965d27f..5e7446ee40 100644 +index 4a21eddbf9..4edfdb0ddb 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -268,6 +268,8 @@ struct MachineClass { +@@ -277,6 +277,8 @@ struct MachineClass { strList *allowed_dynamic_sysbus_devices; bool auto_enable_numa_with_memhp; bool auto_enable_numa_with_memdev; @@ -632,16 +715,22 @@ index c5a965d27f..5e7446ee40 100644 bool smbus_no_migration_support; bool nvdimm_supported; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 908a275736..4376f64a47 100644 +index 037942d233..37644ede7e 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -293,6 +293,33 @@ extern const size_t pc_compat_1_4_len; +@@ -314,6 +314,39 @@ extern const size_t pc_compat_1_4_len; int pc_machine_kvm_type(MachineState *machine, const char *vm_type); +extern GlobalProperty pc_rhel_compat[]; +extern const size_t pc_rhel_compat_len; + ++extern GlobalProperty pc_rhel_9_3_compat[]; ++extern const size_t pc_rhel_9_3_compat_len; ++ ++extern GlobalProperty pc_rhel_9_2_compat[]; ++extern const size_t pc_rhel_9_2_compat_len; ++ +extern GlobalProperty pc_rhel_9_0_compat[]; +extern const size_t pc_rhel_9_0_compat_len; + @@ -670,10 +759,10 @@ index 908a275736..4376f64a47 100644 static void pc_machine_##suffix##_class_init(ObjectClass *oc, void *data) \ { \ diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 6576287e5b..0ef2bf1b93 100644 +index cd16cb893d..93203d9b91 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c -@@ -1834,9 +1834,13 @@ static const CPUCaches epyc_milan_cache_info = { +@@ -2190,9 +2190,13 @@ static const CPUCaches epyc_genoa_cache_info = { * PT in VMX operation */ @@ -687,7 +776,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 0xd, .vendor = CPUID_VENDOR_AMD, .family = 15, -@@ -1857,6 +1861,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2213,6 +2217,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "phenom", @@ -695,7 +784,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 5, .vendor = CPUID_VENDOR_AMD, .family = 16, -@@ -1889,6 +1894,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2245,6 +2250,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "core2duo", @@ -703,7 +792,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 10, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -1931,6 +1937,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2287,6 +2293,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "kvm64", @@ -711,7 +800,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 0xd, .vendor = CPUID_VENDOR_INTEL, .family = 15, -@@ -1972,6 +1979,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2328,6 +2335,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "qemu32", @@ -719,7 +808,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 4, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -1986,6 +1994,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2342,6 +2350,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "kvm32", @@ -727,7 +816,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 5, .vendor = CPUID_VENDOR_INTEL, .family = 15, -@@ -2016,6 +2025,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2372,6 +2381,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "coreduo", @@ -735,7 +824,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 10, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -2049,6 +2059,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2405,6 +2415,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "486", @@ -743,7 +832,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 1, .vendor = CPUID_VENDOR_INTEL, .family = 4, -@@ -2061,6 +2072,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2417,6 +2428,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "pentium", @@ -751,7 +840,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 1, .vendor = CPUID_VENDOR_INTEL, .family = 5, -@@ -2073,6 +2085,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2429,6 +2441,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "pentium2", @@ -759,7 +848,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 2, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -2085,6 +2098,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2441,6 +2454,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "pentium3", @@ -767,7 +856,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 3, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -2097,6 +2111,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2453,6 +2467,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "athlon", @@ -775,7 +864,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 2, .vendor = CPUID_VENDOR_AMD, .family = 6, -@@ -2112,6 +2127,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2468,6 +2483,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "n270", @@ -783,7 +872,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 10, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -2137,6 +2153,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2493,6 +2509,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "Conroe", @@ -791,7 +880,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 10, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -2177,6 +2194,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2533,6 +2550,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "Penryn", @@ -799,7 +888,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 10, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -3893,6 +3911,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -4394,6 +4412,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "Opteron_G1", @@ -807,7 +896,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 5, .vendor = CPUID_VENDOR_AMD, .family = 15, -@@ -3913,6 +3932,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -4414,6 +4433,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "Opteron_G2", @@ -815,7 +904,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 5, .vendor = CPUID_VENDOR_AMD, .family = 15, -@@ -3935,6 +3955,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -4436,6 +4456,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "Opteron_G3", @@ -824,10 +913,10 @@ index 6576287e5b..0ef2bf1b93 100644 .vendor = CPUID_VENDOR_AMD, .family = 16, diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c -index 7237378a7d..7b8a3d5af0 100644 +index 9c791b7b05..b91af5051f 100644 --- a/target/i386/kvm/kvm-cpu.c +++ b/target/i386/kvm/kvm-cpu.c -@@ -137,6 +137,7 @@ static PropValue kvm_default_props[] = { +@@ -138,6 +138,7 @@ static PropValue kvm_default_props[] = { { "acpi", "off" }, { "monitor", "off" }, { "svm", "off" }, @@ -836,10 +925,10 @@ index 7237378a7d..7b8a3d5af0 100644 }; diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c -index de531842f6..8d82304609 100644 +index 4ce80555b4..9d41edf01e 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c -@@ -3822,6 +3822,7 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -3711,6 +3711,7 @@ static int kvm_get_msrs(X86CPU *cpu) struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries; int ret, i; uint64_t mtrr_top_bits; @@ -847,7 +936,7 @@ index de531842f6..8d82304609 100644 kvm_msr_buf_reset(cpu); -@@ -4177,6 +4178,9 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -4065,6 +4066,9 @@ static int kvm_get_msrs(X86CPU *cpu) break; case MSR_KVM_ASYNC_PF_EN: env->async_pf_en_msr = msrs[i].data; @@ -881,5 +970,5 @@ index 78f1cf8186..ac954c9b06 100644 val = qtest_inb(qts, 0x505); g_assert_cmpuint(val, ==, 3); -- -2.39.1 +2.39.3 diff --git a/SOURCES/0011-Enable-make-check.patch b/SOURCES/0011-Enable-make-check.patch index cc91302..54015c0 100644 --- a/SOURCES/0011-Enable-make-check.patch +++ b/SOURCES/0011-Enable-make-check.patch @@ -1,4 +1,4 @@ -From 738db8353055eb6fd902513949c6659af8b401d0 Mon Sep 17 00:00:00 2001 +From 66a0510405e5142a1f9e38e0770aa0f10aed3e03 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 2 Sep 2020 09:39:41 +0200 Subject: Enable make check @@ -24,43 +24,49 @@ Rebase changes (7.0.0): - Remove unnecessary changes in iotest 051 - Remove changes in bios-tables-test.c and prom-env-test.c qtests -Rebase changes (7.1.0 rc0): +Rebase changes (7.1.0): - Disable bcm2835-dma-test (added upstream) -Rebase changes (8.0.0-rc1): +Rebase changes (8.0.0): - Removed chunks for disabling bios-table-test (protected upstream) - -Rebase change (8.0.0-rc2): - Disable new qemu-iotests execution - Revert change in tco qtest (blocking test run) +Rebase changes (8.1.0): +- Do not disable device-plug-test for s390x + +Rebase changes (8.2.0 rc1): +- Remove unneeded hack in qtest/usb-hcd-xhci-test.c + Merged patches (6.1.0): - 2f129df7d3 redhat: Enable the 'test-block-iothread' test again -Merged patches (7.1.0 rc0): +Merged patches (7.1.0): - 64d736640e RHEL-only: tests/avocado: Switch aarch64 tests from a53 to a57 + +Merged patches (8.1.0): +- f468163234 iotests: Use alternative CPU type that is not deprecated in RHEL --- .distro/qemu-kvm.spec.template | 4 ++-- tests/avocado/replay_kernel.py | 2 +- tests/avocado/reverse_debugging.py | 2 +- tests/avocado/tcg_plugins.py | 6 ++--- tests/qemu-iotests/meson.build | 34 ++++++++++++++--------------- + tests/qemu-iotests/testenv.py | 3 +++ tests/qtest/fuzz-e1000e-test.c | 2 +- tests/qtest/fuzz-virtio-scsi-test.c | 2 +- tests/qtest/intel-hda-test.c | 2 +- tests/qtest/libqos/meson.build | 2 +- tests/qtest/lpc-ich9-test.c | 2 +- - tests/qtest/meson.build | 2 -- - tests/qtest/tco-test.c | 2 +- - tests/qtest/usb-hcd-xhci-test.c | 4 ++++ + tests/qtest/meson.build | 1 - tests/qtest/virtio-net-failover.c | 1 + - 14 files changed, 35 insertions(+), 32 deletions(-) + 13 files changed, 33 insertions(+), 30 deletions(-) diff --git a/tests/avocado/replay_kernel.py b/tests/avocado/replay_kernel.py -index f13456e1ec..2fee270a42 100644 +index c37afa662c..61c95a2198 100644 --- a/tests/avocado/replay_kernel.py +++ b/tests/avocado/replay_kernel.py -@@ -147,7 +147,7 @@ def test_aarch64_virt(self): +@@ -153,7 +153,7 @@ def test_aarch64_virt(self): """ :avocado: tags=arch:aarch64 :avocado: tags=machine:virt @@ -70,10 +76,10 @@ index f13456e1ec..2fee270a42 100644 kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora' '/linux/releases/29/Everything/aarch64/os/images/pxeboot' diff --git a/tests/avocado/reverse_debugging.py b/tests/avocado/reverse_debugging.py -index 680c314cfc..71eccb8fb6 100644 +index 4cce5a5598..e9248a04a2 100644 --- a/tests/avocado/reverse_debugging.py +++ b/tests/avocado/reverse_debugging.py -@@ -206,7 +206,7 @@ def test_aarch64_virt(self): +@@ -230,7 +230,7 @@ def test_aarch64_virt(self): """ :avocado: tags=arch:aarch64 :avocado: tags=machine:virt @@ -83,10 +89,10 @@ index 680c314cfc..71eccb8fb6 100644 kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora' '/linux/releases/29/Everything/aarch64/os/images/pxeboot' diff --git a/tests/avocado/tcg_plugins.py b/tests/avocado/tcg_plugins.py -index 642d2e49e3..93b3afd823 100644 +index 15fd87b2c1..f0d9d89c93 100644 --- a/tests/avocado/tcg_plugins.py +++ b/tests/avocado/tcg_plugins.py -@@ -68,7 +68,7 @@ def test_aarch64_virt_insn(self): +@@ -66,7 +66,7 @@ def test_aarch64_virt_insn(self): :avocado: tags=accel:tcg :avocado: tags=arch:aarch64 :avocado: tags=machine:virt @@ -95,7 +101,7 @@ index 642d2e49e3..93b3afd823 100644 """ kernel_path = self._grab_aarch64_kernel() kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + -@@ -94,7 +94,7 @@ def test_aarch64_virt_insn_icount(self): +@@ -96,7 +96,7 @@ def test_aarch64_virt_insn_icount(self): :avocado: tags=accel:tcg :avocado: tags=arch:aarch64 :avocado: tags=machine:virt @@ -104,7 +110,7 @@ index 642d2e49e3..93b3afd823 100644 """ kernel_path = self._grab_aarch64_kernel() kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + -@@ -120,7 +120,7 @@ def test_aarch64_virt_mem_icount(self): +@@ -126,7 +126,7 @@ def test_aarch64_virt_mem_icount(self): :avocado: tags=accel:tcg :avocado: tags=arch:aarch64 :avocado: tags=machine:virt @@ -114,7 +120,7 @@ index 642d2e49e3..93b3afd823 100644 kernel_path = self._grab_aarch64_kernel() kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + diff --git a/tests/qemu-iotests/meson.build b/tests/qemu-iotests/meson.build -index 9735071a29..32002335f4 100644 +index 53847cb98f..a2abdb650e 100644 --- a/tests/qemu-iotests/meson.build +++ b/tests/qemu-iotests/meson.build @@ -51,21 +51,21 @@ foreach format, speed: qemu_iotests_formats @@ -156,6 +162,20 @@ index 9735071a29..32002335f4 100644 +# suite: suites) +# endforeach endforeach +diff --git a/tests/qemu-iotests/testenv.py b/tests/qemu-iotests/testenv.py +index 3ff38f2661..cab9a2bd6c 100644 +--- a/tests/qemu-iotests/testenv.py ++++ b/tests/qemu-iotests/testenv.py +@@ -244,6 +244,9 @@ def __init__(self, source_dir: str, build_dir: str, + if self.qemu_prog.endswith(f'qemu-system-{suffix}'): + self.qemu_options += f' -machine {machine}' + ++ if self.qemu_prog.endswith('qemu-system-x86_64'): ++ self.qemu_options += ' -cpu Nehalem' ++ + # QEMU_DEFAULT_MACHINE + self.qemu_default_machine = get_default_machine(self.qemu_prog) + diff --git a/tests/qtest/fuzz-e1000e-test.c b/tests/qtest/fuzz-e1000e-test.c index 5052883fb6..b5286f4b12 100644 --- a/tests/qtest/fuzz-e1000e-test.c @@ -183,20 +203,20 @@ index e37b48b2cc..88647da054 100644 qtest_outl(s, 0xcf8, 0x80001811); diff --git a/tests/qtest/intel-hda-test.c b/tests/qtest/intel-hda-test.c -index d4a8db6fd6..1a796ec15a 100644 +index 663bb6c485..2efc43e3f7 100644 --- a/tests/qtest/intel-hda-test.c +++ b/tests/qtest/intel-hda-test.c -@@ -38,7 +38,7 @@ static void test_issue542_ich6(void) +@@ -42,7 +42,7 @@ static void test_issue542_ich6(void) { QTestState *s; - s = qtest_init("-nographic -nodefaults -M pc-q35-6.2 " + s = qtest_init("-nographic -nodefaults -M pc-q35-rhel9.0.0 " + AUDIODEV "-device intel-hda,id=" HDA_ID CODEC_DEVICES); - qtest_outl(s, 0xcf8, 0x80000804); diff --git a/tests/qtest/libqos/meson.build b/tests/qtest/libqos/meson.build -index cc209a8de5..42a7c529c9 100644 +index 90aae42a22..9bc4e41af0 100644 --- a/tests/qtest/libqos/meson.build +++ b/tests/qtest/libqos/meson.build @@ -44,7 +44,7 @@ libqos_srcs = files( @@ -206,8 +226,8 @@ index cc209a8de5..42a7c529c9 100644 - 'virtio-iommu.c', +# 'virtio-iommu.c', 'virtio-gpio.c', + 'virtio-scmi.c', 'generic-pcihost.c', - diff --git a/tests/qtest/lpc-ich9-test.c b/tests/qtest/lpc-ich9-test.c index 8ac95b89f7..cd2102555c 100644 --- a/tests/qtest/lpc-ich9-test.c @@ -222,10 +242,10 @@ index 8ac95b89f7..cd2102555c 100644 qtest_outl(s, 0xcf8, 0x8000f840); /* PMBASE */ diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build -index 85ea4e8d99..893afc8eeb 100644 +index 47dabf91d0..0bdfa3a821 100644 --- a/tests/qtest/meson.build +++ b/tests/qtest/meson.build -@@ -94,7 +94,6 @@ qtests_i386 = \ +@@ -97,7 +97,6 @@ qtests_i386 = \ 'drive_del-test', 'tco-test', 'cpu-plug-test', @@ -233,62 +253,11 @@ index 85ea4e8d99..893afc8eeb 100644 'vmgenid-test', 'migration-test', 'test-x86-cpuid-compat', -@@ -223,7 +222,6 @@ qtests_s390x = \ - (config_host.has_key('CONFIG_POSIX') ? ['test-filter-redirector'] : []) + \ - ['boot-serial-test', - 'drive_del-test', -- 'device-plug-test', - 'virtio-ccw-test', - 'cpu-plug-test', - 'migration-test'] -diff --git a/tests/qtest/tco-test.c b/tests/qtest/tco-test.c -index 0547d41173..3756ce82d8 100644 ---- a/tests/qtest/tco-test.c -+++ b/tests/qtest/tco-test.c -@@ -60,7 +60,7 @@ static void test_init(TestData *d) - QTestState *qs; - - qs = qtest_initf("-machine q35 %s %s", -- d->noreboot ? "-global ICH9-LPC.noreboot=true" : "", -+ d->noreboot ? "" : "-global ICH9-LPC.noreboot=false", - !d->args ? "" : d->args); - qtest_irq_intercept_in(qs, "ioapic"); - -diff --git a/tests/qtest/usb-hcd-xhci-test.c b/tests/qtest/usb-hcd-xhci-test.c -index 10ef9d2a91..3855873050 100644 ---- a/tests/qtest/usb-hcd-xhci-test.c -+++ b/tests/qtest/usb-hcd-xhci-test.c -@@ -21,6 +21,7 @@ static void test_xhci_hotplug(void) - usb_test_hotplug(global_qtest, "xhci", "1", NULL); - } - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void test_usb_uas_hotplug(void) - { - QTestState *qts = global_qtest; -@@ -36,6 +37,7 @@ static void test_usb_uas_hotplug(void) - qtest_qmp_device_del(qts, "scsihd"); - qtest_qmp_device_del(qts, "uas"); - } -+#endif - - static void test_usb_ccid_hotplug(void) - { -@@ -56,7 +58,9 @@ int main(int argc, char **argv) - - qtest_add_func("/xhci/pci/init", test_xhci_init); - qtest_add_func("/xhci/pci/hotplug", test_xhci_hotplug); -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - qtest_add_func("/xhci/pci/hotplug/usb-uas", test_usb_uas_hotplug); -+#endif - qtest_add_func("/xhci/pci/hotplug/usb-ccid", test_usb_ccid_hotplug); - - qtest_start("-device nec-usb-xhci,id=xhci" diff --git a/tests/qtest/virtio-net-failover.c b/tests/qtest/virtio-net-failover.c -index 4a809590bf..1bf3fa641c 100644 +index 0d40bc1f2d..4c633c1584 100644 --- a/tests/qtest/virtio-net-failover.c +++ b/tests/qtest/virtio-net-failover.c -@@ -25,6 +25,7 @@ +@@ -26,6 +26,7 @@ #define PCI_SEL_BASE 0x0010 #define BASE_MACHINE "-M q35 -nodefaults " \ @@ -297,5 +266,5 @@ index 4a809590bf..1bf3fa641c 100644 "-device pcie-root-port,id=root1,addr=0x2,bus=pcie.0,chassis=2 " -- -2.39.1 +2.39.3 diff --git a/SOURCES/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch b/SOURCES/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch index 430959b..8222efd 100644 --- a/SOURCES/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch +++ b/SOURCES/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -1,4 +1,4 @@ -From 34cb4f7ddd762ec46ed1a6a4261aebde39360ca4 Mon Sep 17 00:00:00 2001 +From a27cfa0b407bd806ce389a7c69d0130bcfd35244 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 3 Dec 2013 20:05:13 +0100 Subject: vfio: cap number of devices that can be assigned @@ -26,13 +26,16 @@ Count of slots increased to 509 later so we could increase limit to 64 as some usecases require more than 32 devices. Signed-off-by: Bandan Das + +Rebase changes (231025): +- Update to upstream changes --- - hw/vfio/pci.c | 29 ++++++++++++++++++++++++++++- + hw/vfio/pci.c | 31 ++++++++++++++++++++++++++++++- hw/vfio/pci.h | 1 + - 2 files changed, 29 insertions(+), 1 deletion(-) + 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index ec9a854361..a779053be3 100644 +index c62c02f7b6..ec98080f28 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -48,6 +48,9 @@ @@ -45,13 +48,20 @@ index ec9a854361..a779053be3 100644 static void vfio_disable_interrupts(VFIOPCIDevice *vdev); static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); static void vfio_msi_disable_common(VFIOPCIDevice *vdev); -@@ -2854,9 +2857,30 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - ssize_t len; +@@ -3076,14 +3079,37 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + { + VFIOPCIDevice *vdev = VFIO_PCI(pdev); + VFIODevice *vbasedev = &vdev->vbasedev; ++ VFIODevice *vbasedev_iter; ++ VFIOGroup *group; + char *tmp, *subsys; + Error *err = NULL; struct stat st; - int groupid; - int i, ret; + int ret, i = 0; bool is_mdev; + char uuid[UUID_STR_LEN]; + char *name; + if (device_limit && device_limit != vdev->assigned_device_limit) { + error_setg(errp, "Assigned device limit has been redefined. " @@ -77,7 +87,7 @@ index ec9a854361..a779053be3 100644 if (!vbasedev->sysfsdev) { if (!(~vdev->host.domain || ~vdev->host.bus || ~vdev->host.slot || ~vdev->host.function)) { -@@ -3294,6 +3318,9 @@ static Property vfio_pci_dev_properties[] = { +@@ -3501,6 +3527,9 @@ static Property vfio_pci_dev_properties[] = { DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false), DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice, no_geforce_quirks, false), @@ -88,10 +98,10 @@ index ec9a854361..a779053be3 100644 false), DEFINE_PROP_BOOL("x-no-vfio-ioeventfd", VFIOPCIDevice, no_vfio_ioeventfd, diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h -index 177abcc8fb..45235d38ba 100644 +index fba8737ab2..eb74d9de2d 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h -@@ -140,6 +140,7 @@ struct VFIOPCIDevice { +@@ -142,6 +142,7 @@ struct VFIOPCIDevice { EventNotifier err_notifier; EventNotifier req_notifier; int (*resetfn)(struct VFIOPCIDevice *); @@ -100,5 +110,5 @@ index 177abcc8fb..45235d38ba 100644 uint32_t device_id; uint32_t sub_vendor_id; -- -2.39.1 +2.39.3 diff --git a/SOURCES/0013-Add-support-statement-to-help-output.patch b/SOURCES/0013-Add-support-statement-to-help-output.patch index 25db0b8..bc5d9b4 100644 --- a/SOURCES/0013-Add-support-statement-to-help-output.patch +++ b/SOURCES/0013-Add-support-statement-to-help-output.patch @@ -1,4 +1,4 @@ -From 8964a3e8835992442902d35b011a708787366d82 Mon Sep 17 00:00:00 2001 +From 424f14d123fe1043518758605d94ed5ba50e52ad Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 4 Dec 2013 18:53:17 +0100 Subject: Add support statement to -help output @@ -17,14 +17,14 @@ as unsupported by Red Hat, and advising users to use libvirt instead. Signed-off-by: Eduardo Habkost --- - softmmu/vl.c | 9 +++++++++ + system/vl.c | 9 +++++++++ 1 file changed, 9 insertions(+) -diff --git a/softmmu/vl.c b/softmmu/vl.c -index ea20b23e4c..ad4173138d 100644 ---- a/softmmu/vl.c -+++ b/softmmu/vl.c -@@ -834,9 +834,17 @@ static void version(void) +diff --git a/system/vl.c b/system/vl.c +index 2bcd9efb9a..93635ffc5b 100644 +--- a/system/vl.c ++++ b/system/vl.c +@@ -870,9 +870,17 @@ static void version(void) QEMU_COPYRIGHT "\n"); } @@ -42,7 +42,7 @@ index ea20b23e4c..ad4173138d 100644 printf("usage: %s [options] [disk_image]\n\n" "'disk_image' is a raw hard disk image for IDE hard disk 0\n\n", g_get_prgname()); -@@ -862,6 +870,7 @@ static void help(int exitcode) +@@ -898,6 +906,7 @@ static void help(int exitcode) "\n" QEMU_HELP_BOTTOM "\n"); @@ -51,5 +51,5 @@ index ea20b23e4c..ad4173138d 100644 } -- -2.39.1 +2.39.3 diff --git a/SOURCES/0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/SOURCES/0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch index b97c844..7fa10b5 100644 --- a/SOURCES/0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +++ b/SOURCES/0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -1,4 +1,4 @@ -From 0b72d348fa0714de641ee242e5cee97df006e8fd Mon Sep 17 00:00:00 2001 +From c683ff4a770b77dbe707413840918a46f67fa825 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 8 Jul 2020 08:35:50 +0200 Subject: Use qemu-kvm in documentation instead of qemu-system- @@ -36,10 +36,10 @@ index 52d6454b93..d74dbdeca9 100644 .. |I2C| replace:: I\ :sup:`2`\ C .. |I2S| replace:: I\ :sup:`2`\ S diff --git a/qemu-options.hx b/qemu-options.hx -index 59bdf67a2c..52b49f1f6a 100644 +index 42fd09e4de..557118cb1f 100644 --- a/qemu-options.hx +++ b/qemu-options.hx -@@ -3296,11 +3296,11 @@ SRST +@@ -3469,11 +3469,11 @@ SRST :: @@ -57,5 +57,5 @@ index 59bdf67a2c..52b49f1f6a 100644 ``-netdev vhost-vdpa[,vhostdev=/path/to/dev][,vhostfd=h]`` Establish a vhost-vdpa netdev. -- -2.39.1 +2.39.3 diff --git a/SOURCES/0015-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch b/SOURCES/0015-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch index 1e2f8e1..667d431 100644 --- a/SOURCES/0015-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch +++ b/SOURCES/0015-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch @@ -1,4 +1,4 @@ -From bd6bcebfd783fa49e283d035d378fb5240423d84 Mon Sep 17 00:00:00 2001 +From 776bff1be5e98982a9bbc8345ff27274ff5b8c0f Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 20 Aug 2021 18:25:12 +0200 Subject: qcow2: Deprecation warning when opening v2 images rw @@ -44,10 +44,10 @@ Rebase notes (6.1.0): 2 files changed, 7 insertions(+) diff --git a/block/qcow2.c b/block/qcow2.c -index 30fd53fa64..22084730f9 100644 +index 13e032bd5e..7968735346 100644 --- a/block/qcow2.c +++ b/block/qcow2.c -@@ -1337,6 +1337,12 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, +@@ -1358,6 +1358,12 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, ret = -ENOTSUP; goto fail; } @@ -61,7 +61,7 @@ index 30fd53fa64..22084730f9 100644 s->qcow_version = header.version; diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter -index 6b32c7fbfa..6ddda2ee64 100644 +index 2846c83808..83472953a2 100644 --- a/tests/qemu-iotests/common.filter +++ b/tests/qemu-iotests/common.filter @@ -83,6 +83,7 @@ _filter_qemu() @@ -73,5 +73,5 @@ index 6b32c7fbfa..6ddda2ee64 100644 } -- -2.39.1 +2.39.3 diff --git a/SOURCES/0016-Introduce-RHEL-9.4.0-qemu-kvm-machine-type-for-aarch.patch b/SOURCES/0016-Introduce-RHEL-9.4.0-qemu-kvm-machine-type-for-aarch.patch new file mode 100644 index 0000000..4e62baa --- /dev/null +++ b/SOURCES/0016-Introduce-RHEL-9.4.0-qemu-kvm-machine-type-for-aarch.patch @@ -0,0 +1,44 @@ +From 3b9b38339346ebfaf3e8ddf0822eba1cc9e78408 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Thu, 14 Dec 2023 04:42:01 -0500 +Subject: Introduce RHEL 9.4.0 qemu-kvm machine type for aarch64 + +Jira: https://issues.redhat.com/browse/RHEL-17168 + +Adding new machine type to support enabling new features. + +Signed-off-by: Miroslav Rezanina +--- + hw/arm/virt.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index c541efee5e..0b17c94ad7 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3630,14 +3630,21 @@ static void rhel_machine_init(void) + } + type_init(rhel_machine_init); + ++static void rhel940_virt_options(MachineClass *mc) ++{ ++} ++DEFINE_RHEL_MACHINE_AS_LATEST(9, 4, 0) ++ + static void rhel920_virt_options(MachineClass *mc) + { ++ rhel940_virt_options(mc); ++ + compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); + compat_props_add(mc->compat_props, hw_compat_rhel_9_4, hw_compat_rhel_9_4_len); + compat_props_add(mc->compat_props, hw_compat_rhel_9_3, hw_compat_rhel_9_3_len); + compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len); + } +-DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0) ++DEFINE_RHEL_MACHINE(9, 2, 0) + + static void rhel900_virt_options(MachineClass *mc) + { +-- +2.39.3 + diff --git a/SOURCES/0016-qga-linux-add-usb-support-to-guest-get-fsinfo.patch b/SOURCES/0016-qga-linux-add-usb-support-to-guest-get-fsinfo.patch deleted file mode 100644 index bb9455a..0000000 --- a/SOURCES/0016-qga-linux-add-usb-support-to-guest-get-fsinfo.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 78a42cf27aa519bb71214443ab570b40e156fa9c Mon Sep 17 00:00:00 2001 -From: Kfir Manor -Date: Sun, 22 Jan 2023 17:33:07 +0200 -Subject: qga/linux: add usb support to guest-get-fsinfo - -RH-Author: Kostiantyn Kostiuk -RH-MergeRequest: 140: qga/linux: add usb support to guest-get-fsinfo -RH-Bugzilla: 2149191 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: yvugenfi -RH-Commit: [1/1] bae929a2d0d0ad20e7308ede69c26499fc2119c7 (kostyanf14/redhat_centos-stream_src_qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2149191 -Upstream patch: https://patchew.org/QEMU/20230122153307.1050593-1-kfir@daynix.com/ - -Signed-off-by: Kfir Manor -Reviewed-by: Konstantin Kostiuk -Signed-off-by: Konstantin Kostiuk - -Patch-name: kvm-qga-linux-add-usb-support-to-guest-get-fsinfo.patch -Patch-id: 72 -Patch-present-in-specfile: True ---- - qga/commands-posix.c | 6 +++++- - 1 file changed, 5 insertions(+), 1 deletion(-) - -diff --git a/qga/commands-posix.c b/qga/commands-posix.c -index 079689d79a..97754930c1 100644 ---- a/qga/commands-posix.c -+++ b/qga/commands-posix.c -@@ -879,7 +879,9 @@ static bool build_guest_fsinfo_for_pci_dev(char const *syspath, - g_str_equal(driver, "sym53c8xx") || - g_str_equal(driver, "virtio-pci") || - g_str_equal(driver, "ahci") || -- g_str_equal(driver, "nvme"))) { -+ g_str_equal(driver, "nvme") || -+ g_str_equal(driver, "xhci_hcd") || -+ g_str_equal(driver, "ehci-pci"))) { - break; - } - -@@ -976,6 +978,8 @@ static bool build_guest_fsinfo_for_pci_dev(char const *syspath, - } - } else if (strcmp(driver, "nvme") == 0) { - disk->bus_type = GUEST_DISK_BUS_TYPE_NVME; -+ } else if (strcmp(driver, "ehci-pci") == 0 || strcmp(driver, "xhci_hcd") == 0) { -+ disk->bus_type = GUEST_DISK_BUS_TYPE_USB; - } else { - g_debug("unknown driver '%s' (sysfs path '%s')", driver, syspath); - goto cleanup; --- -2.39.1 - diff --git a/SOURCES/0017-Add-RHEL-9.2.0-compat-structure.patch b/SOURCES/0017-Add-RHEL-9.2.0-compat-structure.patch deleted file mode 100644 index ce0ba5c..0000000 --- a/SOURCES/0017-Add-RHEL-9.2.0-compat-structure.patch +++ /dev/null @@ -1,110 +0,0 @@ -From bd5d81d2865c239ffea0fecf32476732149ad05c Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Wed, 15 Feb 2023 02:03:17 -0500 -Subject: Add RHEL 9.2.0 compat structure - -Adding compatibility bits necessary to keep 9.2.0 machine -types same after rebase to 8.0. - -Signed-off-by: Miroslav Rezanina - -Rebase notes (8.0.0 rc4): -- Added migration.x-preempt-pre-7-2 compat) ---- - hw/arm/virt.c | 1 + - hw/core/machine.c | 10 ++++++++++ - hw/i386/pc_piix.c | 2 ++ - hw/i386/pc_q35.c | 3 +++ - hw/s390x/s390-virtio-ccw.c | 1 + - include/hw/boards.h | 3 +++ - 6 files changed, 20 insertions(+) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 1ae1654be5..9be53e9355 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -3669,6 +3669,7 @@ type_init(rhel_machine_init); - static void rhel920_virt_options(MachineClass *mc) - { - compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); -+ compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len); - } - DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 5aa567fad3..0e0120b7f2 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -52,6 +52,16 @@ const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2); - const char *rhel_old_machine_deprecation = - "machine types for previous major releases are deprecated"; - -+GlobalProperty hw_compat_rhel_9_2[] = { -+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */ -+ { "e1000e", "migrate-timadj", "off" }, -+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */ -+ { "virtio-mem", "x-early-migration", "false" }, -+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */ -+ { "migration", "x-preempt-pre-7-2", "true" }, -+}; -+const size_t hw_compat_rhel_9_2_len = G_N_ELEMENTS(hw_compat_rhel_9_2); -+ - /* - * Mostly the same as hw_compat_7_0 - */ -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 3e330fd36f..90fb6e2e03 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -947,6 +947,8 @@ static void pc_machine_rhel760_options(MachineClass *m) - /* From pc_i440fx_5_1_machine_options() */ - pcmc->pci_root_uid = 1; - pcmc->enforce_amd_1tb_hole = false; -+ compat_props_add(m->compat_props, hw_compat_rhel_9_2, -+ hw_compat_rhel_9_2_len); - compat_props_add(m->compat_props, hw_compat_rhel_9_1, - hw_compat_rhel_9_1_len); - compat_props_add(m->compat_props, hw_compat_rhel_9_0, -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 98601bb76f..8945b69175 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -701,6 +701,9 @@ static void pc_q35_machine_rhel920_options(MachineClass *m) - m->desc = "RHEL-9.2.0 PC (Q35 + ICH9, 2009)"; - pcmc->smbios_stream_product = "RHEL"; - pcmc->smbios_stream_version = "9.2.0"; -+ -+ compat_props_add(m->compat_props, hw_compat_rhel_9_2, -+ hw_compat_rhel_9_2_len); - } - - DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920, -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index dcd3b966b0..6a0b93c63d 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -1211,6 +1211,7 @@ static void ccw_machine_rhel920_instance_options(MachineState *machine) - - static void ccw_machine_rhel920_class_options(MachineClass *mc) - { -+ compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len); - } - DEFINE_CCW_MACHINE(rhel920, "rhel9.2.0", true); - -diff --git a/include/hw/boards.h b/include/hw/boards.h -index 5e7446ee40..5f08bd7550 100644 ---- a/include/hw/boards.h -+++ b/include/hw/boards.h -@@ -461,6 +461,9 @@ extern const size_t hw_compat_2_2_len; - extern GlobalProperty hw_compat_2_1[]; - extern const size_t hw_compat_2_1_len; - -+extern GlobalProperty hw_compat_rhel_9_2[]; -+extern const size_t hw_compat_rhel_9_2_len; -+ - extern GlobalProperty hw_compat_rhel_9_1[]; - extern const size_t hw_compat_rhel_9_1_len; - --- -2.39.1 - diff --git a/SOURCES/0018-redhat-hw-i386-pc-Update-x86-machine-type-compatibil.patch b/SOURCES/0018-redhat-hw-i386-pc-Update-x86-machine-type-compatibil.patch deleted file mode 100644 index 81993e9..0000000 --- a/SOURCES/0018-redhat-hw-i386-pc-Update-x86-machine-type-compatibil.patch +++ /dev/null @@ -1,76 +0,0 @@ -From c6eaf73adda2e87fe91c9a3836f45dd58a553e06 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Mon, 27 Mar 2023 15:14:03 +0200 -Subject: redhat: hw/i386/pc: Update x86 machine type compatibility for QEMU - 8.0.0 update - -Add pc_rhel_9_2_compat based on upstream pc_compat_7_2. - -Signed-off-by: Thomas Huth ---- - hw/i386/pc.c | 6 ++++++ - hw/i386/pc_piix.c | 2 ++ - hw/i386/pc_q35.c | 2 ++ - include/hw/i386/pc.h | 3 +++ - 4 files changed, 13 insertions(+) - -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 8abb1f872e..f216922cee 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -429,6 +429,12 @@ GlobalProperty pc_rhel_compat[] = { - }; - const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); - -+GlobalProperty pc_rhel_9_2_compat[] = { -+ /* pc_rhel_9_2_compat from pc_compat_7_2 */ -+ { "ICH9-LPC", "noreboot", "true" }, -+}; -+const size_t pc_rhel_9_2_compat_len = G_N_ELEMENTS(pc_rhel_9_2_compat); -+ - GlobalProperty pc_rhel_9_0_compat[] = { - /* pc_rhel_9_0_compat from pc_compat_6_2 */ - { "virtio-mem", "unplugged-inaccessible", "off" }, -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 90fb6e2e03..fc704d783f 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -949,6 +949,8 @@ static void pc_machine_rhel760_options(MachineClass *m) - pcmc->enforce_amd_1tb_hole = false; - compat_props_add(m->compat_props, hw_compat_rhel_9_2, - hw_compat_rhel_9_2_len); -+ compat_props_add(m->compat_props, pc_rhel_9_2_compat, -+ pc_rhel_9_2_compat_len); - compat_props_add(m->compat_props, hw_compat_rhel_9_1, - hw_compat_rhel_9_1_len); - compat_props_add(m->compat_props, hw_compat_rhel_9_0, -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 8945b69175..e97655616a 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -704,6 +704,8 @@ static void pc_q35_machine_rhel920_options(MachineClass *m) - - compat_props_add(m->compat_props, hw_compat_rhel_9_2, - hw_compat_rhel_9_2_len); -+ compat_props_add(m->compat_props, pc_rhel_9_2_compat, -+ pc_rhel_9_2_compat_len); - } - - DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920, -diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 4376f64a47..d218ad1628 100644 ---- a/include/hw/i386/pc.h -+++ b/include/hw/i386/pc.h -@@ -296,6 +296,9 @@ int pc_machine_kvm_type(MachineState *machine, const char *vm_type); - extern GlobalProperty pc_rhel_compat[]; - extern const size_t pc_rhel_compat_len; - -+extern GlobalProperty pc_rhel_9_2_compat[]; -+extern const size_t pc_rhel_9_2_compat_len; -+ - extern GlobalProperty pc_rhel_9_0_compat[]; - extern const size_t pc_rhel_9_0_compat_len; - --- -2.39.1 - diff --git a/SOURCES/0019-Disable-unwanted-new-devices.patch b/SOURCES/0019-Disable-unwanted-new-devices.patch deleted file mode 100644 index f656ca9..0000000 --- a/SOURCES/0019-Disable-unwanted-new-devices.patch +++ /dev/null @@ -1,83 +0,0 @@ -From 8173d2eabaf77312d36b00c618f6770948b80593 Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Mon, 17 Apr 2023 01:24:18 -0400 -Subject: Disable unwanted new devices - -QEMU 8.0 adds two new device we do not want to support that can't -be disabled using configure switch. - -1) ide-cf - virtual CompactFlash card - -2) i2c-echo - testing echo device - -Use manual disabling of the device by changing code (1) and meson configs (2). - -Signed-off-by: Miroslav Rezanina ---- - hw/ide/qdev.c | 9 +++++++++ - hw/misc/meson.build | 3 ++- - 2 files changed, 11 insertions(+), 1 deletion(-) - -diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c -index 1b3b4da01d..454bfa5783 100644 ---- a/hw/ide/qdev.c -+++ b/hw/ide/qdev.c -@@ -283,10 +283,13 @@ static void ide_cd_realize(IDEDevice *dev, Error **errp) - ide_dev_initfn(dev, IDE_CD, errp); - } - -+/* Disabled for Red Hat Enterprise Linux */ -+#if 0 - static void ide_cf_realize(IDEDevice *dev, Error **errp) - { - ide_dev_initfn(dev, IDE_CFATA, errp); - } -+#endif - - #define DEFINE_IDE_DEV_PROPERTIES() \ - DEFINE_BLOCK_PROPERTIES(IDEDrive, dev.conf), \ -@@ -346,6 +349,8 @@ static const TypeInfo ide_cd_info = { - .class_init = ide_cd_class_init, - }; - -+/* Disabled for Red Hat Enterprise Linux */ -+#if 0 - static Property ide_cf_properties[] = { - DEFINE_IDE_DEV_PROPERTIES(), - DEFINE_BLOCK_CHS_PROPERTIES(IDEDrive, dev.conf), -@@ -371,6 +376,7 @@ static const TypeInfo ide_cf_info = { - .instance_size = sizeof(IDEDrive), - .class_init = ide_cf_class_init, - }; -+#endif - - static void ide_device_class_init(ObjectClass *klass, void *data) - { -@@ -396,7 +402,10 @@ static void ide_register_types(void) - type_register_static(&ide_bus_info); - type_register_static(&ide_hd_info); - type_register_static(&ide_cd_info); -+/* Disabled for Red Hat Enterprise Linux */ -+#if 0 - type_register_static(&ide_cf_info); -+#endif - type_register_static(&ide_device_type_info); - } - -diff --git a/hw/misc/meson.build b/hw/misc/meson.build -index a40245ad44..9cc5a61ed7 100644 ---- a/hw/misc/meson.build -+++ b/hw/misc/meson.build -@@ -128,7 +128,8 @@ softmmu_ss.add(when: 'CONFIG_NRF51_SOC', if_true: files('nrf51_rng.c')) - - softmmu_ss.add(when: 'CONFIG_GRLIB', if_true: files('grlib_ahb_apb_pnp.c')) - --softmmu_ss.add(when: 'CONFIG_I2C', if_true: files('i2c-echo.c')) -+# Disabled for Red Hat Enterprise Linux -+# softmmu_ss.add(when: 'CONFIG_I2C', if_true: files('i2c-echo.c')) - - specific_ss.add(when: 'CONFIG_AVR_POWER', if_true: files('avr_power.c')) - --- -2.39.1 - diff --git a/SOURCES/README.tests b/SOURCES/README.tests index 9932773..739e2c6 100644 --- a/SOURCES/README.tests +++ b/SOURCES/README.tests @@ -28,7 +28,7 @@ avocado_qemu tests: The avocado_qemu tests can be executed by running the following avocado command: avocado run -p qemu_bin=/usr/libexec/qemu-kvm /usr/lib64/qemu-kvm/tests/acceptance/ Avocado needs to be installed separately using either pip or from source as -Avocado is not being packaged for RHEL-8. +Avocado is not being packaged for RHEL. qemu-iotests: symlinks to corresponding binaries need to be created for QEMU_PROG, @@ -36,4 +36,4 @@ QEMU_IO_PROG, QEMU_IMG_PROG, and QEMU_NBD_PROG before the iotests can be executed. The primary purpose of this package is to make these tests available to be -executed as gating tests for the virt module in the RHEL-8 OSCI environment. +executed as gating tests for the qemu-kvm in the RHEL OSCI environment. diff --git a/SOURCES/kvm-Compile-IOMMUFD-object-on-aarch64.patch b/SOURCES/kvm-Compile-IOMMUFD-object-on-aarch64.patch new file mode 100644 index 0000000..ed776c0 --- /dev/null +++ b/SOURCES/kvm-Compile-IOMMUFD-object-on-aarch64.patch @@ -0,0 +1,37 @@ +From 363d6aedc82314a70bdfbe9fa23b7e8fdda50138 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 11 Jan 2024 12:26:19 -0500 +Subject: [PATCH 066/101] Compile IOMMUFD object on aarch64 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [65/67] 9358030fdd499c5fe122dee3bb4f114966fac9c2 (eauger1/centos-qemu-kvm) + +Upstream: RHEL only + +Compiles the IOMMUFD object on aarch64 to be able to use +the IOMMUFD VFIO backend. + +Signed-off-by: Eric Auger +--- + configs/devices/aarch64-softmmu/aarch64-rh-devices.mak | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak +index aec1831199..b0191d3c69 100644 +--- a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak ++++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak +@@ -39,3 +39,4 @@ CONFIG_PXB=y + CONFIG_VHOST_VSOCK=y + CONFIG_VHOST_USER_VSOCK=y + CONFIG_VHOST_USER_FS=y ++CONFIG_IOMMUFD=y +-- +2.39.3 + diff --git a/SOURCES/kvm-Compile-IOMMUFD-on-s390x.patch b/SOURCES/kvm-Compile-IOMMUFD-on-s390x.patch new file mode 100644 index 0000000..9a98477 --- /dev/null +++ b/SOURCES/kvm-Compile-IOMMUFD-on-s390x.patch @@ -0,0 +1,37 @@ +From c1e9ddf8d0ea6d358fcaa5cacd3a91920f36e73b Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 11 Jan 2024 12:33:17 -0500 +Subject: [PATCH 067/101] Compile IOMMUFD on s390x +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [66/67] d3004aafca2bb76d817ac99c3d65973b8fbd4557 (eauger1/centos-qemu-kvm) + +Upstream: RHEL only + +Compiles the IOMMUFD object on s390x to be able to use +the IOMMUFD VFIO backend. + +Signed-off-by: Eric Auger +--- + configs/devices/s390x-softmmu/s390x-rh-devices.mak | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/configs/devices/s390x-softmmu/s390x-rh-devices.mak b/configs/devices/s390x-softmmu/s390x-rh-devices.mak +index 69a799adbd..24cf6dbd03 100644 +--- a/configs/devices/s390x-softmmu/s390x-rh-devices.mak ++++ b/configs/devices/s390x-softmmu/s390x-rh-devices.mak +@@ -16,3 +16,4 @@ CONFIG_WDT_DIAG288=y + CONFIG_VHOST_VSOCK=y + CONFIG_VHOST_USER_VSOCK=y + CONFIG_VHOST_USER_FS=y ++CONFIG_IOMMUFD=y +-- +2.39.3 + diff --git a/SOURCES/kvm-Compile-IOMMUFD-on-x86_64.patch b/SOURCES/kvm-Compile-IOMMUFD-on-x86_64.patch new file mode 100644 index 0000000..a3eb40e --- /dev/null +++ b/SOURCES/kvm-Compile-IOMMUFD-on-x86_64.patch @@ -0,0 +1,37 @@ +From be2c3d9bbee1bdec061c901f507bc999fa40a53e Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 11 Jan 2024 12:34:44 -0500 +Subject: [PATCH 068/101] Compile IOMMUFD on x86_64 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [67/67] 411d48a5cc7ce1f05be793fd9a89c143ce34c91a (eauger1/centos-qemu-kvm) + +Upstream: RHEL only + +Compiles the IOMMUFD object on s390x to be able to use +the IOMMUFD VFIO backend. + +Signed-off-by: Eric Auger +--- + configs/devices/x86_64-softmmu/x86_64-rh-devices.mak | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +index ce5be73633..ba41108e0c 100644 +--- a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak ++++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +@@ -108,3 +108,4 @@ CONFIG_SGX=y + CONFIG_VHOST_VSOCK=y + CONFIG_VHOST_USER_VSOCK=y + CONFIG_VHOST_USER_FS=y ++CONFIG_IOMMUFD=y +-- +2.39.3 + diff --git a/SOURCES/kvm-Implement-SMBIOS-type-9-v2.6.patch b/SOURCES/kvm-Implement-SMBIOS-type-9-v2.6.patch new file mode 100644 index 0000000..31439d7 --- /dev/null +++ b/SOURCES/kvm-Implement-SMBIOS-type-9-v2.6.patch @@ -0,0 +1,155 @@ +From 5c639f8ce65183ce8e44ee8e0230e9d627a440d7 Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Wed, 21 Feb 2024 17:00:27 +0000 +Subject: [PATCH 05/20] Implement SMBIOS type 9 v2.6 + +RH-Author: Igor Mammedov +RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS +RH-Jira: RHEL-21705 +RH-Acked-by: MST +RH-Acked-by: Ani Sinha +RH-Commit: [3/18] ead230527d93938907a561cf5b985ee4f54d82b1 + +JIRA: https://issues.redhat.com/browse/RHEL-21705 +Author: Felix Wu + + Signed-off-by: Felix Wu + Signed-off-by: Nabih Estefan + Message-Id: <20240221170027.1027325-3-nabihestefan@google.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit 04f143d828845d0fd52dd4a52664d81a4f5431f7) +Signed-off-by: Igor Mammedov +--- + hw/smbios/smbios.c | 49 +++++++++++++++++++++++++++++++++--- + include/hw/firmware/smbios.h | 4 +++ + qemu-options.hx | 2 +- + 3 files changed, 51 insertions(+), 4 deletions(-) + +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index 4f5637d445..074705fa4c 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -124,7 +124,7 @@ static QTAILQ_HEAD(, type8_instance) type8 = QTAILQ_HEAD_INITIALIZER(type8); + + /* type 9 instance for parsing */ + struct type9_instance { +- const char *slot_designation; ++ const char *slot_designation, *pcidev; + uint8_t slot_type, slot_data_bus_width, current_usage, slot_length, + slot_characteristics1, slot_characteristics2; + uint16_t slot_id; +@@ -427,6 +427,11 @@ static const QemuOptDesc qemu_smbios_type9_opts[] = { + .type = QEMU_OPT_NUMBER, + .help = "slot characteristics2, see the spec", + }, ++ { ++ .name = "pci_device", ++ .type = QEMU_OPT_STRING, ++ .help = "PCI device, if provided." ++ } + }; + + static const QemuOptDesc qemu_smbios_type11_opts[] = { +@@ -851,7 +856,7 @@ static void smbios_build_type_8_table(void) + } + } + +-static void smbios_build_type_9_table(void) ++static void smbios_build_type_9_table(Error **errp) + { + unsigned instance = 0; + struct type9_instance *t9; +@@ -868,6 +873,43 @@ static void smbios_build_type_9_table(void) + t->slot_characteristics1 = t9->slot_characteristics1; + t->slot_characteristics2 = t9->slot_characteristics2; + ++ if (t9->pcidev) { ++ PCIDevice *pdev = NULL; ++ int rc = pci_qdev_find_device(t9->pcidev, &pdev); ++ if (rc != 0) { ++ error_setg(errp, ++ "No PCI device %s for SMBIOS type 9 entry %s", ++ t9->pcidev, t9->slot_designation); ++ return; ++ } ++ /* ++ * We only handle the case were the device is attached to ++ * the PCI root bus. The general case is more complex as ++ * bridges are enumerated later and the table would need ++ * to be updated at this moment. ++ */ ++ if (!pci_bus_is_root(pci_get_bus(pdev))) { ++ error_setg(errp, ++ "Cannot create type 9 entry for PCI device %s: " ++ "not attached to the root bus", ++ t9->pcidev); ++ return; ++ } ++ t->segment_group_number = cpu_to_le16(0); ++ t->bus_number = pci_dev_bus_num(pdev); ++ t->device_number = pdev->devfn; ++ } else { ++ /* ++ * Per SMBIOS spec, For slots that are not of the PCI, AGP, PCI-X, ++ * or PCI-Express type that do not have bus/device/function ++ * information, 0FFh should be populated in the fields of Segment ++ * Group Number, Bus Number, Device/Function Number. ++ */ ++ t->segment_group_number = 0xff; ++ t->bus_number = 0xff; ++ t->device_number = 0xff; ++ } ++ + SMBIOS_BUILD_TABLE_POST; + instance++; + } +@@ -1222,7 +1264,7 @@ void smbios_get_tables(MachineState *ms, + } + + smbios_build_type_8_table(); +- smbios_build_type_9_table(); ++ smbios_build_type_9_table(errp); + smbios_build_type_11_table(); + + #define MAX_DIMM_SZ (16 * GiB) +@@ -1568,6 +1610,7 @@ void smbios_entry_add(QemuOpts *opts, Error **errp) + t->slot_id = qemu_opt_get_number(opts, "slot_id", 0); + t->slot_characteristics1 = qemu_opt_get_number(opts, "slot_characteristics1", 0); + t->slot_characteristics2 = qemu_opt_get_number(opts, "slot_characteristics2", 0); ++ save_opt(&t->pcidev, opts, "pcidev"); + QTAILQ_INSERT_TAIL(&type9, t, next); + return; + } +diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h +index 6bbd5a4c20..f8dd07fe4c 100644 +--- a/include/hw/firmware/smbios.h ++++ b/include/hw/firmware/smbios.h +@@ -222,6 +222,10 @@ struct smbios_type_9 { + uint16_t slot_id; + uint8_t slot_characteristics1; + uint8_t slot_characteristics2; ++ /* SMBIOS spec v2.6+ */ ++ uint16_t segment_group_number; ++ uint8_t bus_number; ++ uint8_t device_number; + } QEMU_PACKED; + + /* SMBIOS type 11 - OEM strings */ +diff --git a/qemu-options.hx b/qemu-options.hx +index 94cacc2c63..93364e1765 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -2710,7 +2710,7 @@ SRST + ``-smbios type=4[,sock_pfx=str][,manufacturer=str][,version=str][,serial=str][,asset=str][,part=str][,processor-id=%d]`` + Specify SMBIOS type 4 fields + +-``-smbios type=9[,slot_designation=str][,slot_type=%d][,slot_data_bus_width=%d][,current_usage=%d][,slot_length=%d][,slot_id=%d][,slot_characteristics1=%d][,slot_characteristics12=%d]`` ++``-smbios type=9[,slot_designation=str][,slot_type=%d][,slot_data_bus_width=%d][,current_usage=%d][,slot_length=%d][,slot_id=%d][,slot_characteristics1=%d][,slot_characteristics12=%d][,pci_device=str]`` + Specify SMBIOS type 9 fields + + ``-smbios type=11[,value=str][,path=filename]`` +-- +2.39.3 + diff --git a/SOURCES/kvm-Implement-base-of-SMBIOS-type-9-descriptor.patch b/SOURCES/kvm-Implement-base-of-SMBIOS-type-9-descriptor.patch new file mode 100644 index 0000000..89466c7 --- /dev/null +++ b/SOURCES/kvm-Implement-base-of-SMBIOS-type-9-descriptor.patch @@ -0,0 +1,218 @@ +From 84fc607d678bd72397a41d706e91fa241fd97266 Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Wed, 21 Feb 2024 17:00:26 +0000 +Subject: [PATCH 04/20] Implement base of SMBIOS type 9 descriptor. + +RH-Author: Igor Mammedov +RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS +RH-Jira: RHEL-21705 +RH-Acked-by: MST +RH-Acked-by: Ani Sinha +RH-Commit: [2/18] 2678cc080bfbf3357fa2f94ceaf42fc61b690d32 + +JIRA: https://issues.redhat.com/browse/RHEL-21705 + +commit: 735eee07d1f963635d3c3bf9f5e4bf1bc000870e +Author: Felix Wu + + Version 2.1+. + + Signed-off-by: Felix Wu + Signed-off-by: Nabih Estefan + Message-Id: <20240221170027.1027325-2-nabihestefan@google.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +Signed-off-by: Igor Mammedov +--- + hw/smbios/smbios.c | 99 ++++++++++++++++++++++++++++++++++++ + include/hw/firmware/smbios.h | 13 +++++ + qemu-options.hx | 3 ++ + 3 files changed, 115 insertions(+) + +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index 7bde23e59d..4f5637d445 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -122,6 +122,16 @@ struct type8_instance { + }; + static QTAILQ_HEAD(, type8_instance) type8 = QTAILQ_HEAD_INITIALIZER(type8); + ++/* type 9 instance for parsing */ ++struct type9_instance { ++ const char *slot_designation; ++ uint8_t slot_type, slot_data_bus_width, current_usage, slot_length, ++ slot_characteristics1, slot_characteristics2; ++ uint16_t slot_id; ++ QTAILQ_ENTRY(type9_instance) next; ++}; ++static QTAILQ_HEAD(, type9_instance) type9 = QTAILQ_HEAD_INITIALIZER(type9); ++ + static struct { + size_t nvalues; + char **values; +@@ -371,6 +381,54 @@ static const QemuOptDesc qemu_smbios_type8_opts[] = { + }, + }; + ++static const QemuOptDesc qemu_smbios_type9_opts[] = { ++ { ++ .name = "type", ++ .type = QEMU_OPT_NUMBER, ++ .help = "SMBIOS element type", ++ }, ++ { ++ .name = "slot_designation", ++ .type = QEMU_OPT_STRING, ++ .help = "string number for reference designation", ++ }, ++ { ++ .name = "slot_type", ++ .type = QEMU_OPT_NUMBER, ++ .help = "connector type", ++ }, ++ { ++ .name = "slot_data_bus_width", ++ .type = QEMU_OPT_NUMBER, ++ .help = "port type", ++ }, ++ { ++ .name = "current_usage", ++ .type = QEMU_OPT_NUMBER, ++ .help = "current usage", ++ }, ++ { ++ .name = "slot_length", ++ .type = QEMU_OPT_NUMBER, ++ .help = "system slot length", ++ }, ++ { ++ .name = "slot_id", ++ .type = QEMU_OPT_NUMBER, ++ .help = "system slot id", ++ }, ++ { ++ .name = "slot_characteristics1", ++ .type = QEMU_OPT_NUMBER, ++ .help = "slot characteristics1, see the spec", ++ }, ++ { ++ .name = "slot_characteristics2", ++ .type = QEMU_OPT_NUMBER, ++ .help = "slot characteristics2, see the spec", ++ }, ++}; ++ + static const QemuOptDesc qemu_smbios_type11_opts[] = { + { + .name = "value", +@@ -594,6 +652,7 @@ bool smbios_skip_table(uint8_t type, bool required_table) + #define T2_BASE 0x200 + #define T3_BASE 0x300 + #define T4_BASE 0x400 ++#define T9_BASE 0x900 + #define T11_BASE 0xe00 + + #define T16_BASE 0x1000 +@@ -792,6 +851,28 @@ static void smbios_build_type_8_table(void) + } + } + ++static void smbios_build_type_9_table(void) ++{ ++ unsigned instance = 0; ++ struct type9_instance *t9; ++ ++ QTAILQ_FOREACH(t9, &type9, next) { ++ SMBIOS_BUILD_TABLE_PRE(9, T9_BASE + instance, true); ++ ++ SMBIOS_TABLE_SET_STR(9, slot_designation, t9->slot_designation); ++ t->slot_type = t9->slot_type; ++ t->slot_data_bus_width = t9->slot_data_bus_width; ++ t->current_usage = t9->current_usage; ++ t->slot_length = t9->slot_length; ++ t->slot_id = t9->slot_id; ++ t->slot_characteristics1 = t9->slot_characteristics1; ++ t->slot_characteristics2 = t9->slot_characteristics2; ++ ++ SMBIOS_BUILD_TABLE_POST; ++ instance++; ++ } ++} ++ + static void smbios_build_type_11_table(void) + { + char count_str[128]; +@@ -1141,6 +1222,7 @@ void smbios_get_tables(MachineState *ms, + } + + smbios_build_type_8_table(); ++ smbios_build_type_9_table(); + smbios_build_type_11_table(); + + #define MAX_DIMM_SZ (16 * GiB) +@@ -1472,6 +1554,23 @@ void smbios_entry_add(QemuOpts *opts, Error **errp) + t8_i->port_type = qemu_opt_get_number(opts, "port_type", 0); + QTAILQ_INSERT_TAIL(&type8, t8_i, next); + return; ++ case 9: { ++ if (!qemu_opts_validate(opts, qemu_smbios_type9_opts, errp)) { ++ return; ++ } ++ struct type9_instance *t; ++ t = g_new0(struct type9_instance, 1); ++ save_opt(&t->slot_designation, opts, "slot_designation"); ++ t->slot_type = qemu_opt_get_number(opts, "slot_type", 0); ++ t->slot_data_bus_width = qemu_opt_get_number(opts, "slot_data_bus_width", 0); ++ t->current_usage = qemu_opt_get_number(opts, "current_usage", 0); ++ t->slot_length = qemu_opt_get_number(opts, "slot_length", 0); ++ t->slot_id = qemu_opt_get_number(opts, "slot_id", 0); ++ t->slot_characteristics1 = qemu_opt_get_number(opts, "slot_characteristics1", 0); ++ t->slot_characteristics2 = qemu_opt_get_number(opts, "slot_characteristics2", 0); ++ QTAILQ_INSERT_TAIL(&type9, t, next); ++ return; ++ } + case 11: + if (!qemu_opts_validate(opts, qemu_smbios_type11_opts, errp)) { + return; +diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h +index d24b3ccd32..6bbd5a4c20 100644 +--- a/include/hw/firmware/smbios.h ++++ b/include/hw/firmware/smbios.h +@@ -211,6 +211,19 @@ struct smbios_type_8 { + uint8_t port_type; + } QEMU_PACKED; + ++/* SMBIOS type 9 - System Slots (v2.1+) */ ++struct smbios_type_9 { ++ struct smbios_structure_header header; ++ uint8_t slot_designation; ++ uint8_t slot_type; ++ uint8_t slot_data_bus_width; ++ uint8_t current_usage; ++ uint8_t slot_length; ++ uint16_t slot_id; ++ uint8_t slot_characteristics1; ++ uint8_t slot_characteristics2; ++} QEMU_PACKED; ++ + /* SMBIOS type 11 - OEM strings */ + struct smbios_type_11 { + struct smbios_structure_header header; +diff --git a/qemu-options.hx b/qemu-options.hx +index 0814f43066..94cacc2c63 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -2710,6 +2710,9 @@ SRST + ``-smbios type=4[,sock_pfx=str][,manufacturer=str][,version=str][,serial=str][,asset=str][,part=str][,processor-id=%d]`` + Specify SMBIOS type 4 fields + ++``-smbios type=9[,slot_designation=str][,slot_type=%d][,slot_data_bus_width=%d][,current_usage=%d][,slot_length=%d][,slot_id=%d][,slot_characteristics1=%d][,slot_characteristics12=%d]`` ++ Specify SMBIOS type 9 fields ++ + ``-smbios type=11[,value=str][,path=filename]`` + Specify SMBIOS type 11 fields + +-- +2.39.3 + diff --git a/SOURCES/kvm-Revert-chardev-char-socket-Fix-TLS-io-channels-sendi.patch b/SOURCES/kvm-Revert-chardev-char-socket-Fix-TLS-io-channels-sendi.patch new file mode 100644 index 0000000..bc0f15a --- /dev/null +++ b/SOURCES/kvm-Revert-chardev-char-socket-Fix-TLS-io-channels-sendi.patch @@ -0,0 +1,60 @@ +From 2e0e4355b2d4edb66b7d8c198339e17940abd682 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= +Date: Mon, 18 Mar 2024 13:03:19 +0000 +Subject: [PATCH 2/3] Revert "chardev/char-socket: Fix TLS io channels sending + too much data to the backend" +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Daniel P. Berrangé +RH-MergeRequest: 233: Fix handling of TLS sessions in chardevs +RH-Jira: RHEL-24614 +RH-Acked-by: Thomas Huth +RH-Acked-by: Marc-André Lureau +RH-Commit: [2/3] 1cb3d72b86ced0f70a09dfa0d325ae8a85db1b2b (berrange/centos-src-qemu) + +This commit results in unexpected termination of the TLS connection. +When 'fd_can_read' returns 0, the code goes on to pass a zero length +buffer to qio_channel_read. The TLS impl calls into gnutls_recv() +with this zero length buffer, at which point GNUTLS returns an error +GNUTLS_E_INVALID_REQUEST. This is treated as fatal by QEMU's TLS code +resulting in the connection being torn down by the chardev. + +Simply skipping the qio_channel_read when the buffer length is zero +is also not satisfactory, as it results in a high CPU burn busy loop +massively slowing QEMU's functionality. + +The proper solution is to avoid tcp_chr_read being called at all +unless the frontend is able to accept more data. This will be done +in a followup commit. + +This reverts commit 462945cd22d2bcd233401ed3aa167d83a8e35b05 + +Reviewed-by: Thomas Huth +Signed-off-by: Daniel P. Berrangé +(cherry picked from commit e8ee827ffdb86ebbd5f5213a1f78123c25a90864) +--- + chardev/char-socket.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/chardev/char-socket.c b/chardev/char-socket.c +index f48d341ebc..51d0943fce 100644 +--- a/chardev/char-socket.c ++++ b/chardev/char-socket.c +@@ -492,9 +492,9 @@ static gboolean tcp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque) + s->max_size <= 0) { + return TRUE; + } +- len = tcp_chr_read_poll(opaque); +- if (len > sizeof(buf)) { +- len = sizeof(buf); ++ len = sizeof(buf); ++ if (len > s->max_size) { ++ len = s->max_size; + } + size = tcp_chr_recv(chr, (void *)buf, len); + if (size == 0 || (size == -1 && errno != EAGAIN)) { +-- +2.39.3 + diff --git a/SOURCES/kvm-Revert-chardev-use-a-child-source-for-qio-input-sour.patch b/SOURCES/kvm-Revert-chardev-use-a-child-source-for-qio-input-sour.patch new file mode 100644 index 0000000..135afbe --- /dev/null +++ b/SOURCES/kvm-Revert-chardev-use-a-child-source-for-qio-input-sour.patch @@ -0,0 +1,216 @@ +From ab5a33d57b48e35388928e388bb6e6479bc77651 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= +Date: Mon, 18 Mar 2024 17:08:30 +0000 +Subject: [PATCH 3/3] Revert "chardev: use a child source for qio input source" +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Daniel P. Berrangé +RH-MergeRequest: 233: Fix handling of TLS sessions in chardevs +RH-Jira: RHEL-24614 +RH-Acked-by: Thomas Huth +RH-Acked-by: Marc-André Lureau +RH-Commit: [3/3] b58e6c19c2b11d5d28db31cf1386226fb01d195e (berrange/centos-src-qemu) + +This reverts commit a7077b8e354d90fec26c2921aa2dea85b90dff90, +and add comments to explain why child sources cannot be used. + +When a GSource is added as a child of another GSource, if its +'prepare' function indicates readiness, then the parent's +'prepare' function will never be run. The io_watch_poll_prepare +absolutely *must* be run on every iteration of the main loop, +to ensure that the chardev backend doesn't feed data to the +frontend that it is unable to consume. + +At the time a7077b8e354d90fec26c2921aa2dea85b90dff90 was made, +all the child GSource impls were relying on poll'ing an FD, +so their 'prepare' functions would never indicate readiness +ahead of poll() being invoked. So the buggy behaviour was +not noticed and lay dormant. + +Relatively recently the QIOChannelTLS impl introduced a +level 2 child GSource, which checks with GNUTLS whether it +has cached any data that was decoded but not yet consumed: + + commit ffda5db65aef42266a5053a4be34515106c4c7ee + Author: Antoine Damhet + Date: Tue Nov 15 15:23:29 2022 +0100 + + io/channel-tls: fix handling of bigger read buffers + + Since the TLS backend can read more data from the underlying QIOChannel + we introduce a minimal child GSource to notify if we still have more + data available to be read. + + Signed-off-by: Antoine Damhet + Signed-off-by: Charles Frey + Signed-off-by: Daniel P. Berrangé + +With this, it is now quite common for the 'prepare' function +on a QIOChannelTLS GSource to indicate immediate readiness, +bypassing the parent GSource 'prepare' function. IOW, the +critical 'io_watch_poll_prepare' is being skipped on some +iterations of the main loop. As a result chardev frontend +asserts are now being triggered as they are fed data they +are not ready to consume. + +A reproducer is as follows: + + * In terminal 1 run a GNUTLS *echo* server + + $ gnutls-serv --echo \ + --x509cafile ca-cert.pem \ + --x509keyfile server-key.pem \ + --x509certfile server-cert.pem \ + -p 9000 + + * In terminal 2 run a QEMU guest + + $ qemu-system-s390x \ + -nodefaults \ + -display none \ + -object tls-creds-x509,id=tls0,dir=$PWD,endpoint=client \ + -chardev socket,id=con0,host=localhost,port=9000,tls-creds=tls0 \ + -device sclpconsole,chardev=con0 \ + -hda Fedora-Cloud-Base-39-1.5.s390x.qcow2 + +After the previous patch revert, but before this patch revert, +this scenario will crash: + + qemu-system-s390x: ../hw/char/sclpconsole.c:73: chr_read: Assertion + `size <= SIZE_BUFFER_VT220 - scon->iov_data_len' failed. + +This assert indicates that 'tcp_chr_read' was called without +'tcp_chr_read_poll' having first been checked for ability to +receive more data + +QEMU's use of a 'prepare' function to create/delete another +GSource is rather a hack and not normally the kind of thing that +is expected to be done by a GSource. There is no mechanism to +force GLib to always run the 'prepare' function of a parent +GSource. The best option is to simply not use the child source +concept, and go back to the functional approach previously +relied on. + +Reviewed-by: Marc-André Lureau +Reviewed-by: Thomas Huth +Tested-by: Thomas Huth +Signed-off-by: Daniel P. Berrangé +(cherry picked from commit 038b4217884c6f297278bb1ec6f0463c6c8221de) +--- + chardev/char-io.c | 56 ++++++++++++++++++++++++++++++++++++++++++----- + 1 file changed, 51 insertions(+), 5 deletions(-) + +diff --git a/chardev/char-io.c b/chardev/char-io.c +index 4451128cba..dab77b112e 100644 +--- a/chardev/char-io.c ++++ b/chardev/char-io.c +@@ -33,6 +33,7 @@ typedef struct IOWatchPoll { + IOCanReadHandler *fd_can_read; + GSourceFunc fd_read; + void *opaque; ++ GMainContext *context; + } IOWatchPoll; + + static IOWatchPoll *io_watch_poll_from_source(GSource *source) +@@ -50,28 +51,59 @@ static gboolean io_watch_poll_prepare(GSource *source, + return FALSE; + } + ++ /* ++ * We do not register the QIOChannel watch as a child GSource. ++ * The 'prepare' function on the parent GSource will be ++ * skipped if a child GSource's 'prepare' function indicates ++ * readiness. We need this prepare function be guaranteed ++ * to run on *every* iteration of the main loop, because ++ * it is critical to ensure we remove the QIOChannel watch ++ * if 'fd_can_read' indicates the frontend cannot receive ++ * more data. ++ */ + if (now_active) { + iwp->src = qio_channel_create_watch( + iwp->ioc, G_IO_IN | G_IO_ERR | G_IO_HUP | G_IO_NVAL); + g_source_set_callback(iwp->src, iwp->fd_read, iwp->opaque, NULL); +- g_source_add_child_source(source, iwp->src); +- g_source_unref(iwp->src); ++ g_source_attach(iwp->src, iwp->context); + } else { +- g_source_remove_child_source(source, iwp->src); ++ g_source_destroy(iwp->src); ++ g_source_unref(iwp->src); + iwp->src = NULL; + } + return FALSE; + } + ++static gboolean io_watch_poll_check(GSource *source) ++{ ++ return FALSE; ++} ++ + static gboolean io_watch_poll_dispatch(GSource *source, GSourceFunc callback, + gpointer user_data) + { +- return G_SOURCE_CONTINUE; ++ abort(); ++} ++ ++static void io_watch_poll_finalize(GSource *source) ++{ ++ /* ++ * Due to a glib bug, removing the last reference to a source ++ * inside a finalize callback causes recursive locking (and a ++ * deadlock). This is not a problem inside other callbacks, ++ * including dispatch callbacks, so we call io_remove_watch_poll ++ * to remove this source. At this point, iwp->src must ++ * be NULL, or we would leak it. ++ */ ++ IOWatchPoll *iwp = io_watch_poll_from_source(source); ++ assert(iwp->src == NULL); + } + + static GSourceFuncs io_watch_poll_funcs = { + .prepare = io_watch_poll_prepare, ++ .check = io_watch_poll_check, + .dispatch = io_watch_poll_dispatch, ++ .finalize = io_watch_poll_finalize, + }; + + GSource *io_add_watch_poll(Chardev *chr, +@@ -91,6 +123,7 @@ GSource *io_add_watch_poll(Chardev *chr, + iwp->ioc = ioc; + iwp->fd_read = (GSourceFunc) fd_read; + iwp->src = NULL; ++ iwp->context = context; + + name = g_strdup_printf("chardev-iowatch-%s", chr->label); + g_source_set_name((GSource *)iwp, name); +@@ -101,10 +134,23 @@ GSource *io_add_watch_poll(Chardev *chr, + return (GSource *)iwp; + } + ++static void io_remove_watch_poll(GSource *source) ++{ ++ IOWatchPoll *iwp; ++ ++ iwp = io_watch_poll_from_source(source); ++ if (iwp->src) { ++ g_source_destroy(iwp->src); ++ g_source_unref(iwp->src); ++ iwp->src = NULL; ++ } ++ g_source_destroy(&iwp->parent); ++} ++ + void remove_fd_in_watch(Chardev *chr) + { + if (chr->gsource) { +- g_source_destroy(chr->gsource); ++ io_remove_watch_poll(chr->gsource); + chr->gsource = NULL; + } + } +-- +2.39.3 + diff --git a/SOURCES/kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch b/SOURCES/kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch deleted file mode 100644 index b937d27..0000000 --- a/SOURCES/kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch +++ /dev/null @@ -1,84 +0,0 @@ -From 61256a82ce78f40222455becb8850b5f5ebb5d72 Mon Sep 17 00:00:00 2001 -From: Igor Mammedov -Date: Tue, 18 Apr 2023 11:04:49 +0200 -Subject: [PATCH 1/3] acpi: pcihp: allow repeating hot-unplug requests - -RH-Author: Igor Mammedov -RH-MergeRequest: 159: acpi: pcihp: allow repeating hot-unplug requests -RH-Bugzilla: 2087047 -RH-Acked-by: Ani Sinha -RH-Acked-by: Julia Suvorova -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: MST -RH-Commit: [1/1] 9c597232466b27d91f127ee6004322d6ba69755f (imammedo/qemu-kvm-c-9-s-imam) - -with Q35 using ACPI PCI hotplug by default, user's request to unplug -device is ignored when it's issued before guest OS has been booted. -And any additional attempt to request device hot-unplug afterwards -results in following error: - - "Device XYZ is already in the process of unplug" - -arguably it can be considered as a regression introduced by [2], -before which it was possible to issue unplug request multiple -times. - -Accept new uplug requests after timeout (1ms). This brings ACPI PCI -hotplug on par with native PCIe unplug behavior [1] and allows user -to repeat unplug requests at propper times. -Set expire timeout to arbitrary 1msec so user won't be able to -flood guest with SCI interrupts by calling device_del in tight loop. - -PS: -ACPI spec doesn't mandate what OSPM can do with GPEx.status -bits set before it's booted => it's impl. depended. -Status bits may be retained (I tested with one Windows version) -or cleared (Linux since 2.6 kernel times) during guest's ACPI -subsystem initialization. -Clearing status bits (though not wrong per se) hides the unplug -event from guest, and it's upto user to repeat device_del later -when guest is able to handle unplug requests. - -1) 18416c62e3 ("pcie: expire pending delete") -2) -Fixes: cce8944cc9ef ("qdev-monitor: Forbid repeated device_del") -Signed-off-by: Igor Mammedov -Acked-by: Gerd Hoffmann -CC: mst@redhat.com -CC: anisinha@redhat.com -CC: jusual@redhat.com -CC: kraxel@redhat.com -Message-Id: <20230418090449.2155757-1-imammedo@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Ani Sinha -(cherry picked from commit 0f689cf5ada4d5df5ab95c7f7aa9fc221afa855d) -Signed-off-by: Igor Mammedov ---- - hw/acpi/pcihp.c | 10 ++++++++++ - 1 file changed, 10 insertions(+) - -diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c -index dcfb779a7a..cdd6f775a1 100644 ---- a/hw/acpi/pcihp.c -+++ b/hw/acpi/pcihp.c -@@ -357,6 +357,16 @@ void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev, - * acpi_pcihp_eject_slot() when the operation is completed. - */ - pdev->qdev.pending_deleted_event = true; -+ /* if unplug was requested before OSPM is initialized, -+ * linux kernel will clear GPE0.sts[] bits during boot, which effectively -+ * hides unplug event. And than followup qmp_device_del() calls remain -+ * blocked by above flag permanently. -+ * Unblock qmp_device_del() by setting expire limit, so user can -+ * repeat unplug request later when OSPM has been booted. -+ */ -+ pdev->qdev.pending_deleted_expires_ms = -+ qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); /* 1 msec */ -+ - s->acpi_pcihp_pci_status[bsel].down |= (1U << slot); - acpi_send_event(DEVICE(hotplug_dev), ACPI_PCI_HOTPLUG_STATUS); - } --- -2.39.1 - diff --git a/SOURCES/kvm-aio-make-aio_context_acquire-aio_context_release-a-n.patch b/SOURCES/kvm-aio-make-aio_context_acquire-aio_context_release-a-n.patch new file mode 100644 index 0000000..f30b81f --- /dev/null +++ b/SOURCES/kvm-aio-make-aio_context_acquire-aio_context_release-a-n.patch @@ -0,0 +1,60 @@ +From 6b5cfed21e20b372090046a934387255ff4bda58 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 5 Dec 2023 13:20:01 -0500 +Subject: [PATCH 084/101] aio: make aio_context_acquire()/aio_context_release() + a no-op + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [15/26] 723dcada900aaf08862e8221921be22506b561a8 (kmwolf/centos-qemu-kvm) + +aio_context_acquire()/aio_context_release() has been replaced by +fine-grained locking to protect state shared by multiple threads. The +AioContext lock still plays the role of balancing locking in +AIO_WAIT_WHILE() and many functions in QEMU either require that the +AioContext lock is held or not held for this reason. In other words, the +AioContext lock is purely there for consistency with itself and serves +no real purpose anymore. + +Stop actually acquiring/releasing the lock in +aio_context_acquire()/aio_context_release() so that subsequent patches +can remove callers across the codebase incrementally. + +I have performed "make check" and qemu-iotests stress tests across +x86-64, ppc64le, and aarch64 to confirm that there are no failures as a +result of eliminating the lock. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Eric Blake +Acked-by: Kevin Wolf +Message-ID: <20231205182011.1976568-5-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +--- + util/async.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/util/async.c b/util/async.c +index 8f90ddc304..04ee83d220 100644 +--- a/util/async.c ++++ b/util/async.c +@@ -725,12 +725,12 @@ void aio_context_unref(AioContext *ctx) + + void aio_context_acquire(AioContext *ctx) + { +- qemu_rec_mutex_lock(&ctx->lock); ++ /* TODO remove this function */ + } + + void aio_context_release(AioContext *ctx) + { +- qemu_rec_mutex_unlock(&ctx->lock); ++ /* TODO remove this function */ + } + + QEMU_DEFINE_STATIC_CO_TLS(AioContext *, my_aiocontext) +-- +2.39.3 + diff --git a/SOURCES/kvm-aio-remove-aio_context_acquire-aio_context_release-A.patch b/SOURCES/kvm-aio-remove-aio_context_acquire-aio_context_release-A.patch new file mode 100644 index 0000000..a64e246 --- /dev/null +++ b/SOURCES/kvm-aio-remove-aio_context_acquire-aio_context_release-A.patch @@ -0,0 +1,102 @@ +From 14913d8970090c8914dc19dad14f3b9f91985ec3 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 5 Dec 2023 13:20:07 -0500 +Subject: [PATCH 090/101] aio: remove + aio_context_acquire()/aio_context_release() API + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [21/26] 4b6d4afcac79d3248a6722b063b5fc777dc418df (kmwolf/centos-qemu-kvm) + +Delete these functions because nothing calls these functions anymore. + +I introduced these APIs in commit 98563fc3ec44 ("aio: add +aio_context_acquire() and aio_context_release()") in 2014. It's with a +sigh of relief that I delete these APIs almost 10 years later. + +Thanks to Paolo Bonzini's vision for multi-queue QEMU, we got an +understanding of where the code needed to go in order to remove the +limitations that the original dataplane and the IOThread/AioContext +approach that followed it. + +Emanuele Giuseppe Esposito had the splendid determination to convert +large parts of the codebase so that they no longer needed the AioContext +lock. This was a painstaking process, both in the actual code changes +required and the iterations of code review that Emanuele eked out of +Kevin and me over many months. + +Kevin Wolf tackled multitudes of graph locking conversions to protect +in-flight I/O from run-time changes to the block graph as well as the +clang Thread Safety Analysis annotations that allow the compiler to +check whether the graph lock is being used correctly. + +And me, well, I'm just here to add some pizzazz to the QEMU multi-queue +block layer :). Thank you to everyone who helped with this effort, +including Eric Blake, code reviewer extraordinaire, and others who I've +forgotten to mention. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Eric Blake +Message-ID: <20231205182011.1976568-11-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +--- + include/block/aio.h | 17 ----------------- + util/async.c | 10 ---------- + 2 files changed, 27 deletions(-) + +diff --git a/include/block/aio.h b/include/block/aio.h +index f08b358077..af05512a7d 100644 +--- a/include/block/aio.h ++++ b/include/block/aio.h +@@ -278,23 +278,6 @@ void aio_context_ref(AioContext *ctx); + */ + void aio_context_unref(AioContext *ctx); + +-/* Take ownership of the AioContext. If the AioContext will be shared between +- * threads, and a thread does not want to be interrupted, it will have to +- * take ownership around calls to aio_poll(). Otherwise, aio_poll() +- * automatically takes care of calling aio_context_acquire and +- * aio_context_release. +- * +- * Note that this is separate from bdrv_drained_begin/bdrv_drained_end. A +- * thread still has to call those to avoid being interrupted by the guest. +- * +- * Bottom halves, timers and callbacks can be created or removed without +- * acquiring the AioContext. +- */ +-void aio_context_acquire(AioContext *ctx); +- +-/* Relinquish ownership of the AioContext. */ +-void aio_context_release(AioContext *ctx); +- + /** + * aio_bh_schedule_oneshot_full: Allocate a new bottom half structure that will + * run only once and as soon as possible. +diff --git a/util/async.c b/util/async.c +index dfd44ef612..460529057c 100644 +--- a/util/async.c ++++ b/util/async.c +@@ -719,16 +719,6 @@ void aio_context_unref(AioContext *ctx) + g_source_unref(&ctx->source); + } + +-void aio_context_acquire(AioContext *ctx) +-{ +- /* TODO remove this function */ +-} +- +-void aio_context_release(AioContext *ctx) +-{ +- /* TODO remove this function */ +-} +- + QEMU_DEFINE_STATIC_CO_TLS(AioContext *, my_aiocontext) + + AioContext *qemu_get_current_aio_context(void) +-- +2.39.3 + diff --git a/SOURCES/kvm-aio-wait-draw-equivalence-between-AIO_WAIT_WHILE-and.patch b/SOURCES/kvm-aio-wait-draw-equivalence-between-AIO_WAIT_WHILE-and.patch new file mode 100644 index 0000000..7f95b67 --- /dev/null +++ b/SOURCES/kvm-aio-wait-draw-equivalence-between-AIO_WAIT_WHILE-and.patch @@ -0,0 +1,81 @@ +From e1e2f3972065c4b5d6fcf37e0e1c4fb92a0d5260 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 5 Dec 2023 13:20:06 -0500 +Subject: [PATCH 089/101] aio-wait: draw equivalence between AIO_WAIT_WHILE() + and AIO_WAIT_WHILE_UNLOCKED() + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [20/26] 20e49777869714c99769263103f1b0c2c370cfcd (kmwolf/centos-qemu-kvm) + +Now that the AioContext lock no longer exists, AIO_WAIT_WHILE() and +AIO_WAIT_WHILE_UNLOCKED() are equivalent. + +A future patch will get rid of AIO_WAIT_WHILE_UNLOCKED(). + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Eric Blake +Message-ID: <20231205182011.1976568-10-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +--- + include/block/aio-wait.h | 16 ++++------------ + 1 file changed, 4 insertions(+), 12 deletions(-) + +diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h +index 5449b6d742..157f105916 100644 +--- a/include/block/aio-wait.h ++++ b/include/block/aio-wait.h +@@ -63,9 +63,6 @@ extern AioWait global_aio_wait; + * @ctx: the aio context, or NULL if multiple aio contexts (for which the + * caller does not hold a lock) are involved in the polling condition. + * @cond: wait while this conditional expression is true +- * @unlock: whether to unlock and then lock again @ctx. This applies +- * only when waiting for another AioContext from the main loop. +- * Otherwise it's ignored. + * + * Wait while a condition is true. Use this to implement synchronous + * operations that require event loop activity. +@@ -78,7 +75,7 @@ extern AioWait global_aio_wait; + * wait on conditions between two IOThreads since that could lead to deadlock, + * go via the main loop instead. + */ +-#define AIO_WAIT_WHILE_INTERNAL(ctx, cond, unlock) ({ \ ++#define AIO_WAIT_WHILE_INTERNAL(ctx, cond) ({ \ + bool waited_ = false; \ + AioWait *wait_ = &global_aio_wait; \ + AioContext *ctx_ = (ctx); \ +@@ -95,13 +92,7 @@ extern AioWait global_aio_wait; + assert(qemu_get_current_aio_context() == \ + qemu_get_aio_context()); \ + while ((cond)) { \ +- if (unlock && ctx_) { \ +- aio_context_release(ctx_); \ +- } \ + aio_poll(qemu_get_aio_context(), true); \ +- if (unlock && ctx_) { \ +- aio_context_acquire(ctx_); \ +- } \ + waited_ = true; \ + } \ + } \ +@@ -109,10 +100,11 @@ extern AioWait global_aio_wait; + waited_; }) + + #define AIO_WAIT_WHILE(ctx, cond) \ +- AIO_WAIT_WHILE_INTERNAL(ctx, cond, true) ++ AIO_WAIT_WHILE_INTERNAL(ctx, cond) + ++/* TODO replace this with AIO_WAIT_WHILE() in a future patch */ + #define AIO_WAIT_WHILE_UNLOCKED(ctx, cond) \ +- AIO_WAIT_WHILE_INTERNAL(ctx, cond, false) ++ AIO_WAIT_WHILE_INTERNAL(ctx, cond) + + /** + * aio_wait_kick: +-- +2.39.3 + diff --git a/SOURCES/kvm-apic-disable-reentrancy-detection-for-apic-msi.patch b/SOURCES/kvm-apic-disable-reentrancy-detection-for-apic-msi.patch deleted file mode 100644 index 69505f8..0000000 --- a/SOURCES/kvm-apic-disable-reentrancy-detection-for-apic-msi.patch +++ /dev/null @@ -1,55 +0,0 @@ -From 5beea8b889a38aa59259679d7f1ba050f09eb0f0 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 9 May 2023 10:29:03 -0400 -Subject: [PATCH 12/21] apic: disable reentrancy detection for apic-msi - -RH-Author: Jon Maloy -RH-MergeRequest: 165: memory: prevent dma-reentracy issues -RH-Jira: RHEL-516 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [8/13] 329f3b1c02fc42d85c821dd14c70e6b885cf849a (jmaloy/jmaloy-qemu-kvm-2) - -Jira: https://issues.redhat.com/browse/RHEL-516 -Upstream: Merged -CVE: CVE-2023-2680 - -commit 50795ee051a342c681a9b45671c552fbd6274db8 -Author: Alexander Bulekov -Date: Thu Apr 27 17:10:13 2023 -0400 - - apic: disable reentrancy detection for apic-msi - - As the code is designed for re-entrant calls to apic-msi, mark apic-msi - as reentrancy-safe. - - Signed-off-by: Alexander Bulekov - Reviewed-by: Darren Kenny - Message-Id: <20230427211013.2994127-9-alxndr@bu.edu> - Signed-off-by: Thomas Huth - -Signed-off-by: Jon Maloy ---- - hw/intc/apic.c | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/hw/intc/apic.c b/hw/intc/apic.c -index 20b5a94073..ac3d47d231 100644 ---- a/hw/intc/apic.c -+++ b/hw/intc/apic.c -@@ -885,6 +885,13 @@ static void apic_realize(DeviceState *dev, Error **errp) - memory_region_init_io(&s->io_memory, OBJECT(s), &apic_io_ops, s, "apic-msi", - APIC_SPACE_SIZE); - -+ /* -+ * apic-msi's apic_mem_write can call into ioapic_eoi_broadcast, which can -+ * write back to apic-msi. As such mark the apic-msi region re-entrancy -+ * safe. -+ */ -+ s->io_memory.disable_reentrancy_guard = true; -+ - s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, apic_timer, s); - local_apics[s->id] = s; - --- -2.39.3 - diff --git a/SOURCES/kvm-async-Add-an-optional-reentrancy-guard-to-the-BH-API.patch b/SOURCES/kvm-async-Add-an-optional-reentrancy-guard-to-the-BH-API.patch deleted file mode 100644 index 65ba3be..0000000 --- a/SOURCES/kvm-async-Add-an-optional-reentrancy-guard-to-the-BH-API.patch +++ /dev/null @@ -1,231 +0,0 @@ -From f6db359f543723e2eb840653d35004af357ea5ac Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 9 May 2023 10:29:03 -0400 -Subject: [PATCH 06/21] async: Add an optional reentrancy guard to the BH API - -RH-Author: Jon Maloy -RH-MergeRequest: 165: memory: prevent dma-reentracy issues -RH-Jira: RHEL-516 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/13] 009a9a68c1c25b9ad0cd9bc0d73b3e07bee2a19d (jmaloy/jmaloy-qemu-kvm-2) - -Jira: https://issues.redhat.com/browse/RHEL-516 -Upstream: Merged -CVE: CVE-2023-2680 - -commit 9c86c97f12c060bf7484dd931f38634e166a81f0 -Author: Alexander Bulekov -Date: Thu Apr 27 17:10:07 2023 -0400 - - async: Add an optional reentrancy guard to the BH API - - Devices can pass their MemoryReentrancyGuard (from their DeviceState), - when creating new BHes. Then, the async API will toggle the guard - before/after calling the BH call-back. This prevents bh->mmio reentrancy - issues. - - Signed-off-by: Alexander Bulekov - Reviewed-by: Darren Kenny - Message-Id: <20230427211013.2994127-3-alxndr@bu.edu> - [thuth: Fix "line over 90 characters" checkpatch.pl error] - Signed-off-by: Thomas Huth - -Signed-off-by: Jon Maloy ---- - docs/devel/multiple-iothreads.txt | 7 +++++++ - include/block/aio.h | 18 ++++++++++++++++-- - include/qemu/main-loop.h | 7 +++++-- - tests/unit/ptimer-test-stubs.c | 3 ++- - util/async.c | 18 +++++++++++++++++- - util/main-loop.c | 6 ++++-- - util/trace-events | 1 + - 7 files changed, 52 insertions(+), 8 deletions(-) - -diff --git a/docs/devel/multiple-iothreads.txt b/docs/devel/multiple-iothreads.txt -index 343120f2ef..a3e949f6b3 100644 ---- a/docs/devel/multiple-iothreads.txt -+++ b/docs/devel/multiple-iothreads.txt -@@ -61,6 +61,7 @@ There are several old APIs that use the main loop AioContext: - * LEGACY qemu_aio_set_event_notifier() - monitor an event notifier - * LEGACY timer_new_ms() - create a timer - * LEGACY qemu_bh_new() - create a BH -+ * LEGACY qemu_bh_new_guarded() - create a BH with a device re-entrancy guard - * LEGACY qemu_aio_wait() - run an event loop iteration - - Since they implicitly work on the main loop they cannot be used in code that -@@ -72,8 +73,14 @@ Instead, use the AioContext functions directly (see include/block/aio.h): - * aio_set_event_notifier() - monitor an event notifier - * aio_timer_new() - create a timer - * aio_bh_new() - create a BH -+ * aio_bh_new_guarded() - create a BH with a device re-entrancy guard - * aio_poll() - run an event loop iteration - -+The qemu_bh_new_guarded/aio_bh_new_guarded APIs accept a "MemReentrancyGuard" -+argument, which is used to check for and prevent re-entrancy problems. For -+BHs associated with devices, the reentrancy-guard is contained in the -+corresponding DeviceState and named "mem_reentrancy_guard". -+ - The AioContext can be obtained from the IOThread using - iothread_get_aio_context() or for the main loop using qemu_get_aio_context(). - Code that takes an AioContext argument works both in IOThreads or the main -diff --git a/include/block/aio.h b/include/block/aio.h -index 543717f294..db6f23c619 100644 ---- a/include/block/aio.h -+++ b/include/block/aio.h -@@ -23,6 +23,8 @@ - #include "qemu/thread.h" - #include "qemu/timer.h" - #include "block/graph-lock.h" -+#include "hw/qdev-core.h" -+ - - typedef struct BlockAIOCB BlockAIOCB; - typedef void BlockCompletionFunc(void *opaque, int ret); -@@ -331,9 +333,11 @@ void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, - * is opaque and must be allocated prior to its use. - * - * @name: A human-readable identifier for debugging purposes. -+ * @reentrancy_guard: A guard set when entering a cb to prevent -+ * device-reentrancy issues - */ - QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, -- const char *name); -+ const char *name, MemReentrancyGuard *reentrancy_guard); - - /** - * aio_bh_new: Allocate a new bottom half structure -@@ -342,7 +346,17 @@ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, - * string. - */ - #define aio_bh_new(ctx, cb, opaque) \ -- aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb))) -+ aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), NULL) -+ -+/** -+ * aio_bh_new_guarded: Allocate a new bottom half structure with a -+ * reentrancy_guard -+ * -+ * A convenience wrapper for aio_bh_new_full() that uses the cb as the name -+ * string. -+ */ -+#define aio_bh_new_guarded(ctx, cb, opaque, guard) \ -+ aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), guard) - - /** - * aio_notify: Force processing of pending events. -diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h -index b3e54e00bc..68e70e61aa 100644 ---- a/include/qemu/main-loop.h -+++ b/include/qemu/main-loop.h -@@ -387,9 +387,12 @@ void qemu_cond_timedwait_iothread(QemuCond *cond, int ms); - - /* internal interfaces */ - -+#define qemu_bh_new_guarded(cb, opaque, guard) \ -+ qemu_bh_new_full((cb), (opaque), (stringify(cb)), guard) - #define qemu_bh_new(cb, opaque) \ -- qemu_bh_new_full((cb), (opaque), (stringify(cb))) --QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name); -+ qemu_bh_new_full((cb), (opaque), (stringify(cb)), NULL) -+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name, -+ MemReentrancyGuard *reentrancy_guard); - void qemu_bh_schedule_idle(QEMUBH *bh); - - enum { -diff --git a/tests/unit/ptimer-test-stubs.c b/tests/unit/ptimer-test-stubs.c -index f2bfcede93..8c9407c560 100644 ---- a/tests/unit/ptimer-test-stubs.c -+++ b/tests/unit/ptimer-test-stubs.c -@@ -107,7 +107,8 @@ int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int attr_mask) - return deadline; - } - --QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name) -+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name, -+ MemReentrancyGuard *reentrancy_guard) - { - QEMUBH *bh = g_new(QEMUBH, 1); - -diff --git a/util/async.c b/util/async.c -index 21016a1ac7..a9b528c370 100644 ---- a/util/async.c -+++ b/util/async.c -@@ -65,6 +65,7 @@ struct QEMUBH { - void *opaque; - QSLIST_ENTRY(QEMUBH) next; - unsigned flags; -+ MemReentrancyGuard *reentrancy_guard; - }; - - /* Called concurrently from any thread */ -@@ -137,7 +138,7 @@ void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, - } - - QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, -- const char *name) -+ const char *name, MemReentrancyGuard *reentrancy_guard) - { - QEMUBH *bh; - bh = g_new(QEMUBH, 1); -@@ -146,13 +147,28 @@ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, - .cb = cb, - .opaque = opaque, - .name = name, -+ .reentrancy_guard = reentrancy_guard, - }; - return bh; - } - - void aio_bh_call(QEMUBH *bh) - { -+ bool last_engaged_in_io = false; -+ -+ if (bh->reentrancy_guard) { -+ last_engaged_in_io = bh->reentrancy_guard->engaged_in_io; -+ if (bh->reentrancy_guard->engaged_in_io) { -+ trace_reentrant_aio(bh->ctx, bh->name); -+ } -+ bh->reentrancy_guard->engaged_in_io = true; -+ } -+ - bh->cb(bh->opaque); -+ -+ if (bh->reentrancy_guard) { -+ bh->reentrancy_guard->engaged_in_io = last_engaged_in_io; -+ } - } - - /* Multiple occurrences of aio_bh_poll cannot be called concurrently. */ -diff --git a/util/main-loop.c b/util/main-loop.c -index e180c85145..7022f02ef8 100644 ---- a/util/main-loop.c -+++ b/util/main-loop.c -@@ -605,9 +605,11 @@ void main_loop_wait(int nonblocking) - - /* Functions to operate on the main QEMU AioContext. */ - --QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name) -+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name, -+ MemReentrancyGuard *reentrancy_guard) - { -- return aio_bh_new_full(qemu_aio_context, cb, opaque, name); -+ return aio_bh_new_full(qemu_aio_context, cb, opaque, name, -+ reentrancy_guard); - } - - /* -diff --git a/util/trace-events b/util/trace-events -index 16f78d8fe5..3f7e766683 100644 ---- a/util/trace-events -+++ b/util/trace-events -@@ -11,6 +11,7 @@ poll_remove(void *ctx, void *node, int fd) "ctx %p node %p fd %d" - # async.c - aio_co_schedule(void *ctx, void *co) "ctx %p co %p" - aio_co_schedule_bh_cb(void *ctx, void *co) "ctx %p co %p" -+reentrant_aio(void *ctx, const char *name) "ctx %p name %s" - - # thread-pool.c - thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p" --- -2.39.3 - diff --git a/SOURCES/kvm-async-avoid-use-after-free-on-re-entrancy-guard.patch b/SOURCES/kvm-async-avoid-use-after-free-on-re-entrancy-guard.patch deleted file mode 100644 index df71fa2..0000000 --- a/SOURCES/kvm-async-avoid-use-after-free-on-re-entrancy-guard.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 137e84f68da06666ebf7f391766cc6209ce1c39c Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 9 May 2023 10:29:03 -0400 -Subject: [PATCH 13/21] async: avoid use-after-free on re-entrancy guard - -RH-Author: Jon Maloy -RH-MergeRequest: 165: memory: prevent dma-reentracy issues -RH-Jira: RHEL-516 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [9/13] d4b957108aaacf4a597122aaeeaa8e56985f1fca (jmaloy/jmaloy-qemu-kvm-2) - -Jira: https://issues.redhat.com/browse/RHEL-516 -Upstream: Merged -CVE: CVE-2023-2680 - -commit 7915bd06f25e1803778081161bf6fa10c42dc7cd -Author: Alexander Bulekov -Date: Mon May 1 10:19:56 2023 -0400 - - async: avoid use-after-free on re-entrancy guard - - A BH callback can free the BH, causing a use-after-free in aio_bh_call. - Fix that by keeping a local copy of the re-entrancy guard pointer. - - Buglink: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=58513 - Fixes: 9c86c97f12 ("async: Add an optional reentrancy guard to the BH API") - Signed-off-by: Alexander Bulekov - Message-Id: <20230501141956.3444868-1-alxndr@bu.edu> - Reviewed-by: Thomas Huth - Signed-off-by: Thomas Huth - -Signed-off-by: Jon Maloy ---- - util/async.c | 14 ++++++++------ - 1 file changed, 8 insertions(+), 6 deletions(-) - -diff --git a/util/async.c b/util/async.c -index a9b528c370..cd1a1815f9 100644 ---- a/util/async.c -+++ b/util/async.c -@@ -156,18 +156,20 @@ void aio_bh_call(QEMUBH *bh) - { - bool last_engaged_in_io = false; - -- if (bh->reentrancy_guard) { -- last_engaged_in_io = bh->reentrancy_guard->engaged_in_io; -- if (bh->reentrancy_guard->engaged_in_io) { -+ /* Make a copy of the guard-pointer as cb may free the bh */ -+ MemReentrancyGuard *reentrancy_guard = bh->reentrancy_guard; -+ if (reentrancy_guard) { -+ last_engaged_in_io = reentrancy_guard->engaged_in_io; -+ if (reentrancy_guard->engaged_in_io) { - trace_reentrant_aio(bh->ctx, bh->name); - } -- bh->reentrancy_guard->engaged_in_io = true; -+ reentrancy_guard->engaged_in_io = true; - } - - bh->cb(bh->opaque); - -- if (bh->reentrancy_guard) { -- bh->reentrancy_guard->engaged_in_io = last_engaged_in_io; -+ if (reentrancy_guard) { -+ reentrancy_guard->engaged_in_io = last_engaged_in_io; - } - } - --- -2.39.3 - diff --git a/SOURCES/kvm-backends-iommufd-Introduce-the-iommufd-object.patch b/SOURCES/kvm-backends-iommufd-Introduce-the-iommufd-object.patch new file mode 100644 index 0000000..898e35b --- /dev/null +++ b/SOURCES/kvm-backends-iommufd-Introduce-the-iommufd-object.patch @@ -0,0 +1,476 @@ +From 0d8255c98b3ef6f603ff0279592d3e91de26de0e Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Tue, 21 Nov 2023 16:44:00 +0800 +Subject: [PATCH 021/101] backends/iommufd: Introduce the iommufd object +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [20/67] 8a56344ab4a2126f248bfa492ccddd19265f39be (eauger1/centos-qemu-kvm) + +Introduce an iommufd object which allows the interaction +with the host /dev/iommu device. + +The /dev/iommu can have been already pre-opened outside of qemu, +in which case the fd can be passed directly along with the +iommufd object: + +This allows the iommufd object to be shared accross several +subsystems (VFIO, VDPA, ...). For example, libvirt would open +the /dev/iommu once. + +If no fd is passed along with the iommufd object, the /dev/iommu +is opened by the qemu code. + +Suggested-by: Alex Williamson +Signed-off-by: Eric Auger +Signed-off-by: Yi Liu +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Tested-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit 6e6d8ac62b5b38dc9d4b69ffdf073f0a0b43b7be) +Signed-off-by: Eric Auger +--- + MAINTAINERS | 8 ++ + backends/Kconfig | 4 + + backends/iommufd.c | 245 +++++++++++++++++++++++++++++++++++++++ + backends/meson.build | 1 + + backends/trace-events | 10 ++ + include/sysemu/iommufd.h | 38 ++++++ + qapi/qom.json | 19 +++ + qemu-options.hx | 12 ++ + 8 files changed, 337 insertions(+) + create mode 100644 backends/iommufd.c + create mode 100644 include/sysemu/iommufd.h + +diff --git a/MAINTAINERS b/MAINTAINERS +index 695e0bd34f..a5a446914a 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -2167,6 +2167,14 @@ F: hw/vfio/ap.c + F: docs/system/s390x/vfio-ap.rst + L: qemu-s390x@nongnu.org + ++iommufd ++M: Yi Liu ++M: Eric Auger ++M: Zhenzhong Duan ++S: Supported ++F: backends/iommufd.c ++F: include/sysemu/iommufd.h ++ + vhost + M: Michael S. Tsirkin + S: Supported +diff --git a/backends/Kconfig b/backends/Kconfig +index f35abc1609..2cb23f62fa 100644 +--- a/backends/Kconfig ++++ b/backends/Kconfig +@@ -1 +1,5 @@ + source tpm/Kconfig ++ ++config IOMMUFD ++ bool ++ depends on VFIO +diff --git a/backends/iommufd.c b/backends/iommufd.c +new file mode 100644 +index 0000000000..ba58a0eb0d +--- /dev/null ++++ b/backends/iommufd.c +@@ -0,0 +1,245 @@ ++/* ++ * iommufd container backend ++ * ++ * Copyright (C) 2023 Intel Corporation. ++ * Copyright Red Hat, Inc. 2023 ++ * ++ * Authors: Yi Liu ++ * Eric Auger ++ * ++ * SPDX-License-Identifier: GPL-2.0-or-later ++ */ ++ ++#include "qemu/osdep.h" ++#include "sysemu/iommufd.h" ++#include "qapi/error.h" ++#include "qapi/qmp/qerror.h" ++#include "qemu/module.h" ++#include "qom/object_interfaces.h" ++#include "qemu/error-report.h" ++#include "monitor/monitor.h" ++#include "trace.h" ++#include ++#include ++ ++static void iommufd_backend_init(Object *obj) ++{ ++ IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); ++ ++ be->fd = -1; ++ be->users = 0; ++ be->owned = true; ++ qemu_mutex_init(&be->lock); ++} ++ ++static void iommufd_backend_finalize(Object *obj) ++{ ++ IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); ++ ++ if (be->owned) { ++ close(be->fd); ++ be->fd = -1; ++ } ++} ++ ++static void iommufd_backend_set_fd(Object *obj, const char *str, Error **errp) ++{ ++ IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); ++ int fd = -1; ++ ++ fd = monitor_fd_param(monitor_cur(), str, errp); ++ if (fd == -1) { ++ error_prepend(errp, "Could not parse remote object fd %s:", str); ++ return; ++ } ++ qemu_mutex_lock(&be->lock); ++ be->fd = fd; ++ be->owned = false; ++ qemu_mutex_unlock(&be->lock); ++ trace_iommu_backend_set_fd(be->fd); ++} ++ ++static bool iommufd_backend_can_be_deleted(UserCreatable *uc) ++{ ++ IOMMUFDBackend *be = IOMMUFD_BACKEND(uc); ++ ++ return !be->users; ++} ++ ++static void iommufd_backend_class_init(ObjectClass *oc, void *data) ++{ ++ UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); ++ ++ ucc->can_be_deleted = iommufd_backend_can_be_deleted; ++ ++ object_class_property_add_str(oc, "fd", NULL, iommufd_backend_set_fd); ++} ++ ++int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) ++{ ++ int fd, ret = 0; ++ ++ qemu_mutex_lock(&be->lock); ++ if (be->users == UINT32_MAX) { ++ error_setg(errp, "too many connections"); ++ ret = -E2BIG; ++ goto out; ++ } ++ if (be->owned && !be->users) { ++ fd = qemu_open_old("/dev/iommu", O_RDWR); ++ if (fd < 0) { ++ error_setg_errno(errp, errno, "/dev/iommu opening failed"); ++ ret = fd; ++ goto out; ++ } ++ be->fd = fd; ++ } ++ be->users++; ++out: ++ trace_iommufd_backend_connect(be->fd, be->owned, ++ be->users, ret); ++ qemu_mutex_unlock(&be->lock); ++ return ret; ++} ++ ++void iommufd_backend_disconnect(IOMMUFDBackend *be) ++{ ++ qemu_mutex_lock(&be->lock); ++ if (!be->users) { ++ goto out; ++ } ++ be->users--; ++ if (!be->users && be->owned) { ++ close(be->fd); ++ be->fd = -1; ++ } ++out: ++ trace_iommufd_backend_disconnect(be->fd, be->users); ++ qemu_mutex_unlock(&be->lock); ++} ++ ++int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id, ++ Error **errp) ++{ ++ int ret, fd = be->fd; ++ struct iommu_ioas_alloc alloc_data = { ++ .size = sizeof(alloc_data), ++ .flags = 0, ++ }; ++ ++ ret = ioctl(fd, IOMMU_IOAS_ALLOC, &alloc_data); ++ if (ret) { ++ error_setg_errno(errp, errno, "Failed to allocate ioas"); ++ return ret; ++ } ++ ++ *ioas_id = alloc_data.out_ioas_id; ++ trace_iommufd_backend_alloc_ioas(fd, *ioas_id, ret); ++ ++ return ret; ++} ++ ++void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id) ++{ ++ int ret, fd = be->fd; ++ struct iommu_destroy des = { ++ .size = sizeof(des), ++ .id = id, ++ }; ++ ++ ret = ioctl(fd, IOMMU_DESTROY, &des); ++ trace_iommufd_backend_free_id(fd, id, ret); ++ if (ret) { ++ error_report("Failed to free id: %u %m", id); ++ } ++} ++ ++int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova, ++ ram_addr_t size, void *vaddr, bool readonly) ++{ ++ int ret, fd = be->fd; ++ struct iommu_ioas_map map = { ++ .size = sizeof(map), ++ .flags = IOMMU_IOAS_MAP_READABLE | ++ IOMMU_IOAS_MAP_FIXED_IOVA, ++ .ioas_id = ioas_id, ++ .__reserved = 0, ++ .user_va = (uintptr_t)vaddr, ++ .iova = iova, ++ .length = size, ++ }; ++ ++ if (!readonly) { ++ map.flags |= IOMMU_IOAS_MAP_WRITEABLE; ++ } ++ ++ ret = ioctl(fd, IOMMU_IOAS_MAP, &map); ++ trace_iommufd_backend_map_dma(fd, ioas_id, iova, size, ++ vaddr, readonly, ret); ++ if (ret) { ++ ret = -errno; ++ ++ /* TODO: Not support mapping hardware PCI BAR region for now. */ ++ if (errno == EFAULT) { ++ warn_report("IOMMU_IOAS_MAP failed: %m, PCI BAR?"); ++ } else { ++ error_report("IOMMU_IOAS_MAP failed: %m"); ++ } ++ } ++ return ret; ++} ++ ++int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, ++ hwaddr iova, ram_addr_t size) ++{ ++ int ret, fd = be->fd; ++ struct iommu_ioas_unmap unmap = { ++ .size = sizeof(unmap), ++ .ioas_id = ioas_id, ++ .iova = iova, ++ .length = size, ++ }; ++ ++ ret = ioctl(fd, IOMMU_IOAS_UNMAP, &unmap); ++ /* ++ * IOMMUFD takes mapping as some kind of object, unmapping ++ * nonexistent mapping is treated as deleting a nonexistent ++ * object and return ENOENT. This is different from legacy ++ * backend which allows it. vIOMMU may trigger a lot of ++ * redundant unmapping, to avoid flush the log, treat them ++ * as succeess for IOMMUFD just like legacy backend. ++ */ ++ if (ret && errno == ENOENT) { ++ trace_iommufd_backend_unmap_dma_non_exist(fd, ioas_id, iova, size, ret); ++ ret = 0; ++ } else { ++ trace_iommufd_backend_unmap_dma(fd, ioas_id, iova, size, ret); ++ } ++ ++ if (ret) { ++ ret = -errno; ++ error_report("IOMMU_IOAS_UNMAP failed: %m"); ++ } ++ return ret; ++} ++ ++static const TypeInfo iommufd_backend_info = { ++ .name = TYPE_IOMMUFD_BACKEND, ++ .parent = TYPE_OBJECT, ++ .instance_size = sizeof(IOMMUFDBackend), ++ .instance_init = iommufd_backend_init, ++ .instance_finalize = iommufd_backend_finalize, ++ .class_size = sizeof(IOMMUFDBackendClass), ++ .class_init = iommufd_backend_class_init, ++ .interfaces = (InterfaceInfo[]) { ++ { TYPE_USER_CREATABLE }, ++ { } ++ } ++}; ++ ++static void register_types(void) ++{ ++ type_register_static(&iommufd_backend_info); ++} ++ ++type_init(register_types); +diff --git a/backends/meson.build b/backends/meson.build +index 914c7c4afb..9a5cea480d 100644 +--- a/backends/meson.build ++++ b/backends/meson.build +@@ -20,6 +20,7 @@ if have_vhost_user + system_ss.add(when: 'CONFIG_VIRTIO', if_true: files('vhost-user.c')) + endif + system_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost.c')) ++system_ss.add(when: 'CONFIG_IOMMUFD', if_true: files('iommufd.c')) + if have_vhost_user_crypto + system_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost-user.c')) + endif +diff --git a/backends/trace-events b/backends/trace-events +index 652eb76a57..d45c6e31a6 100644 +--- a/backends/trace-events ++++ b/backends/trace-events +@@ -5,3 +5,13 @@ dbus_vmstate_pre_save(void) + dbus_vmstate_post_load(int version_id) "version_id: %d" + dbus_vmstate_loading(const char *id) "id: %s" + dbus_vmstate_saving(const char *id) "id: %s" ++ ++# iommufd.c ++iommufd_backend_connect(int fd, bool owned, uint32_t users, int ret) "fd=%d owned=%d users=%d (%d)" ++iommufd_backend_disconnect(int fd, uint32_t users) "fd=%d users=%d" ++iommu_backend_set_fd(int fd) "pre-opened /dev/iommu fd=%d" ++iommufd_backend_map_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, void *vaddr, bool readonly, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" addr=%p readonly=%d (%d)" ++iommufd_backend_unmap_dma_non_exist(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " Unmap nonexistent mapping: iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)" ++iommufd_backend_unmap_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)" ++iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas, int ret) " iommufd=%d ioas=%d (%d)" ++iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (%d)" +diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h +new file mode 100644 +index 0000000000..9c5524b0ed +--- /dev/null ++++ b/include/sysemu/iommufd.h +@@ -0,0 +1,38 @@ ++#ifndef SYSEMU_IOMMUFD_H ++#define SYSEMU_IOMMUFD_H ++ ++#include "qom/object.h" ++#include "qemu/thread.h" ++#include "exec/hwaddr.h" ++#include "exec/cpu-common.h" ++ ++#define TYPE_IOMMUFD_BACKEND "iommufd" ++OBJECT_DECLARE_TYPE(IOMMUFDBackend, IOMMUFDBackendClass, IOMMUFD_BACKEND) ++ ++struct IOMMUFDBackendClass { ++ ObjectClass parent_class; ++}; ++ ++struct IOMMUFDBackend { ++ Object parent; ++ ++ /*< protected >*/ ++ int fd; /* /dev/iommu file descriptor */ ++ bool owned; /* is the /dev/iommu opened internally */ ++ QemuMutex lock; ++ uint32_t users; ++ ++ /*< public >*/ ++}; ++ ++int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp); ++void iommufd_backend_disconnect(IOMMUFDBackend *be); ++ ++int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id, ++ Error **errp); ++void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id); ++int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova, ++ ram_addr_t size, void *vaddr, bool readonly); ++int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, ++ hwaddr iova, ram_addr_t size); ++#endif +diff --git a/qapi/qom.json b/qapi/qom.json +index c53ef978ff..95516ba325 100644 +--- a/qapi/qom.json ++++ b/qapi/qom.json +@@ -794,6 +794,23 @@ + { 'struct': 'VfioUserServerProperties', + 'data': { 'socket': 'SocketAddress', 'device': 'str' } } + ++## ++# @IOMMUFDProperties: ++# ++# Properties for iommufd objects. ++# ++# @fd: file descriptor name previously passed via 'getfd' command, ++# which represents a pre-opened /dev/iommu. This allows the ++# iommufd object to be shared accross several subsystems ++# (VFIO, VDPA, ...), and the file descriptor to be shared ++# with other process, e.g. DPDK. (default: QEMU opens ++# /dev/iommu by itself) ++# ++# Since: 9.0 ++## ++{ 'struct': 'IOMMUFDProperties', ++ 'data': { '*fd': 'str' } } ++ + ## + # @RngProperties: + # +@@ -934,6 +951,7 @@ + 'input-barrier', + { 'name': 'input-linux', + 'if': 'CONFIG_LINUX' }, ++ 'iommufd', + 'iothread', + 'main-loop', + { 'name': 'memory-backend-epc', +@@ -1003,6 +1021,7 @@ + 'input-barrier': 'InputBarrierProperties', + 'input-linux': { 'type': 'InputLinuxProperties', + 'if': 'CONFIG_LINUX' }, ++ 'iommufd': 'IOMMUFDProperties', + 'iothread': 'IothreadProperties', + 'main-loop': 'MainLoopProperties', + 'memory-backend-epc': { 'type': 'MemoryBackendEpcProperties', +diff --git a/qemu-options.hx b/qemu-options.hx +index 557118cb1f..0814f43066 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -5224,6 +5224,18 @@ SRST + + The ``share`` boolean option is on by default with memfd. + ++ ``-object iommufd,id=id[,fd=fd]`` ++ Creates an iommufd backend which allows control of DMA mapping ++ through the ``/dev/iommu`` device. ++ ++ The ``id`` parameter is a unique ID which frontends (such as ++ vfio-pci of vdpa) will use to connect with the iommufd backend. ++ ++ The ``fd`` parameter is an optional pre-opened file descriptor ++ resulting from ``/dev/iommu`` opening. Usually the iommufd is shared ++ across all subsystems, bringing the benefit of centralized ++ reference counting. ++ + ``-object rng-builtin,id=id`` + Creates a random number generator backend which obtains entropy + from QEMU builtin functions. The ``id`` parameter is a unique ID +-- +2.39.3 + diff --git a/SOURCES/kvm-backends-iommufd-Remove-check-on-number-of-backend-u.patch b/SOURCES/kvm-backends-iommufd-Remove-check-on-number-of-backend-u.patch new file mode 100644 index 0000000..5ee365b --- /dev/null +++ b/SOURCES/kvm-backends-iommufd-Remove-check-on-number-of-backend-u.patch @@ -0,0 +1,47 @@ +From da9a24793e876f6f2727d57f939d882be26a47b8 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Fri, 22 Dec 2023 08:55:23 +0100 +Subject: [PATCH 064/101] backends/iommufd: Remove check on number of backend + users +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [63/67] ac4d4589d1f2de5ac3f0adfd8d1f27dbf6bbfdee (eauger1/centos-qemu-kvm) + +QOM already has a ref count on objects and it will assert much +earlier, when INT_MAX is reached. + +Reviewed-by: Eric Auger +Reviewed-by: Zhenzhong Duan +Signed-off-by: Cédric Le Goater +(cherry picked from commit c2ab3a6f7411c895e538e8350fee8948ac07c1a0) +Signed-off-by: Eric Auger +--- + backends/iommufd.c | 5 ----- + 1 file changed, 5 deletions(-) + +diff --git a/backends/iommufd.c b/backends/iommufd.c +index ba58a0eb0d..393c0d9a37 100644 +--- a/backends/iommufd.c ++++ b/backends/iommufd.c +@@ -80,11 +80,6 @@ int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) + int fd, ret = 0; + + qemu_mutex_lock(&be->lock); +- if (be->users == UINT32_MAX) { +- error_setg(errp, "too many connections"); +- ret = -E2BIG; +- goto out; +- } + if (be->owned && !be->users) { + fd = qemu_open_old("/dev/iommu", O_RDWR); + if (fd < 0) { +-- +2.39.3 + diff --git a/SOURCES/kvm-backends-iommufd-Remove-mutex.patch b/SOURCES/kvm-backends-iommufd-Remove-mutex.patch new file mode 100644 index 0000000..83878d5 --- /dev/null +++ b/SOURCES/kvm-backends-iommufd-Remove-mutex.patch @@ -0,0 +1,112 @@ +From 92aff3cc1a412de01e9563802fa48848eae5283f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Thu, 21 Dec 2023 16:58:41 +0100 +Subject: [PATCH 065/101] backends/iommufd: Remove mutex +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [64/67] 65518432b18f18ceadafe1b0698cdaa962e84f61 (eauger1/centos-qemu-kvm) + +Coverity reports a concurrent data access violation because be->users +is being accessed in iommufd_backend_can_be_deleted() without holding +the mutex. + +However, these routines are called from the QEMU main thread when a +device is created. In this case, the code paths should be protected by +the BQL lock and it should be safe to drop the IOMMUFD backend mutex. +Simply remove it. + +Fixes: CID 1531550 +Fixes: CID 1531549 +Reviewed-by: Eric Auger +Reviewed-by: Zhenzhong Duan +Signed-off-by: Cédric Le Goater +(cherry picked from commit 19368b1905b4b917e915526fcbd5bfa3f7439451) +Signed-off-by: Eric Auger +--- + backends/iommufd.c | 7 ------- + include/sysemu/iommufd.h | 2 -- + 2 files changed, 9 deletions(-) + +diff --git a/backends/iommufd.c b/backends/iommufd.c +index 393c0d9a37..1ef683c7b0 100644 +--- a/backends/iommufd.c ++++ b/backends/iommufd.c +@@ -29,7 +29,6 @@ static void iommufd_backend_init(Object *obj) + be->fd = -1; + be->users = 0; + be->owned = true; +- qemu_mutex_init(&be->lock); + } + + static void iommufd_backend_finalize(Object *obj) +@@ -52,10 +51,8 @@ static void iommufd_backend_set_fd(Object *obj, const char *str, Error **errp) + error_prepend(errp, "Could not parse remote object fd %s:", str); + return; + } +- qemu_mutex_lock(&be->lock); + be->fd = fd; + be->owned = false; +- qemu_mutex_unlock(&be->lock); + trace_iommu_backend_set_fd(be->fd); + } + +@@ -79,7 +76,6 @@ int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) + { + int fd, ret = 0; + +- qemu_mutex_lock(&be->lock); + if (be->owned && !be->users) { + fd = qemu_open_old("/dev/iommu", O_RDWR); + if (fd < 0) { +@@ -93,13 +89,11 @@ int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) + out: + trace_iommufd_backend_connect(be->fd, be->owned, + be->users, ret); +- qemu_mutex_unlock(&be->lock); + return ret; + } + + void iommufd_backend_disconnect(IOMMUFDBackend *be) + { +- qemu_mutex_lock(&be->lock); + if (!be->users) { + goto out; + } +@@ -110,7 +104,6 @@ void iommufd_backend_disconnect(IOMMUFDBackend *be) + } + out: + trace_iommufd_backend_disconnect(be->fd, be->users); +- qemu_mutex_unlock(&be->lock); + } + + int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id, +diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h +index 9c5524b0ed..9af27ebd6c 100644 +--- a/include/sysemu/iommufd.h ++++ b/include/sysemu/iommufd.h +@@ -2,7 +2,6 @@ + #define SYSEMU_IOMMUFD_H + + #include "qom/object.h" +-#include "qemu/thread.h" + #include "exec/hwaddr.h" + #include "exec/cpu-common.h" + +@@ -19,7 +18,6 @@ struct IOMMUFDBackend { + /*< protected >*/ + int fd; /* /dev/iommu file descriptor */ + bool owned; /* is the /dev/iommu opened internally */ +- QemuMutex lock; + uint32_t users; + + /*< public >*/ +-- +2.39.3 + diff --git a/SOURCES/kvm-bcm2835_property-disable-reentrancy-detection-for-io.patch b/SOURCES/kvm-bcm2835_property-disable-reentrancy-detection-for-io.patch deleted file mode 100644 index 6d9abb8..0000000 --- a/SOURCES/kvm-bcm2835_property-disable-reentrancy-detection-for-io.patch +++ /dev/null @@ -1,57 +0,0 @@ -From 40866640d15e6a8c9f6af7e437edc1ec1e17ba34 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 9 May 2023 10:29:03 -0400 -Subject: [PATCH 10/21] bcm2835_property: disable reentrancy detection for - iomem - -RH-Author: Jon Maloy -RH-MergeRequest: 165: memory: prevent dma-reentracy issues -RH-Jira: RHEL-516 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [6/13] 128ebc85e228674af66553af82fba70eb87960e6 (jmaloy/jmaloy-qemu-kvm-2) - -Jira: https://issues.redhat.com/browse/RHEL-516 -Upstream: Merged -CVE: CVE-2023-2680 - -commit 985c4a4e547afb9573b6bd6843d20eb2c3d1d1cd -Author: Alexander Bulekov -Date: Thu Apr 27 17:10:11 2023 -0400 - - bcm2835_property: disable reentrancy detection for iomem - - As the code is designed for re-entrant calls from bcm2835_property to - bcm2835_mbox and back into bcm2835_property, mark iomem as - reentrancy-safe. - - Signed-off-by: Alexander Bulekov - Reviewed-by: Thomas Huth - Message-Id: <20230427211013.2994127-7-alxndr@bu.edu> - Signed-off-by: Thomas Huth - -Signed-off-by: Jon Maloy ---- - hw/misc/bcm2835_property.c | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/hw/misc/bcm2835_property.c b/hw/misc/bcm2835_property.c -index 890ae7bae5..de056ea2df 100644 ---- a/hw/misc/bcm2835_property.c -+++ b/hw/misc/bcm2835_property.c -@@ -382,6 +382,13 @@ static void bcm2835_property_init(Object *obj) - - memory_region_init_io(&s->iomem, OBJECT(s), &bcm2835_property_ops, s, - TYPE_BCM2835_PROPERTY, 0x10); -+ -+ /* -+ * bcm2835_property_ops call into bcm2835_mbox, which in-turn reads from -+ * iomem. As such, mark iomem as re-entracy safe. -+ */ -+ s->iomem.disable_reentrancy_guard = true; -+ - sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem); - sysbus_init_irq(SYS_BUS_DEVICE(s), &s->mbox_irq); - } --- -2.39.3 - diff --git a/SOURCES/kvm-block-Collapse-padded-I-O-vecs-exceeding-IOV_MAX.patch b/SOURCES/kvm-block-Collapse-padded-I-O-vecs-exceeding-IOV_MAX.patch deleted file mode 100644 index 6de5d65..0000000 --- a/SOURCES/kvm-block-Collapse-padded-I-O-vecs-exceeding-IOV_MAX.patch +++ /dev/null @@ -1,354 +0,0 @@ -From ff05c0b0d3414c0e5b3903048280accdc6c75ca0 Mon Sep 17 00:00:00 2001 -From: Hanna Czenczek -Date: Tue, 11 Apr 2023 19:34:16 +0200 -Subject: [PATCH 2/9] block: Collapse padded I/O vecs exceeding IOV_MAX - -RH-Author: Hanna Czenczek -RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX -RH-Bugzilla: 2174676 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/5] 84c56bd16f841a18cf2baa918dfeab3240e3944d (hreitz/qemu-kvm-c-9-s) - -When processing vectored guest requests that are not aligned to the -storage request alignment, we pad them by adding head and/or tail -buffers for a read-modify-write cycle. - -The guest can submit I/O vectors up to IOV_MAX (1024) in length, but -with this padding, the vector can exceed that limit. As of -4c002cef0e9abe7135d7916c51abce47f7fc1ee2 ("util/iov: make -qemu_iovec_init_extended() honest"), we refuse to pad vectors beyond the -limit, instead returning an error to the guest. - -To the guest, this appears as a random I/O error. We should not return -an I/O error to the guest when it issued a perfectly valid request. - -Before 4c002cef0e9abe7135d7916c51abce47f7fc1ee2, we just made the vector -longer than IOV_MAX, which generally seems to work (because the guest -assumes a smaller alignment than we really have, file-posix's -raw_co_prw() will generally see bdrv_qiov_is_aligned() return false, and -so emulate the request, so that the IOV_MAX does not matter). However, -that does not seem exactly great. - -I see two ways to fix this problem: -1. We split such long requests into two requests. -2. We join some elements of the vector into new buffers to make it - shorter. - -I am wary of (1), because it seems like it may have unintended side -effects. - -(2) on the other hand seems relatively simple to implement, with -hopefully few side effects, so this patch does that. - -To do this, the use of qemu_iovec_init_extended() in bdrv_pad_request() -is effectively replaced by the new function bdrv_create_padded_qiov(), -which not only wraps the request IOV with padding head/tail, but also -ensures that the resulting vector will not have more than IOV_MAX -elements. Putting that functionality into qemu_iovec_init_extended() is -infeasible because it requires allocating a bounce buffer; doing so -would require many more parameters (buffer alignment, how to initialize -the buffer, and out parameters like the buffer, its length, and the -original elements), which is not reasonable. - -Conversely, it is not difficult to move qemu_iovec_init_extended()'s -functionality into bdrv_create_padded_qiov() by using public -qemu_iovec_* functions, so that is what this patch does. - -Because bdrv_pad_request() was the only "serious" user of -qemu_iovec_init_extended(), the next patch will remove the latter -function, so the functionality is not implemented twice. - -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2141964 -Signed-off-by: Hanna Czenczek -Message-Id: <20230411173418.19549-3-hreitz@redhat.com> -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 18743311b829cafc1737a5f20bc3248d5f91ee2a) -Signed-off-by: Hanna Czenczek ---- - block/io.c | 166 ++++++++++++++++++++++++++++++++++++++++++++++++----- - 1 file changed, 151 insertions(+), 15 deletions(-) - -diff --git a/block/io.c b/block/io.c -index 2e267a85ab..4e8e90208b 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -1439,6 +1439,14 @@ out: - * @merge_reads is true for small requests, - * if @buf_len == @head + bytes + @tail. In this case it is possible that both - * head and tail exist but @buf_len == align and @tail_buf == @buf. -+ * -+ * @write is true for write requests, false for read requests. -+ * -+ * If padding makes the vector too long (exceeding IOV_MAX), then we need to -+ * merge existing vector elements into a single one. @collapse_bounce_buf acts -+ * as the bounce buffer in such cases. @pre_collapse_qiov has the pre-collapse -+ * I/O vector elements so for read requests, the data can be copied back after -+ * the read is done. - */ - typedef struct BdrvRequestPadding { - uint8_t *buf; -@@ -1447,11 +1455,17 @@ typedef struct BdrvRequestPadding { - size_t head; - size_t tail; - bool merge_reads; -+ bool write; - QEMUIOVector local_qiov; -+ -+ uint8_t *collapse_bounce_buf; -+ size_t collapse_len; -+ QEMUIOVector pre_collapse_qiov; - } BdrvRequestPadding; - - static bool bdrv_init_padding(BlockDriverState *bs, - int64_t offset, int64_t bytes, -+ bool write, - BdrvRequestPadding *pad) - { - int64_t align = bs->bl.request_alignment; -@@ -1483,6 +1497,8 @@ static bool bdrv_init_padding(BlockDriverState *bs, - pad->tail_buf = pad->buf + pad->buf_len - align; - } - -+ pad->write = write; -+ - return true; - } - -@@ -1547,8 +1563,23 @@ zero_mem: - return 0; - } - --static void bdrv_padding_destroy(BdrvRequestPadding *pad) -+/** -+ * Free *pad's associated buffers, and perform any necessary finalization steps. -+ */ -+static void bdrv_padding_finalize(BdrvRequestPadding *pad) - { -+ if (pad->collapse_bounce_buf) { -+ if (!pad->write) { -+ /* -+ * If padding required elements in the vector to be collapsed into a -+ * bounce buffer, copy the bounce buffer content back -+ */ -+ qemu_iovec_from_buf(&pad->pre_collapse_qiov, 0, -+ pad->collapse_bounce_buf, pad->collapse_len); -+ } -+ qemu_vfree(pad->collapse_bounce_buf); -+ qemu_iovec_destroy(&pad->pre_collapse_qiov); -+ } - if (pad->buf) { - qemu_vfree(pad->buf); - qemu_iovec_destroy(&pad->local_qiov); -@@ -1556,6 +1587,101 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad) - memset(pad, 0, sizeof(*pad)); - } - -+/* -+ * Create pad->local_qiov by wrapping @iov in the padding head and tail, while -+ * ensuring that the resulting vector will not exceed IOV_MAX elements. -+ * -+ * To ensure this, when necessary, the first two or three elements of @iov are -+ * merged into pad->collapse_bounce_buf and replaced by a reference to that -+ * bounce buffer in pad->local_qiov. -+ * -+ * After performing a read request, the data from the bounce buffer must be -+ * copied back into pad->pre_collapse_qiov (e.g. by bdrv_padding_finalize()). -+ */ -+static int bdrv_create_padded_qiov(BlockDriverState *bs, -+ BdrvRequestPadding *pad, -+ struct iovec *iov, int niov, -+ size_t iov_offset, size_t bytes) -+{ -+ int padded_niov, surplus_count, collapse_count; -+ -+ /* Assert this invariant */ -+ assert(niov <= IOV_MAX); -+ -+ /* -+ * Cannot pad if resulting length would exceed SIZE_MAX. Returning an error -+ * to the guest is not ideal, but there is little else we can do. At least -+ * this will practically never happen on 64-bit systems. -+ */ -+ if (SIZE_MAX - pad->head < bytes || -+ SIZE_MAX - pad->head - bytes < pad->tail) -+ { -+ return -EINVAL; -+ } -+ -+ /* Length of the resulting IOV if we just concatenated everything */ -+ padded_niov = !!pad->head + niov + !!pad->tail; -+ -+ qemu_iovec_init(&pad->local_qiov, MIN(padded_niov, IOV_MAX)); -+ -+ if (pad->head) { -+ qemu_iovec_add(&pad->local_qiov, pad->buf, pad->head); -+ } -+ -+ /* -+ * If padded_niov > IOV_MAX, we cannot just concatenate everything. -+ * Instead, merge the first two or three elements of @iov to reduce the -+ * number of vector elements as necessary. -+ */ -+ if (padded_niov > IOV_MAX) { -+ /* -+ * Only head and tail can have lead to the number of entries exceeding -+ * IOV_MAX, so we can exceed it by the head and tail at most. We need -+ * to reduce the number of elements by `surplus_count`, so we merge that -+ * many elements plus one into one element. -+ */ -+ surplus_count = padded_niov - IOV_MAX; -+ assert(surplus_count <= !!pad->head + !!pad->tail); -+ collapse_count = surplus_count + 1; -+ -+ /* -+ * Move the elements to collapse into `pad->pre_collapse_qiov`, then -+ * advance `iov` (and associated variables) by those elements. -+ */ -+ qemu_iovec_init(&pad->pre_collapse_qiov, collapse_count); -+ qemu_iovec_concat_iov(&pad->pre_collapse_qiov, iov, -+ collapse_count, iov_offset, SIZE_MAX); -+ iov += collapse_count; -+ iov_offset = 0; -+ niov -= collapse_count; -+ bytes -= pad->pre_collapse_qiov.size; -+ -+ /* -+ * Construct the bounce buffer to match the length of the to-collapse -+ * vector elements, and for write requests, initialize it with the data -+ * from those elements. Then add it to `pad->local_qiov`. -+ */ -+ pad->collapse_len = pad->pre_collapse_qiov.size; -+ pad->collapse_bounce_buf = qemu_blockalign(bs, pad->collapse_len); -+ if (pad->write) { -+ qemu_iovec_to_buf(&pad->pre_collapse_qiov, 0, -+ pad->collapse_bounce_buf, pad->collapse_len); -+ } -+ qemu_iovec_add(&pad->local_qiov, -+ pad->collapse_bounce_buf, pad->collapse_len); -+ } -+ -+ qemu_iovec_concat_iov(&pad->local_qiov, iov, niov, iov_offset, bytes); -+ -+ if (pad->tail) { -+ qemu_iovec_add(&pad->local_qiov, -+ pad->buf + pad->buf_len - pad->tail, pad->tail); -+ } -+ -+ assert(pad->local_qiov.niov == MIN(padded_niov, IOV_MAX)); -+ return 0; -+} -+ - /* - * bdrv_pad_request - * -@@ -1563,6 +1689,8 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad) - * read of padding, bdrv_padding_rmw_read() should be called separately if - * needed. - * -+ * @write is true for write requests, false for read requests. -+ * - * Request parameters (@qiov, &qiov_offset, &offset, &bytes) are in-out: - * - on function start they represent original request - * - on failure or when padding is not needed they are unchanged -@@ -1571,26 +1699,34 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad) - static int bdrv_pad_request(BlockDriverState *bs, - QEMUIOVector **qiov, size_t *qiov_offset, - int64_t *offset, int64_t *bytes, -+ bool write, - BdrvRequestPadding *pad, bool *padded, - BdrvRequestFlags *flags) - { - int ret; -+ struct iovec *sliced_iov; -+ int sliced_niov; -+ size_t sliced_head, sliced_tail; - - bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort); - -- if (!bdrv_init_padding(bs, *offset, *bytes, pad)) { -+ if (!bdrv_init_padding(bs, *offset, *bytes, write, pad)) { - if (padded) { - *padded = false; - } - return 0; - } - -- ret = qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head, -- *qiov, *qiov_offset, *bytes, -- pad->buf + pad->buf_len - pad->tail, -- pad->tail); -+ sliced_iov = qemu_iovec_slice(*qiov, *qiov_offset, *bytes, -+ &sliced_head, &sliced_tail, -+ &sliced_niov); -+ -+ /* Guaranteed by bdrv_check_qiov_request() */ -+ assert(*bytes <= SIZE_MAX); -+ ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov, -+ sliced_head, *bytes); - if (ret < 0) { -- bdrv_padding_destroy(pad); -+ bdrv_padding_finalize(pad); - return ret; - } - *bytes += pad->head + pad->tail; -@@ -1657,8 +1793,8 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child, - flags |= BDRV_REQ_COPY_ON_READ; - } - -- ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad, -- NULL, &flags); -+ ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, false, -+ &pad, NULL, &flags); - if (ret < 0) { - goto fail; - } -@@ -1668,7 +1804,7 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child, - bs->bl.request_alignment, - qiov, qiov_offset, flags); - tracked_request_end(&req); -- bdrv_padding_destroy(&pad); -+ bdrv_padding_finalize(&pad); - - fail: - bdrv_dec_in_flight(bs); -@@ -2000,7 +2136,7 @@ bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes, - /* This flag doesn't make sense for padding or zero writes */ - flags &= ~BDRV_REQ_REGISTERED_BUF; - -- padding = bdrv_init_padding(bs, offset, bytes, &pad); -+ padding = bdrv_init_padding(bs, offset, bytes, true, &pad); - if (padding) { - assert(!(flags & BDRV_REQ_NO_WAIT)); - bdrv_make_request_serialising(req, align); -@@ -2048,7 +2184,7 @@ bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes, - } - - out: -- bdrv_padding_destroy(&pad); -+ bdrv_padding_finalize(&pad); - - return ret; - } -@@ -2116,8 +2252,8 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child, - * bdrv_co_do_zero_pwritev() does aligning by itself, so, we do - * alignment only if there is no ZERO flag. - */ -- ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad, -- &padded, &flags); -+ ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, true, -+ &pad, &padded, &flags); - if (ret < 0) { - return ret; - } -@@ -2147,7 +2283,7 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child, - ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align, - qiov, qiov_offset, flags); - -- bdrv_padding_destroy(&pad); -+ bdrv_padding_finalize(&pad); - - out: - tracked_request_end(&req); --- -2.39.3 - diff --git a/SOURCES/kvm-block-Don-t-call-no_coroutine_fns-in-qmp_block_resiz.patch b/SOURCES/kvm-block-Don-t-call-no_coroutine_fns-in-qmp_block_resiz.patch deleted file mode 100644 index fbab82d..0000000 --- a/SOURCES/kvm-block-Don-t-call-no_coroutine_fns-in-qmp_block_resiz.patch +++ /dev/null @@ -1,56 +0,0 @@ -From dfa2811e88afaf996345552330e97f0513c1803c Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Thu, 4 May 2023 13:57:34 +0200 -Subject: [PATCH 53/56] block: Don't call no_coroutine_fns in - qmp_block_resize() - -RH-Author: Kevin Wolf -RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize() -RH-Bugzilla: 2185688 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Eric Blake -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [2/4] 7ac7e34821cfc8bd5f0daadd7a1c4a5596bc60a6 (kmwolf/centos-qemu-kvm) - -This QMP handler runs in a coroutine, so it must use the corresponding -no_co_wrappers instead. - -Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2185688 -Cc: qemu-stable@nongnu.org -Signed-off-by: Kevin Wolf -Reviewed-by: Eric Blake -Reviewed-by: Stefan Hajnoczi -Message-Id: <20230504115750.54437-5-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 0c7d204f50c382c6baac8c94bd57af4a022b3888) -Signed-off-by: Kevin Wolf ---- - blockdev.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/blockdev.c b/blockdev.c -index d7b5c18f0a..eb509cf964 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -2430,7 +2430,7 @@ void coroutine_fn qmp_block_resize(const char *device, const char *node_name, - return; - } - -- blk = blk_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL, errp); -+ blk = blk_co_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL, errp); - if (!blk) { - return; - } -@@ -2445,7 +2445,7 @@ void coroutine_fn qmp_block_resize(const char *device, const char *node_name, - - bdrv_co_lock(bs); - bdrv_drained_end(bs); -- blk_unref(blk); -+ blk_co_unref(blk); - bdrv_co_unlock(bs); - } - --- -2.39.1 - diff --git a/SOURCES/kvm-block-Fix-pad_request-s-request-restriction.patch b/SOURCES/kvm-block-Fix-pad_request-s-request-restriction.patch deleted file mode 100644 index c0ab8c2..0000000 --- a/SOURCES/kvm-block-Fix-pad_request-s-request-restriction.patch +++ /dev/null @@ -1,73 +0,0 @@ -From 547f6bf93734f7c13675eebb93273ef2273f7c31 Mon Sep 17 00:00:00 2001 -From: Hanna Czenczek -Date: Fri, 14 Jul 2023 10:59:38 +0200 -Subject: [PATCH 5/9] block: Fix pad_request's request restriction - -RH-Author: Hanna Czenczek -RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX -RH-Bugzilla: 2174676 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [5/5] e8abc0485f6e0608a1ec55143ff40a14d273dfc8 (hreitz/qemu-kvm-c-9-s) - -bdrv_pad_request() relies on requests' lengths not to exceed SIZE_MAX, -which bdrv_check_qiov_request() does not guarantee. - -bdrv_check_request32() however will guarantee this, and both of -bdrv_pad_request()'s callers (bdrv_co_preadv_part() and -bdrv_co_pwritev_part()) already run it before calling -bdrv_pad_request(). Therefore, bdrv_pad_request() can safely call -bdrv_check_request32() without expecting error, too. - -In effect, this patch will not change guest-visible behavior. It is a -clean-up to tighten a condition to match what is guaranteed by our -callers, and which exists purely to show clearly why the subsequent -assertion (`assert(*bytes <= SIZE_MAX)`) is always true. - -Note there is a difference between the interfaces of -bdrv_check_qiov_request() and bdrv_check_request32(): The former takes -an errp, the latter does not, so we can no longer just pass -&error_abort. Instead, we need to check the returned value. While we -do expect success (because the callers have already run this function), -an assert(ret == 0) is not much simpler than just to return an error if -it occurs, so let us handle errors by returning them up the stack now. - -Reported-by: Peter Maydell -Signed-off-by: Hanna Czenczek -Message-id: 20230714085938.202730-1-hreitz@redhat.com -Fixes: 18743311b829cafc1737a5f20bc3248d5f91ee2a - ("block: Collapse padded I/O vecs exceeding IOV_MAX") -Signed-off-by: Hanna Czenczek -Signed-off-by: Stefan Hajnoczi ---- - block/io.c | 8 ++++++-- - 1 file changed, 6 insertions(+), 2 deletions(-) - -diff --git a/block/io.c b/block/io.c -index 4e8e90208b..807c9fb720 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -1708,7 +1708,11 @@ static int bdrv_pad_request(BlockDriverState *bs, - int sliced_niov; - size_t sliced_head, sliced_tail; - -- bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort); -+ /* Should have been checked by the caller already */ -+ ret = bdrv_check_request32(*offset, *bytes, *qiov, *qiov_offset); -+ if (ret < 0) { -+ return ret; -+ } - - if (!bdrv_init_padding(bs, *offset, *bytes, write, pad)) { - if (padded) { -@@ -1721,7 +1725,7 @@ static int bdrv_pad_request(BlockDriverState *bs, - &sliced_head, &sliced_tail, - &sliced_niov); - -- /* Guaranteed by bdrv_check_qiov_request() */ -+ /* Guaranteed by bdrv_check_request32() */ - assert(*bytes <= SIZE_MAX); - ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov, - sliced_head, *bytes); --- -2.39.3 - diff --git a/SOURCES/kvm-block-backend-Allow-concurrent-context-changes.patch b/SOURCES/kvm-block-backend-Allow-concurrent-context-changes.patch new file mode 100644 index 0000000..155fa19 --- /dev/null +++ b/SOURCES/kvm-block-backend-Allow-concurrent-context-changes.patch @@ -0,0 +1,104 @@ +From afa842e9fdf6e1d6e5d5785679a22779632142bd Mon Sep 17 00:00:00 2001 +From: Hanna Czenczek +Date: Fri, 2 Feb 2024 15:47:54 +0100 +Subject: [PATCH 03/22] block-backend: Allow concurrent context changes + +RH-Author: Hanna Czenczek +RH-MergeRequest: 222: Allow concurrent BlockBackend context changes +RH-Jira: RHEL-24593 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Kevin Wolf +RH-Commit: [1/2] 9e1b535f60f7afa94a0817dc3e71136e41631c71 (hreitz/qemu-kvm-c-9-s) + +Since AioContext locks have been removed, a BlockBackend's AioContext +may really change at any time (only exception is that it is often +confined to a drained section, as noted in this patch). Therefore, +blk_get_aio_context() cannot rely on its root node's context always +matching that of the BlockBackend. + +In practice, whether they match does not matter anymore anyway: Requests +can be sent to BDSs from any context, so anyone who requests the BB's +context should have no reason to require the root node to have the same +context. Therefore, we can and should remove the assertion to that +effect. + +In addition, because the context can be set and queried from different +threads concurrently, it has to be accessed with atomic operations. + +Buglink: https://issues.redhat.com/browse/RHEL-19381 +Suggested-by: Kevin Wolf +Signed-off-by: Hanna Czenczek +Message-ID: <20240202144755.671354-2-hreitz@redhat.com> +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit ad893672027ffe26db498947d70cde6d4f58a111) +--- + block/block-backend.c | 22 +++++++++++----------- + 1 file changed, 11 insertions(+), 11 deletions(-) + +diff --git a/block/block-backend.c b/block/block-backend.c +index 209eb07528..9c4de79e6b 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -44,7 +44,7 @@ struct BlockBackend { + char *name; + int refcnt; + BdrvChild *root; +- AioContext *ctx; ++ AioContext *ctx; /* access with atomic operations only */ + DriveInfo *legacy_dinfo; /* null unless created by drive_new() */ + QTAILQ_ENTRY(BlockBackend) link; /* for block_backends */ + QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */ +@@ -2414,22 +2414,22 @@ void blk_op_unblock_all(BlockBackend *blk, Error *reason) + } + } + ++/** ++ * Return BB's current AioContext. Note that this context may change ++ * concurrently at any time, with one exception: If the BB has a root node ++ * attached, its context will only change through bdrv_try_change_aio_context(), ++ * which creates a drained section. Therefore, incrementing such a BB's ++ * in-flight counter will prevent its context from changing. ++ */ + AioContext *blk_get_aio_context(BlockBackend *blk) + { +- BlockDriverState *bs; + IO_CODE(); + + if (!blk) { + return qemu_get_aio_context(); + } + +- bs = blk_bs(blk); +- if (bs) { +- AioContext *ctx = bdrv_get_aio_context(blk_bs(blk)); +- assert(ctx == blk->ctx); +- } +- +- return blk->ctx; ++ return qatomic_read(&blk->ctx); + } + + int blk_set_aio_context(BlockBackend *blk, AioContext *new_context, +@@ -2442,7 +2442,7 @@ int blk_set_aio_context(BlockBackend *blk, AioContext *new_context, + GLOBAL_STATE_CODE(); + + if (!bs) { +- blk->ctx = new_context; ++ qatomic_set(&blk->ctx, new_context); + return 0; + } + +@@ -2471,7 +2471,7 @@ static void blk_root_set_aio_ctx_commit(void *opaque) + AioContext *new_context = s->new_ctx; + ThrottleGroupMember *tgm = &blk->public.throttle_group_member; + +- blk->ctx = new_context; ++ qatomic_set(&blk->ctx, new_context); + if (tgm->throttle_state) { + throttle_group_detach_aio_context(tgm); + throttle_group_attach_aio_context(tgm, new_context); +-- +2.39.3 + diff --git a/SOURCES/kvm-block-bdrv-blk_co_unref-for-calls-in-coroutine-conte.patch b/SOURCES/kvm-block-bdrv-blk_co_unref-for-calls-in-coroutine-conte.patch deleted file mode 100644 index 0f0347b..0000000 --- a/SOURCES/kvm-block-bdrv-blk_co_unref-for-calls-in-coroutine-conte.patch +++ /dev/null @@ -1,386 +0,0 @@ -From 7baea25be90e184175dd5a919ee5878cbd4970c2 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Thu, 4 May 2023 13:57:33 +0200 -Subject: [PATCH 52/56] block: bdrv/blk_co_unref() for calls in coroutine - context - -RH-Author: Kevin Wolf -RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize() -RH-Bugzilla: 2185688 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Eric Blake -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [1/4] 8ebf8486b082c30ca1b39a6ede35e471eaaccfa3 (kmwolf/centos-qemu-kvm) - -These functions must not be called in coroutine context, because they -need write access to the graph. - -Cc: qemu-stable@nongnu.org -Signed-off-by: Kevin Wolf -Reviewed-by: Eric Blake -Reviewed-by: Stefan Hajnoczi -Message-Id: <20230504115750.54437-4-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit b2ab5f545fa1eaaf2955dd617bee19a8b3279786) -Signed-off-by: Kevin Wolf ---- - block.c | 2 +- - block/crypto.c | 6 +++--- - block/parallels.c | 6 +++--- - block/qcow.c | 6 +++--- - block/qcow2.c | 14 +++++++------- - block/qed.c | 6 +++--- - block/vdi.c | 6 +++--- - block/vhdx.c | 6 +++--- - block/vmdk.c | 18 +++++++++--------- - block/vpc.c | 6 +++--- - include/block/block-global-state.h | 3 ++- - include/sysemu/block-backend-global-state.h | 5 ++++- - 12 files changed, 44 insertions(+), 40 deletions(-) - -diff --git a/block.c b/block.c -index d79a52ca74..a48112f945 100644 ---- a/block.c -+++ b/block.c -@@ -680,7 +680,7 @@ int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv, - - ret = 0; - out: -- blk_unref(blk); -+ blk_co_unref(blk); - return ret; - } - -diff --git a/block/crypto.c b/block/crypto.c -index ca67289187..8fd3ad0054 100644 ---- a/block/crypto.c -+++ b/block/crypto.c -@@ -355,7 +355,7 @@ block_crypto_co_create_generic(BlockDriverState *bs, int64_t size, - ret = 0; - cleanup: - qcrypto_block_free(crypto); -- blk_unref(blk); -+ blk_co_unref(blk); - return ret; - } - -@@ -661,7 +661,7 @@ block_crypto_co_create_luks(BlockdevCreateOptions *create_options, Error **errp) - - ret = 0; - fail: -- bdrv_unref(bs); -+ bdrv_co_unref(bs); - return ret; - } - -@@ -730,7 +730,7 @@ fail: - bdrv_co_delete_file_noerr(bs); - } - -- bdrv_unref(bs); -+ bdrv_co_unref(bs); - qapi_free_QCryptoBlockCreateOptions(create_opts); - qobject_unref(cryptoopts); - return ret; -diff --git a/block/parallels.c b/block/parallels.c -index 013684801a..b49c35929e 100644 ---- a/block/parallels.c -+++ b/block/parallels.c -@@ -613,8 +613,8 @@ static int coroutine_fn parallels_co_create(BlockdevCreateOptions* opts, - - ret = 0; - out: -- blk_unref(blk); -- bdrv_unref(bs); -+ blk_co_unref(blk); -+ bdrv_co_unref(bs); - return ret; - - exit: -@@ -691,7 +691,7 @@ parallels_co_create_opts(BlockDriver *drv, const char *filename, - - done: - qobject_unref(qdict); -- bdrv_unref(bs); -+ bdrv_co_unref(bs); - qapi_free_BlockdevCreateOptions(create_options); - return ret; - } -diff --git a/block/qcow.c b/block/qcow.c -index 490e4f819e..a0c701f578 100644 ---- a/block/qcow.c -+++ b/block/qcow.c -@@ -915,8 +915,8 @@ static int coroutine_fn qcow_co_create(BlockdevCreateOptions *opts, - g_free(tmp); - ret = 0; - exit: -- blk_unref(qcow_blk); -- bdrv_unref(bs); -+ blk_co_unref(qcow_blk); -+ bdrv_co_unref(bs); - qcrypto_block_free(crypto); - return ret; - } -@@ -1015,7 +1015,7 @@ qcow_co_create_opts(BlockDriver *drv, const char *filename, - fail: - g_free(backing_fmt); - qobject_unref(qdict); -- bdrv_unref(bs); -+ bdrv_co_unref(bs); - qapi_free_BlockdevCreateOptions(create_options); - return ret; - } -diff --git a/block/qcow2.c b/block/qcow2.c -index 22084730f9..0b8beb8b47 100644 ---- a/block/qcow2.c -+++ b/block/qcow2.c -@@ -3711,7 +3711,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) - goto out; - } - -- blk_unref(blk); -+ blk_co_unref(blk); - blk = NULL; - - /* -@@ -3791,7 +3791,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) - } - } - -- blk_unref(blk); -+ blk_co_unref(blk); - blk = NULL; - - /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning. -@@ -3816,9 +3816,9 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) - - ret = 0; - out: -- blk_unref(blk); -- bdrv_unref(bs); -- bdrv_unref(data_bs); -+ blk_co_unref(blk); -+ bdrv_co_unref(bs); -+ bdrv_co_unref(data_bs); - return ret; - } - -@@ -3949,8 +3949,8 @@ finish: - } - - qobject_unref(qdict); -- bdrv_unref(bs); -- bdrv_unref(data_bs); -+ bdrv_co_unref(bs); -+ bdrv_co_unref(data_bs); - qapi_free_BlockdevCreateOptions(create_options); - return ret; - } -diff --git a/block/qed.c b/block/qed.c -index 0705a7b4e2..aff2a2076e 100644 ---- a/block/qed.c -+++ b/block/qed.c -@@ -748,8 +748,8 @@ static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts, - ret = 0; /* success */ - out: - g_free(l1_table); -- blk_unref(blk); -- bdrv_unref(bs); -+ blk_co_unref(blk); -+ bdrv_co_unref(bs); - return ret; - } - -@@ -819,7 +819,7 @@ bdrv_qed_co_create_opts(BlockDriver *drv, const char *filename, - - fail: - qobject_unref(qdict); -- bdrv_unref(bs); -+ bdrv_co_unref(bs); - qapi_free_BlockdevCreateOptions(create_options); - return ret; - } -diff --git a/block/vdi.c b/block/vdi.c -index f2434d6153..08331d2dd7 100644 ---- a/block/vdi.c -+++ b/block/vdi.c -@@ -886,8 +886,8 @@ static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options, - - ret = 0; - exit: -- blk_unref(blk); -- bdrv_unref(bs_file); -+ blk_co_unref(blk); -+ bdrv_co_unref(bs_file); - g_free(bmap); - return ret; - } -@@ -975,7 +975,7 @@ vdi_co_create_opts(BlockDriver *drv, const char *filename, - done: - qobject_unref(qdict); - qapi_free_BlockdevCreateOptions(create_options); -- bdrv_unref(bs_file); -+ bdrv_co_unref(bs_file); - return ret; - } - -diff --git a/block/vhdx.c b/block/vhdx.c -index 81420722a1..00777da91a 100644 ---- a/block/vhdx.c -+++ b/block/vhdx.c -@@ -2053,8 +2053,8 @@ static int coroutine_fn vhdx_co_create(BlockdevCreateOptions *opts, - - ret = 0; - delete_and_exit: -- blk_unref(blk); -- bdrv_unref(bs); -+ blk_co_unref(blk); -+ bdrv_co_unref(bs); - g_free(creator); - return ret; - } -@@ -2144,7 +2144,7 @@ vhdx_co_create_opts(BlockDriver *drv, const char *filename, - - fail: - qobject_unref(qdict); -- bdrv_unref(bs); -+ bdrv_co_unref(bs); - qapi_free_BlockdevCreateOptions(create_options); - return ret; - } -diff --git a/block/vmdk.c b/block/vmdk.c -index f5f49018fe..01ca13c82b 100644 ---- a/block/vmdk.c -+++ b/block/vmdk.c -@@ -2306,7 +2306,7 @@ exit: - if (pbb) { - *pbb = blk; - } else { -- blk_unref(blk); -+ blk_co_unref(blk); - blk = NULL; - } - } -@@ -2516,12 +2516,12 @@ vmdk_co_do_create(int64_t size, - if (strcmp(blk_bs(backing)->drv->format_name, "vmdk")) { - error_setg(errp, "Invalid backing file format: %s. Must be vmdk", - blk_bs(backing)->drv->format_name); -- blk_unref(backing); -+ blk_co_unref(backing); - ret = -EINVAL; - goto exit; - } - ret = vmdk_read_cid(blk_bs(backing), 0, &parent_cid); -- blk_unref(backing); -+ blk_co_unref(backing); - if (ret) { - error_setg(errp, "Failed to read parent CID"); - goto exit; -@@ -2542,14 +2542,14 @@ vmdk_co_do_create(int64_t size, - blk_bs(extent_blk)->filename); - created_size += cur_size; - extent_idx++; -- blk_unref(extent_blk); -+ blk_co_unref(extent_blk); - } - - /* Check whether we got excess extents */ - extent_blk = extent_fn(-1, extent_idx, flat, split, compress, zeroed_grain, - opaque, NULL); - if (extent_blk) { -- blk_unref(extent_blk); -+ blk_co_unref(extent_blk); - error_setg(errp, "List of extents contains unused extents"); - ret = -EINVAL; - goto exit; -@@ -2590,7 +2590,7 @@ vmdk_co_do_create(int64_t size, - ret = 0; - exit: - if (blk) { -- blk_unref(blk); -+ blk_co_unref(blk); - } - g_free(desc); - g_free(parent_desc_line); -@@ -2641,7 +2641,7 @@ vmdk_co_create_opts_cb(int64_t size, int idx, bool flat, bool split, - errp)) { - goto exit; - } -- bdrv_unref(bs); -+ bdrv_co_unref(bs); - exit: - g_free(ext_filename); - return blk; -@@ -2797,12 +2797,12 @@ static BlockBackend * coroutine_fn vmdk_co_create_cb(int64_t size, int idx, - return NULL; - } - blk_set_allow_write_beyond_eof(blk, true); -- bdrv_unref(bs); -+ bdrv_co_unref(bs); - - if (size != -1) { - ret = vmdk_init_extent(blk, size, flat, compress, zeroed_grain, errp); - if (ret) { -- blk_unref(blk); -+ blk_co_unref(blk); - blk = NULL; - } - } -diff --git a/block/vpc.c b/block/vpc.c -index b89b0ff8e2..07ddda5b99 100644 ---- a/block/vpc.c -+++ b/block/vpc.c -@@ -1082,8 +1082,8 @@ static int coroutine_fn vpc_co_create(BlockdevCreateOptions *opts, - } - - out: -- blk_unref(blk); -- bdrv_unref(bs); -+ blk_co_unref(blk); -+ bdrv_co_unref(bs); - return ret; - } - -@@ -1162,7 +1162,7 @@ vpc_co_create_opts(BlockDriver *drv, const char *filename, - - fail: - qobject_unref(qdict); -- bdrv_unref(bs); -+ bdrv_co_unref(bs); - qapi_free_BlockdevCreateOptions(create_options); - return ret; - } -diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h -index 399200a9a3..cd4ea554bf 100644 ---- a/include/block/block-global-state.h -+++ b/include/block/block-global-state.h -@@ -214,7 +214,8 @@ void bdrv_img_create(const char *filename, const char *fmt, - bool quiet, Error **errp); - - void bdrv_ref(BlockDriverState *bs); --void bdrv_unref(BlockDriverState *bs); -+void no_coroutine_fn bdrv_unref(BlockDriverState *bs); -+void coroutine_fn no_co_wrapper bdrv_co_unref(BlockDriverState *bs); - void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child); - BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, - BlockDriverState *child_bs, -diff --git a/include/sysemu/block-backend-global-state.h b/include/sysemu/block-backend-global-state.h -index 2b6d27db7c..fa83f9389c 100644 ---- a/include/sysemu/block-backend-global-state.h -+++ b/include/sysemu/block-backend-global-state.h -@@ -42,7 +42,10 @@ blk_co_new_open(const char *filename, const char *reference, QDict *options, - - int blk_get_refcnt(BlockBackend *blk); - void blk_ref(BlockBackend *blk); --void blk_unref(BlockBackend *blk); -+ -+void no_coroutine_fn blk_unref(BlockBackend *blk); -+void coroutine_fn no_co_wrapper blk_co_unref(BlockBackend *blk); -+ - void blk_remove_all_bs(void); - BlockBackend *blk_by_name(const char *name); - BlockBackend *blk_next(BlockBackend *blk); --- -2.39.1 - diff --git a/SOURCES/kvm-block-blkio-do-not-use-open-flags-in-qemu_open.patch b/SOURCES/kvm-block-blkio-do-not-use-open-flags-in-qemu_open.patch deleted file mode 100644 index caf6694..0000000 --- a/SOURCES/kvm-block-blkio-do-not-use-open-flags-in-qemu_open.patch +++ /dev/null @@ -1,74 +0,0 @@ -From b1f0546548e561856252c2bc610a8f4f8fcdf007 Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Wed, 26 Jul 2023 09:48:07 +0200 -Subject: [PATCH 02/14] block/blkio: do not use open flags in qemu_open() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefano Garzarella -RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers -RH-Bugzilla: 2225354 2225439 -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Alberto Faria -RH-Commit: [2/6] 1ccd0ef56182bb5e2374c3b5be98ee1ec05066d6 (sgarzarella/qemu-kvm-c-9-s) - -qemu_open() in blkio_virtio_blk_common_open() is used to open the -character device (e.g. /dev/vhost-vdpa-0 or /dev/vfio/vfio) or in -the future eventually the unix socket. - -In all these cases we cannot open the path in read-only mode, -when the `read-only` option of blockdev is on, because the exchange -of IOCTL commands for example will fail. - -In order to open the device read-only, we have to use the `read-only` -property of the libblkio driver as we already do in blkio_file_open(). - -Fixes: cad2ccc395 ("block/blkio: use qemu_open() to support fd passing for virtio-blk") -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2225439 -Reported-by: Qing Wang -Signed-off-by: Stefano Garzarella -Reviewed-by: Daniel P. Berrangé -Message-id: 20230726074807.14041-1-sgarzare@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit a5942c177b7bcc1357e496b7d68668befcfc2bb9) -Signed-off-by: Stefano Garzarella ---- - block/blkio.c | 21 ++++++++++++--------- - 1 file changed, 12 insertions(+), 9 deletions(-) - -diff --git a/block/blkio.c b/block/blkio.c -index 3ea9841bd8..5a82c6cb1a 100644 ---- a/block/blkio.c -+++ b/block/blkio.c -@@ -685,15 +685,18 @@ static int blkio_virtio_blk_common_open(BlockDriverState *bs, - * layer through the "/dev/fdset/N" special path. - */ - if (fd_supported) { -- int open_flags; -- -- if (flags & BDRV_O_RDWR) { -- open_flags = O_RDWR; -- } else { -- open_flags = O_RDONLY; -- } -- -- fd = qemu_open(path, open_flags, errp); -+ /* -+ * `path` can contain the path of a character device -+ * (e.g. /dev/vhost-vdpa-0 or /dev/vfio/vfio) or a unix socket. -+ * -+ * So, we should always open it with O_RDWR flag, also if BDRV_O_RDWR -+ * is not set in the open flags, because the exchange of IOCTL commands -+ * for example will fail. -+ * -+ * In order to open the device read-only, we are using the `read-only` -+ * property of the libblkio driver in blkio_file_open(). -+ */ -+ fd = qemu_open(path, O_RDWR, errp); - if (fd < 0) { - return -EINVAL; - } --- -2.39.3 - diff --git a/SOURCES/kvm-block-blkio-enable-the-completion-eventfd.patch b/SOURCES/kvm-block-blkio-enable-the-completion-eventfd.patch deleted file mode 100644 index 8a6f72b..0000000 --- a/SOURCES/kvm-block-blkio-enable-the-completion-eventfd.patch +++ /dev/null @@ -1,54 +0,0 @@ -From ef99db21e9469f3fc946b7bf3edc1837d7b24e0b Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Tue, 25 Jul 2023 12:37:44 +0200 -Subject: [PATCH 01/14] block/blkio: enable the completion eventfd - -RH-Author: Stefano Garzarella -RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers -RH-Bugzilla: 2225354 2225439 -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Alberto Faria -RH-Commit: [1/6] d91b3a465942863550130105ae2f38f47a82a360 (sgarzarella/qemu-kvm-c-9-s) - -Until libblkio 1.3.0, virtio-blk drivers had completion eventfd -notifications enabled from the start, but from the next releases -this is no longer the case, so we have to explicitly enable them. - -In fact, the libblkio documentation says they could be disabled, -so we should always enable them at the start if we want to be -sure to get completion eventfd notifications: - - By default, the driver might not generate completion events for - requests so it is necessary to explicitly enable the completion - file descriptor before use: - - void blkioq_set_completion_fd_enabled(struct blkioq *q, bool enable); - -I discovered this while trying a development version of libblkio: -the guest kernel hangs during boot, while probing the device. - -Fixes: fd66dbd424f5 ("blkio: add libblkio block driver") -Signed-off-by: Stefano Garzarella -Message-id: 20230725103744.77343-1-sgarzare@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 9359c459889fce1804c4e1b2a2ff8f182b4a9ae8) -Signed-off-by: Stefano Garzarella ---- - block/blkio.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/block/blkio.c b/block/blkio.c -index afcec359f2..3ea9841bd8 100644 ---- a/block/blkio.c -+++ b/block/blkio.c -@@ -844,6 +844,7 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags, - QLIST_INIT(&s->bounce_bufs); - s->blkioq = blkio_get_queue(s->blkio, 0); - s->completion_fd = blkioq_get_completion_fd(s->blkioq); -+ blkioq_set_completion_fd_enabled(s->blkioq, true); - - blkio_attach_aio_context(bs, bdrv_get_aio_context(bs)); - return 0; --- -2.39.3 - diff --git a/SOURCES/kvm-block-blkio-fall-back-on-using-path-when-fd-setting-.patch b/SOURCES/kvm-block-blkio-fall-back-on-using-path-when-fd-setting-.patch deleted file mode 100644 index f4d6e3c..0000000 --- a/SOURCES/kvm-block-blkio-fall-back-on-using-path-when-fd-setting-.patch +++ /dev/null @@ -1,67 +0,0 @@ -From c1ce3ba81698b9d52ac9dff83c01ee8141ca403d Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Thu, 27 Jul 2023 18:10:19 +0200 -Subject: [PATCH 05/14] block/blkio: fall back on using `path` when `fd` - setting fails - -RH-Author: Stefano Garzarella -RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers -RH-Bugzilla: 2225354 2225439 -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Alberto Faria -RH-Commit: [5/6] c03cea95146a59b2830ffe2dd56ef77a6630ce3e (sgarzarella/qemu-kvm-c-9-s) - -qemu_open() fails if called with an unix domain socket in this way: - -blockdev node-name=drive0,driver=virtio-blk-vhost-user,path=vhost-user-blk.sock,cache.direct=on: Could not open 'vhost-user-blk.sock': No such device or address - -Since virtio-blk-vhost-user does not support fd passing, let`s always fall back -on using `path` if we fail the fd passing. - -Fixes: cad2ccc395 ("block/blkio: use qemu_open() to support fd passing for virtio-blk") -Reported-by: Qing Wang -Signed-off-by: Stefano Garzarella -Message-id: 20230727161020.84213-4-sgarzare@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 723bea27b127969931fa26bc0de79372a3d9e148) -Signed-off-by: Stefano Garzarella ---- - block/blkio.c | 20 ++++++++++---------- - 1 file changed, 10 insertions(+), 10 deletions(-) - -diff --git a/block/blkio.c b/block/blkio.c -index 93a8f8fc5c..eef80e9ce5 100644 ---- a/block/blkio.c -+++ b/block/blkio.c -@@ -710,19 +710,19 @@ static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options, - * In order to open the device read-only, we are using the `read-only` - * property of the libblkio driver in blkio_file_open(). - */ -- fd = qemu_open(path, O_RDWR, errp); -+ fd = qemu_open(path, O_RDWR, NULL); - if (fd < 0) { -- return -EINVAL; -+ fd_supported = false; -+ } else { -+ ret = blkio_set_int(s->blkio, "fd", fd); -+ if (ret < 0) { -+ fd_supported = false; -+ qemu_close(fd); -+ } - } -+ } - -- ret = blkio_set_int(s->blkio, "fd", fd); -- if (ret < 0) { -- error_setg_errno(errp, -ret, "failed to set fd: %s", -- blkio_get_error_msg()); -- qemu_close(fd); -- return ret; -- } -- } else { -+ if (!fd_supported) { - ret = blkio_set_str(s->blkio, "path", path); - if (ret < 0) { - error_setg_errno(errp, -ret, "failed to set path: %s", --- -2.39.3 - diff --git a/SOURCES/kvm-block-blkio-fix-module_block.py-parsing.patch b/SOURCES/kvm-block-blkio-fix-module_block.py-parsing.patch deleted file mode 100644 index 1c89a0b..0000000 --- a/SOURCES/kvm-block-blkio-fix-module_block.py-parsing.patch +++ /dev/null @@ -1,205 +0,0 @@ -From 545482400ea87d54b1b839587f8aaad41e30692f Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 4 Jul 2023 14:34:36 +0200 -Subject: [PATCH 36/37] block/blkio: fix module_block.py parsing - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 181: block/blkio: fix module_block.py parsing -RH-Bugzilla: 2213317 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Kevin Wolf -RH-Commit: [1/2] c85df95824f4889526a73527771dec9efcb06926 (stefanha/centos-stream-qemu-kvm) - -When QEMU is built with --enable-modules, the module_block.py script -parses block/*.c to find block drivers that are built as modules. The -script generates a table of block drivers called block_driver_modules[]. -This table is used for block driver module loading. - -The blkio.c driver uses macros to define its BlockDriver structs. This -was done to avoid code duplication but the module_block.py script is -unable to parse the macro. The result is that libblkio-based block -drivers can be built as modules but will not be found at runtime. - -One fix is to make the module_block.py script or build system fancier so -it can parse C macros (e.g. by parsing the preprocessed source code). I -chose not to do this because it raises the complexity of the build, -making future issues harder to debug. - -Keep things simple: use the macro to avoid duplicating BlockDriver -function pointers but define .format_name and .protocol_name manually -for each BlockDriver. This way the module_block.py is able to parse the -code. - -Also get rid of the block driver name macros (e.g. DRIVER_IO_URING) -because module_block.py cannot parse them either. - -Fixes: fd66dbd424f5 ("blkio: add libblkio block driver") -Reported-by: Qing Wang -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Stefano Garzarella -Message-id: 20230704123436.187761-1-stefanha@redhat.com -Cc: Stefano Garzarella -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit c21eae1ccc782440f320accb6f90c66cb8f45ee9) - -Conflicts: -- Downstream lacks commit 28ff7b4dfbb5 ("block/blkio: convert to - blk_io_plug_call() API") so keep the .bdrv_co_io_unplug callback. - -Signed-off-by: Stefan Hajnoczi ---- - block/blkio.c | 118 ++++++++++++++++++++++++++------------------------ - 1 file changed, 61 insertions(+), 57 deletions(-) - -diff --git a/block/blkio.c b/block/blkio.c -index 6a6f20f923..afcec359f2 100644 ---- a/block/blkio.c -+++ b/block/blkio.c -@@ -21,16 +21,6 @@ - - #include "block/block-io.h" - --/* -- * Keep the QEMU BlockDriver names identical to the libblkio driver names. -- * Using macros instead of typing out the string literals avoids typos. -- */ --#define DRIVER_IO_URING "io_uring" --#define DRIVER_NVME_IO_URING "nvme-io_uring" --#define DRIVER_VIRTIO_BLK_VFIO_PCI "virtio-blk-vfio-pci" --#define DRIVER_VIRTIO_BLK_VHOST_USER "virtio-blk-vhost-user" --#define DRIVER_VIRTIO_BLK_VHOST_VDPA "virtio-blk-vhost-vdpa" -- - /* - * Allocated bounce buffers are kept in a list sorted by buffer address. - */ -@@ -743,15 +733,15 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags, - return ret; - } - -- if (strcmp(blkio_driver, DRIVER_IO_URING) == 0) { -+ if (strcmp(blkio_driver, "io_uring") == 0) { - ret = blkio_io_uring_open(bs, options, flags, errp); -- } else if (strcmp(blkio_driver, DRIVER_NVME_IO_URING) == 0) { -+ } else if (strcmp(blkio_driver, "nvme-io_uring") == 0) { - ret = blkio_nvme_io_uring(bs, options, flags, errp); -- } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VFIO_PCI) == 0) { -+ } else if (strcmp(blkio_driver, "virtio-blk-vfio-pci") == 0) { - ret = blkio_virtio_blk_common_open(bs, options, flags, errp); -- } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VHOST_USER) == 0) { -+ } else if (strcmp(blkio_driver, "virtio-blk-vhost-user") == 0) { - ret = blkio_virtio_blk_common_open(bs, options, flags, errp); -- } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VHOST_VDPA) == 0) { -+ } else if (strcmp(blkio_driver, "virtio-blk-vhost-vdpa") == 0) { - ret = blkio_virtio_blk_common_open(bs, options, flags, errp); - } else { - g_assert_not_reached(); -@@ -1027,50 +1017,64 @@ static void blkio_refresh_limits(BlockDriverState *bs, Error **errp) - * - truncate - */ - --#define BLKIO_DRIVER(name, ...) \ -- { \ -- .format_name = name, \ -- .protocol_name = name, \ -- .instance_size = sizeof(BDRVBlkioState), \ -- .bdrv_file_open = blkio_file_open, \ -- .bdrv_close = blkio_close, \ -- .bdrv_co_getlength = blkio_co_getlength, \ -- .bdrv_co_truncate = blkio_truncate, \ -- .bdrv_co_get_info = blkio_co_get_info, \ -- .bdrv_attach_aio_context = blkio_attach_aio_context, \ -- .bdrv_detach_aio_context = blkio_detach_aio_context, \ -- .bdrv_co_pdiscard = blkio_co_pdiscard, \ -- .bdrv_co_preadv = blkio_co_preadv, \ -- .bdrv_co_pwritev = blkio_co_pwritev, \ -- .bdrv_co_flush_to_disk = blkio_co_flush, \ -- .bdrv_co_pwrite_zeroes = blkio_co_pwrite_zeroes, \ -- .bdrv_co_io_unplug = blkio_co_io_unplug, \ -- .bdrv_refresh_limits = blkio_refresh_limits, \ -- .bdrv_register_buf = blkio_register_buf, \ -- .bdrv_unregister_buf = blkio_unregister_buf, \ -- __VA_ARGS__ \ -- } -- --static BlockDriver bdrv_io_uring = BLKIO_DRIVER( -- DRIVER_IO_URING, -- .bdrv_needs_filename = true, --); -- --static BlockDriver bdrv_nvme_io_uring = BLKIO_DRIVER( -- DRIVER_NVME_IO_URING, --); -- --static BlockDriver bdrv_virtio_blk_vfio_pci = BLKIO_DRIVER( -- DRIVER_VIRTIO_BLK_VFIO_PCI --); -+/* -+ * Do not include .format_name and .protocol_name because module_block.py -+ * does not parse macros in the source code. -+ */ -+#define BLKIO_DRIVER_COMMON \ -+ .instance_size = sizeof(BDRVBlkioState), \ -+ .bdrv_file_open = blkio_file_open, \ -+ .bdrv_close = blkio_close, \ -+ .bdrv_co_getlength = blkio_co_getlength, \ -+ .bdrv_co_truncate = blkio_truncate, \ -+ .bdrv_co_get_info = blkio_co_get_info, \ -+ .bdrv_attach_aio_context = blkio_attach_aio_context, \ -+ .bdrv_detach_aio_context = blkio_detach_aio_context, \ -+ .bdrv_co_pdiscard = blkio_co_pdiscard, \ -+ .bdrv_co_preadv = blkio_co_preadv, \ -+ .bdrv_co_pwritev = blkio_co_pwritev, \ -+ .bdrv_co_flush_to_disk = blkio_co_flush, \ -+ .bdrv_co_pwrite_zeroes = blkio_co_pwrite_zeroes, \ -+ .bdrv_co_io_unplug = blkio_co_io_unplug, \ -+ .bdrv_refresh_limits = blkio_refresh_limits, \ -+ .bdrv_register_buf = blkio_register_buf, \ -+ .bdrv_unregister_buf = blkio_unregister_buf, - --static BlockDriver bdrv_virtio_blk_vhost_user = BLKIO_DRIVER( -- DRIVER_VIRTIO_BLK_VHOST_USER --); -+/* -+ * Use the same .format_name and .protocol_name as the libblkio driver name for -+ * consistency. -+ */ - --static BlockDriver bdrv_virtio_blk_vhost_vdpa = BLKIO_DRIVER( -- DRIVER_VIRTIO_BLK_VHOST_VDPA --); -+static BlockDriver bdrv_io_uring = { -+ .format_name = "io_uring", -+ .protocol_name = "io_uring", -+ .bdrv_needs_filename = true, -+ BLKIO_DRIVER_COMMON -+}; -+ -+static BlockDriver bdrv_nvme_io_uring = { -+ .format_name = "nvme-io_uring", -+ .protocol_name = "nvme-io_uring", -+ BLKIO_DRIVER_COMMON -+}; -+ -+static BlockDriver bdrv_virtio_blk_vfio_pci = { -+ .format_name = "virtio-blk-vfio-pci", -+ .protocol_name = "virtio-blk-vfio-pci", -+ BLKIO_DRIVER_COMMON -+}; -+ -+static BlockDriver bdrv_virtio_blk_vhost_user = { -+ .format_name = "virtio-blk-vhost-user", -+ .protocol_name = "virtio-blk-vhost-user", -+ BLKIO_DRIVER_COMMON -+}; -+ -+static BlockDriver bdrv_virtio_blk_vhost_vdpa = { -+ .format_name = "virtio-blk-vhost-vdpa", -+ .protocol_name = "virtio-blk-vhost-vdpa", -+ BLKIO_DRIVER_COMMON -+}; - - static void bdrv_blkio_init(void) - { --- -2.39.3 - diff --git a/SOURCES/kvm-block-blkio-move-blkio_connect-in-the-drivers-functi.patch b/SOURCES/kvm-block-blkio-move-blkio_connect-in-the-drivers-functi.patch deleted file mode 100644 index e3ec1ee..0000000 --- a/SOURCES/kvm-block-blkio-move-blkio_connect-in-the-drivers-functi.patch +++ /dev/null @@ -1,151 +0,0 @@ -From 458c33c9f19ed01beeb9b2b494ce6ed10d2ed4ac Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Thu, 27 Jul 2023 18:10:17 +0200 -Subject: [PATCH 03/14] block/blkio: move blkio_connect() in the drivers - functions - -RH-Author: Stefano Garzarella -RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers -RH-Bugzilla: 2225354 2225439 -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Alberto Faria -RH-Commit: [3/6] c356108d7dfe1ba2098c094f8d12b6e40853560c (sgarzarella/qemu-kvm-c-9-s) - -This is in preparation for the next patch, where for virtio-blk -drivers we need to handle the failure of blkio_connect(). - -Let's also rename the *_open() functions to *_connect() to make -the code reflect the changes applied. - -Signed-off-by: Stefano Garzarella -Message-id: 20230727161020.84213-2-sgarzare@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 69785d66ae1ec43f77fc65109a21721992bead9f) -Signed-off-by: Stefano Garzarella ---- - block/blkio.c | 67 ++++++++++++++++++++++++++++++--------------------- - 1 file changed, 40 insertions(+), 27 deletions(-) - -diff --git a/block/blkio.c b/block/blkio.c -index 5a82c6cb1a..85d1eed5fb 100644 ---- a/block/blkio.c -+++ b/block/blkio.c -@@ -602,8 +602,8 @@ static void blkio_unregister_buf(BlockDriverState *bs, void *host, size_t size) - } - } - --static int blkio_io_uring_open(BlockDriverState *bs, QDict *options, int flags, -- Error **errp) -+static int blkio_io_uring_connect(BlockDriverState *bs, QDict *options, -+ int flags, Error **errp) - { - const char *filename = qdict_get_str(options, "filename"); - BDRVBlkioState *s = bs->opaque; -@@ -626,11 +626,18 @@ static int blkio_io_uring_open(BlockDriverState *bs, QDict *options, int flags, - } - } - -+ ret = blkio_connect(s->blkio); -+ if (ret < 0) { -+ error_setg_errno(errp, -ret, "blkio_connect failed: %s", -+ blkio_get_error_msg()); -+ return ret; -+ } -+ - return 0; - } - --static int blkio_nvme_io_uring(BlockDriverState *bs, QDict *options, int flags, -- Error **errp) -+static int blkio_nvme_io_uring_connect(BlockDriverState *bs, QDict *options, -+ int flags, Error **errp) - { - const char *path = qdict_get_try_str(options, "path"); - BDRVBlkioState *s = bs->opaque; -@@ -654,11 +661,18 @@ static int blkio_nvme_io_uring(BlockDriverState *bs, QDict *options, int flags, - return -EINVAL; - } - -+ ret = blkio_connect(s->blkio); -+ if (ret < 0) { -+ error_setg_errno(errp, -ret, "blkio_connect failed: %s", -+ blkio_get_error_msg()); -+ return ret; -+ } -+ - return 0; - } - --static int blkio_virtio_blk_common_open(BlockDriverState *bs, -- QDict *options, int flags, Error **errp) -+static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options, -+ int flags, Error **errp) - { - const char *path = qdict_get_try_str(options, "path"); - BDRVBlkioState *s = bs->opaque; -@@ -717,6 +731,13 @@ static int blkio_virtio_blk_common_open(BlockDriverState *bs, - } - } - -+ ret = blkio_connect(s->blkio); -+ if (ret < 0) { -+ error_setg_errno(errp, -ret, "blkio_connect failed: %s", -+ blkio_get_error_msg()); -+ return ret; -+ } -+ - qdict_del(options, "path"); - - return 0; -@@ -736,24 +757,6 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags, - return ret; - } - -- if (strcmp(blkio_driver, "io_uring") == 0) { -- ret = blkio_io_uring_open(bs, options, flags, errp); -- } else if (strcmp(blkio_driver, "nvme-io_uring") == 0) { -- ret = blkio_nvme_io_uring(bs, options, flags, errp); -- } else if (strcmp(blkio_driver, "virtio-blk-vfio-pci") == 0) { -- ret = blkio_virtio_blk_common_open(bs, options, flags, errp); -- } else if (strcmp(blkio_driver, "virtio-blk-vhost-user") == 0) { -- ret = blkio_virtio_blk_common_open(bs, options, flags, errp); -- } else if (strcmp(blkio_driver, "virtio-blk-vhost-vdpa") == 0) { -- ret = blkio_virtio_blk_common_open(bs, options, flags, errp); -- } else { -- g_assert_not_reached(); -- } -- if (ret < 0) { -- blkio_destroy(&s->blkio); -- return ret; -- } -- - if (!(flags & BDRV_O_RDWR)) { - ret = blkio_set_bool(s->blkio, "read-only", true); - if (ret < 0) { -@@ -764,10 +767,20 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags, - } - } - -- ret = blkio_connect(s->blkio); -+ if (strcmp(blkio_driver, "io_uring") == 0) { -+ ret = blkio_io_uring_connect(bs, options, flags, errp); -+ } else if (strcmp(blkio_driver, "nvme-io_uring") == 0) { -+ ret = blkio_nvme_io_uring_connect(bs, options, flags, errp); -+ } else if (strcmp(blkio_driver, "virtio-blk-vfio-pci") == 0) { -+ ret = blkio_virtio_blk_connect(bs, options, flags, errp); -+ } else if (strcmp(blkio_driver, "virtio-blk-vhost-user") == 0) { -+ ret = blkio_virtio_blk_connect(bs, options, flags, errp); -+ } else if (strcmp(blkio_driver, "virtio-blk-vhost-vdpa") == 0) { -+ ret = blkio_virtio_blk_connect(bs, options, flags, errp); -+ } else { -+ g_assert_not_reached(); -+ } - if (ret < 0) { -- error_setg_errno(errp, -ret, "blkio_connect failed: %s", -- blkio_get_error_msg()); - blkio_destroy(&s->blkio); - return ret; - } --- -2.39.3 - diff --git a/SOURCES/kvm-block-blkio-retry-blkio_connect-if-it-fails-using-fd.patch b/SOURCES/kvm-block-blkio-retry-blkio_connect-if-it-fails-using-fd.patch deleted file mode 100644 index 5ec9e0b..0000000 --- a/SOURCES/kvm-block-blkio-retry-blkio_connect-if-it-fails-using-fd.patch +++ /dev/null @@ -1,85 +0,0 @@ -From ece855a71d9234c58497f37cb5498f507742167d Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Thu, 27 Jul 2023 18:10:18 +0200 -Subject: [PATCH 04/14] block/blkio: retry blkio_connect() if it fails using - `fd` - -RH-Author: Stefano Garzarella -RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers -RH-Bugzilla: 2225354 2225439 -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Alberto Faria -RH-Commit: [4/6] 14ebc1f333617ce22c68693dec1c9a186d4f8a08 (sgarzarella/qemu-kvm-c-9-s) - -libblkio 1.3.0 added support of "fd" property for virtio-blk-vhost-vdpa -driver. In QEMU, starting from commit cad2ccc395 ("block/blkio: use -qemu_open() to support fd passing for virtio-blk") we are using -`blkio_get_int(..., "fd")` to check if the "fd" property is supported -for all the virtio-blk-* driver. - -Unfortunately that property is also available for those driver that do -not support it, such as virtio-blk-vhost-user. - -So, `blkio_get_int()` is not enough to check whether the driver supports -the `fd` property or not. This is because the virito-blk common libblkio -driver only checks whether or not `fd` is set during `blkio_connect()` -and fails with -EINVAL for those transports that do not support it -(all except vhost-vdpa for now). - -So let's handle the `blkio_connect()` failure, retrying it using `path` -directly. - -Fixes: cad2ccc395 ("block/blkio: use qemu_open() to support fd passing for virtio-blk") -Suggested-by: Stefan Hajnoczi -Signed-off-by: Stefano Garzarella -Message-id: 20230727161020.84213-3-sgarzare@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 809c319f8a089fbc49223dc29e1cc2b978beeada) -Signed-off-by: Stefano Garzarella ---- - block/blkio.c | 29 +++++++++++++++++++++++++++++ - 1 file changed, 29 insertions(+) - -diff --git a/block/blkio.c b/block/blkio.c -index 85d1eed5fb..93a8f8fc5c 100644 ---- a/block/blkio.c -+++ b/block/blkio.c -@@ -732,6 +732,35 @@ static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options, - } - - ret = blkio_connect(s->blkio); -+ /* -+ * If the libblkio driver doesn't support the `fd` property, blkio_connect() -+ * will fail with -EINVAL. So let's try calling blkio_connect() again by -+ * directly setting `path`. -+ */ -+ if (fd_supported && ret == -EINVAL) { -+ qemu_close(fd); -+ -+ /* -+ * We need to clear the `fd` property we set previously by setting -+ * it to -1. -+ */ -+ ret = blkio_set_int(s->blkio, "fd", -1); -+ if (ret < 0) { -+ error_setg_errno(errp, -ret, "failed to set fd: %s", -+ blkio_get_error_msg()); -+ return ret; -+ } -+ -+ ret = blkio_set_str(s->blkio, "path", path); -+ if (ret < 0) { -+ error_setg_errno(errp, -ret, "failed to set path: %s", -+ blkio_get_error_msg()); -+ return ret; -+ } -+ -+ ret = blkio_connect(s->blkio); -+ } -+ - if (ret < 0) { - error_setg_errno(errp, -ret, "blkio_connect failed: %s", - blkio_get_error_msg()); --- -2.39.3 - diff --git a/SOURCES/kvm-block-blkio-use-blkio_set_int-fd-to-check-fd-support.patch b/SOURCES/kvm-block-blkio-use-blkio_set_int-fd-to-check-fd-support.patch deleted file mode 100644 index c6e1cd8..0000000 --- a/SOURCES/kvm-block-blkio-use-blkio_set_int-fd-to-check-fd-support.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 2f4436e7cc2f63d198229dc8ba32783460c0b185 Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Thu, 27 Jul 2023 18:10:20 +0200 -Subject: [PATCH 06/14] block/blkio: use blkio_set_int("fd") to check fd - support - -RH-Author: Stefano Garzarella -RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers -RH-Bugzilla: 2225354 2225439 -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Alberto Faria -RH-Commit: [6/6] d57aafb2c3a8ed13aa3c6dcce5525a9cc8f5aa21 (sgarzarella/qemu-kvm-c-9-s) - -Setting the `fd` property fails with virtio-blk-* libblkio drivers -that do not support fd passing since -https://gitlab.com/libblkio/libblkio/-/merge_requests/208. - -Getting the `fd` property, on the other hand, always succeeds for -virtio-blk-* libblkio drivers even when they don't support fd passing. - -This patch switches to setting the `fd` property because it is a -better mechanism for probing fd passing support than getting the `fd` -property. - -Signed-off-by: Stefano Garzarella -Message-id: 20230727161020.84213-5-sgarzare@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 1c38fe69e2b8a05c1762b122292fa7e3662f06fd) -Signed-off-by: Stefano Garzarella ---- - block/blkio.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/block/blkio.c b/block/blkio.c -index eef80e9ce5..8defbf744f 100644 ---- a/block/blkio.c -+++ b/block/blkio.c -@@ -689,7 +689,7 @@ static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options, - return -EINVAL; - } - -- if (blkio_get_int(s->blkio, "fd", &fd) == 0) { -+ if (blkio_set_int(s->blkio, "fd", -1) == 0) { - fd_supported = true; - } - --- -2.39.3 - diff --git a/SOURCES/kvm-block-blkio-use-qemu_open-to-support-fd-passing-for-.patch b/SOURCES/kvm-block-blkio-use-qemu_open-to-support-fd-passing-for-.patch deleted file mode 100644 index 3b32299..0000000 --- a/SOURCES/kvm-block-blkio-use-qemu_open-to-support-fd-passing-for-.patch +++ /dev/null @@ -1,108 +0,0 @@ -From fd57241cf0f8c2906fa56118f8da1e65a5b1e4d8 Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Tue, 30 May 2023 09:19:40 +0200 -Subject: [PATCH 3/5] block/blkio: use qemu_open() to support fd passing for - virtio-blk - -RH-Author: Stefano Garzarella -RH-MergeRequest: 169: block/blkio: support fd passing for virtio-blk-vhost-vdpa driver -RH-Bugzilla: 2180076 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [1/2] 9ff1a1510500db101648341207a36318a0c41c5a (sgarzarella/qemu-kvm-c-9-s) - -Some virtio-blk drivers (e.g. virtio-blk-vhost-vdpa) supports the fd -passing. Let's expose this to the user, so the management layer -can pass the file descriptor of an already opened path. - -If the libblkio virtio-blk driver supports fd passing, let's always -use qemu_open() to open the `path`, so we can handle fd passing -from the management layer through the "/dev/fdset/N" special path. - -Reviewed-by: Stefan Hajnoczi -Signed-off-by: Stefano Garzarella -Message-id: 20230530071941.8954-2-sgarzare@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit cad2ccc395c7113fb30bc9390774b67b34f06c68) -Signed-off-by: Stefano Garzarella ---- - block/blkio.c | 53 ++++++++++++++++++++++++++++++++++++++++++--------- - 1 file changed, 44 insertions(+), 9 deletions(-) - -diff --git a/block/blkio.c b/block/blkio.c -index 0cdc99a729..6a6f20f923 100644 ---- a/block/blkio.c -+++ b/block/blkio.c -@@ -672,25 +672,60 @@ static int blkio_virtio_blk_common_open(BlockDriverState *bs, - { - const char *path = qdict_get_try_str(options, "path"); - BDRVBlkioState *s = bs->opaque; -- int ret; -+ bool fd_supported = false; -+ int fd, ret; - - if (!path) { - error_setg(errp, "missing 'path' option"); - return -EINVAL; - } - -- ret = blkio_set_str(s->blkio, "path", path); -- qdict_del(options, "path"); -- if (ret < 0) { -- error_setg_errno(errp, -ret, "failed to set path: %s", -- blkio_get_error_msg()); -- return ret; -- } -- - if (!(flags & BDRV_O_NOCACHE)) { - error_setg(errp, "cache.direct=off is not supported"); - return -EINVAL; - } -+ -+ if (blkio_get_int(s->blkio, "fd", &fd) == 0) { -+ fd_supported = true; -+ } -+ -+ /* -+ * If the libblkio driver supports fd passing, let's always use qemu_open() -+ * to open the `path`, so we can handle fd passing from the management -+ * layer through the "/dev/fdset/N" special path. -+ */ -+ if (fd_supported) { -+ int open_flags; -+ -+ if (flags & BDRV_O_RDWR) { -+ open_flags = O_RDWR; -+ } else { -+ open_flags = O_RDONLY; -+ } -+ -+ fd = qemu_open(path, open_flags, errp); -+ if (fd < 0) { -+ return -EINVAL; -+ } -+ -+ ret = blkio_set_int(s->blkio, "fd", fd); -+ if (ret < 0) { -+ error_setg_errno(errp, -ret, "failed to set fd: %s", -+ blkio_get_error_msg()); -+ qemu_close(fd); -+ return ret; -+ } -+ } else { -+ ret = blkio_set_str(s->blkio, "path", path); -+ if (ret < 0) { -+ error_setg_errno(errp, -ret, "failed to set path: %s", -+ blkio_get_error_msg()); -+ return ret; -+ } -+ } -+ -+ qdict_del(options, "path"); -+ - return 0; - } - --- -2.39.3 - diff --git a/SOURCES/kvm-block-compile-out-assert_bdrv_graph_readable-by-defa.patch b/SOURCES/kvm-block-compile-out-assert_bdrv_graph_readable-by-defa.patch deleted file mode 100644 index b6eebf3..0000000 --- a/SOURCES/kvm-block-compile-out-assert_bdrv_graph_readable-by-defa.patch +++ /dev/null @@ -1,121 +0,0 @@ -From d9190117f3c701380701d6e9b2aa3c2446b9708f Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Mon, 1 May 2023 13:34:43 -0400 -Subject: [PATCH 01/21] block: compile out assert_bdrv_graph_readable() by - default - -RH-Author: Kevin Wolf -RH-MergeRequest: 166: block/graph-lock: Disable locking for now -RH-Bugzilla: 2186725 -RH-Acked-by: Eric Blake -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [1/4] d8cb4bb832c85e8216d97e57679a34c7bc6a8f71 (kmwolf/centos-qemu-kvm) - -reader_count() is a performance bottleneck because the global -aio_context_list_lock mutex causes thread contention. Put this debugging -assertion behind a new ./configure --enable-debug-graph-lock option and -disable it by default. - -The --enable-debug-graph-lock option is also enabled by the more general ---enable-debug option. - -Signed-off-by: Stefan Hajnoczi -Message-Id: <20230501173443.153062-1-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 58a2e3f5c37be02dac3086b81bdda9414b931edf) -Signed-off-by: Kevin Wolf ---- - block/graph-lock.c | 3 +++ - configure | 1 + - meson.build | 2 ++ - meson_options.txt | 2 ++ - scripts/meson-buildoptions.sh | 4 ++++ - 5 files changed, 12 insertions(+) - -diff --git a/block/graph-lock.c b/block/graph-lock.c -index 454c31e691..259a7a0bde 100644 ---- a/block/graph-lock.c -+++ b/block/graph-lock.c -@@ -265,7 +265,10 @@ void bdrv_graph_rdunlock_main_loop(void) - - void assert_bdrv_graph_readable(void) - { -+ /* reader_count() is slow due to aio_context_list_lock lock contention */ -+#ifdef CONFIG_DEBUG_GRAPH_LOCK - assert(qemu_in_main_thread() || reader_count()); -+#endif - } - - void assert_bdrv_graph_writable(void) -diff --git a/configure b/configure -index 800b5850f4..a62a3e6be9 100755 ---- a/configure -+++ b/configure -@@ -806,6 +806,7 @@ for opt do - --enable-debug) - # Enable debugging options that aren't excessively noisy - debug_tcg="yes" -+ meson_option_parse --enable-debug-graph-lock "" - meson_option_parse --enable-debug-mutex "" - meson_option_add -Doptimization=0 - fortify_source="no" -diff --git a/meson.build b/meson.build -index c44d05a13f..d964e741e7 100644 ---- a/meson.build -+++ b/meson.build -@@ -1956,6 +1956,7 @@ if get_option('debug_stack_usage') and have_coroutine_pool - have_coroutine_pool = false - endif - config_host_data.set10('CONFIG_COROUTINE_POOL', have_coroutine_pool) -+config_host_data.set('CONFIG_DEBUG_GRAPH_LOCK', get_option('debug_graph_lock')) - config_host_data.set('CONFIG_DEBUG_MUTEX', get_option('debug_mutex')) - config_host_data.set('CONFIG_DEBUG_STACK_USAGE', get_option('debug_stack_usage')) - config_host_data.set('CONFIG_GPROF', get_option('gprof')) -@@ -3833,6 +3834,7 @@ summary_info += {'PIE': get_option('b_pie')} - summary_info += {'static build': config_host.has_key('CONFIG_STATIC')} - summary_info += {'malloc trim support': has_malloc_trim} - summary_info += {'membarrier': have_membarrier} -+summary_info += {'debug graph lock': get_option('debug_graph_lock')} - summary_info += {'debug stack usage': get_option('debug_stack_usage')} - summary_info += {'mutex debugging': get_option('debug_mutex')} - summary_info += {'memory allocator': get_option('malloc')} -diff --git a/meson_options.txt b/meson_options.txt -index fc9447d267..bc857fe68b 100644 ---- a/meson_options.txt -+++ b/meson_options.txt -@@ -311,6 +311,8 @@ option('rng_none', type: 'boolean', value: false, - description: 'dummy RNG, avoid using /dev/(u)random and getrandom()') - option('coroutine_pool', type: 'boolean', value: true, - description: 'coroutine freelist (better performance)') -+option('debug_graph_lock', type: 'boolean', value: false, -+ description: 'graph lock debugging support') - option('debug_mutex', type: 'boolean', value: false, - description: 'mutex debugging support') - option('debug_stack_usage', type: 'boolean', value: false, -diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh -index 009fab1515..30e1f25259 100644 ---- a/scripts/meson-buildoptions.sh -+++ b/scripts/meson-buildoptions.sh -@@ -21,6 +21,8 @@ meson_options_help() { - printf "%s\n" ' QEMU' - printf "%s\n" ' --enable-cfi Control-Flow Integrity (CFI)' - printf "%s\n" ' --enable-cfi-debug Verbose errors in case of CFI violation' -+ printf "%s\n" ' --enable-debug-graph-lock' -+ printf "%s\n" ' graph lock debugging support' - printf "%s\n" ' --enable-debug-mutex mutex debugging support' - printf "%s\n" ' --enable-debug-stack-usage' - printf "%s\n" ' measure coroutine stack usage' -@@ -249,6 +251,8 @@ _meson_option_parse() { - --datadir=*) quote_sh "-Ddatadir=$2" ;; - --enable-dbus-display) printf "%s" -Ddbus_display=enabled ;; - --disable-dbus-display) printf "%s" -Ddbus_display=disabled ;; -+ --enable-debug-graph-lock) printf "%s" -Ddebug_graph_lock=true ;; -+ --disable-debug-graph-lock) printf "%s" -Ddebug_graph_lock=false ;; - --enable-debug-mutex) printf "%s" -Ddebug_mutex=true ;; - --disable-debug-mutex) printf "%s" -Ddebug_mutex=false ;; - --enable-debug-stack-usage) printf "%s" -Ddebug_stack_usage=true ;; --- -2.39.3 - diff --git a/SOURCES/kvm-block-coroutine-wrapper-use-qemu_get_current_aio_con.patch b/SOURCES/kvm-block-coroutine-wrapper-use-qemu_get_current_aio_con.patch new file mode 100644 index 0000000..df764fb --- /dev/null +++ b/SOURCES/kvm-block-coroutine-wrapper-use-qemu_get_current_aio_con.patch @@ -0,0 +1,69 @@ +From b1a68aebadecd7d339cf5eaffeda15099c998528 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 12 Sep 2023 19:10:37 -0400 +Subject: [PATCH 095/101] block-coroutine-wrapper: use + qemu_get_current_aio_context() + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [26/26] cde767bcdc626e90721792e3889952057a548ac5 (kmwolf/centos-qemu-kvm) + +Use qemu_get_current_aio_context() in mixed wrappers and coroutine +wrappers so that code runs in the caller's AioContext instead of moving +to the BlockDriverState's AioContext. This change is necessary for the +multi-queue block layer where any thread can call into the block layer. + +Most wrappers are IO_CODE where it's safe to use the current AioContext +nowadays. BlockDrivers and the core block layer use their own locks and +no longer depend on the AioContext lock for thread-safety. + +The bdrv_create() wrapper invokes GLOBAL_STATE code. Using the current +AioContext is safe because this code is only called with the BQL held +from the main loop thread. + +Signed-off-by: Stefan Hajnoczi +Message-ID: <20230912231037.826804-6-stefanha@redhat.com> +Reviewed-by: Eric Blake +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +--- + scripts/block-coroutine-wrapper.py | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +diff --git a/scripts/block-coroutine-wrapper.py b/scripts/block-coroutine-wrapper.py +index c9c09fcacd..dbbde99e39 100644 +--- a/scripts/block-coroutine-wrapper.py ++++ b/scripts/block-coroutine-wrapper.py +@@ -92,8 +92,6 @@ def __init__(self, wrapper_type: str, return_type: str, name: str, + f"{self.name}") + self.target_name = f'{subsystem}_{subname}' + +- self.ctx = self.gen_ctx() +- + self.get_result = 's->ret = ' + self.ret = 'return s.ret;' + self.co_ret = 'return ' +@@ -167,7 +165,7 @@ def create_mixed_wrapper(func: FuncDecl) -> str: + {func.co_ret}{name}({ func.gen_list('{name}') }); + }} else {{ + {struct_name} s = {{ +- .poll_state.ctx = {func.ctx}, ++ .poll_state.ctx = qemu_get_current_aio_context(), + .poll_state.in_progress = true, + + { func.gen_block(' .{name} = {name},') } +@@ -191,7 +189,7 @@ def create_co_wrapper(func: FuncDecl) -> str: + {func.return_type} {func.name}({ func.gen_list('{decl}') }) + {{ + {struct_name} s = {{ +- .poll_state.ctx = {func.ctx}, ++ .poll_state.ctx = qemu_get_current_aio_context(), + .poll_state.in_progress = true, + + { func.gen_block(' .{name} = {name},') } +-- +2.39.3 + diff --git a/SOURCES/kvm-block-file-posix-set-up-Linux-AIO-and-io_uring-in-th.patch b/SOURCES/kvm-block-file-posix-set-up-Linux-AIO-and-io_uring-in-th.patch new file mode 100644 index 0000000..1783a64 --- /dev/null +++ b/SOURCES/kvm-block-file-posix-set-up-Linux-AIO-and-io_uring-in-th.patch @@ -0,0 +1,217 @@ +From 25cce5df341861e8ba8ec57722558e2dee3ce56a Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 14 Sep 2023 10:00:58 -0400 +Subject: [PATCH 073/101] block/file-posix: set up Linux AIO and io_uring in + the current thread + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [4/26] 74c7daf805daefe706378308c3afeb28d861164b (kmwolf/centos-qemu-kvm) + +The file-posix block driver currently only sets up Linux AIO and +io_uring in the BDS's AioContext. In the multi-queue block layer we must +be able to submit I/O requests in AioContexts that do not have Linux AIO +and io_uring set up yet since any thread can call into the block driver. + +Set up Linux AIO and io_uring for the current AioContext during request +submission. We lose the ability to return an error from +.bdrv_file_open() when Linux AIO and io_uring setup fails (e.g. due to +resource limits). Instead the user only gets warnings and we fall back +to aio=threads. This is still better than a fatal error after startup. + +Signed-off-by: Stefan Hajnoczi +Message-ID: <20230914140101.1065008-2-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Reviewed-by: Eric Blake +Signed-off-by: Kevin Wolf +--- + block/file-posix.c | 103 ++++++++++++++++++++++----------------------- + 1 file changed, 51 insertions(+), 52 deletions(-) + +diff --git a/block/file-posix.c b/block/file-posix.c +index b862406c71..35684f7e21 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -712,17 +712,11 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, + + #ifdef CONFIG_LINUX_AIO + /* Currently Linux does AIO only for files opened with O_DIRECT */ +- if (s->use_linux_aio) { +- if (!(s->open_flags & O_DIRECT)) { +- error_setg(errp, "aio=native was specified, but it requires " +- "cache.direct=on, which was not specified."); +- ret = -EINVAL; +- goto fail; +- } +- if (!aio_setup_linux_aio(bdrv_get_aio_context(bs), errp)) { +- error_prepend(errp, "Unable to use native AIO: "); +- goto fail; +- } ++ if (s->use_linux_aio && !(s->open_flags & O_DIRECT)) { ++ error_setg(errp, "aio=native was specified, but it requires " ++ "cache.direct=on, which was not specified."); ++ ret = -EINVAL; ++ goto fail; + } + #else + if (s->use_linux_aio) { +@@ -733,14 +727,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, + } + #endif /* !defined(CONFIG_LINUX_AIO) */ + +-#ifdef CONFIG_LINUX_IO_URING +- if (s->use_linux_io_uring) { +- if (!aio_setup_linux_io_uring(bdrv_get_aio_context(bs), errp)) { +- error_prepend(errp, "Unable to use io_uring: "); +- goto fail; +- } +- } +-#else ++#ifndef CONFIG_LINUX_IO_URING + if (s->use_linux_io_uring) { + error_setg(errp, "aio=io_uring was specified, but is not supported " + "in this build."); +@@ -2444,6 +2431,48 @@ static bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) + return true; + } + ++#ifdef CONFIG_LINUX_IO_URING ++static inline bool raw_check_linux_io_uring(BDRVRawState *s) ++{ ++ Error *local_err = NULL; ++ AioContext *ctx; ++ ++ if (!s->use_linux_io_uring) { ++ return false; ++ } ++ ++ ctx = qemu_get_current_aio_context(); ++ if (unlikely(!aio_setup_linux_io_uring(ctx, &local_err))) { ++ error_reportf_err(local_err, "Unable to use linux io_uring, " ++ "falling back to thread pool: "); ++ s->use_linux_io_uring = false; ++ return false; ++ } ++ return true; ++} ++#endif ++ ++#ifdef CONFIG_LINUX_AIO ++static inline bool raw_check_linux_aio(BDRVRawState *s) ++{ ++ Error *local_err = NULL; ++ AioContext *ctx; ++ ++ if (!s->use_linux_aio) { ++ return false; ++ } ++ ++ ctx = qemu_get_current_aio_context(); ++ if (unlikely(!aio_setup_linux_aio(ctx, &local_err))) { ++ error_reportf_err(local_err, "Unable to use Linux AIO, " ++ "falling back to thread pool: "); ++ s->use_linux_aio = false; ++ return false; ++ } ++ return true; ++} ++#endif ++ + static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr, + uint64_t bytes, QEMUIOVector *qiov, int type) + { +@@ -2474,13 +2503,13 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr, + if (s->needs_alignment && !bdrv_qiov_is_aligned(bs, qiov)) { + type |= QEMU_AIO_MISALIGNED; + #ifdef CONFIG_LINUX_IO_URING +- } else if (s->use_linux_io_uring) { ++ } else if (raw_check_linux_io_uring(s)) { + assert(qiov->size == bytes); + ret = luring_co_submit(bs, s->fd, offset, qiov, type); + goto out; + #endif + #ifdef CONFIG_LINUX_AIO +- } else if (s->use_linux_aio) { ++ } else if (raw_check_linux_aio(s)) { + assert(qiov->size == bytes); + ret = laio_co_submit(s->fd, offset, qiov, type, + s->aio_max_batch); +@@ -2567,39 +2596,13 @@ static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs) + }; + + #ifdef CONFIG_LINUX_IO_URING +- if (s->use_linux_io_uring) { ++ if (raw_check_linux_io_uring(s)) { + return luring_co_submit(bs, s->fd, 0, NULL, QEMU_AIO_FLUSH); + } + #endif + return raw_thread_pool_submit(handle_aiocb_flush, &acb); + } + +-static void raw_aio_attach_aio_context(BlockDriverState *bs, +- AioContext *new_context) +-{ +- BDRVRawState __attribute__((unused)) *s = bs->opaque; +-#ifdef CONFIG_LINUX_AIO +- if (s->use_linux_aio) { +- Error *local_err = NULL; +- if (!aio_setup_linux_aio(new_context, &local_err)) { +- error_reportf_err(local_err, "Unable to use native AIO, " +- "falling back to thread pool: "); +- s->use_linux_aio = false; +- } +- } +-#endif +-#ifdef CONFIG_LINUX_IO_URING +- if (s->use_linux_io_uring) { +- Error *local_err = NULL; +- if (!aio_setup_linux_io_uring(new_context, &local_err)) { +- error_reportf_err(local_err, "Unable to use linux io_uring, " +- "falling back to thread pool: "); +- s->use_linux_io_uring = false; +- } +- } +-#endif +-} +- + static void raw_close(BlockDriverState *bs) + { + BDRVRawState *s = bs->opaque; +@@ -3896,7 +3899,6 @@ BlockDriver bdrv_file = { + .bdrv_co_copy_range_from = raw_co_copy_range_from, + .bdrv_co_copy_range_to = raw_co_copy_range_to, + .bdrv_refresh_limits = raw_refresh_limits, +- .bdrv_attach_aio_context = raw_aio_attach_aio_context, + + .bdrv_co_truncate = raw_co_truncate, + .bdrv_co_getlength = raw_co_getlength, +@@ -4266,7 +4268,6 @@ static BlockDriver bdrv_host_device = { + .bdrv_co_copy_range_from = raw_co_copy_range_from, + .bdrv_co_copy_range_to = raw_co_copy_range_to, + .bdrv_refresh_limits = raw_refresh_limits, +- .bdrv_attach_aio_context = raw_aio_attach_aio_context, + + .bdrv_co_truncate = raw_co_truncate, + .bdrv_co_getlength = raw_co_getlength, +@@ -4402,7 +4403,6 @@ static BlockDriver bdrv_host_cdrom = { + .bdrv_co_pwritev = raw_co_pwritev, + .bdrv_co_flush_to_disk = raw_co_flush_to_disk, + .bdrv_refresh_limits = cdrom_refresh_limits, +- .bdrv_attach_aio_context = raw_aio_attach_aio_context, + + .bdrv_co_truncate = raw_co_truncate, + .bdrv_co_getlength = raw_co_getlength, +@@ -4528,7 +4528,6 @@ static BlockDriver bdrv_host_cdrom = { + .bdrv_co_pwritev = raw_co_pwritev, + .bdrv_co_flush_to_disk = raw_co_flush_to_disk, + .bdrv_refresh_limits = cdrom_refresh_limits, +- .bdrv_attach_aio_context = raw_aio_attach_aio_context, + + .bdrv_co_truncate = raw_co_truncate, + .bdrv_co_getlength = raw_co_getlength, +-- +2.39.3 + diff --git a/SOURCES/kvm-block-remove-AioContext-locking.patch b/SOURCES/kvm-block-remove-AioContext-locking.patch new file mode 100644 index 0000000..5bcd859 --- /dev/null +++ b/SOURCES/kvm-block-remove-AioContext-locking.patch @@ -0,0 +1,4438 @@ +From df1400991580e8a60d711079865b56ed95830b28 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 5 Dec 2023 13:20:03 -0500 +Subject: [PATCH 086/101] block: remove AioContext locking + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [17/26] b29c3ac7ea91ca356335ba047c66187317c482f9 (kmwolf/centos-qemu-kvm) + +This is the big patch that removes +aio_context_acquire()/aio_context_release() from the block layer and +affected block layer users. + +There isn't a clean way to split this patch and the reviewers are likely +the same group of people, so I decided to do it in one patch. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Eric Blake +Reviewed-by: Kevin Wolf +Reviewed-by: Paul Durrant +Message-ID: <20231205182011.1976568-7-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +--- + block.c | 234 +--------------------- + block/block-backend.c | 14 -- + block/copy-before-write.c | 22 +-- + block/export/export.c | 22 +-- + block/io.c | 45 +---- + block/mirror.c | 19 -- + block/monitor/bitmap-qmp-cmds.c | 20 +- + block/monitor/block-hmp-cmds.c | 29 --- + block/qapi-sysemu.c | 27 +-- + block/qapi.c | 18 +- + block/raw-format.c | 5 - + block/replication.c | 58 +----- + block/snapshot.c | 22 +-- + block/write-threshold.c | 6 - + blockdev.c | 307 +++++------------------------ + blockjob.c | 18 -- + hw/block/dataplane/virtio-blk.c | 10 - + hw/block/dataplane/xen-block.c | 17 +- + hw/block/virtio-blk.c | 13 -- + hw/core/qdev-properties-system.c | 9 - + include/block/block-global-state.h | 9 +- + include/block/block-io.h | 3 +- + include/block/snapshot.h | 2 - + job.c | 16 -- + migration/block.c | 34 +--- + migration/migration-hmp-cmds.c | 3 - + migration/savevm.c | 22 --- + net/colo-compare.c | 2 - + qemu-img.c | 4 - + qemu-io.c | 10 +- + qemu-nbd.c | 2 - + replay/replay-debugging.c | 4 - + scripts/block-coroutine-wrapper.py | 3 - + tests/tsan/suppressions.tsan | 1 - + tests/unit/test-bdrv-drain.c | 51 +---- + tests/unit/test-bdrv-graph-mod.c | 6 - + tests/unit/test-block-iothread.c | 31 --- + tests/unit/test-blockjob.c | 137 ------------- + tests/unit/test-replication.c | 11 -- + util/async.c | 4 - + util/vhost-user-server.c | 3 - + 41 files changed, 104 insertions(+), 1169 deletions(-) + +diff --git a/block.c b/block.c +index 25e1ebc606..91ace5d2d5 100644 +--- a/block.c ++++ b/block.c +@@ -1625,7 +1625,6 @@ static int no_coroutine_fn GRAPH_UNLOCKED + bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name, + QDict *options, int open_flags, Error **errp) + { +- AioContext *ctx; + Error *local_err = NULL; + int i, ret; + GLOBAL_STATE_CODE(); +@@ -1673,21 +1672,15 @@ bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name, + bs->supported_read_flags |= BDRV_REQ_REGISTERED_BUF; + bs->supported_write_flags |= BDRV_REQ_REGISTERED_BUF; + +- /* Get the context after .bdrv_open, it can change the context */ +- ctx = bdrv_get_aio_context(bs); +- aio_context_acquire(ctx); +- + ret = bdrv_refresh_total_sectors(bs, bs->total_sectors); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not refresh total sector count"); +- aio_context_release(ctx); + return ret; + } + + bdrv_graph_rdlock_main_loop(); + bdrv_refresh_limits(bs, NULL, &local_err); + bdrv_graph_rdunlock_main_loop(); +- aio_context_release(ctx); + + if (local_err) { + error_propagate(errp, local_err); +@@ -3062,7 +3055,7 @@ bdrv_attach_child_common(BlockDriverState *child_bs, + Transaction *tran, Error **errp) + { + BdrvChild *new_child; +- AioContext *parent_ctx, *new_child_ctx; ++ AioContext *parent_ctx; + AioContext *child_ctx = bdrv_get_aio_context(child_bs); + + assert(child_class->get_parent_desc); +@@ -3114,12 +3107,6 @@ bdrv_attach_child_common(BlockDriverState *child_bs, + } + } + +- new_child_ctx = bdrv_get_aio_context(child_bs); +- if (new_child_ctx != child_ctx) { +- aio_context_release(child_ctx); +- aio_context_acquire(new_child_ctx); +- } +- + bdrv_ref(child_bs); + /* + * Let every new BdrvChild start with a drained parent. Inserting the child +@@ -3149,11 +3136,6 @@ bdrv_attach_child_common(BlockDriverState *child_bs, + }; + tran_add(tran, &bdrv_attach_child_common_drv, s); + +- if (new_child_ctx != child_ctx) { +- aio_context_release(new_child_ctx); +- aio_context_acquire(child_ctx); +- } +- + return new_child; + } + +@@ -3605,7 +3587,6 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, + int ret = 0; + bool implicit_backing = false; + BlockDriverState *backing_hd; +- AioContext *backing_hd_ctx; + QDict *options; + QDict *tmp_parent_options = NULL; + Error *local_err = NULL; +@@ -3691,11 +3672,8 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, + + /* Hook up the backing file link; drop our reference, bs owns the + * backing_hd reference now */ +- backing_hd_ctx = bdrv_get_aio_context(backing_hd); +- aio_context_acquire(backing_hd_ctx); + ret = bdrv_set_backing_hd(bs, backing_hd, errp); + bdrv_unref(backing_hd); +- aio_context_release(backing_hd_ctx); + + if (ret < 0) { + goto free_exit; +@@ -3780,7 +3758,6 @@ BdrvChild *bdrv_open_child(const char *filename, + { + BlockDriverState *bs; + BdrvChild *child; +- AioContext *ctx; + + GLOBAL_STATE_CODE(); + +@@ -3791,11 +3768,8 @@ BdrvChild *bdrv_open_child(const char *filename, + } + + bdrv_graph_wrlock(); +- ctx = bdrv_get_aio_context(bs); +- aio_context_acquire(ctx); + child = bdrv_attach_child(parent, bs, bdref_key, child_class, child_role, + errp); +- aio_context_release(ctx); + bdrv_graph_wrunlock(); + + return child; +@@ -3881,7 +3855,6 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, + int64_t total_size; + QemuOpts *opts = NULL; + BlockDriverState *bs_snapshot = NULL; +- AioContext *ctx = bdrv_get_aio_context(bs); + int ret; + + GLOBAL_STATE_CODE(); +@@ -3890,9 +3863,7 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, + instead of opening 'filename' directly */ + + /* Get the required size from the image */ +- aio_context_acquire(ctx); + total_size = bdrv_getlength(bs); +- aio_context_release(ctx); + + if (total_size < 0) { + error_setg_errno(errp, -total_size, "Could not get image size"); +@@ -3927,10 +3898,7 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, + goto out; + } + +- aio_context_acquire(ctx); + ret = bdrv_append(bs_snapshot, bs, errp); +- aio_context_release(ctx); +- + if (ret < 0) { + bs_snapshot = NULL; + goto out; +@@ -3974,7 +3942,6 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, + Error *local_err = NULL; + QDict *snapshot_options = NULL; + int snapshot_flags = 0; +- AioContext *ctx = qemu_get_aio_context(); + + assert(!child_class || !flags); + assert(!child_class == !parent); +@@ -4115,12 +4082,10 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, + /* Not requesting BLK_PERM_CONSISTENT_READ because we're only + * looking at the header to guess the image format. This works even + * in cases where a guest would not see a consistent state. */ +- ctx = bdrv_get_aio_context(file_bs); +- aio_context_acquire(ctx); ++ AioContext *ctx = bdrv_get_aio_context(file_bs); + file = blk_new(ctx, 0, BLK_PERM_ALL); + blk_insert_bs(file, file_bs, &local_err); + bdrv_unref(file_bs); +- aio_context_release(ctx); + + if (local_err) { + goto fail; +@@ -4167,13 +4132,8 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, + goto fail; + } + +- /* The AioContext could have changed during bdrv_open_common() */ +- ctx = bdrv_get_aio_context(bs); +- + if (file) { +- aio_context_acquire(ctx); + blk_unref(file); +- aio_context_release(ctx); + file = NULL; + } + +@@ -4231,16 +4191,13 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, + * (snapshot_bs); thus, we have to drop the strong reference to bs + * (which we obtained by calling bdrv_new()). bs will not be deleted, + * though, because the overlay still has a reference to it. */ +- aio_context_acquire(ctx); + bdrv_unref(bs); +- aio_context_release(ctx); + bs = snapshot_bs; + } + + return bs; + + fail: +- aio_context_acquire(ctx); + blk_unref(file); + qobject_unref(snapshot_options); + qobject_unref(bs->explicit_options); +@@ -4249,14 +4206,11 @@ fail: + bs->options = NULL; + bs->explicit_options = NULL; + bdrv_unref(bs); +- aio_context_release(ctx); + error_propagate(errp, local_err); + return NULL; + + close_and_fail: +- aio_context_acquire(ctx); + bdrv_unref(bs); +- aio_context_release(ctx); + qobject_unref(snapshot_options); + qobject_unref(options); + error_propagate(errp, local_err); +@@ -4540,12 +4494,7 @@ void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue) + if (bs_queue) { + BlockReopenQueueEntry *bs_entry, *next; + QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { +- AioContext *ctx = bdrv_get_aio_context(bs_entry->state.bs); +- +- aio_context_acquire(ctx); + bdrv_drained_end(bs_entry->state.bs); +- aio_context_release(ctx); +- + qobject_unref(bs_entry->state.explicit_options); + qobject_unref(bs_entry->state.options); + g_free(bs_entry); +@@ -4577,7 +4526,6 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) + { + int ret = -1; + BlockReopenQueueEntry *bs_entry, *next; +- AioContext *ctx; + Transaction *tran = tran_new(); + g_autoptr(GSList) refresh_list = NULL; + +@@ -4586,10 +4534,7 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) + GLOBAL_STATE_CODE(); + + QTAILQ_FOREACH(bs_entry, bs_queue, entry) { +- ctx = bdrv_get_aio_context(bs_entry->state.bs); +- aio_context_acquire(ctx); + ret = bdrv_flush(bs_entry->state.bs); +- aio_context_release(ctx); + if (ret < 0) { + error_setg_errno(errp, -ret, "Error flushing drive"); + goto abort; +@@ -4598,10 +4543,7 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) + + QTAILQ_FOREACH(bs_entry, bs_queue, entry) { + assert(bs_entry->state.bs->quiesce_counter > 0); +- ctx = bdrv_get_aio_context(bs_entry->state.bs); +- aio_context_acquire(ctx); + ret = bdrv_reopen_prepare(&bs_entry->state, bs_queue, tran, errp); +- aio_context_release(ctx); + if (ret < 0) { + goto abort; + } +@@ -4644,10 +4586,7 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) + * to first element. + */ + QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) { +- ctx = bdrv_get_aio_context(bs_entry->state.bs); +- aio_context_acquire(ctx); + bdrv_reopen_commit(&bs_entry->state); +- aio_context_release(ctx); + } + + bdrv_graph_wrlock(); +@@ -4658,10 +4597,7 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) + BlockDriverState *bs = bs_entry->state.bs; + + if (bs->drv->bdrv_reopen_commit_post) { +- ctx = bdrv_get_aio_context(bs); +- aio_context_acquire(ctx); + bs->drv->bdrv_reopen_commit_post(&bs_entry->state); +- aio_context_release(ctx); + } + } + +@@ -4675,10 +4611,7 @@ abort: + + QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { + if (bs_entry->prepared) { +- ctx = bdrv_get_aio_context(bs_entry->state.bs); +- aio_context_acquire(ctx); + bdrv_reopen_abort(&bs_entry->state); +- aio_context_release(ctx); + } + } + +@@ -4691,24 +4624,13 @@ cleanup: + int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, + Error **errp) + { +- AioContext *ctx = bdrv_get_aio_context(bs); + BlockReopenQueue *queue; +- int ret; + + GLOBAL_STATE_CODE(); + + queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts); + +- if (ctx != qemu_get_aio_context()) { +- aio_context_release(ctx); +- } +- ret = bdrv_reopen_multiple(queue, errp); +- +- if (ctx != qemu_get_aio_context()) { +- aio_context_acquire(ctx); +- } +- +- return ret; ++ return bdrv_reopen_multiple(queue, errp); + } + + int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only, +@@ -4760,7 +4682,6 @@ bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, + const char *child_name = is_backing ? "backing" : "file"; + QObject *value; + const char *str; +- AioContext *ctx, *old_ctx; + bool has_child; + int ret; + +@@ -4844,13 +4765,6 @@ bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, + bdrv_drained_begin(old_child_bs); + } + +- old_ctx = bdrv_get_aio_context(bs); +- ctx = bdrv_get_aio_context(new_child_bs); +- if (old_ctx != ctx) { +- aio_context_release(old_ctx); +- aio_context_acquire(ctx); +- } +- + bdrv_graph_rdunlock_main_loop(); + bdrv_graph_wrlock(); + +@@ -4859,11 +4773,6 @@ bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, + + bdrv_graph_wrunlock(); + +- if (old_ctx != ctx) { +- aio_context_release(ctx); +- aio_context_acquire(old_ctx); +- } +- + if (old_child_bs) { + bdrv_drained_end(old_child_bs); + bdrv_unref(old_child_bs); +@@ -5537,7 +5446,6 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, + int ret; + BdrvChild *child; + Transaction *tran = tran_new(); +- AioContext *old_context, *new_context = NULL; + + GLOBAL_STATE_CODE(); + +@@ -5545,21 +5453,8 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, + assert(!bs_new->backing); + bdrv_graph_rdunlock_main_loop(); + +- old_context = bdrv_get_aio_context(bs_top); + bdrv_drained_begin(bs_top); +- +- /* +- * bdrv_drained_begin() requires that only the AioContext of the drained +- * node is locked, and at this point it can still differ from the AioContext +- * of bs_top. +- */ +- new_context = bdrv_get_aio_context(bs_new); +- aio_context_release(old_context); +- aio_context_acquire(new_context); + bdrv_drained_begin(bs_new); +- aio_context_release(new_context); +- aio_context_acquire(old_context); +- new_context = NULL; + + bdrv_graph_wrlock(); + +@@ -5571,18 +5466,6 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, + goto out; + } + +- /* +- * bdrv_attach_child_noperm could change the AioContext of bs_top and +- * bs_new, but at least they are in the same AioContext now. This is the +- * AioContext that we need to lock for the rest of the function. +- */ +- new_context = bdrv_get_aio_context(bs_top); +- +- if (old_context != new_context) { +- aio_context_release(old_context); +- aio_context_acquire(new_context); +- } +- + ret = bdrv_replace_node_noperm(bs_top, bs_new, true, tran, errp); + if (ret < 0) { + goto out; +@@ -5598,11 +5481,6 @@ out: + bdrv_drained_end(bs_top); + bdrv_drained_end(bs_new); + +- if (new_context && old_context != new_context) { +- aio_context_release(new_context); +- aio_context_acquire(old_context); +- } +- + return ret; + } + +@@ -5697,12 +5575,8 @@ BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options, + + GLOBAL_STATE_CODE(); + +- aio_context_release(ctx); +- aio_context_acquire(qemu_get_aio_context()); + new_node_bs = bdrv_new_open_driver_opts(drv, node_name, options, flags, + errp); +- aio_context_release(qemu_get_aio_context()); +- aio_context_acquire(ctx); + assert(bdrv_get_aio_context(bs) == ctx); + + options = NULL; /* bdrv_new_open_driver() eats options */ +@@ -7037,12 +6911,9 @@ void bdrv_activate_all(Error **errp) + GRAPH_RDLOCK_GUARD_MAINLOOP(); + + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { +- AioContext *aio_context = bdrv_get_aio_context(bs); + int ret; + +- aio_context_acquire(aio_context); + ret = bdrv_activate(bs, errp); +- aio_context_release(aio_context); + if (ret < 0) { + bdrv_next_cleanup(&it); + return; +@@ -7137,20 +7008,10 @@ int bdrv_inactivate_all(void) + BlockDriverState *bs = NULL; + BdrvNextIterator it; + int ret = 0; +- GSList *aio_ctxs = NULL, *ctx; + + GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); + +- for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { +- AioContext *aio_context = bdrv_get_aio_context(bs); +- +- if (!g_slist_find(aio_ctxs, aio_context)) { +- aio_ctxs = g_slist_prepend(aio_ctxs, aio_context); +- aio_context_acquire(aio_context); +- } +- } +- + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { + /* Nodes with BDS parents are covered by recursion from the last + * parent that gets inactivated. Don't inactivate them a second +@@ -7161,17 +7022,10 @@ int bdrv_inactivate_all(void) + ret = bdrv_inactivate_recurse(bs); + if (ret < 0) { + bdrv_next_cleanup(&it); +- goto out; ++ break; + } + } + +-out: +- for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) { +- AioContext *aio_context = ctx->data; +- aio_context_release(aio_context); +- } +- g_slist_free(aio_ctxs); +- + return ret; + } + +@@ -7257,11 +7111,8 @@ void bdrv_unref(BlockDriverState *bs) + static void bdrv_schedule_unref_bh(void *opaque) + { + BlockDriverState *bs = opaque; +- AioContext *ctx = bdrv_get_aio_context(bs); + +- aio_context_acquire(ctx); + bdrv_unref(bs); +- aio_context_release(ctx); + } + + /* +@@ -7398,8 +7249,6 @@ void bdrv_img_create(const char *filename, const char *fmt, + return; + } + +- aio_context_acquire(qemu_get_aio_context()); +- + /* Create parameter list */ + create_opts = qemu_opts_append(create_opts, drv->create_opts); + create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); +@@ -7549,7 +7398,6 @@ out: + qemu_opts_del(opts); + qemu_opts_free(create_opts); + error_propagate(errp, local_err); +- aio_context_release(qemu_get_aio_context()); + } + + AioContext *bdrv_get_aio_context(BlockDriverState *bs) +@@ -7585,29 +7433,12 @@ void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx) + + void coroutine_fn bdrv_co_lock(BlockDriverState *bs) + { +- AioContext *ctx = bdrv_get_aio_context(bs); +- +- /* In the main thread, bs->aio_context won't change concurrently */ +- assert(qemu_get_current_aio_context() == qemu_get_aio_context()); +- +- /* +- * We're in coroutine context, so we already hold the lock of the main +- * loop AioContext. Don't lock it twice to avoid deadlocks. +- */ +- assert(qemu_in_coroutine()); +- if (ctx != qemu_get_aio_context()) { +- aio_context_acquire(ctx); +- } ++ /* TODO removed in next patch */ + } + + void coroutine_fn bdrv_co_unlock(BlockDriverState *bs) + { +- AioContext *ctx = bdrv_get_aio_context(bs); +- +- assert(qemu_in_coroutine()); +- if (ctx != qemu_get_aio_context()) { +- aio_context_release(ctx); +- } ++ /* TODO removed in next patch */ + } + + static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban) +@@ -7728,21 +7559,8 @@ static void bdrv_set_aio_context_commit(void *opaque) + BdrvStateSetAioContext *state = (BdrvStateSetAioContext *) opaque; + BlockDriverState *bs = (BlockDriverState *) state->bs; + AioContext *new_context = state->new_ctx; +- AioContext *old_context = bdrv_get_aio_context(bs); + +- /* +- * Take the old AioContex when detaching it from bs. +- * At this point, new_context lock is already acquired, and we are now +- * also taking old_context. This is safe as long as bdrv_detach_aio_context +- * does not call AIO_POLL_WHILE(). +- */ +- if (old_context != qemu_get_aio_context()) { +- aio_context_acquire(old_context); +- } + bdrv_detach_aio_context(bs); +- if (old_context != qemu_get_aio_context()) { +- aio_context_release(old_context); +- } + bdrv_attach_aio_context(bs, new_context); + } + +@@ -7827,7 +7645,6 @@ int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, + Transaction *tran; + GHashTable *visited; + int ret; +- AioContext *old_context = bdrv_get_aio_context(bs); + GLOBAL_STATE_CODE(); + + /* +@@ -7857,34 +7674,7 @@ int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, + return -EPERM; + } + +- /* +- * Release old AioContext, it won't be needed anymore, as all +- * bdrv_drained_begin() have been called already. +- */ +- if (qemu_get_aio_context() != old_context) { +- aio_context_release(old_context); +- } +- +- /* +- * Acquire new AioContext since bdrv_drained_end() is going to be called +- * after we switched all nodes in the new AioContext, and the function +- * assumes that the lock of the bs is always taken. +- */ +- if (qemu_get_aio_context() != ctx) { +- aio_context_acquire(ctx); +- } +- + tran_commit(tran); +- +- if (qemu_get_aio_context() != ctx) { +- aio_context_release(ctx); +- } +- +- /* Re-acquire the old AioContext, since the caller takes and releases it. */ +- if (qemu_get_aio_context() != old_context) { +- aio_context_acquire(old_context); +- } +- + return 0; + } + +@@ -8006,7 +7796,6 @@ BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, + const char *node_name, Error **errp) + { + BlockDriverState *to_replace_bs = bdrv_find_node(node_name); +- AioContext *aio_context; + + GLOBAL_STATE_CODE(); + +@@ -8015,12 +7804,8 @@ BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, + return NULL; + } + +- aio_context = bdrv_get_aio_context(to_replace_bs); +- aio_context_acquire(aio_context); +- + if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) { +- to_replace_bs = NULL; +- goto out; ++ return NULL; + } + + /* We don't want arbitrary node of the BDS chain to be replaced only the top +@@ -8033,12 +7818,9 @@ BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, + "because it cannot be guaranteed that doing so would not " + "lead to an abrupt change of visible data", + node_name, parent_bs->node_name); +- to_replace_bs = NULL; +- goto out; ++ return NULL; + } + +-out: +- aio_context_release(aio_context); + return to_replace_bs; + } + +diff --git a/block/block-backend.c b/block/block-backend.c +index abac4e0235..f412bed274 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -429,7 +429,6 @@ BlockBackend *blk_new_open(const char *filename, const char *reference, + { + BlockBackend *blk; + BlockDriverState *bs; +- AioContext *ctx; + uint64_t perm = 0; + uint64_t shared = BLK_PERM_ALL; + +@@ -459,23 +458,18 @@ BlockBackend *blk_new_open(const char *filename, const char *reference, + shared = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED; + } + +- aio_context_acquire(qemu_get_aio_context()); + bs = bdrv_open(filename, reference, options, flags, errp); +- aio_context_release(qemu_get_aio_context()); + if (!bs) { + return NULL; + } + + /* bdrv_open() could have moved bs to a different AioContext */ +- ctx = bdrv_get_aio_context(bs); + blk = blk_new(bdrv_get_aio_context(bs), perm, shared); + blk->perm = perm; + blk->shared_perm = shared; + +- aio_context_acquire(ctx); + blk_insert_bs(blk, bs, errp); + bdrv_unref(bs); +- aio_context_release(ctx); + + if (!blk->root) { + blk_unref(blk); +@@ -577,13 +571,9 @@ void blk_remove_all_bs(void) + GLOBAL_STATE_CODE(); + + while ((blk = blk_all_next(blk)) != NULL) { +- AioContext *ctx = blk_get_aio_context(blk); +- +- aio_context_acquire(ctx); + if (blk->root) { + blk_remove_bs(blk); + } +- aio_context_release(ctx); + } + } + +@@ -2736,20 +2726,16 @@ int blk_commit_all(void) + GRAPH_RDLOCK_GUARD_MAINLOOP(); + + while ((blk = blk_all_next(blk)) != NULL) { +- AioContext *aio_context = blk_get_aio_context(blk); + BlockDriverState *unfiltered_bs = bdrv_skip_filters(blk_bs(blk)); + +- aio_context_acquire(aio_context); + if (blk_is_inserted(blk) && bdrv_cow_child(unfiltered_bs)) { + int ret; + + ret = bdrv_commit(unfiltered_bs); + if (ret < 0) { +- aio_context_release(aio_context); + return ret; + } + } +- aio_context_release(aio_context); + } + return 0; + } +diff --git a/block/copy-before-write.c b/block/copy-before-write.c +index 13972879b1..0842a1a6df 100644 +--- a/block/copy-before-write.c ++++ b/block/copy-before-write.c +@@ -412,7 +412,6 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags, + int64_t cluster_size; + g_autoptr(BlockdevOptions) full_opts = NULL; + BlockdevOptionsCbw *opts; +- AioContext *ctx; + int ret; + + full_opts = cbw_parse_options(options, errp); +@@ -435,15 +434,11 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags, + + GRAPH_RDLOCK_GUARD_MAINLOOP(); + +- ctx = bdrv_get_aio_context(bs); +- aio_context_acquire(ctx); +- + if (opts->bitmap) { + bitmap = block_dirty_bitmap_lookup(opts->bitmap->node, + opts->bitmap->name, NULL, errp); + if (!bitmap) { +- ret = -EINVAL; +- goto out; ++ return -EINVAL; + } + } + s->on_cbw_error = opts->has_on_cbw_error ? opts->on_cbw_error : +@@ -461,24 +456,21 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags, + s->bcs = block_copy_state_new(bs->file, s->target, bitmap, errp); + if (!s->bcs) { + error_prepend(errp, "Cannot create block-copy-state: "); +- ret = -EINVAL; +- goto out; ++ return -EINVAL; + } + + cluster_size = block_copy_cluster_size(s->bcs); + + s->done_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL, errp); + if (!s->done_bitmap) { +- ret = -EINVAL; +- goto out; ++ return -EINVAL; + } + bdrv_disable_dirty_bitmap(s->done_bitmap); + + /* s->access_bitmap starts equal to bcs bitmap */ + s->access_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL, errp); + if (!s->access_bitmap) { +- ret = -EINVAL; +- goto out; ++ return -EINVAL; + } + bdrv_disable_dirty_bitmap(s->access_bitmap); + bdrv_dirty_bitmap_merge_internal(s->access_bitmap, +@@ -487,11 +479,7 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags, + + qemu_co_mutex_init(&s->lock); + QLIST_INIT(&s->frozen_read_reqs); +- +- ret = 0; +-out: +- aio_context_release(ctx); +- return ret; ++ return 0; + } + + static void cbw_close(BlockDriverState *bs) +diff --git a/block/export/export.c b/block/export/export.c +index a8f274e526..6d51ae8ed7 100644 +--- a/block/export/export.c ++++ b/block/export/export.c +@@ -114,7 +114,6 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp) + } + + ctx = bdrv_get_aio_context(bs); +- aio_context_acquire(ctx); + + if (export->iothread) { + IOThread *iothread; +@@ -133,8 +132,6 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp) + set_context_errp = fixed_iothread ? errp : NULL; + ret = bdrv_try_change_aio_context(bs, new_ctx, NULL, set_context_errp); + if (ret == 0) { +- aio_context_release(ctx); +- aio_context_acquire(new_ctx); + ctx = new_ctx; + } else if (fixed_iothread) { + goto fail; +@@ -191,8 +188,6 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp) + assert(exp->blk != NULL); + + QLIST_INSERT_HEAD(&block_exports, exp, next); +- +- aio_context_release(ctx); + return exp; + + fail: +@@ -200,7 +195,6 @@ fail: + blk_set_dev_ops(blk, NULL, NULL); + blk_unref(blk); + } +- aio_context_release(ctx); + if (exp) { + g_free(exp->id); + g_free(exp); +@@ -218,9 +212,6 @@ void blk_exp_ref(BlockExport *exp) + static void blk_exp_delete_bh(void *opaque) + { + BlockExport *exp = opaque; +- AioContext *aio_context = exp->ctx; +- +- aio_context_acquire(aio_context); + + assert(exp->refcount == 0); + QLIST_REMOVE(exp, next); +@@ -230,8 +221,6 @@ static void blk_exp_delete_bh(void *opaque) + qapi_event_send_block_export_deleted(exp->id); + g_free(exp->id); + g_free(exp); +- +- aio_context_release(aio_context); + } + + void blk_exp_unref(BlockExport *exp) +@@ -249,22 +238,16 @@ void blk_exp_unref(BlockExport *exp) + * connections and other internally held references start to shut down. When + * the function returns, there may still be active references while the export + * is in the process of shutting down. +- * +- * Acquires exp->ctx internally. Callers must *not* hold the lock. + */ + void blk_exp_request_shutdown(BlockExport *exp) + { +- AioContext *aio_context = exp->ctx; +- +- aio_context_acquire(aio_context); +- + /* + * If the user doesn't own the export any more, it is already shutting + * down. We must not call .request_shutdown and decrease the refcount a + * second time. + */ + if (!exp->user_owned) { +- goto out; ++ return; + } + + exp->drv->request_shutdown(exp); +@@ -272,9 +255,6 @@ void blk_exp_request_shutdown(BlockExport *exp) + assert(exp->user_owned); + exp->user_owned = false; + blk_exp_unref(exp); +- +-out: +- aio_context_release(aio_context); + } + + /* +diff --git a/block/io.c b/block/io.c +index 7e62fabbf5..8fa7670571 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -294,8 +294,6 @@ static void bdrv_co_drain_bh_cb(void *opaque) + BlockDriverState *bs = data->bs; + + if (bs) { +- AioContext *ctx = bdrv_get_aio_context(bs); +- aio_context_acquire(ctx); + bdrv_dec_in_flight(bs); + if (data->begin) { + bdrv_do_drained_begin(bs, data->parent, data->poll); +@@ -303,7 +301,6 @@ static void bdrv_co_drain_bh_cb(void *opaque) + assert(!data->poll); + bdrv_do_drained_end(bs, data->parent); + } +- aio_context_release(ctx); + } else { + assert(data->begin); + bdrv_drain_all_begin(); +@@ -320,8 +317,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, + { + BdrvCoDrainData data; + Coroutine *self = qemu_coroutine_self(); +- AioContext *ctx = bdrv_get_aio_context(bs); +- AioContext *co_ctx = qemu_coroutine_get_aio_context(self); + + /* Calling bdrv_drain() from a BH ensures the current coroutine yields and + * other coroutines run if they were queued by aio_co_enter(). */ +@@ -340,17 +335,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, + bdrv_inc_in_flight(bs); + } + +- /* +- * Temporarily drop the lock across yield or we would get deadlocks. +- * bdrv_co_drain_bh_cb() reaquires the lock as needed. +- * +- * When we yield below, the lock for the current context will be +- * released, so if this is actually the lock that protects bs, don't drop +- * it a second time. +- */ +- if (ctx != co_ctx) { +- aio_context_release(ctx); +- } + replay_bh_schedule_oneshot_event(qemu_get_aio_context(), + bdrv_co_drain_bh_cb, &data); + +@@ -358,11 +342,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, + /* If we are resumed from some other event (such as an aio completion or a + * timer callback), it is a bug in the caller that should be fixed. */ + assert(data.done); +- +- /* Reacquire the AioContext of bs if we dropped it */ +- if (ctx != co_ctx) { +- aio_context_acquire(ctx); +- } + } + + static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, +@@ -478,13 +457,12 @@ static bool bdrv_drain_all_poll(void) + GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); + +- /* bdrv_drain_poll() can't make changes to the graph and we are holding the +- * main AioContext lock, so iterating bdrv_next_all_states() is safe. */ ++ /* ++ * bdrv_drain_poll() can't make changes to the graph and we hold the BQL, ++ * so iterating bdrv_next_all_states() is safe. ++ */ + while ((bs = bdrv_next_all_states(bs))) { +- AioContext *aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); + result |= bdrv_drain_poll(bs, NULL, true); +- aio_context_release(aio_context); + } + + return result; +@@ -525,11 +503,7 @@ void bdrv_drain_all_begin_nopoll(void) + /* Quiesce all nodes, without polling in-flight requests yet. The graph + * cannot change during this loop. */ + while ((bs = bdrv_next_all_states(bs))) { +- AioContext *aio_context = bdrv_get_aio_context(bs); +- +- aio_context_acquire(aio_context); + bdrv_do_drained_begin(bs, NULL, false); +- aio_context_release(aio_context); + } + } + +@@ -588,11 +562,7 @@ void bdrv_drain_all_end(void) + } + + while ((bs = bdrv_next_all_states(bs))) { +- AioContext *aio_context = bdrv_get_aio_context(bs); +- +- aio_context_acquire(aio_context); + bdrv_do_drained_end(bs, NULL); +- aio_context_release(aio_context); + } + + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); +@@ -2368,15 +2338,10 @@ int bdrv_flush_all(void) + } + + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { +- AioContext *aio_context = bdrv_get_aio_context(bs); +- int ret; +- +- aio_context_acquire(aio_context); +- ret = bdrv_flush(bs); ++ int ret = bdrv_flush(bs); + if (ret < 0 && !result) { + result = ret; + } +- aio_context_release(aio_context); + } + + return result; +diff --git a/block/mirror.c b/block/mirror.c +index 51f9e2f17c..5145eb53e1 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -662,7 +662,6 @@ static int mirror_exit_common(Job *job) + MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job); + BlockJob *bjob = &s->common; + MirrorBDSOpaque *bs_opaque; +- AioContext *replace_aio_context = NULL; + BlockDriverState *src; + BlockDriverState *target_bs; + BlockDriverState *mirror_top_bs; +@@ -677,7 +676,6 @@ static int mirror_exit_common(Job *job) + } + s->prepared = true; + +- aio_context_acquire(qemu_get_aio_context()); + bdrv_graph_rdlock_main_loop(); + + mirror_top_bs = s->mirror_top_bs; +@@ -742,11 +740,6 @@ static int mirror_exit_common(Job *job) + } + bdrv_graph_rdunlock_main_loop(); + +- if (s->to_replace) { +- replace_aio_context = bdrv_get_aio_context(s->to_replace); +- aio_context_acquire(replace_aio_context); +- } +- + if (s->should_complete && !abort) { + BlockDriverState *to_replace = s->to_replace ?: src; + bool ro = bdrv_is_read_only(to_replace); +@@ -785,9 +778,6 @@ static int mirror_exit_common(Job *job) + error_free(s->replace_blocker); + bdrv_unref(s->to_replace); + } +- if (replace_aio_context) { +- aio_context_release(replace_aio_context); +- } + g_free(s->replaces); + + /* +@@ -811,8 +801,6 @@ static int mirror_exit_common(Job *job) + bdrv_unref(mirror_top_bs); + bdrv_unref(src); + +- aio_context_release(qemu_get_aio_context()); +- + return ret; + } + +@@ -1191,24 +1179,17 @@ static void mirror_complete(Job *job, Error **errp) + + /* block all operations on to_replace bs */ + if (s->replaces) { +- AioContext *replace_aio_context; +- + s->to_replace = bdrv_find_node(s->replaces); + if (!s->to_replace) { + error_setg(errp, "Node name '%s' not found", s->replaces); + return; + } + +- replace_aio_context = bdrv_get_aio_context(s->to_replace); +- aio_context_acquire(replace_aio_context); +- + /* TODO Translate this into child freeze system. */ + error_setg(&s->replace_blocker, + "block device is in use by block-job-complete"); + bdrv_op_block_all(s->to_replace, s->replace_blocker); + bdrv_ref(s->to_replace); +- +- aio_context_release(replace_aio_context); + } + + s->should_complete = true; +diff --git a/block/monitor/bitmap-qmp-cmds.c b/block/monitor/bitmap-qmp-cmds.c +index 70d01a3776..a738e7bbf7 100644 +--- a/block/monitor/bitmap-qmp-cmds.c ++++ b/block/monitor/bitmap-qmp-cmds.c +@@ -95,7 +95,6 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, + { + BlockDriverState *bs; + BdrvDirtyBitmap *bitmap; +- AioContext *aio_context; + + if (!name || name[0] == '\0') { + error_setg(errp, "Bitmap name cannot be empty"); +@@ -107,14 +106,11 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, + return; + } + +- aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); +- + if (has_granularity) { + if (granularity < 512 || !is_power_of_2(granularity)) { + error_setg(errp, "Granularity must be power of 2 " + "and at least 512"); +- goto out; ++ return; + } + } else { + /* Default to cluster size, if available: */ +@@ -132,12 +128,12 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, + if (persistent && + !bdrv_can_store_new_dirty_bitmap(bs, name, granularity, errp)) + { +- goto out; ++ return; + } + + bitmap = bdrv_create_dirty_bitmap(bs, granularity, name, errp); + if (bitmap == NULL) { +- goto out; ++ return; + } + + if (disabled) { +@@ -145,9 +141,6 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, + } + + bdrv_dirty_bitmap_set_persistence(bitmap, persistent); +- +-out: +- aio_context_release(aio_context); + } + + BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name, +@@ -157,7 +150,6 @@ BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name, + { + BlockDriverState *bs; + BdrvDirtyBitmap *bitmap; +- AioContext *aio_context; + + GLOBAL_STATE_CODE(); + +@@ -166,19 +158,14 @@ BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name, + return NULL; + } + +- aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); +- + if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_BUSY | BDRV_BITMAP_RO, + errp)) { +- aio_context_release(aio_context); + return NULL; + } + + if (bdrv_dirty_bitmap_get_persistence(bitmap) && + bdrv_remove_persistent_dirty_bitmap(bs, name, errp) < 0) + { +- aio_context_release(aio_context); + return NULL; + } + +@@ -190,7 +177,6 @@ BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name, + *bitmap_bs = bs; + } + +- aio_context_release(aio_context); + return release ? NULL : bitmap; + } + +diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c +index c729cbf1eb..bdbb5cb141 100644 +--- a/block/monitor/block-hmp-cmds.c ++++ b/block/monitor/block-hmp-cmds.c +@@ -141,7 +141,6 @@ void hmp_drive_del(Monitor *mon, const QDict *qdict) + const char *id = qdict_get_str(qdict, "id"); + BlockBackend *blk; + BlockDriverState *bs; +- AioContext *aio_context; + Error *local_err = NULL; + + GLOBAL_STATE_CODE(); +@@ -168,14 +167,10 @@ void hmp_drive_del(Monitor *mon, const QDict *qdict) + return; + } + +- aio_context = blk_get_aio_context(blk); +- aio_context_acquire(aio_context); +- + bs = blk_bs(blk); + if (bs) { + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_DRIVE_DEL, &local_err)) { + error_report_err(local_err); +- aio_context_release(aio_context); + return; + } + +@@ -196,8 +191,6 @@ void hmp_drive_del(Monitor *mon, const QDict *qdict) + } else { + blk_unref(blk); + } +- +- aio_context_release(aio_context); + } + + void hmp_commit(Monitor *mon, const QDict *qdict) +@@ -213,7 +206,6 @@ void hmp_commit(Monitor *mon, const QDict *qdict) + ret = blk_commit_all(); + } else { + BlockDriverState *bs; +- AioContext *aio_context; + + blk = blk_by_name(device); + if (!blk) { +@@ -222,18 +214,13 @@ void hmp_commit(Monitor *mon, const QDict *qdict) + } + + bs = bdrv_skip_implicit_filters(blk_bs(blk)); +- aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); + + if (!blk_is_available(blk)) { + error_report("Device '%s' has no medium", device); +- aio_context_release(aio_context); + return; + } + + ret = bdrv_commit(bs); +- +- aio_context_release(aio_context); + } + if (ret < 0) { + error_report("'commit' error for '%s': %s", device, strerror(-ret)); +@@ -560,7 +547,6 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) + BlockBackend *blk = NULL; + BlockDriverState *bs = NULL; + BlockBackend *local_blk = NULL; +- AioContext *ctx = NULL; + bool qdev = qdict_get_try_bool(qdict, "qdev", false); + const char *device = qdict_get_str(qdict, "device"); + const char *command = qdict_get_str(qdict, "command"); +@@ -582,9 +568,6 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) + } + } + +- ctx = blk ? blk_get_aio_context(blk) : bdrv_get_aio_context(bs); +- aio_context_acquire(ctx); +- + if (bs) { + blk = local_blk = blk_new(bdrv_get_aio_context(bs), 0, BLK_PERM_ALL); + ret = blk_insert_bs(blk, bs, &err); +@@ -622,11 +605,6 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) + + fail: + blk_unref(local_blk); +- +- if (ctx) { +- aio_context_release(ctx); +- } +- + hmp_handle_error(mon, err); + } + +@@ -882,7 +860,6 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict) + int nb_sns, i; + int total; + int *global_snapshots; +- AioContext *aio_context; + + typedef struct SnapshotEntry { + QEMUSnapshotInfo sn; +@@ -909,11 +886,8 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict) + error_report_err(err); + return; + } +- aio_context = bdrv_get_aio_context(bs); + +- aio_context_acquire(aio_context); + nb_sns = bdrv_snapshot_list(bs, &sn_tab); +- aio_context_release(aio_context); + + if (nb_sns < 0) { + monitor_printf(mon, "bdrv_snapshot_list: error %d\n", nb_sns); +@@ -924,9 +898,7 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict) + int bs1_nb_sns = 0; + ImageEntry *ie; + SnapshotEntry *se; +- AioContext *ctx = bdrv_get_aio_context(bs1); + +- aio_context_acquire(ctx); + if (bdrv_can_snapshot(bs1)) { + sn = NULL; + bs1_nb_sns = bdrv_snapshot_list(bs1, &sn); +@@ -944,7 +916,6 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict) + } + g_free(sn); + } +- aio_context_release(ctx); + } + + if (no_snapshot) { +diff --git a/block/qapi-sysemu.c b/block/qapi-sysemu.c +index 1618cd225a..e4282631d2 100644 +--- a/block/qapi-sysemu.c ++++ b/block/qapi-sysemu.c +@@ -174,7 +174,6 @@ blockdev_remove_medium(const char *device, const char *id, Error **errp) + { + BlockBackend *blk; + BlockDriverState *bs; +- AioContext *aio_context; + bool has_attached_device; + + GLOBAL_STATE_CODE(); +@@ -204,13 +203,10 @@ blockdev_remove_medium(const char *device, const char *id, Error **errp) + return; + } + +- aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); +- + bdrv_graph_rdlock_main_loop(); + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_EJECT, errp)) { + bdrv_graph_rdunlock_main_loop(); +- goto out; ++ return; + } + bdrv_graph_rdunlock_main_loop(); + +@@ -223,9 +219,6 @@ blockdev_remove_medium(const char *device, const char *id, Error **errp) + * value passed here (i.e. false). */ + blk_dev_change_media_cb(blk, false, &error_abort); + } +- +-out: +- aio_context_release(aio_context); + } + + void qmp_blockdev_remove_medium(const char *id, Error **errp) +@@ -237,7 +230,6 @@ static void qmp_blockdev_insert_anon_medium(BlockBackend *blk, + BlockDriverState *bs, Error **errp) + { + Error *local_err = NULL; +- AioContext *ctx; + bool has_device; + int ret; + +@@ -259,11 +251,7 @@ static void qmp_blockdev_insert_anon_medium(BlockBackend *blk, + return; + } + +- ctx = bdrv_get_aio_context(bs); +- aio_context_acquire(ctx); + ret = blk_insert_bs(blk, bs, errp); +- aio_context_release(ctx); +- + if (ret < 0) { + return; + } +@@ -374,9 +362,7 @@ void qmp_blockdev_change_medium(const char *device, + qdict_put_str(options, "driver", format); + } + +- aio_context_acquire(qemu_get_aio_context()); + medium_bs = bdrv_open(filename, NULL, options, bdrv_flags, errp); +- aio_context_release(qemu_get_aio_context()); + + if (!medium_bs) { + goto fail; +@@ -437,20 +423,16 @@ void qmp_block_set_io_throttle(BlockIOThrottle *arg, Error **errp) + ThrottleConfig cfg; + BlockDriverState *bs; + BlockBackend *blk; +- AioContext *aio_context; + + blk = qmp_get_blk(arg->device, arg->id, errp); + if (!blk) { + return; + } + +- aio_context = blk_get_aio_context(blk); +- aio_context_acquire(aio_context); +- + bs = blk_bs(blk); + if (!bs) { + error_setg(errp, "Device has no medium"); +- goto out; ++ return; + } + + throttle_config_init(&cfg); +@@ -505,7 +487,7 @@ void qmp_block_set_io_throttle(BlockIOThrottle *arg, Error **errp) + } + + if (!throttle_is_valid(&cfg, errp)) { +- goto out; ++ return; + } + + if (throttle_enabled(&cfg)) { +@@ -522,9 +504,6 @@ void qmp_block_set_io_throttle(BlockIOThrottle *arg, Error **errp) + /* If all throttling settings are set to 0, disable I/O limits */ + blk_io_limits_disable(blk); + } +- +-out: +- aio_context_release(aio_context); + } + + void qmp_block_latency_histogram_set( +diff --git a/block/qapi.c b/block/qapi.c +index 82a30b38fe..9e806fa230 100644 +--- a/block/qapi.c ++++ b/block/qapi.c +@@ -234,13 +234,11 @@ bdrv_do_query_node_info(BlockDriverState *bs, BlockNodeInfo *info, Error **errp) + int ret; + Error *err = NULL; + +- aio_context_acquire(bdrv_get_aio_context(bs)); +- + size = bdrv_getlength(bs); + if (size < 0) { + error_setg_errno(errp, -size, "Can't get image size '%s'", + bs->exact_filename); +- goto out; ++ return; + } + + bdrv_refresh_filename(bs); +@@ -265,7 +263,7 @@ bdrv_do_query_node_info(BlockDriverState *bs, BlockNodeInfo *info, Error **errp) + info->format_specific = bdrv_get_specific_info(bs, &err); + if (err) { + error_propagate(errp, err); +- goto out; ++ return; + } + backing_filename = bs->backing_file; + if (backing_filename[0] != '\0') { +@@ -300,11 +298,8 @@ bdrv_do_query_node_info(BlockDriverState *bs, BlockNodeInfo *info, Error **errp) + break; + default: + error_propagate(errp, err); +- goto out; ++ return; + } +- +-out: +- aio_context_release(bdrv_get_aio_context(bs)); + } + + /** +@@ -709,15 +704,10 @@ BlockStatsList *qmp_query_blockstats(bool has_query_nodes, + /* Just to be safe if query_nodes is not always initialized */ + if (has_query_nodes && query_nodes) { + for (bs = bdrv_next_node(NULL); bs; bs = bdrv_next_node(bs)) { +- AioContext *ctx = bdrv_get_aio_context(bs); +- +- aio_context_acquire(ctx); + QAPI_LIST_APPEND(tail, bdrv_query_bds_stats(bs, false)); +- aio_context_release(ctx); + } + } else { + for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) { +- AioContext *ctx = blk_get_aio_context(blk); + BlockStats *s; + char *qdev; + +@@ -725,7 +715,6 @@ BlockStatsList *qmp_query_blockstats(bool has_query_nodes, + continue; + } + +- aio_context_acquire(ctx); + s = bdrv_query_bds_stats(blk_bs(blk), true); + s->device = g_strdup(blk_name(blk)); + +@@ -737,7 +726,6 @@ BlockStatsList *qmp_query_blockstats(bool has_query_nodes, + } + + bdrv_query_blk_stats(s->stats, blk); +- aio_context_release(ctx); + + QAPI_LIST_APPEND(tail, s); + } +diff --git a/block/raw-format.c b/block/raw-format.c +index 1111dffd54..ac7e8495f6 100644 +--- a/block/raw-format.c ++++ b/block/raw-format.c +@@ -470,7 +470,6 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) + { + BDRVRawState *s = bs->opaque; +- AioContext *ctx; + bool has_size; + uint64_t offset, size; + BdrvChildRole file_role; +@@ -522,11 +521,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, + bs->file->bs->filename); + } + +- ctx = bdrv_get_aio_context(bs); +- aio_context_acquire(ctx); + ret = raw_apply_options(bs, s, offset, has_size, size, errp); +- aio_context_release(ctx); +- + if (ret < 0) { + return ret; + } +diff --git a/block/replication.c b/block/replication.c +index 424b537ff7..ca6bd0a720 100644 +--- a/block/replication.c ++++ b/block/replication.c +@@ -394,14 +394,7 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable, + } + + if (reopen_queue) { +- AioContext *ctx = bdrv_get_aio_context(bs); +- if (ctx != qemu_get_aio_context()) { +- aio_context_release(ctx); +- } + bdrv_reopen_multiple(reopen_queue, errp); +- if (ctx != qemu_get_aio_context()) { +- aio_context_acquire(ctx); +- } + } + } + +@@ -462,14 +455,11 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, + BlockDriverState *top_bs; + BdrvChild *active_disk, *hidden_disk, *secondary_disk; + int64_t active_length, hidden_length, disk_length; +- AioContext *aio_context; + Error *local_err = NULL; + BackupPerf perf = { .use_copy_range = true, .max_workers = 1 }; + + GLOBAL_STATE_CODE(); + +- aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); + s = bs->opaque; + + if (s->stage == BLOCK_REPLICATION_DONE || +@@ -479,20 +469,17 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, + * Ignore the request because the secondary side of replication + * doesn't have to do anything anymore. + */ +- aio_context_release(aio_context); + return; + } + + if (s->stage != BLOCK_REPLICATION_NONE) { + error_setg(errp, "Block replication is running or done"); +- aio_context_release(aio_context); + return; + } + + if (s->mode != mode) { + error_setg(errp, "The parameter mode's value is invalid, needs %d," + " but got %d", s->mode, mode); +- aio_context_release(aio_context); + return; + } + +@@ -505,7 +492,6 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, + if (!active_disk || !active_disk->bs || !active_disk->bs->backing) { + error_setg(errp, "Active disk doesn't have backing file"); + bdrv_graph_rdunlock_main_loop(); +- aio_context_release(aio_context); + return; + } + +@@ -513,7 +499,6 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, + if (!hidden_disk->bs || !hidden_disk->bs->backing) { + error_setg(errp, "Hidden disk doesn't have backing file"); + bdrv_graph_rdunlock_main_loop(); +- aio_context_release(aio_context); + return; + } + +@@ -521,7 +506,6 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, + if (!secondary_disk->bs || !bdrv_has_blk(secondary_disk->bs)) { + error_setg(errp, "The secondary disk doesn't have block backend"); + bdrv_graph_rdunlock_main_loop(); +- aio_context_release(aio_context); + return; + } + bdrv_graph_rdunlock_main_loop(); +@@ -534,7 +518,6 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, + active_length != hidden_length || hidden_length != disk_length) { + error_setg(errp, "Active disk, hidden disk, secondary disk's length" + " are not the same"); +- aio_context_release(aio_context); + return; + } + +@@ -546,7 +529,6 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, + !hidden_disk->bs->drv->bdrv_make_empty) { + error_setg(errp, + "Active disk or hidden disk doesn't support make_empty"); +- aio_context_release(aio_context); + bdrv_graph_rdunlock_main_loop(); + return; + } +@@ -556,7 +538,6 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, + reopen_backing_file(bs, true, &local_err); + if (local_err) { + error_propagate(errp, local_err); +- aio_context_release(aio_context); + return; + } + +@@ -569,7 +550,6 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, + if (local_err) { + error_propagate(errp, local_err); + bdrv_graph_wrunlock(); +- aio_context_release(aio_context); + return; + } + +@@ -580,7 +560,6 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, + if (local_err) { + error_propagate(errp, local_err); + bdrv_graph_wrunlock(); +- aio_context_release(aio_context); + return; + } + +@@ -594,7 +573,6 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, + error_setg(errp, "No top_bs or it is invalid"); + bdrv_graph_wrunlock(); + reopen_backing_file(bs, false, NULL); +- aio_context_release(aio_context); + return; + } + bdrv_op_block_all(top_bs, s->blocker); +@@ -612,13 +590,11 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, + if (local_err) { + error_propagate(errp, local_err); + backup_job_cleanup(bs); +- aio_context_release(aio_context); + return; + } + job_start(&s->backup_job->job); + break; + default: +- aio_context_release(aio_context); + abort(); + } + +@@ -629,18 +605,12 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, + } + + s->error = 0; +- aio_context_release(aio_context); + } + + static void replication_do_checkpoint(ReplicationState *rs, Error **errp) + { + BlockDriverState *bs = rs->opaque; +- BDRVReplicationState *s; +- AioContext *aio_context; +- +- aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); +- s = bs->opaque; ++ BDRVReplicationState *s = bs->opaque; + + if (s->stage == BLOCK_REPLICATION_DONE || + s->stage == BLOCK_REPLICATION_FAILOVER) { +@@ -649,38 +619,28 @@ static void replication_do_checkpoint(ReplicationState *rs, Error **errp) + * Ignore the request because the secondary side of replication + * doesn't have to do anything anymore. + */ +- aio_context_release(aio_context); + return; + } + + if (s->mode == REPLICATION_MODE_SECONDARY) { + secondary_do_checkpoint(bs, errp); + } +- aio_context_release(aio_context); + } + + static void replication_get_error(ReplicationState *rs, Error **errp) + { + BlockDriverState *bs = rs->opaque; +- BDRVReplicationState *s; +- AioContext *aio_context; +- +- aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); +- s = bs->opaque; ++ BDRVReplicationState *s = bs->opaque; + + if (s->stage == BLOCK_REPLICATION_NONE) { + error_setg(errp, "Block replication is not running"); +- aio_context_release(aio_context); + return; + } + + if (s->error) { + error_setg(errp, "I/O error occurred"); +- aio_context_release(aio_context); + return; + } +- aio_context_release(aio_context); + } + + static void replication_done(void *opaque, int ret) +@@ -708,12 +668,7 @@ static void replication_done(void *opaque, int ret) + static void replication_stop(ReplicationState *rs, bool failover, Error **errp) + { + BlockDriverState *bs = rs->opaque; +- BDRVReplicationState *s; +- AioContext *aio_context; +- +- aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); +- s = bs->opaque; ++ BDRVReplicationState *s = bs->opaque; + + if (s->stage == BLOCK_REPLICATION_DONE || + s->stage == BLOCK_REPLICATION_FAILOVER) { +@@ -722,13 +677,11 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp) + * Ignore the request because the secondary side of replication + * doesn't have to do anything anymore. + */ +- aio_context_release(aio_context); + return; + } + + if (s->stage != BLOCK_REPLICATION_RUNNING) { + error_setg(errp, "Block replication is not running"); +- aio_context_release(aio_context); + return; + } + +@@ -744,15 +697,12 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp) + * disk, secondary disk in backup_job_completed(). + */ + if (s->backup_job) { +- aio_context_release(aio_context); + job_cancel_sync(&s->backup_job->job, true); +- aio_context_acquire(aio_context); + } + + if (!failover) { + secondary_do_checkpoint(bs, errp); + s->stage = BLOCK_REPLICATION_DONE; +- aio_context_release(aio_context); + return; + } + +@@ -765,10 +715,8 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp) + bdrv_graph_rdunlock_main_loop(); + break; + default: +- aio_context_release(aio_context); + abort(); + } +- aio_context_release(aio_context); + } + + static const char *const replication_strong_runtime_opts[] = { +diff --git a/block/snapshot.c b/block/snapshot.c +index e486d3e205..a28f2b039f 100644 +--- a/block/snapshot.c ++++ b/block/snapshot.c +@@ -525,9 +525,7 @@ static bool GRAPH_RDLOCK bdrv_all_snapshots_includes_bs(BlockDriverState *bs) + return bdrv_has_blk(bs) || QLIST_EMPTY(&bs->parents); + } + +-/* Group operations. All block drivers are involved. +- * These functions will properly handle dataplane (take aio_context_acquire +- * when appropriate for appropriate block drivers) */ ++/* Group operations. All block drivers are involved. */ + + bool bdrv_all_can_snapshot(bool has_devices, strList *devices, + Error **errp) +@@ -545,14 +543,11 @@ bool bdrv_all_can_snapshot(bool has_devices, strList *devices, + iterbdrvs = bdrvs; + while (iterbdrvs) { + BlockDriverState *bs = iterbdrvs->data; +- AioContext *ctx = bdrv_get_aio_context(bs); + bool ok = true; + +- aio_context_acquire(ctx); + if (devices || bdrv_all_snapshots_includes_bs(bs)) { + ok = bdrv_can_snapshot(bs); + } +- aio_context_release(ctx); + if (!ok) { + error_setg(errp, "Device '%s' is writable but does not support " + "snapshots", bdrv_get_device_or_node_name(bs)); +@@ -582,18 +577,15 @@ int bdrv_all_delete_snapshot(const char *name, + iterbdrvs = bdrvs; + while (iterbdrvs) { + BlockDriverState *bs = iterbdrvs->data; +- AioContext *ctx = bdrv_get_aio_context(bs); + QEMUSnapshotInfo sn1, *snapshot = &sn1; + int ret = 0; + +- aio_context_acquire(ctx); + if ((devices || bdrv_all_snapshots_includes_bs(bs)) && + bdrv_snapshot_find(bs, snapshot, name) >= 0) + { + ret = bdrv_snapshot_delete(bs, snapshot->id_str, + snapshot->name, errp); + } +- aio_context_release(ctx); + if (ret < 0) { + error_prepend(errp, "Could not delete snapshot '%s' on '%s': ", + name, bdrv_get_device_or_node_name(bs)); +@@ -628,17 +620,14 @@ int bdrv_all_goto_snapshot(const char *name, + iterbdrvs = bdrvs; + while (iterbdrvs) { + BlockDriverState *bs = iterbdrvs->data; +- AioContext *ctx = bdrv_get_aio_context(bs); + bool all_snapshots_includes_bs; + +- aio_context_acquire(ctx); + bdrv_graph_rdlock_main_loop(); + all_snapshots_includes_bs = bdrv_all_snapshots_includes_bs(bs); + bdrv_graph_rdunlock_main_loop(); + + ret = (devices || all_snapshots_includes_bs) ? + bdrv_snapshot_goto(bs, name, errp) : 0; +- aio_context_release(ctx); + if (ret < 0) { + bdrv_graph_rdlock_main_loop(); + error_prepend(errp, "Could not load snapshot '%s' on '%s': ", +@@ -670,15 +659,12 @@ int bdrv_all_has_snapshot(const char *name, + iterbdrvs = bdrvs; + while (iterbdrvs) { + BlockDriverState *bs = iterbdrvs->data; +- AioContext *ctx = bdrv_get_aio_context(bs); + QEMUSnapshotInfo sn; + int ret = 0; + +- aio_context_acquire(ctx); + if (devices || bdrv_all_snapshots_includes_bs(bs)) { + ret = bdrv_snapshot_find(bs, &sn, name); + } +- aio_context_release(ctx); + if (ret < 0) { + if (ret == -ENOENT) { + return 0; +@@ -715,10 +701,8 @@ int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn, + iterbdrvs = bdrvs; + while (iterbdrvs) { + BlockDriverState *bs = iterbdrvs->data; +- AioContext *ctx = bdrv_get_aio_context(bs); + int ret = 0; + +- aio_context_acquire(ctx); + if (bs == vm_state_bs) { + sn->vm_state_size = vm_state_size; + ret = bdrv_snapshot_create(bs, sn); +@@ -726,7 +710,6 @@ int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn, + sn->vm_state_size = 0; + ret = bdrv_snapshot_create(bs, sn); + } +- aio_context_release(ctx); + if (ret < 0) { + error_setg(errp, "Could not create snapshot '%s' on '%s'", + sn->name, bdrv_get_device_or_node_name(bs)); +@@ -757,13 +740,10 @@ BlockDriverState *bdrv_all_find_vmstate_bs(const char *vmstate_bs, + iterbdrvs = bdrvs; + while (iterbdrvs) { + BlockDriverState *bs = iterbdrvs->data; +- AioContext *ctx = bdrv_get_aio_context(bs); + bool found = false; + +- aio_context_acquire(ctx); + found = (devices || bdrv_all_snapshots_includes_bs(bs)) && + bdrv_can_snapshot(bs); +- aio_context_release(ctx); + + if (vmstate_bs) { + if (g_str_equal(vmstate_bs, +diff --git a/block/write-threshold.c b/block/write-threshold.c +index 76d8885677..56fe88de81 100644 +--- a/block/write-threshold.c ++++ b/block/write-threshold.c +@@ -33,7 +33,6 @@ void qmp_block_set_write_threshold(const char *node_name, + Error **errp) + { + BlockDriverState *bs; +- AioContext *aio_context; + + bs = bdrv_find_node(node_name); + if (!bs) { +@@ -41,12 +40,7 @@ void qmp_block_set_write_threshold(const char *node_name, + return; + } + +- aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); +- + bdrv_write_threshold_set(bs, threshold_bytes); +- +- aio_context_release(aio_context); + } + + void bdrv_write_threshold_check_write(BlockDriverState *bs, int64_t offset, +diff --git a/blockdev.c b/blockdev.c +index 9e1381169d..5d8b3a23eb 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -662,7 +662,6 @@ err_no_opts: + /* Takes the ownership of bs_opts */ + BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp) + { +- BlockDriverState *bs; + int bdrv_flags = 0; + + GLOBAL_STATE_CODE(); +@@ -677,11 +676,7 @@ BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp) + bdrv_flags |= BDRV_O_INACTIVE; + } + +- aio_context_acquire(qemu_get_aio_context()); +- bs = bdrv_open(NULL, NULL, bs_opts, bdrv_flags, errp); +- aio_context_release(qemu_get_aio_context()); +- +- return bs; ++ return bdrv_open(NULL, NULL, bs_opts, bdrv_flags, errp); + } + + void blockdev_close_all_bdrv_states(void) +@@ -690,11 +685,7 @@ void blockdev_close_all_bdrv_states(void) + + GLOBAL_STATE_CODE(); + QTAILQ_FOREACH_SAFE(bs, &monitor_bdrv_states, monitor_list, next_bs) { +- AioContext *ctx = bdrv_get_aio_context(bs); +- +- aio_context_acquire(ctx); + bdrv_unref(bs); +- aio_context_release(ctx); + } + } + +@@ -1048,7 +1039,6 @@ fail: + static BlockDriverState *qmp_get_root_bs(const char *name, Error **errp) + { + BlockDriverState *bs; +- AioContext *aio_context; + + GRAPH_RDLOCK_GUARD_MAINLOOP(); + +@@ -1062,16 +1052,11 @@ static BlockDriverState *qmp_get_root_bs(const char *name, Error **errp) + return NULL; + } + +- aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); +- + if (!bdrv_is_inserted(bs)) { + error_setg(errp, "Device has no medium"); + bs = NULL; + } + +- aio_context_release(aio_context); +- + return bs; + } + +@@ -1141,7 +1126,6 @@ SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device, + Error **errp) + { + BlockDriverState *bs; +- AioContext *aio_context; + QEMUSnapshotInfo sn; + Error *local_err = NULL; + SnapshotInfo *info = NULL; +@@ -1154,39 +1138,35 @@ SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device, + if (!bs) { + return NULL; + } +- aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); + + if (!id && !name) { + error_setg(errp, "Name or id must be provided"); +- goto out_aio_context; ++ return NULL; + } + + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE, errp)) { +- goto out_aio_context; ++ return NULL; + } + + ret = bdrv_snapshot_find_by_id_and_name(bs, id, name, &sn, &local_err); + if (local_err) { + error_propagate(errp, local_err); +- goto out_aio_context; ++ return NULL; + } + if (!ret) { + error_setg(errp, + "Snapshot with id '%s' and name '%s' does not exist on " + "device '%s'", + STR_OR_NULL(id), STR_OR_NULL(name), device); +- goto out_aio_context; ++ return NULL; + } + + bdrv_snapshot_delete(bs, id, name, &local_err); + if (local_err) { + error_propagate(errp, local_err); +- goto out_aio_context; ++ return NULL; + } + +- aio_context_release(aio_context); +- + info = g_new0(SnapshotInfo, 1); + info->id = g_strdup(sn.id_str); + info->name = g_strdup(sn.name); +@@ -1201,10 +1181,6 @@ SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device, + } + + return info; +- +-out_aio_context: +- aio_context_release(aio_context); +- return NULL; + } + + /* internal snapshot private data */ +@@ -1232,7 +1208,6 @@ static void internal_snapshot_action(BlockdevSnapshotInternal *internal, + bool ret; + int64_t rt; + InternalSnapshotState *state = g_new0(InternalSnapshotState, 1); +- AioContext *aio_context; + int ret1; + + GLOBAL_STATE_CODE(); +@@ -1248,33 +1223,30 @@ static void internal_snapshot_action(BlockdevSnapshotInternal *internal, + return; + } + +- aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); +- + state->bs = bs; + + /* Paired with .clean() */ + bdrv_drained_begin(bs); + + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT, errp)) { +- goto out; ++ return; + } + + if (bdrv_is_read_only(bs)) { + error_setg(errp, "Device '%s' is read only", device); +- goto out; ++ return; + } + + if (!bdrv_can_snapshot(bs)) { + error_setg(errp, "Block format '%s' used by device '%s' " + "does not support internal snapshots", + bs->drv->format_name, device); +- goto out; ++ return; + } + + if (!strlen(name)) { + error_setg(errp, "Name is empty"); +- goto out; ++ return; + } + + /* check whether a snapshot with name exist */ +@@ -1282,12 +1254,12 @@ static void internal_snapshot_action(BlockdevSnapshotInternal *internal, + &local_err); + if (local_err) { + error_propagate(errp, local_err); +- goto out; ++ return; + } else if (ret) { + error_setg(errp, + "Snapshot with name '%s' already exists on device '%s'", + name, device); +- goto out; ++ return; + } + + /* 3. take the snapshot */ +@@ -1308,14 +1280,11 @@ static void internal_snapshot_action(BlockdevSnapshotInternal *internal, + error_setg_errno(errp, -ret1, + "Failed to create snapshot '%s' on device '%s'", + name, device); +- goto out; ++ return; + } + + /* 4. succeed, mark a snapshot is created */ + state->created = true; +- +-out: +- aio_context_release(aio_context); + } + + static void internal_snapshot_abort(void *opaque) +@@ -1323,7 +1292,6 @@ static void internal_snapshot_abort(void *opaque) + InternalSnapshotState *state = opaque; + BlockDriverState *bs = state->bs; + QEMUSnapshotInfo *sn = &state->sn; +- AioContext *aio_context; + Error *local_error = NULL; + + GLOBAL_STATE_CODE(); +@@ -1333,9 +1301,6 @@ static void internal_snapshot_abort(void *opaque) + return; + } + +- aio_context = bdrv_get_aio_context(state->bs); +- aio_context_acquire(aio_context); +- + if (bdrv_snapshot_delete(bs, sn->id_str, sn->name, &local_error) < 0) { + error_reportf_err(local_error, + "Failed to delete snapshot with id '%s' and " +@@ -1343,25 +1308,17 @@ static void internal_snapshot_abort(void *opaque) + sn->id_str, sn->name, + bdrv_get_device_name(bs)); + } +- +- aio_context_release(aio_context); + } + + static void internal_snapshot_clean(void *opaque) + { + g_autofree InternalSnapshotState *state = opaque; +- AioContext *aio_context; + + if (!state->bs) { + return; + } + +- aio_context = bdrv_get_aio_context(state->bs); +- aio_context_acquire(aio_context); +- + bdrv_drained_end(state->bs); +- +- aio_context_release(aio_context); + } + + /* external snapshot private data */ +@@ -1395,7 +1352,6 @@ static void external_snapshot_action(TransactionAction *action, + /* File name of the new image (for 'blockdev-snapshot-sync') */ + const char *new_image_file; + ExternalSnapshotState *state = g_new0(ExternalSnapshotState, 1); +- AioContext *aio_context; + uint64_t perm, shared; + + /* TODO We'll eventually have to take a writer lock in this function */ +@@ -1435,26 +1391,23 @@ static void external_snapshot_action(TransactionAction *action, + return; + } + +- aio_context = bdrv_get_aio_context(state->old_bs); +- aio_context_acquire(aio_context); +- + /* Paired with .clean() */ + bdrv_drained_begin(state->old_bs); + + if (!bdrv_is_inserted(state->old_bs)) { + error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device); +- goto out; ++ return; + } + + if (bdrv_op_is_blocked(state->old_bs, + BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT, errp)) { +- goto out; ++ return; + } + + if (!bdrv_is_read_only(state->old_bs)) { + if (bdrv_flush(state->old_bs)) { + error_setg(errp, QERR_IO_ERROR); +- goto out; ++ return; + } + } + +@@ -1466,13 +1419,13 @@ static void external_snapshot_action(TransactionAction *action, + + if (node_name && !snapshot_node_name) { + error_setg(errp, "New overlay node-name missing"); +- goto out; ++ return; + } + + if (snapshot_node_name && + bdrv_lookup_bs(snapshot_node_name, snapshot_node_name, NULL)) { + error_setg(errp, "New overlay node-name already in use"); +- goto out; ++ return; + } + + flags = state->old_bs->open_flags; +@@ -1485,20 +1438,18 @@ static void external_snapshot_action(TransactionAction *action, + int64_t size = bdrv_getlength(state->old_bs); + if (size < 0) { + error_setg_errno(errp, -size, "bdrv_getlength failed"); +- goto out; ++ return; + } + bdrv_refresh_filename(state->old_bs); + +- aio_context_release(aio_context); + bdrv_img_create(new_image_file, format, + state->old_bs->filename, + state->old_bs->drv->format_name, + NULL, size, flags, false, &local_err); +- aio_context_acquire(aio_context); + + if (local_err) { + error_propagate(errp, local_err); +- goto out; ++ return; + } + } + +@@ -1508,20 +1459,15 @@ static void external_snapshot_action(TransactionAction *action, + } + qdict_put_str(options, "driver", format); + } +- aio_context_release(aio_context); + +- aio_context_acquire(qemu_get_aio_context()); + state->new_bs = bdrv_open(new_image_file, snapshot_ref, options, flags, + errp); +- aio_context_release(qemu_get_aio_context()); + + /* We will manually add the backing_hd field to the bs later */ + if (!state->new_bs) { + return; + } + +- aio_context_acquire(aio_context); +- + /* + * Allow attaching a backing file to an overlay that's already in use only + * if the parents don't assume that they are already seeing a valid image. +@@ -1530,41 +1476,34 @@ static void external_snapshot_action(TransactionAction *action, + bdrv_get_cumulative_perm(state->new_bs, &perm, &shared); + if (perm & BLK_PERM_CONSISTENT_READ) { + error_setg(errp, "The overlay is already in use"); +- goto out; ++ return; + } + + if (state->new_bs->drv->is_filter) { + error_setg(errp, "Filters cannot be used as overlays"); +- goto out; ++ return; + } + + if (bdrv_cow_child(state->new_bs)) { + error_setg(errp, "The overlay already has a backing image"); +- goto out; ++ return; + } + + if (!state->new_bs->drv->supports_backing) { + error_setg(errp, "The overlay does not support backing images"); +- goto out; ++ return; + } + + ret = bdrv_append(state->new_bs, state->old_bs, errp); + if (ret < 0) { +- goto out; ++ return; + } + state->overlay_appended = true; +- +-out: +- aio_context_release(aio_context); + } + + static void external_snapshot_commit(void *opaque) + { + ExternalSnapshotState *state = opaque; +- AioContext *aio_context; +- +- aio_context = bdrv_get_aio_context(state->old_bs); +- aio_context_acquire(aio_context); + + /* We don't need (or want) to use the transactional + * bdrv_reopen_multiple() across all the entries at once, because we +@@ -1572,8 +1511,6 @@ static void external_snapshot_commit(void *opaque) + if (!qatomic_read(&state->old_bs->copy_on_read)) { + bdrv_reopen_set_read_only(state->old_bs, true, NULL); + } +- +- aio_context_release(aio_context); + } + + static void external_snapshot_abort(void *opaque) +@@ -1586,7 +1523,6 @@ static void external_snapshot_abort(void *opaque) + int ret; + + aio_context = bdrv_get_aio_context(state->old_bs); +- aio_context_acquire(aio_context); + + bdrv_ref(state->old_bs); /* we can't let bdrv_set_backind_hd() + close state->old_bs; we need it */ +@@ -1599,15 +1535,9 @@ static void external_snapshot_abort(void *opaque) + */ + tmp_context = bdrv_get_aio_context(state->old_bs); + if (aio_context != tmp_context) { +- aio_context_release(aio_context); +- aio_context_acquire(tmp_context); +- + ret = bdrv_try_change_aio_context(state->old_bs, + aio_context, NULL, NULL); + assert(ret == 0); +- +- aio_context_release(tmp_context); +- aio_context_acquire(aio_context); + } + + bdrv_drained_begin(state->new_bs); +@@ -1617,8 +1547,6 @@ static void external_snapshot_abort(void *opaque) + bdrv_drained_end(state->new_bs); + + bdrv_unref(state->old_bs); /* bdrv_replace_node() ref'ed old_bs */ +- +- aio_context_release(aio_context); + } + } + } +@@ -1626,19 +1554,13 @@ static void external_snapshot_abort(void *opaque) + static void external_snapshot_clean(void *opaque) + { + g_autofree ExternalSnapshotState *state = opaque; +- AioContext *aio_context; + + if (!state->old_bs) { + return; + } + +- aio_context = bdrv_get_aio_context(state->old_bs); +- aio_context_acquire(aio_context); +- + bdrv_drained_end(state->old_bs); + bdrv_unref(state->new_bs); +- +- aio_context_release(aio_context); + } + + typedef struct DriveBackupState { +@@ -1670,7 +1592,6 @@ static void drive_backup_action(DriveBackup *backup, + BlockDriverState *target_bs; + BlockDriverState *source = NULL; + AioContext *aio_context; +- AioContext *old_context; + const char *format; + QDict *options; + Error *local_err = NULL; +@@ -1698,7 +1619,6 @@ static void drive_backup_action(DriveBackup *backup, + } + + aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); + + state->bs = bs; + /* Paired with .clean() */ +@@ -1713,7 +1633,7 @@ static void drive_backup_action(DriveBackup *backup, + bdrv_graph_rdlock_main_loop(); + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) { + bdrv_graph_rdunlock_main_loop(); +- goto out; ++ return; + } + + flags = bs->open_flags | BDRV_O_RDWR; +@@ -1744,7 +1664,7 @@ static void drive_backup_action(DriveBackup *backup, + size = bdrv_getlength(bs); + if (size < 0) { + error_setg_errno(errp, -size, "bdrv_getlength failed"); +- goto out; ++ return; + } + + if (backup->mode != NEW_IMAGE_MODE_EXISTING) { +@@ -1770,7 +1690,7 @@ static void drive_backup_action(DriveBackup *backup, + + if (local_err) { + error_propagate(errp, local_err); +- goto out; ++ return; + } + + options = qdict_new(); +@@ -1779,30 +1699,18 @@ static void drive_backup_action(DriveBackup *backup, + if (format) { + qdict_put_str(options, "driver", format); + } +- aio_context_release(aio_context); + +- aio_context_acquire(qemu_get_aio_context()); + target_bs = bdrv_open(backup->target, NULL, options, flags, errp); +- aio_context_release(qemu_get_aio_context()); +- + if (!target_bs) { + return; + } + +- /* Honor bdrv_try_change_aio_context() context acquisition requirements. */ +- old_context = bdrv_get_aio_context(target_bs); +- aio_context_acquire(old_context); +- + ret = bdrv_try_change_aio_context(target_bs, aio_context, NULL, errp); + if (ret < 0) { + bdrv_unref(target_bs); +- aio_context_release(old_context); + return; + } + +- aio_context_release(old_context); +- aio_context_acquire(aio_context); +- + if (set_backing_hd) { + if (bdrv_set_backing_hd(target_bs, source, errp) < 0) { + goto unref; +@@ -1815,22 +1723,14 @@ static void drive_backup_action(DriveBackup *backup, + + unref: + bdrv_unref(target_bs); +-out: +- aio_context_release(aio_context); + } + + static void drive_backup_commit(void *opaque) + { + DriveBackupState *state = opaque; +- AioContext *aio_context; +- +- aio_context = bdrv_get_aio_context(state->bs); +- aio_context_acquire(aio_context); + + assert(state->job); + job_start(&state->job->job); +- +- aio_context_release(aio_context); + } + + static void drive_backup_abort(void *opaque) +@@ -1845,18 +1745,12 @@ static void drive_backup_abort(void *opaque) + static void drive_backup_clean(void *opaque) + { + g_autofree DriveBackupState *state = opaque; +- AioContext *aio_context; + + if (!state->bs) { + return; + } + +- aio_context = bdrv_get_aio_context(state->bs); +- aio_context_acquire(aio_context); +- + bdrv_drained_end(state->bs); +- +- aio_context_release(aio_context); + } + + typedef struct BlockdevBackupState { +@@ -1881,7 +1775,6 @@ static void blockdev_backup_action(BlockdevBackup *backup, + BlockDriverState *bs; + BlockDriverState *target_bs; + AioContext *aio_context; +- AioContext *old_context; + int ret; + + tran_add(tran, &blockdev_backup_drv, state); +@@ -1898,17 +1791,12 @@ static void blockdev_backup_action(BlockdevBackup *backup, + + /* Honor bdrv_try_change_aio_context() context acquisition requirements. */ + aio_context = bdrv_get_aio_context(bs); +- old_context = bdrv_get_aio_context(target_bs); +- aio_context_acquire(old_context); + + ret = bdrv_try_change_aio_context(target_bs, aio_context, NULL, errp); + if (ret < 0) { +- aio_context_release(old_context); + return; + } + +- aio_context_release(old_context); +- aio_context_acquire(aio_context); + state->bs = bs; + + /* Paired with .clean() */ +@@ -1917,22 +1805,14 @@ static void blockdev_backup_action(BlockdevBackup *backup, + state->job = do_backup_common(qapi_BlockdevBackup_base(backup), + bs, target_bs, aio_context, + block_job_txn, errp); +- +- aio_context_release(aio_context); + } + + static void blockdev_backup_commit(void *opaque) + { + BlockdevBackupState *state = opaque; +- AioContext *aio_context; +- +- aio_context = bdrv_get_aio_context(state->bs); +- aio_context_acquire(aio_context); + + assert(state->job); + job_start(&state->job->job); +- +- aio_context_release(aio_context); + } + + static void blockdev_backup_abort(void *opaque) +@@ -1947,18 +1827,12 @@ static void blockdev_backup_abort(void *opaque) + static void blockdev_backup_clean(void *opaque) + { + g_autofree BlockdevBackupState *state = opaque; +- AioContext *aio_context; + + if (!state->bs) { + return; + } + +- aio_context = bdrv_get_aio_context(state->bs); +- aio_context_acquire(aio_context); +- + bdrv_drained_end(state->bs); +- +- aio_context_release(aio_context); + } + + typedef struct BlockDirtyBitmapState { +@@ -2454,7 +2328,6 @@ void qmp_block_stream(const char *job_id, const char *device, + } + + aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); + + bdrv_graph_rdlock_main_loop(); + if (base) { +@@ -2521,7 +2394,7 @@ void qmp_block_stream(const char *job_id, const char *device, + if (!base_bs && backing_file) { + error_setg(errp, "backing file specified, but streaming the " + "entire chain"); +- goto out; ++ return; + } + + if (has_auto_finalize && !auto_finalize) { +@@ -2536,18 +2409,14 @@ void qmp_block_stream(const char *job_id, const char *device, + filter_node_name, &local_err); + if (local_err) { + error_propagate(errp, local_err); +- goto out; ++ return; + } + + trace_qmp_block_stream(bs); +- +-out: +- aio_context_release(aio_context); + return; + + out_rdlock: + bdrv_graph_rdunlock_main_loop(); +- aio_context_release(aio_context); + } + + void qmp_block_commit(const char *job_id, const char *device, +@@ -2606,10 +2475,9 @@ void qmp_block_commit(const char *job_id, const char *device, + } + + aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); + + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, errp)) { +- goto out; ++ return; + } + + /* default top_bs is the active layer */ +@@ -2617,16 +2485,16 @@ void qmp_block_commit(const char *job_id, const char *device, + + if (top_node && top) { + error_setg(errp, "'top-node' and 'top' are mutually exclusive"); +- goto out; ++ return; + } else if (top_node) { + top_bs = bdrv_lookup_bs(NULL, top_node, errp); + if (top_bs == NULL) { +- goto out; ++ return; + } + if (!bdrv_chain_contains(bs, top_bs)) { + error_setg(errp, "'%s' is not in this backing file chain", + top_node); +- goto out; ++ return; + } + } else if (top) { + /* This strcmp() is just a shortcut, there is no need to +@@ -2640,35 +2508,35 @@ void qmp_block_commit(const char *job_id, const char *device, + + if (top_bs == NULL) { + error_setg(errp, "Top image file %s not found", top ? top : "NULL"); +- goto out; ++ return; + } + + assert(bdrv_get_aio_context(top_bs) == aio_context); + + if (base_node && base) { + error_setg(errp, "'base-node' and 'base' are mutually exclusive"); +- goto out; ++ return; + } else if (base_node) { + base_bs = bdrv_lookup_bs(NULL, base_node, errp); + if (base_bs == NULL) { +- goto out; ++ return; + } + if (!bdrv_chain_contains(top_bs, base_bs)) { + error_setg(errp, "'%s' is not in this backing file chain", + base_node); +- goto out; ++ return; + } + } else if (base) { + base_bs = bdrv_find_backing_image(top_bs, base); + if (base_bs == NULL) { + error_setg(errp, "Can't find '%s' in the backing chain", base); +- goto out; ++ return; + } + } else { + base_bs = bdrv_find_base(top_bs); + if (base_bs == NULL) { + error_setg(errp, "There is no backimg image"); +- goto out; ++ return; + } + } + +@@ -2678,14 +2546,14 @@ void qmp_block_commit(const char *job_id, const char *device, + iter = bdrv_filter_or_cow_bs(iter)) + { + if (bdrv_op_is_blocked(iter, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) { +- goto out; ++ return; + } + } + + /* Do not allow attempts to commit an image into itself */ + if (top_bs == base_bs) { + error_setg(errp, "cannot commit an image into itself"); +- goto out; ++ return; + } + + /* +@@ -2708,7 +2576,7 @@ void qmp_block_commit(const char *job_id, const char *device, + error_setg(errp, "'backing-file' specified, but 'top' has a " + "writer on it"); + } +- goto out; ++ return; + } + if (!job_id) { + /* +@@ -2724,7 +2592,7 @@ void qmp_block_commit(const char *job_id, const char *device, + } else { + BlockDriverState *overlay_bs = bdrv_find_overlay(bs, top_bs); + if (bdrv_op_is_blocked(overlay_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) { +- goto out; ++ return; + } + commit_start(job_id, bs, base_bs, top_bs, job_flags, + speed, on_error, backing_file, +@@ -2732,11 +2600,8 @@ void qmp_block_commit(const char *job_id, const char *device, + } + if (local_err != NULL) { + error_propagate(errp, local_err); +- goto out; ++ return; + } +- +-out: +- aio_context_release(aio_context); + } + + /* Common QMP interface for drive-backup and blockdev-backup */ +@@ -2985,8 +2850,6 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, + + if (replaces) { + BlockDriverState *to_replace_bs; +- AioContext *aio_context; +- AioContext *replace_aio_context; + int64_t bs_size, replace_size; + + bs_size = bdrv_getlength(bs); +@@ -3000,19 +2863,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, + return; + } + +- aio_context = bdrv_get_aio_context(bs); +- replace_aio_context = bdrv_get_aio_context(to_replace_bs); +- /* +- * bdrv_getlength() is a co-wrapper and uses AIO_WAIT_WHILE. Be sure not +- * to acquire the same AioContext twice. +- */ +- if (replace_aio_context != aio_context) { +- aio_context_acquire(replace_aio_context); +- } + replace_size = bdrv_getlength(to_replace_bs); +- if (replace_aio_context != aio_context) { +- aio_context_release(replace_aio_context); +- } + + if (replace_size < 0) { + error_setg_errno(errp, -replace_size, +@@ -3041,7 +2892,6 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) + BlockDriverState *bs; + BlockDriverState *target_backing_bs, *target_bs; + AioContext *aio_context; +- AioContext *old_context; + BlockMirrorBackingMode backing_mode; + Error *local_err = NULL; + QDict *options = NULL; +@@ -3064,7 +2914,6 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) + } + + aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); + + if (!arg->has_mode) { + arg->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; +@@ -3088,14 +2937,14 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) + size = bdrv_getlength(bs); + if (size < 0) { + error_setg_errno(errp, -size, "bdrv_getlength failed"); +- goto out; ++ return; + } + + if (arg->replaces) { + if (!arg->node_name) { + error_setg(errp, "a node-name must be provided when replacing a" + " named node of the graph"); +- goto out; ++ return; + } + } + +@@ -3143,7 +2992,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) + + if (local_err) { + error_propagate(errp, local_err); +- goto out; ++ return; + } + + options = qdict_new(); +@@ -3153,15 +3002,11 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) + if (format) { + qdict_put_str(options, "driver", format); + } +- aio_context_release(aio_context); + + /* Mirroring takes care of copy-on-write using the source's backing + * file. + */ +- aio_context_acquire(qemu_get_aio_context()); + target_bs = bdrv_open(arg->target, NULL, options, flags, errp); +- aio_context_release(qemu_get_aio_context()); +- + if (!target_bs) { + return; + } +@@ -3173,20 +3018,12 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) + bdrv_graph_rdunlock_main_loop(); + + +- /* Honor bdrv_try_change_aio_context() context acquisition requirements. */ +- old_context = bdrv_get_aio_context(target_bs); +- aio_context_acquire(old_context); +- + ret = bdrv_try_change_aio_context(target_bs, aio_context, NULL, errp); + if (ret < 0) { + bdrv_unref(target_bs); +- aio_context_release(old_context); + return; + } + +- aio_context_release(old_context); +- aio_context_acquire(aio_context); +- + blockdev_mirror_common(arg->job_id, bs, target_bs, + arg->replaces, arg->sync, + backing_mode, zero_target, +@@ -3202,8 +3039,6 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) + arg->has_auto_dismiss, arg->auto_dismiss, + errp); + bdrv_unref(target_bs); +-out: +- aio_context_release(aio_context); + } + + void qmp_blockdev_mirror(const char *job_id, +@@ -3226,7 +3061,6 @@ void qmp_blockdev_mirror(const char *job_id, + BlockDriverState *bs; + BlockDriverState *target_bs; + AioContext *aio_context; +- AioContext *old_context; + BlockMirrorBackingMode backing_mode = MIRROR_LEAVE_BACKING_CHAIN; + bool zero_target; + int ret; +@@ -3243,18 +3077,11 @@ void qmp_blockdev_mirror(const char *job_id, + + zero_target = (sync == MIRROR_SYNC_MODE_FULL); + +- /* Honor bdrv_try_change_aio_context() context acquisition requirements. */ +- old_context = bdrv_get_aio_context(target_bs); + aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(old_context); + + ret = bdrv_try_change_aio_context(target_bs, aio_context, NULL, errp); +- +- aio_context_release(old_context); +- aio_context_acquire(aio_context); +- + if (ret < 0) { +- goto out; ++ return; + } + + blockdev_mirror_common(job_id, bs, target_bs, +@@ -3269,8 +3096,6 @@ void qmp_blockdev_mirror(const char *job_id, + has_auto_finalize, auto_finalize, + has_auto_dismiss, auto_dismiss, + errp); +-out: +- aio_context_release(aio_context); + } + + /* +@@ -3433,7 +3258,6 @@ void qmp_change_backing_file(const char *device, + Error **errp) + { + BlockDriverState *bs = NULL; +- AioContext *aio_context; + BlockDriverState *image_bs = NULL; + Error *local_err = NULL; + bool ro; +@@ -3444,9 +3268,6 @@ void qmp_change_backing_file(const char *device, + return; + } + +- aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); +- + bdrv_graph_rdlock_main_loop(); + + image_bs = bdrv_lookup_bs(NULL, image_node_name, &local_err); +@@ -3485,7 +3306,7 @@ void qmp_change_backing_file(const char *device, + + if (ro) { + if (bdrv_reopen_set_read_only(image_bs, false, errp) != 0) { +- goto out; ++ return; + } + } + +@@ -3503,14 +3324,10 @@ void qmp_change_backing_file(const char *device, + if (ro) { + bdrv_reopen_set_read_only(image_bs, true, errp); + } +- +-out: +- aio_context_release(aio_context); + return; + + out_rdlock: + bdrv_graph_rdunlock_main_loop(); +- aio_context_release(aio_context); + } + + void qmp_blockdev_add(BlockdevOptions *options, Error **errp) +@@ -3550,7 +3367,6 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) + for (; reopen_list != NULL; reopen_list = reopen_list->next) { + BlockdevOptions *options = reopen_list->value; + BlockDriverState *bs; +- AioContext *ctx; + QObject *obj; + Visitor *v; + QDict *qdict; +@@ -3578,12 +3394,7 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) + + qdict_flatten(qdict); + +- ctx = bdrv_get_aio_context(bs); +- aio_context_acquire(ctx); +- + queue = bdrv_reopen_queue(queue, bs, qdict, false); +- +- aio_context_release(ctx); + } + + /* Perform the reopen operation */ +@@ -3596,7 +3407,6 @@ fail: + + void qmp_blockdev_del(const char *node_name, Error **errp) + { +- AioContext *aio_context; + BlockDriverState *bs; + + GLOBAL_STATE_CODE(); +@@ -3611,30 +3421,25 @@ void qmp_blockdev_del(const char *node_name, Error **errp) + error_setg(errp, "Node %s is in use", node_name); + return; + } +- aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); + + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_DRIVE_DEL, errp)) { +- goto out; ++ return; + } + + if (!QTAILQ_IN_USE(bs, monitor_list)) { + error_setg(errp, "Node %s is not owned by the monitor", + bs->node_name); +- goto out; ++ return; + } + + if (bs->refcnt > 1) { + error_setg(errp, "Block device %s is in use", + bdrv_get_device_or_node_name(bs)); +- goto out; ++ return; + } + + QTAILQ_REMOVE(&monitor_bdrv_states, bs, monitor_list); + bdrv_unref(bs); +- +-out: +- aio_context_release(aio_context); + } + + static BdrvChild * GRAPH_RDLOCK +@@ -3724,7 +3529,6 @@ BlockJobInfoList *qmp_query_block_jobs(Error **errp) + void qmp_x_blockdev_set_iothread(const char *node_name, StrOrNull *iothread, + bool has_force, bool force, Error **errp) + { +- AioContext *old_context; + AioContext *new_context; + BlockDriverState *bs; + +@@ -3756,12 +3560,7 @@ void qmp_x_blockdev_set_iothread(const char *node_name, StrOrNull *iothread, + new_context = qemu_get_aio_context(); + } + +- old_context = bdrv_get_aio_context(bs); +- aio_context_acquire(old_context); +- + bdrv_try_change_aio_context(bs, new_context, NULL, errp); +- +- aio_context_release(old_context); + } + + QemuOptsList qemu_common_drive_opts = { +diff --git a/blockjob.c b/blockjob.c +index 7310412313..d5f29e14af 100644 +--- a/blockjob.c ++++ b/blockjob.c +@@ -198,9 +198,7 @@ void block_job_remove_all_bdrv(BlockJob *job) + * one to make sure that such a concurrent access does not attempt + * to process an already freed BdrvChild. + */ +- aio_context_release(job->job.aio_context); + bdrv_graph_wrlock(); +- aio_context_acquire(job->job.aio_context); + while (job->nodes) { + GSList *l = job->nodes; + BdrvChild *c = l->data; +@@ -234,28 +232,12 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, + uint64_t perm, uint64_t shared_perm, Error **errp) + { + BdrvChild *c; +- AioContext *ctx = bdrv_get_aio_context(bs); +- bool need_context_ops; + GLOBAL_STATE_CODE(); + + bdrv_ref(bs); + +- need_context_ops = ctx != job->job.aio_context; +- +- if (need_context_ops) { +- if (job->job.aio_context != qemu_get_aio_context()) { +- aio_context_release(job->job.aio_context); +- } +- aio_context_acquire(ctx); +- } + c = bdrv_root_attach_child(bs, name, &child_job, 0, perm, shared_perm, job, + errp); +- if (need_context_ops) { +- aio_context_release(ctx); +- if (job->job.aio_context != qemu_get_aio_context()) { +- aio_context_acquire(job->job.aio_context); +- } +- } + if (c == NULL) { + return -EPERM; + } +diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c +index f83bb0f116..7bbbd981ad 100644 +--- a/hw/block/dataplane/virtio-blk.c ++++ b/hw/block/dataplane/virtio-blk.c +@@ -124,7 +124,6 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) + VirtIOBlockDataPlane *s = vblk->dataplane; + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vblk))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); +- AioContext *old_context; + unsigned i; + unsigned nvqs = s->conf->num_queues; + Error *local_err = NULL; +@@ -178,10 +177,7 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) + + trace_virtio_blk_data_plane_start(s); + +- old_context = blk_get_aio_context(s->conf->conf.blk); +- aio_context_acquire(old_context); + r = blk_set_aio_context(s->conf->conf.blk, s->ctx, &local_err); +- aio_context_release(old_context); + if (r < 0) { + error_report_err(local_err); + goto fail_aio_context; +@@ -208,13 +204,11 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) + + /* Get this show started by hooking up our callbacks */ + if (!blk_in_drain(s->conf->conf.blk)) { +- aio_context_acquire(s->ctx); + for (i = 0; i < nvqs; i++) { + VirtQueue *vq = virtio_get_queue(s->vdev, i); + + virtio_queue_aio_attach_host_notifier(vq, s->ctx); + } +- aio_context_release(s->ctx); + } + return 0; + +@@ -314,8 +308,6 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) + */ + vblk->dataplane_started = false; + +- aio_context_acquire(s->ctx); +- + /* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */ + blk_drain(s->conf->conf.blk); + +@@ -325,8 +317,6 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) + */ + blk_set_aio_context(s->conf->conf.blk, qemu_get_aio_context(), NULL); + +- aio_context_release(s->ctx); +- + /* Clean up guest notifier (irq) */ + k->set_guest_notifiers(qbus->parent, nvqs, false); + +diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c +index c4bb28c66f..98501e6885 100644 +--- a/hw/block/dataplane/xen-block.c ++++ b/hw/block/dataplane/xen-block.c +@@ -260,8 +260,6 @@ static void xen_block_complete_aio(void *opaque, int ret) + XenBlockRequest *request = opaque; + XenBlockDataPlane *dataplane = request->dataplane; + +- aio_context_acquire(dataplane->ctx); +- + if (ret != 0) { + error_report("%s I/O error", + request->req.operation == BLKIF_OP_READ ? +@@ -273,10 +271,10 @@ static void xen_block_complete_aio(void *opaque, int ret) + if (request->presync) { + request->presync = 0; + xen_block_do_aio(request); +- goto done; ++ return; + } + if (request->aio_inflight > 0) { +- goto done; ++ return; + } + + switch (request->req.operation) { +@@ -318,9 +316,6 @@ static void xen_block_complete_aio(void *opaque, int ret) + if (dataplane->more_work) { + qemu_bh_schedule(dataplane->bh); + } +- +-done: +- aio_context_release(dataplane->ctx); + } + + static bool xen_block_split_discard(XenBlockRequest *request, +@@ -601,9 +596,7 @@ static void xen_block_dataplane_bh(void *opaque) + { + XenBlockDataPlane *dataplane = opaque; + +- aio_context_acquire(dataplane->ctx); + xen_block_handle_requests(dataplane); +- aio_context_release(dataplane->ctx); + } + + static bool xen_block_dataplane_event(void *opaque) +@@ -703,10 +696,8 @@ void xen_block_dataplane_stop(XenBlockDataPlane *dataplane) + xen_block_dataplane_detach(dataplane); + } + +- aio_context_acquire(dataplane->ctx); + /* Xen doesn't have multiple users for nodes, so this can't fail */ + blk_set_aio_context(dataplane->blk, qemu_get_aio_context(), &error_abort); +- aio_context_release(dataplane->ctx); + + /* + * Now that the context has been moved onto the main thread, cancel +@@ -752,7 +743,6 @@ void xen_block_dataplane_start(XenBlockDataPlane *dataplane, + { + ERRP_GUARD(); + XenDevice *xendev = dataplane->xendev; +- AioContext *old_context; + unsigned int ring_size; + unsigned int i; + +@@ -836,11 +826,8 @@ void xen_block_dataplane_start(XenBlockDataPlane *dataplane, + goto stop; + } + +- old_context = blk_get_aio_context(dataplane->blk); +- aio_context_acquire(old_context); + /* If other users keep the BlockBackend in the iothread, that's ok */ + blk_set_aio_context(dataplane->blk, dataplane->ctx, NULL); +- aio_context_release(old_context); + + if (!blk_in_drain(dataplane->blk)) { + xen_block_dataplane_attach(dataplane); +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index e110f9718b..ec9ed09a6a 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -1210,17 +1210,13 @@ static void virtio_blk_dma_restart_cb(void *opaque, bool running, + static void virtio_blk_reset(VirtIODevice *vdev) + { + VirtIOBlock *s = VIRTIO_BLK(vdev); +- AioContext *ctx; + VirtIOBlockReq *req; + + /* Dataplane has stopped... */ + assert(!s->dataplane_started); + + /* ...but requests may still be in flight. */ +- ctx = blk_get_aio_context(s->blk); +- aio_context_acquire(ctx); + blk_drain(s->blk); +- aio_context_release(ctx); + + /* We drop queued requests after blk_drain() because blk_drain() itself can + * produce them. */ +@@ -1250,10 +1246,6 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config) + uint64_t capacity; + int64_t length; + int blk_size = conf->logical_block_size; +- AioContext *ctx; +- +- ctx = blk_get_aio_context(s->blk); +- aio_context_acquire(ctx); + + blk_get_geometry(s->blk, &capacity); + memset(&blkcfg, 0, sizeof(blkcfg)); +@@ -1277,7 +1269,6 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config) + * per track (cylinder). + */ + length = blk_getlength(s->blk); +- aio_context_release(ctx); + if (length > 0 && length / conf->heads / conf->secs % blk_size) { + blkcfg.geometry.sectors = conf->secs & ~s->sector_mask; + } else { +@@ -1344,9 +1335,7 @@ static void virtio_blk_set_config(VirtIODevice *vdev, const uint8_t *config) + + memcpy(&blkcfg, config, s->config_size); + +- aio_context_acquire(blk_get_aio_context(s->blk)); + blk_set_enable_write_cache(s->blk, blkcfg.wce != 0); +- aio_context_release(blk_get_aio_context(s->blk)); + } + + static uint64_t virtio_blk_get_features(VirtIODevice *vdev, uint64_t features, +@@ -1414,11 +1403,9 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) + * s->blk would erroneously be placed in writethrough mode. + */ + if (!virtio_vdev_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE)) { +- aio_context_acquire(blk_get_aio_context(s->blk)); + blk_set_enable_write_cache(s->blk, + virtio_vdev_has_feature(vdev, + VIRTIO_BLK_F_WCE)); +- aio_context_release(blk_get_aio_context(s->blk)); + } + } + +diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c +index 1473ab3d5e..73cced4626 100644 +--- a/hw/core/qdev-properties-system.c ++++ b/hw/core/qdev-properties-system.c +@@ -120,9 +120,7 @@ static void set_drive_helper(Object *obj, Visitor *v, const char *name, + "node"); + } + +- aio_context_acquire(ctx); + blk_replace_bs(blk, bs, errp); +- aio_context_release(ctx); + return; + } + +@@ -148,10 +146,7 @@ static void set_drive_helper(Object *obj, Visitor *v, const char *name, + 0, BLK_PERM_ALL); + blk_created = true; + +- aio_context_acquire(ctx); + ret = blk_insert_bs(blk, bs, errp); +- aio_context_release(ctx); +- + if (ret < 0) { + goto fail; + } +@@ -207,12 +202,8 @@ static void release_drive(Object *obj, const char *name, void *opaque) + BlockBackend **ptr = object_field_prop_ptr(obj, prop); + + if (*ptr) { +- AioContext *ctx = blk_get_aio_context(*ptr); +- +- aio_context_acquire(ctx); + blockdev_auto_del(*ptr); + blk_detach_dev(*ptr, dev); +- aio_context_release(ctx); + } + } + +diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h +index 6b21fbc73f..0327f1c605 100644 +--- a/include/block/block-global-state.h ++++ b/include/block/block-global-state.h +@@ -31,11 +31,10 @@ + /* + * Global state (GS) API. These functions run under the BQL. + * +- * If a function modifies the graph, it also uses drain and/or +- * aio_context_acquire/release to be sure it has unique access. +- * aio_context locking is needed together with BQL because of +- * the thread-safe I/O API that concurrently runs and accesses +- * the graph without the BQL. ++ * If a function modifies the graph, it also uses the graph lock to be sure it ++ * has unique access. The graph lock is needed together with BQL because of the ++ * thread-safe I/O API that concurrently runs and accesses the graph without ++ * the BQL. + * + * It is important to note that not all of these functions are + * necessarily limited to running under the BQL, but they would +diff --git a/include/block/block-io.h b/include/block/block-io.h +index f8729ccc55..8eb39a858b 100644 +--- a/include/block/block-io.h ++++ b/include/block/block-io.h +@@ -31,8 +31,7 @@ + + /* + * I/O API functions. These functions are thread-safe, and therefore +- * can run in any thread as long as the thread has called +- * aio_context_acquire/release(). ++ * can run in any thread. + * + * These functions can only call functions from I/O and Common categories, + * but can be invoked by GS, "I/O or GS" and I/O APIs. +diff --git a/include/block/snapshot.h b/include/block/snapshot.h +index d49c5599d9..304cc6ea61 100644 +--- a/include/block/snapshot.h ++++ b/include/block/snapshot.h +@@ -86,8 +86,6 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs, + + /* + * Group operations. All block drivers are involved. +- * These functions will properly handle dataplane (take aio_context_acquire +- * when appropriate for appropriate block drivers + */ + + bool bdrv_all_can_snapshot(bool has_devices, strList *devices, +diff --git a/job.c b/job.c +index 99a2e54b54..660ce22c56 100644 +--- a/job.c ++++ b/job.c +@@ -464,12 +464,8 @@ void job_unref_locked(Job *job) + assert(!job->txn); + + if (job->driver->free) { +- AioContext *aio_context = job->aio_context; + job_unlock(); +- /* FIXME: aiocontext lock is required because cb calls blk_unref */ +- aio_context_acquire(aio_context); + job->driver->free(job); +- aio_context_release(aio_context); + job_lock(); + } + +@@ -840,12 +836,10 @@ static void job_clean(Job *job) + + /* + * Called with job_mutex held, but releases it temporarily. +- * Takes AioContext lock internally to invoke a job->driver callback. + */ + static int job_finalize_single_locked(Job *job) + { + int job_ret; +- AioContext *ctx = job->aio_context; + + assert(job_is_completed_locked(job)); + +@@ -854,7 +848,6 @@ static int job_finalize_single_locked(Job *job) + + job_ret = job->ret; + job_unlock(); +- aio_context_acquire(ctx); + + if (!job_ret) { + job_commit(job); +@@ -867,7 +860,6 @@ static int job_finalize_single_locked(Job *job) + job->cb(job->opaque, job_ret); + } + +- aio_context_release(ctx); + job_lock(); + + /* Emit events only if we actually started */ +@@ -886,17 +878,13 @@ static int job_finalize_single_locked(Job *job) + + /* + * Called with job_mutex held, but releases it temporarily. +- * Takes AioContext lock internally to invoke a job->driver callback. + */ + static void job_cancel_async_locked(Job *job, bool force) + { +- AioContext *ctx = job->aio_context; + GLOBAL_STATE_CODE(); + if (job->driver->cancel) { + job_unlock(); +- aio_context_acquire(ctx); + force = job->driver->cancel(job, force); +- aio_context_release(ctx); + job_lock(); + } else { + /* No .cancel() means the job will behave as if force-cancelled */ +@@ -931,7 +919,6 @@ static void job_cancel_async_locked(Job *job, bool force) + + /* + * Called with job_mutex held, but releases it temporarily. +- * Takes AioContext lock internally to invoke a job->driver callback. + */ + static void job_completed_txn_abort_locked(Job *job) + { +@@ -979,15 +966,12 @@ static void job_completed_txn_abort_locked(Job *job) + static int job_prepare_locked(Job *job) + { + int ret; +- AioContext *ctx = job->aio_context; + + GLOBAL_STATE_CODE(); + + if (job->ret == 0 && job->driver->prepare) { + job_unlock(); +- aio_context_acquire(ctx); + ret = job->driver->prepare(job); +- aio_context_release(ctx); + job_lock(); + job->ret = ret; + job_update_rc_locked(job); +diff --git a/migration/block.c b/migration/block.c +index a15f9bddcb..6ec6a1d6e6 100644 +--- a/migration/block.c ++++ b/migration/block.c +@@ -66,7 +66,7 @@ typedef struct BlkMigDevState { + /* Protected by block migration lock. */ + int64_t completed_sectors; + +- /* During migration this is protected by iothread lock / AioContext. ++ /* During migration this is protected by bdrv_dirty_bitmap_lock(). + * Allocation and free happen during setup and cleanup respectively. + */ + BdrvDirtyBitmap *dirty_bitmap; +@@ -101,7 +101,7 @@ typedef struct BlkMigState { + int prev_progress; + int bulk_completed; + +- /* Lock must be taken _inside_ the iothread lock and any AioContexts. */ ++ /* Lock must be taken _inside_ the iothread lock. */ + QemuMutex lock; + } BlkMigState; + +@@ -270,7 +270,6 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) + + if (bmds->shared_base) { + qemu_mutex_lock_iothread(); +- aio_context_acquire(blk_get_aio_context(bb)); + /* Skip unallocated sectors; intentionally treats failure or + * partial sector as an allocated sector */ + while (cur_sector < total_sectors && +@@ -281,7 +280,6 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) + } + cur_sector += count >> BDRV_SECTOR_BITS; + } +- aio_context_release(blk_get_aio_context(bb)); + qemu_mutex_unlock_iothread(); + } + +@@ -313,21 +311,16 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) + block_mig_state.submitted++; + blk_mig_unlock(); + +- /* We do not know if bs is under the main thread (and thus does +- * not acquire the AioContext when doing AIO) or rather under +- * dataplane. Thus acquire both the iothread mutex and the +- * AioContext. +- * +- * This is ugly and will disappear when we make bdrv_* thread-safe, +- * without the need to acquire the AioContext. ++ /* ++ * The migration thread does not have an AioContext. Lock the BQL so that ++ * I/O runs in the main loop AioContext (see ++ * qemu_get_current_aio_context()). + */ + qemu_mutex_lock_iothread(); +- aio_context_acquire(blk_get_aio_context(bmds->blk)); + bdrv_reset_dirty_bitmap(bmds->dirty_bitmap, cur_sector * BDRV_SECTOR_SIZE, + nr_sectors * BDRV_SECTOR_SIZE); + blk->aiocb = blk_aio_preadv(bb, cur_sector * BDRV_SECTOR_SIZE, &blk->qiov, + 0, blk_mig_read_cb, blk); +- aio_context_release(blk_get_aio_context(bmds->blk)); + qemu_mutex_unlock_iothread(); + + bmds->cur_sector = cur_sector + nr_sectors; +@@ -512,7 +505,7 @@ static void blk_mig_reset_dirty_cursor(void) + } + } + +-/* Called with iothread lock and AioContext taken. */ ++/* Called with iothread lock taken. */ + + static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds, + int is_async) +@@ -606,9 +599,7 @@ static int blk_mig_save_dirty_block(QEMUFile *f, int is_async) + int ret = 1; + + QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) { +- aio_context_acquire(blk_get_aio_context(bmds->blk)); + ret = mig_save_device_dirty(f, bmds, is_async); +- aio_context_release(blk_get_aio_context(bmds->blk)); + if (ret <= 0) { + break; + } +@@ -666,9 +657,9 @@ static int64_t get_remaining_dirty(void) + int64_t dirty = 0; + + QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) { +- aio_context_acquire(blk_get_aio_context(bmds->blk)); ++ bdrv_dirty_bitmap_lock(bmds->dirty_bitmap); + dirty += bdrv_get_dirty_count(bmds->dirty_bitmap); +- aio_context_release(blk_get_aio_context(bmds->blk)); ++ bdrv_dirty_bitmap_unlock(bmds->dirty_bitmap); + } + + return dirty; +@@ -681,7 +672,6 @@ static void block_migration_cleanup_bmds(void) + { + BlkMigDevState *bmds; + BlockDriverState *bs; +- AioContext *ctx; + + unset_dirty_tracking(); + +@@ -693,13 +683,7 @@ static void block_migration_cleanup_bmds(void) + bdrv_op_unblock_all(bs, bmds->blocker); + } + error_free(bmds->blocker); +- +- /* Save ctx, because bmds->blk can disappear during blk_unref. */ +- ctx = blk_get_aio_context(bmds->blk); +- aio_context_acquire(ctx); + blk_unref(bmds->blk); +- aio_context_release(ctx); +- + g_free(bmds->blk_name); + g_free(bmds->aio_bitmap); + g_free(bmds); +diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c +index 86ae832176..99710c8ffb 100644 +--- a/migration/migration-hmp-cmds.c ++++ b/migration/migration-hmp-cmds.c +@@ -852,14 +852,11 @@ static void vm_completion(ReadLineState *rs, const char *str) + + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { + SnapshotInfoList *snapshots, *snapshot; +- AioContext *ctx = bdrv_get_aio_context(bs); + bool ok = false; + +- aio_context_acquire(ctx); + if (bdrv_can_snapshot(bs)) { + ok = bdrv_query_snapshot_info_list(bs, &snapshots, NULL) == 0; + } +- aio_context_release(ctx); + if (!ok) { + continue; + } +diff --git a/migration/savevm.c b/migration/savevm.c +index eec5503a42..1b9ab7b8ee 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -3049,7 +3049,6 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate, + int saved_vm_running; + uint64_t vm_state_size; + g_autoptr(GDateTime) now = g_date_time_new_now_local(); +- AioContext *aio_context; + + GLOBAL_STATE_CODE(); + +@@ -3092,7 +3091,6 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate, + if (bs == NULL) { + return false; + } +- aio_context = bdrv_get_aio_context(bs); + + saved_vm_running = runstate_is_running(); + +@@ -3101,8 +3099,6 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate, + + bdrv_drain_all_begin(); + +- aio_context_acquire(aio_context); +- + memset(sn, 0, sizeof(*sn)); + + /* fill auxiliary fields */ +@@ -3139,14 +3135,6 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate, + goto the_end; + } + +- /* The bdrv_all_create_snapshot() call that follows acquires the AioContext +- * for itself. BDRV_POLL_WHILE() does not support nested locking because +- * it only releases the lock once. Therefore synchronous I/O will deadlock +- * unless we release the AioContext before bdrv_all_create_snapshot(). +- */ +- aio_context_release(aio_context); +- aio_context = NULL; +- + ret = bdrv_all_create_snapshot(sn, bs, vm_state_size, + has_devices, devices, errp); + if (ret < 0) { +@@ -3157,10 +3145,6 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate, + ret = 0; + + the_end: +- if (aio_context) { +- aio_context_release(aio_context); +- } +- + bdrv_drain_all_end(); + + if (saved_vm_running) { +@@ -3258,7 +3242,6 @@ bool load_snapshot(const char *name, const char *vmstate, + QEMUSnapshotInfo sn; + QEMUFile *f; + int ret; +- AioContext *aio_context; + MigrationIncomingState *mis = migration_incoming_get_current(); + + if (!bdrv_all_can_snapshot(has_devices, devices, errp)) { +@@ -3278,12 +3261,9 @@ bool load_snapshot(const char *name, const char *vmstate, + if (!bs_vm_state) { + return false; + } +- aio_context = bdrv_get_aio_context(bs_vm_state); + + /* Don't even try to load empty VM states */ +- aio_context_acquire(aio_context); + ret = bdrv_snapshot_find(bs_vm_state, &sn, name); +- aio_context_release(aio_context); + if (ret < 0) { + return false; + } else if (sn.vm_state_size == 0) { +@@ -3320,10 +3300,8 @@ bool load_snapshot(const char *name, const char *vmstate, + ret = -EINVAL; + goto err_drain; + } +- aio_context_acquire(aio_context); + ret = qemu_loadvm_state(f); + migration_incoming_state_destroy(); +- aio_context_release(aio_context); + + bdrv_drain_all_end(); + +diff --git a/net/colo-compare.c b/net/colo-compare.c +index 7f9e6f89ce..f2dfc0ebdc 100644 +--- a/net/colo-compare.c ++++ b/net/colo-compare.c +@@ -1439,12 +1439,10 @@ static void colo_compare_finalize(Object *obj) + qemu_bh_delete(s->event_bh); + + AioContext *ctx = iothread_get_aio_context(s->iothread); +- aio_context_acquire(ctx); + AIO_WAIT_WHILE(ctx, !s->out_sendco.done); + if (s->notify_dev) { + AIO_WAIT_WHILE(ctx, !s->notify_sendco.done); + } +- aio_context_release(ctx); + + /* Release all unhandled packets after compare thead exited */ + g_queue_foreach(&s->conn_list, colo_flush_packets, s); +diff --git a/qemu-img.c b/qemu-img.c +index 5a77f67719..7668f86769 100644 +--- a/qemu-img.c ++++ b/qemu-img.c +@@ -960,7 +960,6 @@ static int img_commit(int argc, char **argv) + Error *local_err = NULL; + CommonBlockJobCBInfo cbi; + bool image_opts = false; +- AioContext *aio_context; + int64_t rate_limit = 0; + + fmt = NULL; +@@ -1078,12 +1077,9 @@ static int img_commit(int argc, char **argv) + .bs = bs, + }; + +- aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); + commit_active_start("commit", bs, base_bs, JOB_DEFAULT, rate_limit, + BLOCKDEV_ON_ERROR_REPORT, NULL, common_block_job_cb, + &cbi, false, &local_err); +- aio_context_release(aio_context); + if (local_err) { + goto done; + } +diff --git a/qemu-io.c b/qemu-io.c +index 050c70835f..6cb1e00385 100644 +--- a/qemu-io.c ++++ b/qemu-io.c +@@ -414,15 +414,7 @@ static void prep_fetchline(void *opaque) + + static int do_qemuio_command(const char *cmd) + { +- int ret; +- AioContext *ctx = +- qemuio_blk ? blk_get_aio_context(qemuio_blk) : qemu_get_aio_context(); +- +- aio_context_acquire(ctx); +- ret = qemuio_command(qemuio_blk, cmd); +- aio_context_release(ctx); +- +- return ret; ++ return qemuio_command(qemuio_blk, cmd); + } + + static int command_loop(void) +diff --git a/qemu-nbd.c b/qemu-nbd.c +index 186e6468b1..bac0b5e3ec 100644 +--- a/qemu-nbd.c ++++ b/qemu-nbd.c +@@ -1123,9 +1123,7 @@ int main(int argc, char **argv) + qdict_put_str(raw_opts, "file", bs->node_name); + qdict_put_int(raw_opts, "offset", dev_offset); + +- aio_context_acquire(qemu_get_aio_context()); + bs = bdrv_open(NULL, NULL, raw_opts, flags, &error_fatal); +- aio_context_release(qemu_get_aio_context()); + + blk_remove_bs(blk); + blk_insert_bs(blk, bs, &error_fatal); +diff --git a/replay/replay-debugging.c b/replay/replay-debugging.c +index 3e60549a4a..82c66fff26 100644 +--- a/replay/replay-debugging.c ++++ b/replay/replay-debugging.c +@@ -144,7 +144,6 @@ static char *replay_find_nearest_snapshot(int64_t icount, + char *ret = NULL; + int rv; + int nb_sns, i; +- AioContext *aio_context; + + *snapshot_icount = -1; + +@@ -152,11 +151,8 @@ static char *replay_find_nearest_snapshot(int64_t icount, + if (!bs) { + goto fail; + } +- aio_context = bdrv_get_aio_context(bs); + +- aio_context_acquire(aio_context); + nb_sns = bdrv_snapshot_list(bs, &sn_tab); +- aio_context_release(aio_context); + + for (i = 0; i < nb_sns; i++) { + rv = bdrv_all_has_snapshot(sn_tab[i].name, false, NULL, NULL); +diff --git a/scripts/block-coroutine-wrapper.py b/scripts/block-coroutine-wrapper.py +index 38364fa557..c9c09fcacd 100644 +--- a/scripts/block-coroutine-wrapper.py ++++ b/scripts/block-coroutine-wrapper.py +@@ -278,12 +278,9 @@ def gen_no_co_wrapper(func: FuncDecl) -> str: + static void {name}_bh(void *opaque) + {{ + {struct_name} *s = opaque; +- AioContext *ctx = {func.gen_ctx('s->')}; + + {graph_lock} +- aio_context_acquire(ctx); + {func.get_result}{name}({ func.gen_list('s->{name}') }); +- aio_context_release(ctx); + {graph_unlock} + + aio_co_wake(s->co); +diff --git a/tests/tsan/suppressions.tsan b/tests/tsan/suppressions.tsan +index d9a002a2ef..b3ef59c27c 100644 +--- a/tests/tsan/suppressions.tsan ++++ b/tests/tsan/suppressions.tsan +@@ -4,7 +4,6 @@ + + # TSan reports a double lock on RECURSIVE mutexes. + # Since the recursive lock is intentional, we choose to ignore it. +-mutex:aio_context_acquire + mutex:pthread_mutex_lock + + # TSan reports a race between pthread_mutex_init() and +diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c +index d9754dfebc..17830a69c1 100644 +--- a/tests/unit/test-bdrv-drain.c ++++ b/tests/unit/test-bdrv-drain.c +@@ -179,13 +179,7 @@ static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs) + + static void do_drain_begin_unlocked(enum drain_type drain_type, BlockDriverState *bs) + { +- if (drain_type != BDRV_DRAIN_ALL) { +- aio_context_acquire(bdrv_get_aio_context(bs)); +- } + do_drain_begin(drain_type, bs); +- if (drain_type != BDRV_DRAIN_ALL) { +- aio_context_release(bdrv_get_aio_context(bs)); +- } + } + + static BlockBackend * no_coroutine_fn test_setup(void) +@@ -209,13 +203,7 @@ static BlockBackend * no_coroutine_fn test_setup(void) + + static void do_drain_end_unlocked(enum drain_type drain_type, BlockDriverState *bs) + { +- if (drain_type != BDRV_DRAIN_ALL) { +- aio_context_acquire(bdrv_get_aio_context(bs)); +- } + do_drain_end(drain_type, bs); +- if (drain_type != BDRV_DRAIN_ALL) { +- aio_context_release(bdrv_get_aio_context(bs)); +- } + } + + /* +@@ -520,12 +508,8 @@ static void test_iothread_main_thread_bh(void *opaque) + { + struct test_iothread_data *data = opaque; + +- /* Test that the AioContext is not yet locked in a random BH that is +- * executed during drain, otherwise this would deadlock. */ +- aio_context_acquire(bdrv_get_aio_context(data->bs)); + bdrv_flush(data->bs); + bdrv_dec_in_flight(data->bs); /* incremented by test_iothread_common() */ +- aio_context_release(bdrv_get_aio_context(data->bs)); + } + + /* +@@ -567,7 +551,6 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) + blk_set_disable_request_queuing(blk, true); + + blk_set_aio_context(blk, ctx_a, &error_abort); +- aio_context_acquire(ctx_a); + + s->bh_indirection_ctx = ctx_b; + +@@ -582,8 +565,6 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) + g_assert(acb != NULL); + g_assert_cmpint(aio_ret, ==, -EINPROGRESS); + +- aio_context_release(ctx_a); +- + data = (struct test_iothread_data) { + .bs = bs, + .drain_type = drain_type, +@@ -592,10 +573,6 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) + + switch (drain_thread) { + case 0: +- if (drain_type != BDRV_DRAIN_ALL) { +- aio_context_acquire(ctx_a); +- } +- + /* + * Increment in_flight so that do_drain_begin() waits for + * test_iothread_main_thread_bh(). This prevents the race between +@@ -613,20 +590,10 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) + do_drain_begin(drain_type, bs); + g_assert_cmpint(bs->in_flight, ==, 0); + +- if (drain_type != BDRV_DRAIN_ALL) { +- aio_context_release(ctx_a); +- } + qemu_event_wait(&done_event); +- if (drain_type != BDRV_DRAIN_ALL) { +- aio_context_acquire(ctx_a); +- } + + g_assert_cmpint(aio_ret, ==, 0); + do_drain_end(drain_type, bs); +- +- if (drain_type != BDRV_DRAIN_ALL) { +- aio_context_release(ctx_a); +- } + break; + case 1: + co = qemu_coroutine_create(test_iothread_drain_co_entry, &data); +@@ -637,9 +604,7 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) + g_assert_not_reached(); + } + +- aio_context_acquire(ctx_a); + blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort); +- aio_context_release(ctx_a); + + bdrv_unref(bs); + blk_unref(blk); +@@ -757,7 +722,6 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, + BlockJob *job; + TestBlockJob *tjob; + IOThread *iothread = NULL; +- AioContext *ctx; + int ret; + + src = bdrv_new_open_driver(&bdrv_test, "source", BDRV_O_RDWR, +@@ -787,11 +751,11 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, + } + + if (use_iothread) { ++ AioContext *ctx; ++ + iothread = iothread_new(); + ctx = iothread_get_aio_context(iothread); + blk_set_aio_context(blk_src, ctx, &error_abort); +- } else { +- ctx = qemu_get_aio_context(); + } + + target = bdrv_new_open_driver(&bdrv_test, "target", BDRV_O_RDWR, +@@ -800,7 +764,6 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, + blk_insert_bs(blk_target, target, &error_abort); + blk_set_allow_aio_context_change(blk_target, true); + +- aio_context_acquire(ctx); + tjob = block_job_create("job0", &test_job_driver, NULL, src, + 0, BLK_PERM_ALL, + 0, 0, NULL, NULL, &error_abort); +@@ -821,7 +784,6 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, + tjob->prepare_ret = -EIO; + break; + } +- aio_context_release(ctx); + + job_start(&job->job); + +@@ -912,12 +874,10 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, + } + g_assert_cmpint(ret, ==, (result == TEST_JOB_SUCCESS ? 0 : -EIO)); + +- aio_context_acquire(ctx); + if (use_iothread) { + blk_set_aio_context(blk_src, qemu_get_aio_context(), &error_abort); + assert(blk_get_aio_context(blk_target) == qemu_get_aio_context()); + } +- aio_context_release(ctx); + + blk_unref(blk_src); + blk_unref(blk_target); +@@ -1401,9 +1361,7 @@ static void test_append_to_drained(void) + g_assert_cmpint(base_s->drain_count, ==, 1); + g_assert_cmpint(base->in_flight, ==, 0); + +- aio_context_acquire(qemu_get_aio_context()); + bdrv_append(overlay, base, &error_abort); +- aio_context_release(qemu_get_aio_context()); + + g_assert_cmpint(base->in_flight, ==, 0); + g_assert_cmpint(overlay->in_flight, ==, 0); +@@ -1438,16 +1396,11 @@ static void test_set_aio_context(void) + + bdrv_drained_begin(bs); + bdrv_try_change_aio_context(bs, ctx_a, NULL, &error_abort); +- +- aio_context_acquire(ctx_a); + bdrv_drained_end(bs); + + bdrv_drained_begin(bs); + bdrv_try_change_aio_context(bs, ctx_b, NULL, &error_abort); +- aio_context_release(ctx_a); +- aio_context_acquire(ctx_b); + bdrv_try_change_aio_context(bs, qemu_get_aio_context(), NULL, &error_abort); +- aio_context_release(ctx_b); + bdrv_drained_end(bs); + + bdrv_unref(bs); +diff --git a/tests/unit/test-bdrv-graph-mod.c b/tests/unit/test-bdrv-graph-mod.c +index 8ee6ef38d8..cafc023db4 100644 +--- a/tests/unit/test-bdrv-graph-mod.c ++++ b/tests/unit/test-bdrv-graph-mod.c +@@ -142,10 +142,8 @@ static void test_update_perm_tree(void) + BDRV_CHILD_DATA, &error_abort); + bdrv_graph_wrunlock(); + +- aio_context_acquire(qemu_get_aio_context()); + ret = bdrv_append(filter, bs, NULL); + g_assert_cmpint(ret, <, 0); +- aio_context_release(qemu_get_aio_context()); + + bdrv_unref(filter); + blk_unref(root); +@@ -211,9 +209,7 @@ static void test_should_update_child(void) + bdrv_attach_child(filter, target, "target", &child_of_bds, + BDRV_CHILD_DATA, &error_abort); + bdrv_graph_wrunlock(); +- aio_context_acquire(qemu_get_aio_context()); + bdrv_append(filter, bs, &error_abort); +- aio_context_release(qemu_get_aio_context()); + + bdrv_graph_rdlock_main_loop(); + g_assert(target->backing->bs == bs); +@@ -440,9 +436,7 @@ static void test_append_greedy_filter(void) + &error_abort); + bdrv_graph_wrunlock(); + +- aio_context_acquire(qemu_get_aio_context()); + bdrv_append(fl, base, &error_abort); +- aio_context_release(qemu_get_aio_context()); + bdrv_unref(fl); + bdrv_unref(top); + } +diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c +index 9b15d2768c..3766d5de6b 100644 +--- a/tests/unit/test-block-iothread.c ++++ b/tests/unit/test-block-iothread.c +@@ -483,7 +483,6 @@ static void test_sync_op(const void *opaque) + bdrv_graph_rdunlock_main_loop(); + + blk_set_aio_context(blk, ctx, &error_abort); +- aio_context_acquire(ctx); + if (t->fn) { + t->fn(c); + } +@@ -491,7 +490,6 @@ static void test_sync_op(const void *opaque) + t->blkfn(blk); + } + blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort); +- aio_context_release(ctx); + + bdrv_unref(bs); + blk_unref(blk); +@@ -576,9 +574,7 @@ static void test_attach_blockjob(void) + aio_poll(qemu_get_aio_context(), false); + } + +- aio_context_acquire(ctx); + blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort); +- aio_context_release(ctx); + + tjob->n = 0; + while (tjob->n == 0) { +@@ -595,9 +591,7 @@ static void test_attach_blockjob(void) + WITH_JOB_LOCK_GUARD() { + job_complete_sync_locked(&tjob->common.job, &error_abort); + } +- aio_context_acquire(ctx); + blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort); +- aio_context_release(ctx); + + bdrv_unref(bs); + blk_unref(blk); +@@ -654,9 +648,7 @@ static void test_propagate_basic(void) + + /* Switch the AioContext back */ + main_ctx = qemu_get_aio_context(); +- aio_context_acquire(ctx); + blk_set_aio_context(blk, main_ctx, &error_abort); +- aio_context_release(ctx); + g_assert(blk_get_aio_context(blk) == main_ctx); + g_assert(bdrv_get_aio_context(bs_a) == main_ctx); + g_assert(bdrv_get_aio_context(bs_verify) == main_ctx); +@@ -732,9 +724,7 @@ static void test_propagate_diamond(void) + + /* Switch the AioContext back */ + main_ctx = qemu_get_aio_context(); +- aio_context_acquire(ctx); + blk_set_aio_context(blk, main_ctx, &error_abort); +- aio_context_release(ctx); + g_assert(blk_get_aio_context(blk) == main_ctx); + g_assert(bdrv_get_aio_context(bs_verify) == main_ctx); + g_assert(bdrv_get_aio_context(bs_a) == main_ctx); +@@ -764,13 +754,11 @@ static void test_propagate_mirror(void) + &error_abort); + + /* Start a mirror job */ +- aio_context_acquire(main_ctx); + mirror_start("job0", src, target, NULL, JOB_DEFAULT, 0, 0, 0, + MIRROR_SYNC_MODE_NONE, MIRROR_OPEN_BACKING_CHAIN, false, + BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT, + false, "filter_node", MIRROR_COPY_MODE_BACKGROUND, + &error_abort); +- aio_context_release(main_ctx); + + WITH_JOB_LOCK_GUARD() { + job = job_get_locked("job0"); +@@ -785,9 +773,7 @@ static void test_propagate_mirror(void) + g_assert(job->aio_context == ctx); + + /* Change the AioContext of target */ +- aio_context_acquire(ctx); + bdrv_try_change_aio_context(target, main_ctx, NULL, &error_abort); +- aio_context_release(ctx); + g_assert(bdrv_get_aio_context(src) == main_ctx); + g_assert(bdrv_get_aio_context(target) == main_ctx); + g_assert(bdrv_get_aio_context(filter) == main_ctx); +@@ -805,10 +791,8 @@ static void test_propagate_mirror(void) + g_assert(bdrv_get_aio_context(filter) == main_ctx); + + /* ...unless we explicitly allow it */ +- aio_context_acquire(ctx); + blk_set_allow_aio_context_change(blk, true); + bdrv_try_change_aio_context(target, ctx, NULL, &error_abort); +- aio_context_release(ctx); + + g_assert(blk_get_aio_context(blk) == ctx); + g_assert(bdrv_get_aio_context(src) == ctx); +@@ -817,10 +801,8 @@ static void test_propagate_mirror(void) + + job_cancel_sync_all(); + +- aio_context_acquire(ctx); + blk_set_aio_context(blk, main_ctx, &error_abort); + bdrv_try_change_aio_context(target, main_ctx, NULL, &error_abort); +- aio_context_release(ctx); + + blk_unref(blk); + bdrv_unref(src); +@@ -836,7 +818,6 @@ static void test_attach_second_node(void) + BlockDriverState *bs, *filter; + QDict *options; + +- aio_context_acquire(main_ctx); + blk = blk_new(ctx, BLK_PERM_ALL, BLK_PERM_ALL); + bs = bdrv_new_open_driver(&bdrv_test, "base", BDRV_O_RDWR, &error_abort); + blk_insert_bs(blk, bs, &error_abort); +@@ -846,15 +827,12 @@ static void test_attach_second_node(void) + qdict_put_str(options, "file", "base"); + + filter = bdrv_open(NULL, NULL, options, BDRV_O_RDWR, &error_abort); +- aio_context_release(main_ctx); + + g_assert(blk_get_aio_context(blk) == ctx); + g_assert(bdrv_get_aio_context(bs) == ctx); + g_assert(bdrv_get_aio_context(filter) == ctx); + +- aio_context_acquire(ctx); + blk_set_aio_context(blk, main_ctx, &error_abort); +- aio_context_release(ctx); + g_assert(blk_get_aio_context(blk) == main_ctx); + g_assert(bdrv_get_aio_context(bs) == main_ctx); + g_assert(bdrv_get_aio_context(filter) == main_ctx); +@@ -868,11 +846,9 @@ static void test_attach_preserve_blk_ctx(void) + { + IOThread *iothread = iothread_new(); + AioContext *ctx = iothread_get_aio_context(iothread); +- AioContext *main_ctx = qemu_get_aio_context(); + BlockBackend *blk; + BlockDriverState *bs; + +- aio_context_acquire(main_ctx); + blk = blk_new(ctx, BLK_PERM_ALL, BLK_PERM_ALL); + bs = bdrv_new_open_driver(&bdrv_test, "base", BDRV_O_RDWR, &error_abort); + bs->total_sectors = 65536 / BDRV_SECTOR_SIZE; +@@ -881,25 +857,18 @@ static void test_attach_preserve_blk_ctx(void) + blk_insert_bs(blk, bs, &error_abort); + g_assert(blk_get_aio_context(blk) == ctx); + g_assert(bdrv_get_aio_context(bs) == ctx); +- aio_context_release(main_ctx); + + /* Remove the node again */ +- aio_context_acquire(ctx); + blk_remove_bs(blk); +- aio_context_release(ctx); + g_assert(blk_get_aio_context(blk) == ctx); + g_assert(bdrv_get_aio_context(bs) == qemu_get_aio_context()); + + /* Re-attach the node */ +- aio_context_acquire(main_ctx); + blk_insert_bs(blk, bs, &error_abort); +- aio_context_release(main_ctx); + g_assert(blk_get_aio_context(blk) == ctx); + g_assert(bdrv_get_aio_context(bs) == ctx); + +- aio_context_acquire(ctx); + blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort); +- aio_context_release(ctx); + bdrv_unref(bs); + blk_unref(blk); + } +diff --git a/tests/unit/test-blockjob.c b/tests/unit/test-blockjob.c +index a130f6fefb..fe3e0d2d38 100644 +--- a/tests/unit/test-blockjob.c ++++ b/tests/unit/test-blockjob.c +@@ -228,7 +228,6 @@ static void cancel_common(CancelJob *s) + BlockJob *job = &s->common; + BlockBackend *blk = s->blk; + JobStatus sts = job->job.status; +- AioContext *ctx = job->job.aio_context; + + job_cancel_sync(&job->job, true); + WITH_JOB_LOCK_GUARD() { +@@ -240,9 +239,7 @@ static void cancel_common(CancelJob *s) + job_unref_locked(&job->job); + } + +- aio_context_acquire(ctx); + destroy_blk(blk); +- aio_context_release(ctx); + + } + +@@ -391,132 +388,6 @@ static void test_cancel_concluded(void) + cancel_common(s); + } + +-/* (See test_yielding_driver for the job description) */ +-typedef struct YieldingJob { +- BlockJob common; +- bool should_complete; +-} YieldingJob; +- +-static void yielding_job_complete(Job *job, Error **errp) +-{ +- YieldingJob *s = container_of(job, YieldingJob, common.job); +- s->should_complete = true; +- job_enter(job); +-} +- +-static int coroutine_fn yielding_job_run(Job *job, Error **errp) +-{ +- YieldingJob *s = container_of(job, YieldingJob, common.job); +- +- job_transition_to_ready(job); +- +- while (!s->should_complete) { +- job_yield(job); +- } +- +- return 0; +-} +- +-/* +- * This job transitions immediately to the READY state, and then +- * yields until it is to complete. +- */ +-static const BlockJobDriver test_yielding_driver = { +- .job_driver = { +- .instance_size = sizeof(YieldingJob), +- .free = block_job_free, +- .user_resume = block_job_user_resume, +- .run = yielding_job_run, +- .complete = yielding_job_complete, +- }, +-}; +- +-/* +- * Test that job_complete_locked() works even on jobs that are in a paused +- * state (i.e., STANDBY). +- * +- * To do this, run YieldingJob in an IO thread, get it into the READY +- * state, then have a drained section. Before ending the section, +- * acquire the context so the job will not be entered and will thus +- * remain on STANDBY. +- * +- * job_complete_locked() should still work without error. +- * +- * Note that on the QMP interface, it is impossible to lock an IO +- * thread before a drained section ends. In practice, the +- * bdrv_drain_all_end() and the aio_context_acquire() will be +- * reversed. However, that makes for worse reproducibility here: +- * Sometimes, the job would no longer be in STANDBY then but already +- * be started. We cannot prevent that, because the IO thread runs +- * concurrently. We can only prevent it by taking the lock before +- * ending the drained section, so we do that. +- * +- * (You can reverse the order of operations and most of the time the +- * test will pass, but sometimes the assert(status == STANDBY) will +- * fail.) +- */ +-static void test_complete_in_standby(void) +-{ +- BlockBackend *blk; +- IOThread *iothread; +- AioContext *ctx; +- Job *job; +- BlockJob *bjob; +- +- /* Create a test drive, move it to an IO thread */ +- blk = create_blk(NULL); +- iothread = iothread_new(); +- +- ctx = iothread_get_aio_context(iothread); +- blk_set_aio_context(blk, ctx, &error_abort); +- +- /* Create our test job */ +- bjob = mk_job(blk, "job", &test_yielding_driver, true, +- JOB_MANUAL_FINALIZE | JOB_MANUAL_DISMISS); +- job = &bjob->job; +- assert_job_status_is(job, JOB_STATUS_CREATED); +- +- /* Wait for the job to become READY */ +- job_start(job); +- /* +- * Here we are waiting for the status to change, so don't bother +- * protecting the read every time. +- */ +- AIO_WAIT_WHILE_UNLOCKED(ctx, job->status != JOB_STATUS_READY); +- +- /* Begin the drained section, pausing the job */ +- bdrv_drain_all_begin(); +- assert_job_status_is(job, JOB_STATUS_STANDBY); +- +- /* Lock the IO thread to prevent the job from being run */ +- aio_context_acquire(ctx); +- /* This will schedule the job to resume it */ +- bdrv_drain_all_end(); +- aio_context_release(ctx); +- +- WITH_JOB_LOCK_GUARD() { +- /* But the job cannot run, so it will remain on standby */ +- assert(job->status == JOB_STATUS_STANDBY); +- +- /* Even though the job is on standby, this should work */ +- job_complete_locked(job, &error_abort); +- +- /* The test is done now, clean up. */ +- job_finish_sync_locked(job, NULL, &error_abort); +- assert(job->status == JOB_STATUS_PENDING); +- +- job_finalize_locked(job, &error_abort); +- assert(job->status == JOB_STATUS_CONCLUDED); +- +- job_dismiss_locked(&job, &error_abort); +- } +- +- aio_context_acquire(ctx); +- destroy_blk(blk); +- aio_context_release(ctx); +- iothread_join(iothread); +-} +- + int main(int argc, char **argv) + { + qemu_init_main_loop(&error_abort); +@@ -531,13 +402,5 @@ int main(int argc, char **argv) + g_test_add_func("/blockjob/cancel/standby", test_cancel_standby); + g_test_add_func("/blockjob/cancel/pending", test_cancel_pending); + g_test_add_func("/blockjob/cancel/concluded", test_cancel_concluded); +- +- /* +- * This test is flaky and sometimes fails in CI and otherwise: +- * don't run unless user opts in via environment variable. +- */ +- if (getenv("QEMU_TEST_FLAKY_TESTS")) { +- g_test_add_func("/blockjob/complete_in_standby", test_complete_in_standby); +- } + return g_test_run(); + } +diff --git a/tests/unit/test-replication.c b/tests/unit/test-replication.c +index afff908d77..5d2003b8ce 100644 +--- a/tests/unit/test-replication.c ++++ b/tests/unit/test-replication.c +@@ -199,17 +199,13 @@ static BlockBackend *start_primary(void) + static void teardown_primary(void) + { + BlockBackend *blk; +- AioContext *ctx; + + /* remove P_ID */ + blk = blk_by_name(P_ID); + assert(blk); + +- ctx = blk_get_aio_context(blk); +- aio_context_acquire(ctx); + monitor_remove_blk(blk); + blk_unref(blk); +- aio_context_release(ctx); + } + + static void test_primary_read(void) +@@ -345,27 +341,20 @@ static void teardown_secondary(void) + { + /* only need to destroy two BBs */ + BlockBackend *blk; +- AioContext *ctx; + + /* remove S_LOCAL_DISK_ID */ + blk = blk_by_name(S_LOCAL_DISK_ID); + assert(blk); + +- ctx = blk_get_aio_context(blk); +- aio_context_acquire(ctx); + monitor_remove_blk(blk); + blk_unref(blk); +- aio_context_release(ctx); + + /* remove S_ID */ + blk = blk_by_name(S_ID); + assert(blk); + +- ctx = blk_get_aio_context(blk); +- aio_context_acquire(ctx); + monitor_remove_blk(blk); + blk_unref(blk); +- aio_context_release(ctx); + } + + static void test_secondary_read(void) +diff --git a/util/async.c b/util/async.c +index 04ee83d220..dfd44ef612 100644 +--- a/util/async.c ++++ b/util/async.c +@@ -562,12 +562,10 @@ static void co_schedule_bh_cb(void *opaque) + Coroutine *co = QSLIST_FIRST(&straight); + QSLIST_REMOVE_HEAD(&straight, co_scheduled_next); + trace_aio_co_schedule_bh_cb(ctx, co); +- aio_context_acquire(ctx); + + /* Protected by write barrier in qemu_aio_coroutine_enter */ + qatomic_set(&co->scheduled, NULL); + qemu_aio_coroutine_enter(ctx, co); +- aio_context_release(ctx); + } + } + +@@ -707,9 +705,7 @@ void aio_co_enter(AioContext *ctx, Coroutine *co) + assert(self != co); + QSIMPLEQ_INSERT_TAIL(&self->co_queue_wakeup, co, co_queue_next); + } else { +- aio_context_acquire(ctx); + qemu_aio_coroutine_enter(ctx, co); +- aio_context_release(ctx); + } + } + +diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c +index a9a48fffb8..3bfb1ad3ec 100644 +--- a/util/vhost-user-server.c ++++ b/util/vhost-user-server.c +@@ -360,10 +360,7 @@ static void vu_accept(QIONetListener *listener, QIOChannelSocket *sioc, + + qio_channel_set_follow_coroutine_ctx(server->ioc, true); + +- /* Attaching the AioContext starts the vu_client_trip coroutine */ +- aio_context_acquire(server->ctx); + vhost_user_server_attach_aio_context(server, server->ctx); +- aio_context_release(server->ctx); + } + + /* server->ctx acquired by caller */ +-- +2.39.3 + diff --git a/SOURCES/kvm-block-remove-bdrv_co_lock.patch b/SOURCES/kvm-block-remove-bdrv_co_lock.patch new file mode 100644 index 0000000..b219c1c --- /dev/null +++ b/SOURCES/kvm-block-remove-bdrv_co_lock.patch @@ -0,0 +1,97 @@ +From d0514c7d5d6cc1aa140119c95d5ea2c1591b01e9 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 5 Dec 2023 13:20:04 -0500 +Subject: [PATCH 087/101] block: remove bdrv_co_lock() + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [18/26] a303f861ea5e84d8e89fd51e530fd0cb2da17b89 (kmwolf/centos-qemu-kvm) + +The bdrv_co_lock() and bdrv_co_unlock() functions are already no-ops. +Remove them. + +Signed-off-by: Stefan Hajnoczi +Message-ID: <20231205182011.1976568-8-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +--- + block.c | 10 ---------- + blockdev.c | 5 ----- + include/block/block-global-state.h | 14 -------------- + 3 files changed, 29 deletions(-) + +diff --git a/block.c b/block.c +index 91ace5d2d5..434b7f4d72 100644 +--- a/block.c ++++ b/block.c +@@ -7431,16 +7431,6 @@ void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx) + bdrv_dec_in_flight(bs); + } + +-void coroutine_fn bdrv_co_lock(BlockDriverState *bs) +-{ +- /* TODO removed in next patch */ +-} +- +-void coroutine_fn bdrv_co_unlock(BlockDriverState *bs) +-{ +- /* TODO removed in next patch */ +-} +- + static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban) + { + GLOBAL_STATE_CODE(); +diff --git a/blockdev.c b/blockdev.c +index 5d8b3a23eb..3a5e7222ec 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -2264,18 +2264,13 @@ void coroutine_fn qmp_block_resize(const char *device, const char *node_name, + return; + } + +- bdrv_co_lock(bs); + bdrv_drained_begin(bs); +- bdrv_co_unlock(bs); + + old_ctx = bdrv_co_enter(bs); + blk_co_truncate(blk, size, false, PREALLOC_MODE_OFF, 0, errp); + bdrv_co_leave(bs, old_ctx); + +- bdrv_co_lock(bs); + bdrv_drained_end(bs); +- bdrv_co_unlock(bs); +- + blk_co_unref(blk); + } + +diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h +index 0327f1c605..4ec0b217f0 100644 +--- a/include/block/block-global-state.h ++++ b/include/block/block-global-state.h +@@ -267,20 +267,6 @@ int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag); + int bdrv_debug_resume(BlockDriverState *bs, const char *tag); + bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag); + +-/** +- * Locks the AioContext of @bs if it's not the current AioContext. This avoids +- * double locking which could lead to deadlocks: This is a coroutine_fn, so we +- * know we already own the lock of the current AioContext. +- * +- * May only be called in the main thread. +- */ +-void coroutine_fn bdrv_co_lock(BlockDriverState *bs); +- +-/** +- * Unlocks the AioContext of @bs if it's not the current AioContext. +- */ +-void coroutine_fn bdrv_co_unlock(BlockDriverState *bs); +- + bool bdrv_child_change_aio_context(BdrvChild *c, AioContext *ctx, + GHashTable *visited, Transaction *tran, + Error **errp); +-- +2.39.3 + diff --git a/SOURCES/kvm-block-remove-outdated-AioContext-locking-comments.patch b/SOURCES/kvm-block-remove-outdated-AioContext-locking-comments.patch new file mode 100644 index 0000000..d6670c1 --- /dev/null +++ b/SOURCES/kvm-block-remove-outdated-AioContext-locking-comments.patch @@ -0,0 +1,411 @@ +From dc4eb64185957a01948217814478abc450ce5f26 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 5 Dec 2023 13:20:11 -0500 +Subject: [PATCH 094/101] block: remove outdated AioContext locking comments + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [25/26] 395e18fb40d28d4bc961acee1a00da7f60748076 (kmwolf/centos-qemu-kvm) + +The AioContext lock no longer exists. + +There is one noteworthy change: + + - * More specifically, these functions use BDRV_POLL_WHILE(bs), which + - * requires the caller to be either in the main thread and hold + - * the BlockdriverState (bs) AioContext lock, or directly in the + - * home thread that runs the bs AioContext. Calling them from + - * another thread in another AioContext would cause deadlocks. + + * More specifically, these functions use BDRV_POLL_WHILE(bs), which requires + + * the caller to be either in the main thread or directly in the home thread + + * that runs the bs AioContext. Calling them from another thread in another + + * AioContext would cause deadlocks. + +I am not sure whether deadlocks are still possible. Maybe they have just +moved to the fine-grained locks that have replaced the AioContext. Since +I am not sure if the deadlocks are gone, I have kept the substance +unchanged and just removed mention of the AioContext. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Eric Blake +Message-ID: <20231205182011.1976568-15-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +--- + block.c | 73 ++++++---------------------- + block/block-backend.c | 8 --- + block/export/vhost-user-blk-server.c | 4 -- + include/block/block-common.h | 3 -- + include/block/block-io.h | 9 ++-- + include/block/block_int-common.h | 2 - + tests/qemu-iotests/202 | 2 +- + tests/qemu-iotests/203 | 3 +- + 8 files changed, 22 insertions(+), 82 deletions(-) + +diff --git a/block.c b/block.c +index 434b7f4d72..a097772238 100644 +--- a/block.c ++++ b/block.c +@@ -1616,11 +1616,6 @@ out: + g_free(gen_node_name); + } + +-/* +- * The caller must always hold @bs AioContext lock, because this function calls +- * bdrv_refresh_total_sectors() which polls when called from non-coroutine +- * context. +- */ + static int no_coroutine_fn GRAPH_UNLOCKED + bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name, + QDict *options, int open_flags, Error **errp) +@@ -2901,7 +2896,7 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm) + * Replaces the node that a BdrvChild points to without updating permissions. + * + * If @new_bs is non-NULL, the parent of @child must already be drained through +- * @child and the caller must hold the AioContext lock for @new_bs. ++ * @child. + */ + static void GRAPH_WRLOCK + bdrv_replace_child_noperm(BdrvChild *child, BlockDriverState *new_bs) +@@ -3041,9 +3036,8 @@ static TransactionActionDrv bdrv_attach_child_common_drv = { + * + * Returns new created child. + * +- * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and +- * @child_bs can move to a different AioContext in this function. Callers must +- * make sure that their AioContext locking is still correct after this. ++ * Both @parent_bs and @child_bs can move to a different AioContext in this ++ * function. + */ + static BdrvChild * GRAPH_WRLOCK + bdrv_attach_child_common(BlockDriverState *child_bs, +@@ -3142,9 +3136,8 @@ bdrv_attach_child_common(BlockDriverState *child_bs, + /* + * Function doesn't update permissions, caller is responsible for this. + * +- * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and +- * @child_bs can move to a different AioContext in this function. Callers must +- * make sure that their AioContext locking is still correct after this. ++ * Both @parent_bs and @child_bs can move to a different AioContext in this ++ * function. + * + * After calling this function, the transaction @tran may only be completed + * while holding a writer lock for the graph. +@@ -3184,9 +3177,6 @@ bdrv_attach_child_noperm(BlockDriverState *parent_bs, + * + * On failure NULL is returned, errp is set and the reference to + * child_bs is also dropped. +- * +- * The caller must hold the AioContext lock @child_bs, but not that of @ctx +- * (unless @child_bs is already in @ctx). + */ + BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, + const char *child_name, +@@ -3226,9 +3216,6 @@ out: + * + * On failure NULL is returned, errp is set and the reference to + * child_bs is also dropped. +- * +- * If @parent_bs and @child_bs are in different AioContexts, the caller must +- * hold the AioContext lock for @child_bs, but not for @parent_bs. + */ + BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, + BlockDriverState *child_bs, +@@ -3418,9 +3405,8 @@ static BdrvChildRole bdrv_backing_role(BlockDriverState *bs) + * + * Function doesn't update permissions, caller is responsible for this. + * +- * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and +- * @child_bs can move to a different AioContext in this function. Callers must +- * make sure that their AioContext locking is still correct after this. ++ * Both @parent_bs and @child_bs can move to a different AioContext in this ++ * function. + * + * After calling this function, the transaction @tran may only be completed + * while holding a writer lock for the graph. +@@ -3513,9 +3499,8 @@ out: + } + + /* +- * The caller must hold the AioContext lock for @backing_hd. Both @bs and +- * @backing_hd can move to a different AioContext in this function. Callers must +- * make sure that their AioContext locking is still correct after this. ++ * Both @bs and @backing_hd can move to a different AioContext in this ++ * function. + * + * If a backing child is already present (i.e. we're detaching a node), that + * child node must be drained. +@@ -3574,8 +3559,6 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, + * itself, all options starting with "${bdref_key}." are considered part of the + * BlockdevRef. + * +- * The caller must hold the main AioContext lock. +- * + * TODO Can this be unified with bdrv_open_image()? + */ + int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, +@@ -3745,9 +3728,7 @@ done: + * + * The BlockdevRef will be removed from the options QDict. + * +- * The caller must hold the lock of the main AioContext and no other AioContext. +- * @parent can move to a different AioContext in this function. Callers must +- * make sure that their AioContext locking is still correct after this. ++ * @parent can move to a different AioContext in this function. + */ + BdrvChild *bdrv_open_child(const char *filename, + QDict *options, const char *bdref_key, +@@ -3778,9 +3759,7 @@ BdrvChild *bdrv_open_child(const char *filename, + /* + * Wrapper on bdrv_open_child() for most popular case: open primary child of bs. + * +- * The caller must hold the lock of the main AioContext and no other AioContext. +- * @parent can move to a different AioContext in this function. Callers must +- * make sure that their AioContext locking is still correct after this. ++ * @parent can move to a different AioContext in this function. + */ + int bdrv_open_file_child(const char *filename, + QDict *options, const char *bdref_key, +@@ -3923,8 +3902,6 @@ out: + * The reference parameter may be used to specify an existing block device which + * should be opened. If specified, neither options nor a filename may be given, + * nor can an existing BDS be reused (that is, *pbs has to be NULL). +- * +- * The caller must always hold the main AioContext lock. + */ + static BlockDriverState * no_coroutine_fn + bdrv_open_inherit(const char *filename, const char *reference, QDict *options, +@@ -4217,7 +4194,6 @@ close_and_fail: + return NULL; + } + +-/* The caller must always hold the main AioContext lock. */ + BlockDriverState *bdrv_open(const char *filename, const char *reference, + QDict *options, int flags, Error **errp) + { +@@ -4665,10 +4641,7 @@ int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only, + * + * Return 0 on success, otherwise return < 0 and set @errp. + * +- * The caller must hold the AioContext lock of @reopen_state->bs. + * @reopen_state->bs can move to a different AioContext in this function. +- * Callers must make sure that their AioContext locking is still correct after +- * this. + */ + static int GRAPH_UNLOCKED + bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, +@@ -4801,8 +4774,6 @@ out_rdlock: + * It is the responsibility of the caller to then call the abort() or + * commit() for any other BDS that have been left in a prepare() state + * +- * The caller must hold the AioContext lock of @reopen_state->bs. +- * + * After calling this function, the transaction @change_child_tran may only be + * completed while holding a writer lock for the graph. + */ +@@ -5437,8 +5408,6 @@ int bdrv_drop_filter(BlockDriverState *bs, Error **errp) + * child. + * + * This function does not create any image files. +- * +- * The caller must hold the AioContext lock for @bs_top. + */ + int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, + Error **errp) +@@ -5545,9 +5514,8 @@ static void bdrv_delete(BlockDriverState *bs) + * after the call (even on failure), so if the caller intends to reuse the + * dictionary, it needs to use qobject_ref() before calling bdrv_open. + * +- * The caller holds the AioContext lock for @bs. It must make sure that @bs +- * stays in the same AioContext, i.e. @options must not refer to nodes in a +- * different AioContext. ++ * The caller must make sure that @bs stays in the same AioContext, i.e. ++ * @options must not refer to nodes in a different AioContext. + */ + BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options, + int flags, Error **errp) +@@ -7565,10 +7533,6 @@ static TransactionActionDrv set_aio_context = { + * + * Must be called from the main AioContext. + * +- * The caller must own the AioContext lock for the old AioContext of bs, but it +- * must not own the AioContext lock for new_context (unless new_context is the +- * same as the current context of bs). +- * + * @visited will accumulate all visited BdrvChild objects. The caller is + * responsible for freeing the list afterwards. + */ +@@ -7621,13 +7585,6 @@ static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx, + * + * If ignore_child is not NULL, that child (and its subgraph) will not + * be touched. +- * +- * This function still requires the caller to take the bs current +- * AioContext lock, otherwise draining will fail since AIO_WAIT_WHILE +- * assumes the lock is always held if bs is in another AioContext. +- * For the same reason, it temporarily also holds the new AioContext, since +- * bdrv_drained_end calls BDRV_POLL_WHILE that assumes the lock is taken too. +- * Therefore the new AioContext lock must not be taken by the caller. + */ + int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, + BdrvChild *ignore_child, Error **errp) +@@ -7653,8 +7610,8 @@ int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, + + /* + * Linear phase: go through all callbacks collected in the transaction. +- * Run all callbacks collected in the recursion to switch all nodes +- * AioContext lock (transaction commit), or undo all changes done in the ++ * Run all callbacks collected in the recursion to switch every node's ++ * AioContext (transaction commit), or undo all changes done in the + * recursion (transaction abort). + */ + +diff --git a/block/block-backend.c b/block/block-backend.c +index f412bed274..209eb07528 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -390,8 +390,6 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm) + * Both sets of permissions can be changed later using blk_set_perm(). + * + * Return the new BlockBackend on success, null on failure. +- * +- * Callers must hold the AioContext lock of @bs. + */ + BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm, + uint64_t shared_perm, Error **errp) +@@ -416,8 +414,6 @@ BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm, + * Just as with bdrv_open(), after having called this function the reference to + * @options belongs to the block layer (even on failure). + * +- * Called without holding an AioContext lock. +- * + * TODO: Remove @filename and @flags; it should be possible to specify a whole + * BDS tree just by specifying the @options QDict (or @reference, + * alternatively). At the time of adding this function, this is not possible, +@@ -872,8 +868,6 @@ BlockBackend *blk_by_public(BlockBackendPublic *public) + + /* + * Disassociates the currently associated BlockDriverState from @blk. +- * +- * The caller must hold the AioContext lock for the BlockBackend. + */ + void blk_remove_bs(BlockBackend *blk) + { +@@ -915,8 +909,6 @@ void blk_remove_bs(BlockBackend *blk) + + /* + * Associates a new BlockDriverState with @blk. +- * +- * Callers must hold the AioContext lock of @bs. + */ + int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp) + { +diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c +index 16f48388d3..50c358e8cd 100644 +--- a/block/export/vhost-user-blk-server.c ++++ b/block/export/vhost-user-blk-server.c +@@ -278,7 +278,6 @@ static void vu_blk_exp_resize(void *opaque) + vu_config_change_msg(&vexp->vu_server.vu_dev); + } + +-/* Called with vexp->export.ctx acquired */ + static void vu_blk_drained_begin(void *opaque) + { + VuBlkExport *vexp = opaque; +@@ -287,7 +286,6 @@ static void vu_blk_drained_begin(void *opaque) + vhost_user_server_detach_aio_context(&vexp->vu_server); + } + +-/* Called with vexp->export.blk AioContext acquired */ + static void vu_blk_drained_end(void *opaque) + { + VuBlkExport *vexp = opaque; +@@ -300,8 +298,6 @@ static void vu_blk_drained_end(void *opaque) + * Ensures that bdrv_drained_begin() waits until in-flight requests complete + * and the server->co_trip coroutine has terminated. It will be restarted in + * vhost_user_server_attach_aio_context(). +- * +- * Called with vexp->export.ctx acquired. + */ + static bool vu_blk_drained_poll(void *opaque) + { +diff --git a/include/block/block-common.h b/include/block/block-common.h +index d7599564db..a846023a09 100644 +--- a/include/block/block-common.h ++++ b/include/block/block-common.h +@@ -70,9 +70,6 @@ + * automatically takes the graph rdlock when calling the wrapped function. In + * the same way, no_co_wrapper_bdrv_wrlock functions automatically take the + * graph wrlock. +- * +- * If the first parameter of the function is a BlockDriverState, BdrvChild or +- * BlockBackend pointer, the AioContext lock for it is taken in the wrapper. + */ + #define no_co_wrapper + #define no_co_wrapper_bdrv_rdlock +diff --git a/include/block/block-io.h b/include/block/block-io.h +index 8eb39a858b..b49e0537dd 100644 +--- a/include/block/block-io.h ++++ b/include/block/block-io.h +@@ -332,11 +332,10 @@ bdrv_co_copy_range(BdrvChild *src, int64_t src_offset, + * "I/O or GS" API functions. These functions can run without + * the BQL, but only in one specific iothread/main loop. + * +- * More specifically, these functions use BDRV_POLL_WHILE(bs), which +- * requires the caller to be either in the main thread and hold +- * the BlockdriverState (bs) AioContext lock, or directly in the +- * home thread that runs the bs AioContext. Calling them from +- * another thread in another AioContext would cause deadlocks. ++ * More specifically, these functions use BDRV_POLL_WHILE(bs), which requires ++ * the caller to be either in the main thread or directly in the home thread ++ * that runs the bs AioContext. Calling them from another thread in another ++ * AioContext would cause deadlocks. + * + * Therefore, these functions are not proper I/O, because they + * can't run in *any* iothreads, but only in a specific one. +diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h +index 4e31d161c5..151279d481 100644 +--- a/include/block/block_int-common.h ++++ b/include/block/block_int-common.h +@@ -1192,8 +1192,6 @@ struct BlockDriverState { + /* The error object in use for blocking operations on backing_hd */ + Error *backing_blocker; + +- /* Protected by AioContext lock */ +- + /* + * If we are reading a disk image, give its size in sectors. + * Generally read-only; it is written to by load_snapshot and +diff --git a/tests/qemu-iotests/202 b/tests/qemu-iotests/202 +index b784dcd791..13304242e5 100755 +--- a/tests/qemu-iotests/202 ++++ b/tests/qemu-iotests/202 +@@ -21,7 +21,7 @@ + # Check that QMP 'transaction' blockdev-snapshot-sync with multiple drives on a + # single IOThread completes successfully. This particular command triggered a + # hang due to recursive AioContext locking and BDRV_POLL_WHILE(). Protect +-# against regressions. ++# against regressions even though the AioContext lock no longer exists. + + import iotests + +diff --git a/tests/qemu-iotests/203 b/tests/qemu-iotests/203 +index ab80fd0e44..1ba878522b 100755 +--- a/tests/qemu-iotests/203 ++++ b/tests/qemu-iotests/203 +@@ -21,7 +21,8 @@ + # Check that QMP 'migrate' with multiple drives on a single IOThread completes + # successfully. This particular command triggered a hang in the source QEMU + # process due to recursive AioContext locking in bdrv_invalidate_all() and +-# BDRV_POLL_WHILE(). ++# BDRV_POLL_WHILE(). Protect against regressions even though the AioContext ++# lock no longer exists. + + import iotests + +-- +2.39.3 + diff --git a/SOURCES/kvm-chardev-char-socket-Fix-TLS-io-channels-sending-too-.patch b/SOURCES/kvm-chardev-char-socket-Fix-TLS-io-channels-sending-too-.patch new file mode 100644 index 0000000..34c4e8f --- /dev/null +++ b/SOURCES/kvm-chardev-char-socket-Fix-TLS-io-channels-sending-too-.patch @@ -0,0 +1,105 @@ +From 95b2ffc5f01dc4309c2e747ed883d22cd1d26347 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Sat, 2 Mar 2024 17:00:23 +0100 +Subject: [PATCH 2/2] chardev/char-socket: Fix TLS io channels sending too much + data to the backend +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 227: Fix TLS io channels sending too much data to the backend +RH-Jira: RHEL-24614 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Daniel P. Berrangé +RH-Commit: [1/1] fce871914e0ce52e16a6edae0e007513f9fec1ae (thuth/qemu-kvm-cs9) + +JIRA: https://issues.redhat.com/browse/RHEL-24614 + +commit 462945cd22d2bcd233401ed3aa167d83a8e35b05 +Author: Thomas Huth +Date: Thu Feb 29 11:43:37 2024 +0100 + + chardev/char-socket: Fix TLS io channels sending too much data to the backend + + Commit ffda5db65a ("io/channel-tls: fix handling of bigger read buffers") + changed the behavior of the TLS io channels to schedule a second reading + attempt if there is still incoming data pending. This caused a regression + with backends like the sclpconsole that check in their read function that + the sender does not try to write more bytes to it than the device can + currently handle. + + The problem can be reproduced like this: + + 1) In one terminal, do this: + + mkdir qemu-pki + cd qemu-pki + openssl genrsa 2048 > ca-key.pem + openssl req -new -x509 -nodes -days 365000 -key ca-key.pem -out ca-cert.pem + # enter some dummy value for the cert + openssl genrsa 2048 > server-key.pem + openssl req -new -x509 -nodes -days 365000 -key server-key.pem \ + -out server-cert.pem + # enter some other dummy values for the cert + + gnutls-serv --echo --x509cafile ca-cert.pem --x509keyfile server-key.pem \ + --x509certfile server-cert.pem -p 8338 + + 2) In another terminal, do this: + + wget https://download.fedoraproject.org/pub/fedora-secondary/releases/39/Cloud/s390x/images/Fedora-Cloud-Base-39-1.5.s390x.qcow2 + + qemu-system-s390x -nographic -nodefaults \ + -hda Fedora-Cloud-Base-39-1.5.s390x.qcow2 \ + -object tls-creds-x509,id=tls0,endpoint=client,verify-peer=false,dir=$PWD/qemu-pki \ + -chardev socket,id=tls_chardev,host=localhost,port=8338,tls-creds=tls0 \ + -device sclpconsole,chardev=tls_chardev,id=tls_serial + + QEMU then aborts after a second or two with: + + qemu-system-s390x: ../hw/char/sclpconsole.c:73: chr_read: Assertion + `size <= SIZE_BUFFER_VT220 - scon->iov_data_len' failed. + Aborted (core dumped) + + It looks like the second read does not trigger the chr_can_read() function + to be called before the second read, which should normally always be done + before sending bytes to a character device to see how much it can handle, + so the s->max_size in tcp_chr_read() still contains the old value from the + previous read. Let's make sure that we use the up-to-date value by calling + tcp_chr_read_poll() again here. + + Fixes: ffda5db65a ("io/channel-tls: fix handling of bigger read buffers") + Buglink: https://issues.redhat.com/browse/RHEL-24614 + Reviewed-by: "Daniel P. Berrangé" + Message-ID: <20240229104339.42574-1-thuth@redhat.com> + Reviewed-by: Antoine Damhet + Tested-by: Antoine Damhet + Reviewed-by: Marc-André Lureau + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + chardev/char-socket.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/chardev/char-socket.c b/chardev/char-socket.c +index 73947da188..034840593d 100644 +--- a/chardev/char-socket.c ++++ b/chardev/char-socket.c +@@ -492,9 +492,9 @@ static gboolean tcp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque) + s->max_size <= 0) { + return TRUE; + } +- len = sizeof(buf); +- if (len > s->max_size) { +- len = s->max_size; ++ len = tcp_chr_read_poll(opaque); ++ if (len > sizeof(buf)) { ++ len = sizeof(buf); + } + size = tcp_chr_recv(chr, (void *)buf, len); + if (size == 0 || (size == -1 && errno != EAGAIN)) { +-- +2.39.3 + diff --git a/SOURCES/kvm-chardev-lower-priority-of-the-HUP-GSource-in-socket-.patch b/SOURCES/kvm-chardev-lower-priority-of-the-HUP-GSource-in-socket-.patch new file mode 100644 index 0000000..30e055f --- /dev/null +++ b/SOURCES/kvm-chardev-lower-priority-of-the-HUP-GSource-in-socket-.patch @@ -0,0 +1,78 @@ +From 4d4102f6e2f9afd6182888787ae8b570347df87d Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= +Date: Mon, 18 Mar 2024 18:06:59 +0000 +Subject: [PATCH 1/3] chardev: lower priority of the HUP GSource in socket + chardev +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Daniel P. Berrangé +RH-MergeRequest: 233: Fix handling of TLS sessions in chardevs +RH-Jira: RHEL-24614 +RH-Acked-by: Thomas Huth +RH-Acked-by: Marc-André Lureau +RH-Commit: [1/3] 842f54349191b0206e68f35a7a80155f5a584942 (berrange/centos-src-qemu) + +The socket chardev often has 2 GSource object registered against the +same FD. One is registered all the time and is just intended to handle +POLLHUP events, while the other gets registered & unregistered on the +fly as the frontend is ready to receive more data or not. + +It is very common for poll() to signal a POLLHUP event at the same time +as there is pending incoming data from the disconnected client. It is +therefore essential to process incoming data prior to processing HUP. +The problem with having 2 GSource on the same FD is that there is no +guaranteed ordering of execution between them, so the chardev code may +process HUP first and thus discard data. + +This failure scenario is non-deterministic but can be seen fairly +reliably by reverting a7077b8e354d90fec26c2921aa2dea85b90dff90, and +then running 'tests/unit/test-char', which will sometimes fail with +missing data. + +Ideally QEMU would only have 1 GSource, but that's a complex code +refactoring job. The next best solution is to try to ensure ordering +between the 2 GSource objects. This can be achieved by lowering the +priority of the HUP GSource, so that it is never dispatched if the +main GSource is also ready to dispatch. Counter-intuitively, lowering +the priority of a GSource is done by raising its priority number. + +Reviewed-by: Marc-André Lureau +Reviewed-by: Thomas Huth +Signed-off-by: Daniel P. Berrangé +(cherry picked from commit 8bd8b04adc9f18904f323dff085f8b4ec77915c6) +--- + chardev/char-socket.c | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/chardev/char-socket.c b/chardev/char-socket.c +index 034840593d..f48d341ebc 100644 +--- a/chardev/char-socket.c ++++ b/chardev/char-socket.c +@@ -597,6 +597,22 @@ static void update_ioc_handlers(SocketChardev *s) + + remove_hup_source(s); + s->hup_source = qio_channel_create_watch(s->ioc, G_IO_HUP); ++ /* ++ * poll() is liable to return POLLHUP even when there is ++ * still incoming data available to read on the FD. If ++ * we have the hup_source at the same priority as the ++ * main io_add_watch_poll GSource, then we might end up ++ * processing the POLLHUP event first, closing the FD, ++ * and as a result silently discard data we should have ++ * read. ++ * ++ * By setting the hup_source to G_PRIORITY_DEFAULT + 1, ++ * we ensure that io_add_watch_poll GSource will always ++ * be dispatched first, thus guaranteeing we will be ++ * able to process all incoming data before closing the ++ * FD ++ */ ++ g_source_set_priority(s->hup_source, G_PRIORITY_DEFAULT + 1); + g_source_set_callback(s->hup_source, (GSourceFunc)tcp_chr_hup, + chr, NULL); + g_source_attach(s->hup_source, chr->gcontext); +-- +2.39.3 + diff --git a/SOURCES/kvm-checkpatch-add-qemu_bh_new-aio_bh_new-checks.patch b/SOURCES/kvm-checkpatch-add-qemu_bh_new-aio_bh_new-checks.patch deleted file mode 100644 index 4173648..0000000 --- a/SOURCES/kvm-checkpatch-add-qemu_bh_new-aio_bh_new-checks.patch +++ /dev/null @@ -1,55 +0,0 @@ -From 961bc392ee60743344236ddd247ab646a0eec914 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 9 May 2023 10:29:03 -0400 -Subject: [PATCH 07/21] checkpatch: add qemu_bh_new/aio_bh_new checks - -RH-Author: Jon Maloy -RH-MergeRequest: 165: memory: prevent dma-reentracy issues -RH-Jira: RHEL-516 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/13] e0473487f0e3186c42559a5c36a8650f27ab26ae (jmaloy/jmaloy-qemu-kvm-2) - -Jira: https://issues.redhat.com/browse/RHEL-516 -Upstream: Merged -CVE: CVE-2023-2680 - -commit ef56ffbdd6b0605dc1e305611287b948c970e236 -Author: Alexander Bulekov -Date: Thu Apr 27 17:10:08 2023 -0400 - - checkpatch: add qemu_bh_new/aio_bh_new checks - - Advise authors to use the _guarded versions of the APIs, instead. - - Signed-off-by: Alexander Bulekov - Reviewed-by: Darren Kenny - Message-Id: <20230427211013.2994127-4-alxndr@bu.edu> - Signed-off-by: Thomas Huth - -Signed-off-by: Jon Maloy ---- - scripts/checkpatch.pl | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl -index d768171dcf..eeaec436eb 100755 ---- a/scripts/checkpatch.pl -+++ b/scripts/checkpatch.pl -@@ -2865,6 +2865,14 @@ sub process { - if ($line =~ /\bsignal\s*\(/ && !($line =~ /SIG_(?:IGN|DFL)/)) { - ERROR("use sigaction to establish signal handlers; signal is not portable\n" . $herecurr); - } -+# recommend qemu_bh_new_guarded instead of qemu_bh_new -+ if ($realfile =~ /.*\/hw\/.*/ && $line =~ /\bqemu_bh_new\s*\(/) { -+ ERROR("use qemu_bh_new_guarded() instead of qemu_bh_new() to avoid reentrancy problems\n" . $herecurr); -+ } -+# recommend aio_bh_new_guarded instead of aio_bh_new -+ if ($realfile =~ /.*\/hw\/.*/ && $line =~ /\baio_bh_new\s*\(/) { -+ ERROR("use aio_bh_new_guarded() instead of aio_bh_new() to avoid reentrancy problems\n" . $herecurr); -+ } - # check for module_init(), use category-specific init macros explicitly please - if ($line =~ /^module_init\s*\(/) { - ERROR("please use block_init(), type_init() etc. instead of module_init()\n" . $herecurr); --- -2.39.3 - diff --git a/SOURCES/kvm-coroutine-cap-per-thread-local-pool-size.patch b/SOURCES/kvm-coroutine-cap-per-thread-local-pool-size.patch new file mode 100644 index 0000000..6fffc23 --- /dev/null +++ b/SOURCES/kvm-coroutine-cap-per-thread-local-pool-size.patch @@ -0,0 +1,412 @@ +From e99c56752a1c4021a93c92b7be78856ebefaa1b3 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Mon, 18 Mar 2024 14:34:29 -0400 +Subject: [PATCH 1/2] coroutine: cap per-thread local pool size + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 234: coroutine: cap per-thread local pool size +RH-Jira: RHEL-28947 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Czenczek +RH-Commit: [1/2] 5971de1c1e238457925bfb9c4bfc932de857b28d (stefanha/centos-stream-qemu-kvm) + +The coroutine pool implementation can hit the Linux vm.max_map_count +limit, causing QEMU to abort with "failed to allocate memory for stack" +or "failed to set up stack guard page" during coroutine creation. + +This happens because per-thread pools can grow to tens of thousands of +coroutines. Each coroutine causes 2 virtual memory areas to be created. +Eventually vm.max_map_count is reached and memory-related syscalls fail. +The per-thread pool sizes are non-uniform and depend on past coroutine +usage in each thread, so it's possible for one thread to have a large +pool while another thread's pool is empty. + +Switch to a new coroutine pool implementation with a global pool that +grows to a maximum number of coroutines and per-thread local pools that +are capped at hardcoded small number of coroutines. + +This approach does not leave large numbers of coroutines pooled in a +thread that may not use them again. In order to perform well it +amortizes the cost of global pool accesses by working in batches of +coroutines instead of individual coroutines. + +The global pool is a list. Threads donate batches of coroutines to when +they have too many and take batches from when they have too few: + +.-----------------------------------. +| Batch 1 | Batch 2 | Batch 3 | ... | global_pool +`-----------------------------------' + +Each thread has up to 2 batches of coroutines: + +.-------------------. +| Batch 1 | Batch 2 | per-thread local_pool (maximum 2 batches) +`-------------------' + +The goal of this change is to reduce the excessive number of pooled +coroutines that cause QEMU to abort when vm.max_map_count is reached +without losing the performance of an adequately sized coroutine pool. + +Here are virtio-blk disk I/O benchmark results: + + RW BLKSIZE IODEPTH OLD NEW CHANGE +randread 4k 1 113725 117451 +3.3% +randread 4k 8 192968 198510 +2.9% +randread 4k 16 207138 209429 +1.1% +randread 4k 32 212399 215145 +1.3% +randread 4k 64 218319 221277 +1.4% +randread 128k 1 17587 17535 -0.3% +randread 128k 8 17614 17616 +0.0% +randread 128k 16 17608 17609 +0.0% +randread 128k 32 17552 17553 +0.0% +randread 128k 64 17484 17484 +0.0% + +See files/{fio.sh,test.xml.j2} for the benchmark configuration: +https://gitlab.com/stefanha/virt-playbooks/-/tree/coroutine-pool-fix-sizing + +Buglink: https://issues.redhat.com/browse/RHEL-28947 +Reported-by: Sanjay Rao +Reported-by: Boaz Ben Shabat +Reported-by: Joe Mario +Reviewed-by: Kevin Wolf +Signed-off-by: Stefan Hajnoczi +Message-ID: <20240318183429.1039340-1-stefanha@redhat.com> +(cherry picked from commit 86a637e48104ae74d8be53bed6441ce32be33433) +Signed-off-by: Stefan Hajnoczi +--- + util/qemu-coroutine.c | 282 +++++++++++++++++++++++++++++++++--------- + 1 file changed, 223 insertions(+), 59 deletions(-) + +diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c +index 5fd2dbaf8b..2790959eaf 100644 +--- a/util/qemu-coroutine.c ++++ b/util/qemu-coroutine.c +@@ -18,39 +18,200 @@ + #include "qemu/atomic.h" + #include "qemu/coroutine_int.h" + #include "qemu/coroutine-tls.h" ++#include "qemu/cutils.h" + #include "block/aio.h" + +-/** +- * The minimal batch size is always 64, coroutines from the release_pool are +- * reused as soon as there are 64 coroutines in it. The maximum pool size starts +- * with 64 and is increased on demand so that coroutines are not deleted even if +- * they are not immediately reused. +- */ + enum { +- POOL_MIN_BATCH_SIZE = 64, +- POOL_INITIAL_MAX_SIZE = 64, ++ COROUTINE_POOL_BATCH_MAX_SIZE = 128, + }; + +-/** Free list to speed up creation */ +-static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool); +-static unsigned int pool_max_size = POOL_INITIAL_MAX_SIZE; +-static unsigned int release_pool_size; ++/* ++ * Coroutine creation and deletion is expensive so a pool of unused coroutines ++ * is kept as a cache. When the pool has coroutines available, they are ++ * recycled instead of creating new ones from scratch. Coroutines are added to ++ * the pool upon termination. ++ * ++ * The pool is global but each thread maintains a small local pool to avoid ++ * global pool contention. Threads fetch and return batches of coroutines from ++ * the global pool to maintain their local pool. The local pool holds up to two ++ * batches whereas the maximum size of the global pool is controlled by the ++ * qemu_coroutine_inc_pool_size() API. ++ * ++ * .-----------------------------------. ++ * | Batch 1 | Batch 2 | Batch 3 | ... | global_pool ++ * `-----------------------------------' ++ * ++ * .-------------------. ++ * | Batch 1 | Batch 2 | per-thread local_pool (maximum 2 batches) ++ * `-------------------' ++ */ ++typedef struct CoroutinePoolBatch { ++ /* Batches are kept in a list */ ++ QSLIST_ENTRY(CoroutinePoolBatch) next; ++ ++ /* This batch holds up to @COROUTINE_POOL_BATCH_MAX_SIZE coroutines */ ++ QSLIST_HEAD(, Coroutine) list; ++ unsigned int size; ++} CoroutinePoolBatch; ++ ++typedef QSLIST_HEAD(, CoroutinePoolBatch) CoroutinePool; ++ ++/* Host operating system limit on number of pooled coroutines */ ++static unsigned int global_pool_hard_max_size; ++ ++static QemuMutex global_pool_lock; /* protects the following variables */ ++static CoroutinePool global_pool = QSLIST_HEAD_INITIALIZER(global_pool); ++static unsigned int global_pool_size; ++static unsigned int global_pool_max_size = COROUTINE_POOL_BATCH_MAX_SIZE; ++ ++QEMU_DEFINE_STATIC_CO_TLS(CoroutinePool, local_pool); ++QEMU_DEFINE_STATIC_CO_TLS(Notifier, local_pool_cleanup_notifier); + +-typedef QSLIST_HEAD(, Coroutine) CoroutineQSList; +-QEMU_DEFINE_STATIC_CO_TLS(CoroutineQSList, alloc_pool); +-QEMU_DEFINE_STATIC_CO_TLS(unsigned int, alloc_pool_size); +-QEMU_DEFINE_STATIC_CO_TLS(Notifier, coroutine_pool_cleanup_notifier); ++static CoroutinePoolBatch *coroutine_pool_batch_new(void) ++{ ++ CoroutinePoolBatch *batch = g_new(CoroutinePoolBatch, 1); ++ ++ QSLIST_INIT(&batch->list); ++ batch->size = 0; ++ return batch; ++} + +-static void coroutine_pool_cleanup(Notifier *n, void *value) ++static void coroutine_pool_batch_delete(CoroutinePoolBatch *batch) + { + Coroutine *co; + Coroutine *tmp; +- CoroutineQSList *alloc_pool = get_ptr_alloc_pool(); + +- QSLIST_FOREACH_SAFE(co, alloc_pool, pool_next, tmp) { +- QSLIST_REMOVE_HEAD(alloc_pool, pool_next); ++ QSLIST_FOREACH_SAFE(co, &batch->list, pool_next, tmp) { ++ QSLIST_REMOVE_HEAD(&batch->list, pool_next); + qemu_coroutine_delete(co); + } ++ g_free(batch); ++} ++ ++static void local_pool_cleanup(Notifier *n, void *value) ++{ ++ CoroutinePool *local_pool = get_ptr_local_pool(); ++ CoroutinePoolBatch *batch; ++ CoroutinePoolBatch *tmp; ++ ++ QSLIST_FOREACH_SAFE(batch, local_pool, next, tmp) { ++ QSLIST_REMOVE_HEAD(local_pool, next); ++ coroutine_pool_batch_delete(batch); ++ } ++} ++ ++/* Ensure the atexit notifier is registered */ ++static void local_pool_cleanup_init_once(void) ++{ ++ Notifier *notifier = get_ptr_local_pool_cleanup_notifier(); ++ if (!notifier->notify) { ++ notifier->notify = local_pool_cleanup; ++ qemu_thread_atexit_add(notifier); ++ } ++} ++ ++/* Helper to get the next unused coroutine from the local pool */ ++static Coroutine *coroutine_pool_get_local(void) ++{ ++ CoroutinePool *local_pool = get_ptr_local_pool(); ++ CoroutinePoolBatch *batch = QSLIST_FIRST(local_pool); ++ Coroutine *co; ++ ++ if (unlikely(!batch)) { ++ return NULL; ++ } ++ ++ co = QSLIST_FIRST(&batch->list); ++ QSLIST_REMOVE_HEAD(&batch->list, pool_next); ++ batch->size--; ++ ++ if (batch->size == 0) { ++ QSLIST_REMOVE_HEAD(local_pool, next); ++ coroutine_pool_batch_delete(batch); ++ } ++ return co; ++} ++ ++/* Get the next batch from the global pool */ ++static void coroutine_pool_refill_local(void) ++{ ++ CoroutinePool *local_pool = get_ptr_local_pool(); ++ CoroutinePoolBatch *batch; ++ ++ WITH_QEMU_LOCK_GUARD(&global_pool_lock) { ++ batch = QSLIST_FIRST(&global_pool); ++ ++ if (batch) { ++ QSLIST_REMOVE_HEAD(&global_pool, next); ++ global_pool_size -= batch->size; ++ } ++ } ++ ++ if (batch) { ++ QSLIST_INSERT_HEAD(local_pool, batch, next); ++ local_pool_cleanup_init_once(); ++ } ++} ++ ++/* Add a batch of coroutines to the global pool */ ++static void coroutine_pool_put_global(CoroutinePoolBatch *batch) ++{ ++ WITH_QEMU_LOCK_GUARD(&global_pool_lock) { ++ unsigned int max = MIN(global_pool_max_size, ++ global_pool_hard_max_size); ++ ++ if (global_pool_size < max) { ++ QSLIST_INSERT_HEAD(&global_pool, batch, next); ++ ++ /* Overshooting the max pool size is allowed */ ++ global_pool_size += batch->size; ++ return; ++ } ++ } ++ ++ /* The global pool was full, so throw away this batch */ ++ coroutine_pool_batch_delete(batch); ++} ++ ++/* Get the next unused coroutine from the pool or return NULL */ ++static Coroutine *coroutine_pool_get(void) ++{ ++ Coroutine *co; ++ ++ co = coroutine_pool_get_local(); ++ if (!co) { ++ coroutine_pool_refill_local(); ++ co = coroutine_pool_get_local(); ++ } ++ return co; ++} ++ ++static void coroutine_pool_put(Coroutine *co) ++{ ++ CoroutinePool *local_pool = get_ptr_local_pool(); ++ CoroutinePoolBatch *batch = QSLIST_FIRST(local_pool); ++ ++ if (unlikely(!batch)) { ++ batch = coroutine_pool_batch_new(); ++ QSLIST_INSERT_HEAD(local_pool, batch, next); ++ local_pool_cleanup_init_once(); ++ } ++ ++ if (unlikely(batch->size >= COROUTINE_POOL_BATCH_MAX_SIZE)) { ++ CoroutinePoolBatch *next = QSLIST_NEXT(batch, next); ++ ++ /* Is the local pool full? */ ++ if (next) { ++ QSLIST_REMOVE_HEAD(local_pool, next); ++ coroutine_pool_put_global(batch); ++ } ++ ++ batch = coroutine_pool_batch_new(); ++ QSLIST_INSERT_HEAD(local_pool, batch, next); ++ } ++ ++ QSLIST_INSERT_HEAD(&batch->list, co, pool_next); ++ batch->size++; + } + + Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque) +@@ -58,31 +219,7 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque) + Coroutine *co = NULL; + + if (IS_ENABLED(CONFIG_COROUTINE_POOL)) { +- CoroutineQSList *alloc_pool = get_ptr_alloc_pool(); +- +- co = QSLIST_FIRST(alloc_pool); +- if (!co) { +- if (release_pool_size > POOL_MIN_BATCH_SIZE) { +- /* Slow path; a good place to register the destructor, too. */ +- Notifier *notifier = get_ptr_coroutine_pool_cleanup_notifier(); +- if (!notifier->notify) { +- notifier->notify = coroutine_pool_cleanup; +- qemu_thread_atexit_add(notifier); +- } +- +- /* This is not exact; there could be a little skew between +- * release_pool_size and the actual size of release_pool. But +- * it is just a heuristic, it does not need to be perfect. +- */ +- set_alloc_pool_size(qatomic_xchg(&release_pool_size, 0)); +- QSLIST_MOVE_ATOMIC(alloc_pool, &release_pool); +- co = QSLIST_FIRST(alloc_pool); +- } +- } +- if (co) { +- QSLIST_REMOVE_HEAD(alloc_pool, pool_next); +- set_alloc_pool_size(get_alloc_pool_size() - 1); +- } ++ co = coroutine_pool_get(); + } + + if (!co) { +@@ -100,19 +237,10 @@ static void coroutine_delete(Coroutine *co) + co->caller = NULL; + + if (IS_ENABLED(CONFIG_COROUTINE_POOL)) { +- if (release_pool_size < qatomic_read(&pool_max_size) * 2) { +- QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next); +- qatomic_inc(&release_pool_size); +- return; +- } +- if (get_alloc_pool_size() < qatomic_read(&pool_max_size)) { +- QSLIST_INSERT_HEAD(get_ptr_alloc_pool(), co, pool_next); +- set_alloc_pool_size(get_alloc_pool_size() + 1); +- return; +- } ++ coroutine_pool_put(co); ++ } else { ++ qemu_coroutine_delete(co); + } +- +- qemu_coroutine_delete(co); + } + + void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co) +@@ -223,10 +351,46 @@ AioContext *qemu_coroutine_get_aio_context(Coroutine *co) + + void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size) + { +- qatomic_add(&pool_max_size, additional_pool_size); ++ QEMU_LOCK_GUARD(&global_pool_lock); ++ global_pool_max_size += additional_pool_size; + } + + void qemu_coroutine_dec_pool_size(unsigned int removing_pool_size) + { +- qatomic_sub(&pool_max_size, removing_pool_size); ++ QEMU_LOCK_GUARD(&global_pool_lock); ++ global_pool_max_size -= removing_pool_size; ++} ++ ++static unsigned int get_global_pool_hard_max_size(void) ++{ ++#ifdef __linux__ ++ g_autofree char *contents = NULL; ++ int max_map_count; ++ ++ /* ++ * Linux processes can have up to max_map_count virtual memory areas ++ * (VMAs). mmap(2), mprotect(2), etc fail with ENOMEM beyond this limit. We ++ * must limit the coroutine pool to a safe size to avoid running out of ++ * VMAs. ++ */ ++ if (g_file_get_contents("/proc/sys/vm/max_map_count", &contents, NULL, ++ NULL) && ++ qemu_strtoi(contents, NULL, 10, &max_map_count) == 0) { ++ /* ++ * This is a conservative upper bound that avoids exceeding ++ * max_map_count. Leave half for non-coroutine users like library ++ * dependencies, vhost-user, etc. Each coroutine takes up 2 VMAs so ++ * halve the amount again. ++ */ ++ return max_map_count / 4; ++ } ++#endif ++ ++ return UINT_MAX; ++} ++ ++static void __attribute__((constructor)) qemu_coroutine_init(void) ++{ ++ qemu_mutex_init(&global_pool_lock); ++ global_pool_hard_max_size = get_global_pool_hard_max_size(); + } +-- +2.39.3 + diff --git a/SOURCES/kvm-coroutine-reserve-5-000-mappings.patch b/SOURCES/kvm-coroutine-reserve-5-000-mappings.patch new file mode 100644 index 0000000..cdbb666 --- /dev/null +++ b/SOURCES/kvm-coroutine-reserve-5-000-mappings.patch @@ -0,0 +1,61 @@ +From 0aa65dc3acba481f7064df936ab49e3bceb1d5bd Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Wed, 20 Mar 2024 14:12:32 -0400 +Subject: [PATCH 2/2] coroutine: reserve 5,000 mappings +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 234: coroutine: cap per-thread local pool size +RH-Jira: RHEL-28947 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Czenczek +RH-Commit: [2/2] 78560c2b947471111cc16c313d6f38db42860a1c (stefanha/centos-stream-qemu-kvm) + +Daniel P. Berrangé pointed out that the coroutine +pool size heuristic is very conservative. Instead of halving +max_map_count, he suggested reserving 5,000 mappings for non-coroutine +users based on observations of guests he has access to. + +Fixes: 86a637e48104 ("coroutine: cap per-thread local pool size") +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Message-id: 20240320181232.1464819-1-stefanha@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 9352f80cd926fe2dde7c89b93ee33bb0356ff40e) +Signed-off-by: Stefan Hajnoczi +--- + util/qemu-coroutine.c | 15 ++++++++++----- + 1 file changed, 10 insertions(+), 5 deletions(-) + +diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c +index 2790959eaf..eb4eebefdf 100644 +--- a/util/qemu-coroutine.c ++++ b/util/qemu-coroutine.c +@@ -377,12 +377,17 @@ static unsigned int get_global_pool_hard_max_size(void) + NULL) && + qemu_strtoi(contents, NULL, 10, &max_map_count) == 0) { + /* +- * This is a conservative upper bound that avoids exceeding +- * max_map_count. Leave half for non-coroutine users like library +- * dependencies, vhost-user, etc. Each coroutine takes up 2 VMAs so +- * halve the amount again. ++ * This is an upper bound that avoids exceeding max_map_count. Leave a ++ * fixed amount for non-coroutine users like library dependencies, ++ * vhost-user, etc. Each coroutine takes up 2 VMAs so halve the ++ * remaining amount. + */ +- return max_map_count / 4; ++ if (max_map_count > 5000) { ++ return (max_map_count - 5000) / 2; ++ } else { ++ /* Disable the global pool but threads still have local pools */ ++ return 0; ++ } + } + #endif + +-- +2.39.3 + diff --git a/SOURCES/kvm-dma-helpers-don-t-lock-AioContext-in-dma_blk_cb.patch b/SOURCES/kvm-dma-helpers-don-t-lock-AioContext-in-dma_blk_cb.patch new file mode 100644 index 0000000..735f2a3 --- /dev/null +++ b/SOURCES/kvm-dma-helpers-don-t-lock-AioContext-in-dma_blk_cb.patch @@ -0,0 +1,75 @@ +From ac9dc8ea241ef6d3a0447d696620d4d4053b71bf Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Mon, 4 Dec 2023 11:42:59 -0500 +Subject: [PATCH 080/101] dma-helpers: don't lock AioContext in dma_blk_cb() + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [11/26] a8580463ba6aee4ca248c0b947b9e72bd9e87aab (kmwolf/centos-qemu-kvm) + +Commit abfcd2760b3e ("dma-helpers: prevent dma_blk_cb() vs +dma_aio_cancel() race") acquired the AioContext lock inside dma_blk_cb() +to avoid a race with scsi_device_purge_requests() running in the main +loop thread. + +The SCSI code no longer calls dma_aio_cancel() from the main loop thread +while I/O is running in the IOThread AioContext. Therefore it is no +longer necessary to take this lock to protect DMAAIOCB fields. The +->cb() function also does not require the lock because blk_aio_*() and +friends do not need the AioContext lock. + +Both hw/ide/core.c and hw/ide/macio.c also call dma_blk_io() but don't +rely on it taking the AioContext lock, so this change is safe. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Eric Blake +Reviewed-by: Kevin Wolf +Message-ID: <20231204164259.1515217-5-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +--- + system/dma-helpers.c | 7 ++----- + 1 file changed, 2 insertions(+), 5 deletions(-) + +diff --git a/system/dma-helpers.c b/system/dma-helpers.c +index 36211acc7e..528117f256 100644 +--- a/system/dma-helpers.c ++++ b/system/dma-helpers.c +@@ -119,13 +119,12 @@ static void dma_blk_cb(void *opaque, int ret) + + trace_dma_blk_cb(dbs, ret); + +- aio_context_acquire(ctx); + dbs->acb = NULL; + dbs->offset += dbs->iov.size; + + if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) { + dma_complete(dbs, ret); +- goto out; ++ return; + } + dma_blk_unmap(dbs); + +@@ -168,7 +167,7 @@ static void dma_blk_cb(void *opaque, int ret) + trace_dma_map_wait(dbs); + dbs->bh = aio_bh_new(ctx, reschedule_dma, dbs); + cpu_register_map_client(dbs->bh); +- goto out; ++ return; + } + + if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) { +@@ -179,8 +178,6 @@ static void dma_blk_cb(void *opaque, int ret) + dbs->acb = dbs->io_func(dbs->offset, &dbs->iov, + dma_blk_cb, dbs, dbs->io_func_opaque); + assert(dbs->acb); +-out: +- aio_context_release(ctx); + } + + static void dma_aio_cancel(BlockAIOCB *acb) +-- +2.39.3 + diff --git a/SOURCES/kvm-docs-devel-Add-VFIO-iommufd-backend-documentation.patch b/SOURCES/kvm-docs-devel-Add-VFIO-iommufd-backend-documentation.patch new file mode 100644 index 0000000..dbe48d7 --- /dev/null +++ b/SOURCES/kvm-docs-devel-Add-VFIO-iommufd-backend-documentation.patch @@ -0,0 +1,228 @@ +From 71aa0219f7c84cbf175eb2a091d48d5fd5daa40b Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 21 Nov 2023 16:44:26 +0800 +Subject: [PATCH 047/101] docs/devel: Add VFIO iommufd backend documentation +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [46/67] 6cf49d00e87788f894d690a985bb6798eae24505 (eauger1/centos-qemu-kvm) + +Suggested-by: Cédric Le Goater +Signed-off-by: Eric Auger +Signed-off-by: Yi Liu +Signed-off-by: Zhenzhong Duan +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit 98dad2b01931f6064c6c4b48ca3c2a1d9f542cd8) +Signed-off-by: Eric Auger +--- + MAINTAINERS | 1 + + docs/devel/index-internals.rst | 1 + + docs/devel/vfio-iommufd.rst | 166 +++++++++++++++++++++++++++++++++ + 3 files changed, 168 insertions(+) + create mode 100644 docs/devel/vfio-iommufd.rst + +diff --git a/MAINTAINERS b/MAINTAINERS +index ca70bb4e64..0ddb20a35f 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -2176,6 +2176,7 @@ F: backends/iommufd.c + F: include/sysemu/iommufd.h + F: include/qemu/chardev_open.h + F: util/chardev_open.c ++F: docs/devel/vfio-iommufd.rst + + vhost + M: Michael S. Tsirkin +diff --git a/docs/devel/index-internals.rst b/docs/devel/index-internals.rst +index 6f81df92bc..3def4a138b 100644 +--- a/docs/devel/index-internals.rst ++++ b/docs/devel/index-internals.rst +@@ -18,5 +18,6 @@ Details about QEMU's various subsystems including how to add features to them. + s390-dasd-ipl + tracing + vfio-migration ++ vfio-iommufd + writing-monitor-commands + virtio-backends +diff --git a/docs/devel/vfio-iommufd.rst b/docs/devel/vfio-iommufd.rst +new file mode 100644 +index 0000000000..3d1c11f175 +--- /dev/null ++++ b/docs/devel/vfio-iommufd.rst +@@ -0,0 +1,166 @@ ++=============================== ++IOMMUFD BACKEND usage with VFIO ++=============================== ++ ++(Same meaning for backend/container/BE) ++ ++With the introduction of iommufd, the Linux kernel provides a generic ++interface for user space drivers to propagate their DMA mappings to kernel ++for assigned devices. While the legacy kernel interface is group-centric, ++the new iommufd interface is device-centric, relying on device fd and iommufd. ++ ++To support both interfaces in the QEMU VFIO device, introduce a base container ++to abstract the common part of VFIO legacy and iommufd container. So that the ++generic VFIO code can use either container. ++ ++The base container implements generic functions such as memory_listener and ++address space management whereas the derived container implements callbacks ++specific to either legacy or iommufd. Each container has its own way to setup ++secure context and dma management interface. The below diagram shows how it ++looks like with both containers. ++ ++:: ++ ++ VFIO AddressSpace/Memory ++ +-------+ +----------+ +-----+ +-----+ ++ | pci | | platform | | ap | | ccw | ++ +---+---+ +----+-----+ +--+--+ +--+--+ +----------------------+ ++ | | | | | AddressSpace | ++ | | | | +------------+---------+ ++ +---V-----------V-----------V--------V----+ / ++ | VFIOAddressSpace | <------------+ ++ | | | MemoryListener ++ | VFIOContainerBase list | ++ +-------+----------------------------+----+ ++ | | ++ | | ++ +-------V------+ +--------V----------+ ++ | iommufd | | vfio legacy | ++ | container | | container | ++ +-------+------+ +--------+----------+ ++ | | ++ | /dev/iommu | /dev/vfio/vfio ++ | /dev/vfio/devices/vfioX | /dev/vfio/$group_id ++ Userspace | | ++ ============+============================+=========================== ++ Kernel | device fd | ++ +---------------+ | group/container fd ++ | (BIND_IOMMUFD | | (SET_CONTAINER/SET_IOMMU) ++ | ATTACH_IOAS) | | device fd ++ | | | ++ | +-------V------------V-----------------+ ++ iommufd | | vfio | ++ (map/unmap | +---------+--------------------+-------+ ++ ioas_copy) | | | map/unmap ++ | | | ++ +------V------+ +-----V------+ +------V--------+ ++ | iommfd core | | device | | vfio iommu | ++ +-------------+ +------------+ +---------------+ ++ ++* Secure Context setup ++ ++ - iommufd BE: uses device fd and iommufd to setup secure context ++ (bind_iommufd, attach_ioas) ++ - vfio legacy BE: uses group fd and container fd to setup secure context ++ (set_container, set_iommu) ++ ++* Device access ++ ++ - iommufd BE: device fd is opened through ``/dev/vfio/devices/vfioX`` ++ - vfio legacy BE: device fd is retrieved from group fd ioctl ++ ++* DMA Mapping flow ++ ++ 1. VFIOAddressSpace receives MemoryRegion add/del via MemoryListener ++ 2. VFIO populates DMA map/unmap via the container BEs ++ * iommufd BE: uses iommufd ++ * vfio legacy BE: uses container fd ++ ++Example configuration ++===================== ++ ++Step 1: configure the host device ++--------------------------------- ++ ++It's exactly same as the VFIO device with legacy VFIO container. ++ ++Step 2: configure QEMU ++---------------------- ++ ++Interactions with the ``/dev/iommu`` are abstracted by a new iommufd ++object (compiled in with the ``CONFIG_IOMMUFD`` option). ++ ++Any QEMU device (e.g. VFIO device) wishing to use ``/dev/iommu`` must ++be linked with an iommufd object. It gets a new optional property ++named iommufd which allows to pass an iommufd object. Take ``vfio-pci`` ++device for example: ++ ++.. code-block:: bash ++ ++ -object iommufd,id=iommufd0 ++ -device vfio-pci,host=0000:02:00.0,iommufd=iommufd0 ++ ++Note the ``/dev/iommu`` and VFIO cdev can be externally opened by a ++management layer. In such a case the fd is passed, the fd supports a ++string naming the fd or a number, for example: ++ ++.. code-block:: bash ++ ++ -object iommufd,id=iommufd0,fd=22 ++ -device vfio-pci,iommufd=iommufd0,fd=23 ++ ++If the ``fd`` property is not passed, the fd is opened by QEMU. ++ ++If no ``iommufd`` object is passed to the ``vfio-pci`` device, iommufd ++is not used and the user gets the behavior based on the legacy VFIO ++container: ++ ++.. code-block:: bash ++ ++ -device vfio-pci,host=0000:02:00.0 ++ ++Supported platform ++================== ++ ++Supports x86, ARM and s390x currently. ++ ++Caveats ++======= ++ ++Dirty page sync ++--------------- ++ ++Dirty page sync with iommufd backend is unsupported yet, live migration is ++disabled by default. But it can be force enabled like below, low efficient ++though. ++ ++.. code-block:: bash ++ ++ -object iommufd,id=iommufd0 ++ -device vfio-pci,host=0000:02:00.0,iommufd=iommufd0,enable-migration=on ++ ++P2P DMA ++------- ++ ++PCI p2p DMA is unsupported as IOMMUFD doesn't support mapping hardware PCI ++BAR region yet. Below warning shows for assigned PCI device, it's not a bug. ++ ++.. code-block:: none ++ ++ qemu-system-x86_64: warning: IOMMU_IOAS_MAP failed: Bad address, PCI BAR? ++ qemu-system-x86_64: vfio_container_dma_map(0x560cb6cb1620, 0xe000000021000, 0x3000, 0x7f32ed55c000) = -14 (Bad address) ++ ++FD passing with mdev ++-------------------- ++ ++``vfio-pci`` device checks sysfsdev property to decide if backend is a mdev. ++If FD passing is used, there is no way to know that and the mdev is treated ++like a real PCI device. There is an error as below if user wants to enable ++RAM discarding for mdev. ++ ++.. code-block:: none ++ ++ qemu-system-x86_64: -device vfio-pci,iommufd=iommufd0,x-balloon-allowed=on,fd=9: vfio VFIO_FD9: x-balloon-allowed only potentially compatible with mdev devices ++ ++``vfio-ap`` and ``vfio-ccw`` devices don't have same issue as their backend ++devices are always mdev and RAM discarding is force enabled. +-- +2.39.3 + diff --git a/SOURCES/kvm-docs-remove-AioContext-lock-from-IOThread-docs.patch b/SOURCES/kvm-docs-remove-AioContext-lock-from-IOThread-docs.patch new file mode 100644 index 0000000..80adc69 --- /dev/null +++ b/SOURCES/kvm-docs-remove-AioContext-lock-from-IOThread-docs.patch @@ -0,0 +1,98 @@ +From fc69df3a70bed5722643cc16828ca20beae3a20d Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 5 Dec 2023 13:20:08 -0500 +Subject: [PATCH 091/101] docs: remove AioContext lock from IOThread docs + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [22/26] ab89cda483e74ded983d26e1c6e50217405e0a55 (kmwolf/centos-qemu-kvm) + +Encourage the use of locking primitives and stop mentioning the +AioContext lock since it is being removed. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Eric Blake +Message-ID: <20231205182011.1976568-12-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +--- + docs/devel/multiple-iothreads.txt | 47 +++++++++++-------------------- + 1 file changed, 16 insertions(+), 31 deletions(-) + +diff --git a/docs/devel/multiple-iothreads.txt b/docs/devel/multiple-iothreads.txt +index a3e949f6b3..4865196bde 100644 +--- a/docs/devel/multiple-iothreads.txt ++++ b/docs/devel/multiple-iothreads.txt +@@ -88,27 +88,18 @@ loop, depending on which AioContext instance the caller passes in. + + How to synchronize with an IOThread + ----------------------------------- +-AioContext is not thread-safe so some rules must be followed when using file +-descriptors, event notifiers, timers, or BHs across threads: ++Variables that can be accessed by multiple threads require some form of ++synchronization such as qemu_mutex_lock(), rcu_read_lock(), etc. + +-1. AioContext functions can always be called safely. They handle their +-own locking internally. +- +-2. Other threads wishing to access the AioContext must use +-aio_context_acquire()/aio_context_release() for mutual exclusion. Once the +-context is acquired no other thread can access it or run event loop iterations +-in this AioContext. +- +-Legacy code sometimes nests aio_context_acquire()/aio_context_release() calls. +-Do not use nesting anymore, it is incompatible with the BDRV_POLL_WHILE() macro +-used in the block layer and can lead to hangs. +- +-There is currently no lock ordering rule if a thread needs to acquire multiple +-AioContexts simultaneously. Therefore, it is only safe for code holding the +-QEMU global mutex to acquire other AioContexts. ++AioContext functions like aio_set_fd_handler(), aio_set_event_notifier(), ++aio_bh_new(), and aio_timer_new() are thread-safe. They can be used to trigger ++activity in an IOThread. + + Side note: the best way to schedule a function call across threads is to call +-aio_bh_schedule_oneshot(). No acquire/release or locking is needed. ++aio_bh_schedule_oneshot(). ++ ++The main loop thread can wait synchronously for a condition using ++AIO_WAIT_WHILE(). + + AioContext and the block layer + ------------------------------ +@@ -124,22 +115,16 @@ Block layer code must therefore expect to run in an IOThread and avoid using + old APIs that implicitly use the main loop. See the "How to program for + IOThreads" above for information on how to do that. + +-If main loop code such as a QMP function wishes to access a BlockDriverState +-it must first call aio_context_acquire(bdrv_get_aio_context(bs)) to ensure +-that callbacks in the IOThread do not run in parallel. +- + Code running in the monitor typically needs to ensure that past + requests from the guest are completed. When a block device is running + in an IOThread, the IOThread can also process requests from the guest + (via ioeventfd). To achieve both objects, wrap the code between + bdrv_drained_begin() and bdrv_drained_end(), thus creating a "drained +-section". The functions must be called between aio_context_acquire() +-and aio_context_release(). You can freely release and re-acquire the +-AioContext within a drained section. +- +-Long-running jobs (usually in the form of coroutines) are best scheduled in +-the BlockDriverState's AioContext to avoid the need to acquire/release around +-each bdrv_*() call. The functions bdrv_add/remove_aio_context_notifier, +-or alternatively blk_add/remove_aio_context_notifier if you use BlockBackends, +-can be used to get a notification whenever bdrv_try_change_aio_context() moves a ++section". ++ ++Long-running jobs (usually in the form of coroutines) are often scheduled in ++the BlockDriverState's AioContext. The functions ++bdrv_add/remove_aio_context_notifier, or alternatively ++blk_add/remove_aio_context_notifier if you use BlockBackends, can be used to ++get a notification whenever bdrv_try_change_aio_context() moves a + BlockDriverState to a different AioContext. +-- +2.39.3 + diff --git a/SOURCES/kvm-dump-Add-arch-cleanup-function.patch b/SOURCES/kvm-dump-Add-arch-cleanup-function.patch deleted file mode 100644 index 99502d5..0000000 --- a/SOURCES/kvm-dump-Add-arch-cleanup-function.patch +++ /dev/null @@ -1,69 +0,0 @@ -From d032e43c4cebdbeb279d2da9b514fa50c6ed4da3 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Tue, 21 Nov 2023 16:36:26 +0100 -Subject: [PATCH 2/3] dump: Add arch cleanup function -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -RH-MergeRequest: 325: Fix problem that secure execution guest remains in "paused" state after dump failure -RH-Jira: RHEL-16997 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/3] d70fcc72a69f65432f2fbfb7d864452ea37ec25d - -JIRA: https://issues.redhat.com/browse/RHEL-16997 - -commit e72629e5149aba6f44122ea6d2a803ef136a0c6b -Author: Janosch Frank -Date: Thu Nov 9 12:04:42 2023 +0000 - - dump: Add arch cleanup function - - Some architectures (s390x) need to cleanup after a failed dump to be - able to continue to run the vm. Add a cleanup function pointer and - call it if it's set. - - Signed-off-by: Janosch Frank - Reviewed-by: Thomas Huth - Reviewed-by: Marc-André Lureau - Message-ID: <20231109120443.185979-3-frankja@linux.ibm.com> - Signed-off-by: Thomas Huth - -Signed-off-by: Thomas Huth ---- - dump/dump.c | 4 ++++ - include/sysemu/dump-arch.h | 1 + - 2 files changed, 5 insertions(+) - -diff --git a/dump/dump.c b/dump/dump.c -index 1f1a6edcab..6a50e85f49 100644 ---- a/dump/dump.c -+++ b/dump/dump.c -@@ -96,6 +96,10 @@ uint64_t cpu_to_dump64(DumpState *s, uint64_t val) - - static int dump_cleanup(DumpState *s) - { -+ if (s->dump_info.arch_cleanup_fn) { -+ s->dump_info.arch_cleanup_fn(s); -+ } -+ - guest_phys_blocks_free(&s->guest_phys_blocks); - memory_mapping_list_free(&s->list); - close(s->fd); -diff --git a/include/sysemu/dump-arch.h b/include/sysemu/dump-arch.h -index 59bbc9be38..743916e46c 100644 ---- a/include/sysemu/dump-arch.h -+++ b/include/sysemu/dump-arch.h -@@ -24,6 +24,7 @@ typedef struct ArchDumpInfo { - void (*arch_sections_add_fn)(DumpState *s); - uint64_t (*arch_sections_write_hdr_fn)(DumpState *s, uint8_t *buff); - int (*arch_sections_write_fn)(DumpState *s, uint8_t *buff); -+ void (*arch_cleanup_fn)(DumpState *s); - } ArchDumpInfo; - - struct GuestPhysBlockList; /* memory_mapping.h */ --- -2.39.3 - diff --git a/SOURCES/kvm-graph-lock-Disable-locking-for-now.patch b/SOURCES/kvm-graph-lock-Disable-locking-for-now.patch deleted file mode 100644 index 77086e5..0000000 --- a/SOURCES/kvm-graph-lock-Disable-locking-for-now.patch +++ /dev/null @@ -1,153 +0,0 @@ -From 516bf44de08a13d97c08e210137078e642ce8e88 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 17 May 2023 17:28:32 +0200 -Subject: [PATCH 02/21] graph-lock: Disable locking for now - -RH-Author: Kevin Wolf -RH-MergeRequest: 166: block/graph-lock: Disable locking for now -RH-Bugzilla: 2186725 -RH-Acked-by: Eric Blake -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [2/4] 39d42fb527aad0491a018743289de7b762108317 (kmwolf/centos-qemu-kvm) - -In QEMU 8.0, we've been seeing deadlocks in bdrv_graph_wrlock(). They -come from callers that hold an AioContext lock, which is not allowed -during polling. In theory, we could temporarily release the lock, but -callers are inconsistent about whether they hold a lock, and if they do, -some are also confused about which one they hold. While all of this is -fixable, it's not trivial, and the best course of action for 8.0.1 is -probably just disabling the graph locking code temporarily. - -We don't currently rely on graph locking yet. It is supposed to replace -the AioContext lock eventually to enable multiqueue support, but as long -as we still have the AioContext lock, it is sufficient without the graph -lock. Once the AioContext lock goes away, the deadlock doesn't exist any -more either and this commit can be reverted. (Of course, it can also be -reverted while the AioContext lock still exists if the callers have been -fixed.) - -Cc: qemu-stable@nongnu.org -Signed-off-by: Kevin Wolf -Message-Id: <20230517152834.277483-2-kwolf@redhat.com> -Reviewed-by: Eric Blake -Signed-off-by: Kevin Wolf -(cherry picked from commit 80fc5d260002432628710f8b0c7cfc7d9b97bb9d) -Signed-off-by: Kevin Wolf ---- - block/graph-lock.c | 24 ++++++++++++++++++++++++ - 1 file changed, 24 insertions(+) - -diff --git a/block/graph-lock.c b/block/graph-lock.c -index 259a7a0bde..2490926c90 100644 ---- a/block/graph-lock.c -+++ b/block/graph-lock.c -@@ -30,8 +30,10 @@ BdrvGraphLock graph_lock; - /* Protects the list of aiocontext and orphaned_reader_count */ - static QemuMutex aio_context_list_lock; - -+#if 0 - /* Written and read with atomic operations. */ - static int has_writer; -+#endif - - /* - * A reader coroutine could move from an AioContext to another. -@@ -88,6 +90,7 @@ void unregister_aiocontext(AioContext *ctx) - g_free(ctx->bdrv_graph); - } - -+#if 0 - static uint32_t reader_count(void) - { - BdrvGraphRWlock *brdv_graph; -@@ -105,10 +108,17 @@ static uint32_t reader_count(void) - assert((int32_t)rd >= 0); - return rd; - } -+#endif - - void bdrv_graph_wrlock(void) - { - GLOBAL_STATE_CODE(); -+ /* -+ * TODO Some callers hold an AioContext lock when this is called, which -+ * causes deadlocks. Reenable once the AioContext locking is cleaned up (or -+ * AioContext locks are gone). -+ */ -+#if 0 - assert(!qatomic_read(&has_writer)); - - /* Make sure that constantly arriving new I/O doesn't cause starvation */ -@@ -139,11 +149,13 @@ void bdrv_graph_wrlock(void) - } while (reader_count() >= 1); - - bdrv_drain_all_end(); -+#endif - } - - void bdrv_graph_wrunlock(void) - { - GLOBAL_STATE_CODE(); -+#if 0 - QEMU_LOCK_GUARD(&aio_context_list_lock); - assert(qatomic_read(&has_writer)); - -@@ -155,10 +167,13 @@ void bdrv_graph_wrunlock(void) - - /* Wake up all coroutine that are waiting to read the graph */ - qemu_co_enter_all(&reader_queue, &aio_context_list_lock); -+#endif - } - - void coroutine_fn bdrv_graph_co_rdlock(void) - { -+ /* TODO Reenable when wrlock is reenabled */ -+#if 0 - BdrvGraphRWlock *bdrv_graph; - bdrv_graph = qemu_get_current_aio_context()->bdrv_graph; - -@@ -223,10 +238,12 @@ void coroutine_fn bdrv_graph_co_rdlock(void) - qemu_co_queue_wait(&reader_queue, &aio_context_list_lock); - } - } -+#endif - } - - void coroutine_fn bdrv_graph_co_rdunlock(void) - { -+#if 0 - BdrvGraphRWlock *bdrv_graph; - bdrv_graph = qemu_get_current_aio_context()->bdrv_graph; - -@@ -249,6 +266,7 @@ void coroutine_fn bdrv_graph_co_rdunlock(void) - if (qatomic_read(&has_writer)) { - aio_wait_kick(); - } -+#endif - } - - void bdrv_graph_rdlock_main_loop(void) -@@ -266,13 +284,19 @@ void bdrv_graph_rdunlock_main_loop(void) - void assert_bdrv_graph_readable(void) - { - /* reader_count() is slow due to aio_context_list_lock lock contention */ -+ /* TODO Reenable when wrlock is reenabled */ -+#if 0 - #ifdef CONFIG_DEBUG_GRAPH_LOCK - assert(qemu_in_main_thread() || reader_count()); - #endif -+#endif - } - - void assert_bdrv_graph_writable(void) - { - assert(qemu_in_main_thread()); -+ /* TODO Reenable when wrlock is reenabled */ -+#if 0 - assert(qatomic_read(&has_writer)); -+#endif - } --- -2.39.3 - diff --git a/SOURCES/kvm-graph-lock-remove-AioContext-locking.patch b/SOURCES/kvm-graph-lock-remove-AioContext-locking.patch new file mode 100644 index 0000000..2fff9ba --- /dev/null +++ b/SOURCES/kvm-graph-lock-remove-AioContext-locking.patch @@ -0,0 +1,1190 @@ +From 57d96b5774fab588c6bb6812ef8ef281ffe018d7 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 5 Dec 2023 13:20:02 -0500 +Subject: [PATCH 085/101] graph-lock: remove AioContext locking + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [16/26] 9575a8b834aaaa03abaf869e96f0808172e87824 (kmwolf/centos-qemu-kvm) + +Stop acquiring/releasing the AioContext lock in +bdrv_graph_wrlock()/bdrv_graph_unlock() since the lock no longer has any +effect. + +The distinction between bdrv_graph_wrunlock() and +bdrv_graph_wrunlock_ctx() becomes meaningless and they can be collapsed +into one function. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Eric Blake +Reviewed-by: Kevin Wolf +Message-ID: <20231205182011.1976568-6-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +--- + block.c | 50 +++++++++++++++--------------- + block/backup.c | 4 +-- + block/blklogwrites.c | 8 ++--- + block/blkverify.c | 4 +-- + block/block-backend.c | 11 +++---- + block/commit.c | 16 +++++----- + block/graph-lock.c | 44 ++------------------------ + block/mirror.c | 22 ++++++------- + block/qcow2.c | 4 +-- + block/quorum.c | 8 ++--- + block/replication.c | 14 ++++----- + block/snapshot.c | 4 +-- + block/stream.c | 12 +++---- + block/vmdk.c | 20 ++++++------ + blockdev.c | 8 ++--- + blockjob.c | 12 +++---- + include/block/graph-lock.h | 21 ++----------- + scripts/block-coroutine-wrapper.py | 4 +-- + tests/unit/test-bdrv-drain.c | 40 ++++++++++++------------ + tests/unit/test-bdrv-graph-mod.c | 20 ++++++------ + 20 files changed, 133 insertions(+), 193 deletions(-) + +diff --git a/block.c b/block.c +index bfb0861ec6..25e1ebc606 100644 +--- a/block.c ++++ b/block.c +@@ -1708,12 +1708,12 @@ bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name, + open_failed: + bs->drv = NULL; + +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + if (bs->file != NULL) { + bdrv_unref_child(bs, bs->file); + assert(!bs->file); + } +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + + g_free(bs->opaque); + bs->opaque = NULL; +@@ -3575,9 +3575,9 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, + + bdrv_ref(drain_bs); + bdrv_drained_begin(drain_bs); +- bdrv_graph_wrlock(backing_hd); ++ bdrv_graph_wrlock(); + ret = bdrv_set_backing_hd_drained(bs, backing_hd, errp); +- bdrv_graph_wrunlock(backing_hd); ++ bdrv_graph_wrunlock(); + bdrv_drained_end(drain_bs); + bdrv_unref(drain_bs); + +@@ -3790,13 +3790,13 @@ BdrvChild *bdrv_open_child(const char *filename, + return NULL; + } + +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + ctx = bdrv_get_aio_context(bs); + aio_context_acquire(ctx); + child = bdrv_attach_child(parent, bs, bdref_key, child_class, child_role, + errp); + aio_context_release(ctx); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + + return child; + } +@@ -4650,9 +4650,9 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) + aio_context_release(ctx); + } + +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + tran_commit(tran); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + + QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) { + BlockDriverState *bs = bs_entry->state.bs; +@@ -4669,9 +4669,9 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) + goto cleanup; + + abort: +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + tran_abort(tran); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + + QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { + if (bs_entry->prepared) { +@@ -4852,12 +4852,12 @@ bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, + } + + bdrv_graph_rdunlock_main_loop(); +- bdrv_graph_wrlock(new_child_bs); ++ bdrv_graph_wrlock(); + + ret = bdrv_set_file_or_backing_noperm(bs, new_child_bs, is_backing, + tran, errp); + +- bdrv_graph_wrunlock_ctx(ctx); ++ bdrv_graph_wrunlock(); + + if (old_ctx != ctx) { + aio_context_release(ctx); +@@ -5209,14 +5209,14 @@ static void bdrv_close(BlockDriverState *bs) + bs->drv = NULL; + } + +- bdrv_graph_wrlock(bs); ++ bdrv_graph_wrlock(); + QLIST_FOREACH_SAFE(child, &bs->children, next, next) { + bdrv_unref_child(bs, child); + } + + assert(!bs->backing); + assert(!bs->file); +- bdrv_graph_wrunlock(bs); ++ bdrv_graph_wrunlock(); + + g_free(bs->opaque); + bs->opaque = NULL; +@@ -5509,9 +5509,9 @@ int bdrv_drop_filter(BlockDriverState *bs, Error **errp) + bdrv_graph_rdunlock_main_loop(); + + bdrv_drained_begin(child_bs); +- bdrv_graph_wrlock(bs); ++ bdrv_graph_wrlock(); + ret = bdrv_replace_node_common(bs, child_bs, true, true, errp); +- bdrv_graph_wrunlock(bs); ++ bdrv_graph_wrunlock(); + bdrv_drained_end(child_bs); + + return ret; +@@ -5561,7 +5561,7 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, + aio_context_acquire(old_context); + new_context = NULL; + +- bdrv_graph_wrlock(bs_top); ++ bdrv_graph_wrlock(); + + child = bdrv_attach_child_noperm(bs_new, bs_top, "backing", + &child_of_bds, bdrv_backing_role(bs_new), +@@ -5593,7 +5593,7 @@ out: + tran_finalize(tran, ret); + + bdrv_refresh_limits(bs_top, NULL, NULL); +- bdrv_graph_wrunlock(bs_top); ++ bdrv_graph_wrunlock(); + + bdrv_drained_end(bs_top); + bdrv_drained_end(bs_new); +@@ -5620,7 +5620,7 @@ int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs, + bdrv_ref(old_bs); + bdrv_drained_begin(old_bs); + bdrv_drained_begin(new_bs); +- bdrv_graph_wrlock(new_bs); ++ bdrv_graph_wrlock(); + + bdrv_replace_child_tran(child, new_bs, tran); + +@@ -5631,7 +5631,7 @@ int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs, + + tran_finalize(tran, ret); + +- bdrv_graph_wrunlock(new_bs); ++ bdrv_graph_wrunlock(); + bdrv_drained_end(old_bs); + bdrv_drained_end(new_bs); + bdrv_unref(old_bs); +@@ -5718,9 +5718,9 @@ BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options, + bdrv_ref(bs); + bdrv_drained_begin(bs); + bdrv_drained_begin(new_node_bs); +- bdrv_graph_wrlock(new_node_bs); ++ bdrv_graph_wrlock(); + ret = bdrv_replace_node(bs, new_node_bs, errp); +- bdrv_graph_wrunlock(new_node_bs); ++ bdrv_graph_wrunlock(); + bdrv_drained_end(new_node_bs); + bdrv_drained_end(bs); + bdrv_unref(bs); +@@ -5975,7 +5975,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, + + bdrv_ref(top); + bdrv_drained_begin(base); +- bdrv_graph_wrlock(base); ++ bdrv_graph_wrlock(); + + if (!top->drv || !base->drv) { + goto exit_wrlock; +@@ -6015,7 +6015,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, + * That's a FIXME. + */ + bdrv_replace_node_common(top, base, false, false, &local_err); +- bdrv_graph_wrunlock(base); ++ bdrv_graph_wrunlock(); + + if (local_err) { + error_report_err(local_err); +@@ -6052,7 +6052,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, + goto exit; + + exit_wrlock: +- bdrv_graph_wrunlock(base); ++ bdrv_graph_wrunlock(); + exit: + bdrv_drained_end(base); + bdrv_unref(top); +diff --git a/block/backup.c b/block/backup.c +index 8aae5836d7..ec29d6b810 100644 +--- a/block/backup.c ++++ b/block/backup.c +@@ -496,10 +496,10 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, + block_copy_set_speed(bcs, speed); + + /* Required permissions are taken by copy-before-write filter target */ +- bdrv_graph_wrlock(target); ++ bdrv_graph_wrlock(); + block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL, + &error_abort); +- bdrv_graph_wrunlock(target); ++ bdrv_graph_wrunlock(); + + return &job->common; + +diff --git a/block/blklogwrites.c b/block/blklogwrites.c +index 3678f6cf42..7207b2e757 100644 +--- a/block/blklogwrites.c ++++ b/block/blklogwrites.c +@@ -251,9 +251,9 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags, + ret = 0; + fail_log: + if (ret < 0) { +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + bdrv_unref_child(bs, s->log_file); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + s->log_file = NULL; + } + fail: +@@ -265,10 +265,10 @@ static void blk_log_writes_close(BlockDriverState *bs) + { + BDRVBlkLogWritesState *s = bs->opaque; + +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + bdrv_unref_child(bs, s->log_file); + s->log_file = NULL; +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + } + + static int64_t coroutine_fn GRAPH_RDLOCK +diff --git a/block/blkverify.c b/block/blkverify.c +index 9b17c46644..ec45d8335e 100644 +--- a/block/blkverify.c ++++ b/block/blkverify.c +@@ -151,10 +151,10 @@ static void blkverify_close(BlockDriverState *bs) + { + BDRVBlkverifyState *s = bs->opaque; + +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + bdrv_unref_child(bs, s->test_file); + s->test_file = NULL; +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + } + + static int64_t coroutine_fn GRAPH_RDLOCK +diff --git a/block/block-backend.c b/block/block-backend.c +index ec21148806..abac4e0235 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -889,7 +889,6 @@ void blk_remove_bs(BlockBackend *blk) + { + ThrottleGroupMember *tgm = &blk->public.throttle_group_member; + BdrvChild *root; +- AioContext *ctx; + + GLOBAL_STATE_CODE(); + +@@ -919,10 +918,9 @@ void blk_remove_bs(BlockBackend *blk) + root = blk->root; + blk->root = NULL; + +- ctx = bdrv_get_aio_context(root->bs); +- bdrv_graph_wrlock(root->bs); ++ bdrv_graph_wrlock(); + bdrv_root_unref_child(root); +- bdrv_graph_wrunlock_ctx(ctx); ++ bdrv_graph_wrunlock(); + } + + /* +@@ -933,16 +931,15 @@ void blk_remove_bs(BlockBackend *blk) + int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp) + { + ThrottleGroupMember *tgm = &blk->public.throttle_group_member; +- AioContext *ctx = bdrv_get_aio_context(bs); + + GLOBAL_STATE_CODE(); + bdrv_ref(bs); +- bdrv_graph_wrlock(bs); ++ bdrv_graph_wrlock(); + blk->root = bdrv_root_attach_child(bs, "root", &child_root, + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, + blk->perm, blk->shared_perm, + blk, errp); +- bdrv_graph_wrunlock_ctx(ctx); ++ bdrv_graph_wrunlock(); + if (blk->root == NULL) { + return -EPERM; + } +diff --git a/block/commit.c b/block/commit.c +index 69cc75be0c..1dd7a65ffb 100644 +--- a/block/commit.c ++++ b/block/commit.c +@@ -100,9 +100,9 @@ static void commit_abort(Job *job) + bdrv_graph_rdunlock_main_loop(); + + bdrv_drained_begin(commit_top_backing_bs); +- bdrv_graph_wrlock(commit_top_backing_bs); ++ bdrv_graph_wrlock(); + bdrv_replace_node(s->commit_top_bs, commit_top_backing_bs, &error_abort); +- bdrv_graph_wrunlock(commit_top_backing_bs); ++ bdrv_graph_wrunlock(); + bdrv_drained_end(commit_top_backing_bs); + + bdrv_unref(s->commit_top_bs); +@@ -339,7 +339,7 @@ void commit_start(const char *job_id, BlockDriverState *bs, + * this is the responsibility of the interface (i.e. whoever calls + * commit_start()). + */ +- bdrv_graph_wrlock(top); ++ bdrv_graph_wrlock(); + s->base_overlay = bdrv_find_overlay(top, base); + assert(s->base_overlay); + +@@ -370,19 +370,19 @@ void commit_start(const char *job_id, BlockDriverState *bs, + ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0, + iter_shared_perms, errp); + if (ret < 0) { +- bdrv_graph_wrunlock(top); ++ bdrv_graph_wrunlock(); + goto fail; + } + } + + if (bdrv_freeze_backing_chain(commit_top_bs, base, errp) < 0) { +- bdrv_graph_wrunlock(top); ++ bdrv_graph_wrunlock(); + goto fail; + } + s->chain_frozen = true; + + ret = block_job_add_bdrv(&s->common, "base", base, 0, BLK_PERM_ALL, errp); +- bdrv_graph_wrunlock(top); ++ bdrv_graph_wrunlock(); + + if (ret < 0) { + goto fail; +@@ -434,9 +434,9 @@ fail: + * otherwise this would fail because of lack of permissions. */ + if (commit_top_bs) { + bdrv_drained_begin(top); +- bdrv_graph_wrlock(top); ++ bdrv_graph_wrlock(); + bdrv_replace_node(commit_top_bs, top, &error_abort); +- bdrv_graph_wrunlock(top); ++ bdrv_graph_wrunlock(); + bdrv_drained_end(top); + } + } +diff --git a/block/graph-lock.c b/block/graph-lock.c +index 079e878d9b..c81162b147 100644 +--- a/block/graph-lock.c ++++ b/block/graph-lock.c +@@ -106,27 +106,12 @@ static uint32_t reader_count(void) + return rd; + } + +-void no_coroutine_fn bdrv_graph_wrlock(BlockDriverState *bs) ++void no_coroutine_fn bdrv_graph_wrlock(void) + { +- AioContext *ctx = NULL; +- + GLOBAL_STATE_CODE(); + assert(!qatomic_read(&has_writer)); + assert(!qemu_in_coroutine()); + +- /* +- * Release only non-mainloop AioContext. The mainloop often relies on the +- * BQL and doesn't lock the main AioContext before doing things. +- */ +- if (bs) { +- ctx = bdrv_get_aio_context(bs); +- if (ctx != qemu_get_aio_context()) { +- aio_context_release(ctx); +- } else { +- ctx = NULL; +- } +- } +- + /* Make sure that constantly arriving new I/O doesn't cause starvation */ + bdrv_drain_all_begin_nopoll(); + +@@ -155,27 +140,13 @@ void no_coroutine_fn bdrv_graph_wrlock(BlockDriverState *bs) + } while (reader_count() >= 1); + + bdrv_drain_all_end(); +- +- if (ctx) { +- aio_context_acquire(bdrv_get_aio_context(bs)); +- } + } + +-void no_coroutine_fn bdrv_graph_wrunlock_ctx(AioContext *ctx) ++void no_coroutine_fn bdrv_graph_wrunlock(void) + { + GLOBAL_STATE_CODE(); + assert(qatomic_read(&has_writer)); + +- /* +- * Release only non-mainloop AioContext. The mainloop often relies on the +- * BQL and doesn't lock the main AioContext before doing things. +- */ +- if (ctx && ctx != qemu_get_aio_context()) { +- aio_context_release(ctx); +- } else { +- ctx = NULL; +- } +- + WITH_QEMU_LOCK_GUARD(&aio_context_list_lock) { + /* + * No need for memory barriers, this works in pair with +@@ -197,17 +168,6 @@ void no_coroutine_fn bdrv_graph_wrunlock_ctx(AioContext *ctx) + * progress. + */ + aio_bh_poll(qemu_get_aio_context()); +- +- if (ctx) { +- aio_context_acquire(ctx); +- } +-} +- +-void no_coroutine_fn bdrv_graph_wrunlock(BlockDriverState *bs) +-{ +- AioContext *ctx = bs ? bdrv_get_aio_context(bs) : NULL; +- +- bdrv_graph_wrunlock_ctx(ctx); + } + + void coroutine_fn bdrv_graph_co_rdlock(void) +diff --git a/block/mirror.c b/block/mirror.c +index cd9d3ad4a8..51f9e2f17c 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -764,7 +764,7 @@ static int mirror_exit_common(Job *job) + * check for an op blocker on @to_replace, and we have our own + * there. + */ +- bdrv_graph_wrlock(target_bs); ++ bdrv_graph_wrlock(); + if (bdrv_recurse_can_replace(src, to_replace)) { + bdrv_replace_node(to_replace, target_bs, &local_err); + } else { +@@ -773,7 +773,7 @@ static int mirror_exit_common(Job *job) + "would not lead to an abrupt change of visible data", + to_replace->node_name, target_bs->node_name); + } +- bdrv_graph_wrunlock(target_bs); ++ bdrv_graph_wrunlock(); + bdrv_drained_end(to_replace); + if (local_err) { + error_report_err(local_err); +@@ -796,9 +796,9 @@ static int mirror_exit_common(Job *job) + * valid. + */ + block_job_remove_all_bdrv(bjob); +- bdrv_graph_wrlock(mirror_top_bs); ++ bdrv_graph_wrlock(); + bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort); +- bdrv_graph_wrunlock(mirror_top_bs); ++ bdrv_graph_wrunlock(); + + bdrv_drained_end(target_bs); + bdrv_unref(target_bs); +@@ -1914,13 +1914,13 @@ static BlockJob *mirror_start_job( + */ + bdrv_disable_dirty_bitmap(s->dirty_bitmap); + +- bdrv_graph_wrlock(bs); ++ bdrv_graph_wrlock(); + ret = block_job_add_bdrv(&s->common, "source", bs, 0, + BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE | + BLK_PERM_CONSISTENT_READ, + errp); + if (ret < 0) { +- bdrv_graph_wrunlock(bs); ++ bdrv_graph_wrunlock(); + goto fail; + } + +@@ -1965,17 +1965,17 @@ static BlockJob *mirror_start_job( + ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0, + iter_shared_perms, errp); + if (ret < 0) { +- bdrv_graph_wrunlock(bs); ++ bdrv_graph_wrunlock(); + goto fail; + } + } + + if (bdrv_freeze_backing_chain(mirror_top_bs, target, errp) < 0) { +- bdrv_graph_wrunlock(bs); ++ bdrv_graph_wrunlock(); + goto fail; + } + } +- bdrv_graph_wrunlock(bs); ++ bdrv_graph_wrunlock(); + + QTAILQ_INIT(&s->ops_in_flight); + +@@ -2001,12 +2001,12 @@ fail: + + bs_opaque->stop = true; + bdrv_drained_begin(bs); +- bdrv_graph_wrlock(bs); ++ bdrv_graph_wrlock(); + assert(mirror_top_bs->backing->bs == bs); + bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing, + &error_abort); + bdrv_replace_node(mirror_top_bs, bs, &error_abort); +- bdrv_graph_wrunlock(bs); ++ bdrv_graph_wrunlock(); + bdrv_drained_end(bs); + + bdrv_unref(mirror_top_bs); +diff --git a/block/qcow2.c b/block/qcow2.c +index 7968735346..d91b7b91d3 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -2813,9 +2813,9 @@ qcow2_do_close(BlockDriverState *bs, bool close_data_file) + if (close_data_file && has_data_file(bs)) { + GLOBAL_STATE_CODE(); + bdrv_graph_rdunlock_main_loop(); +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + bdrv_unref_child(bs, s->data_file); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + s->data_file = NULL; + bdrv_graph_rdlock_main_loop(); + } +diff --git a/block/quorum.c b/block/quorum.c +index 505b8b3e18..db8fe891c4 100644 +--- a/block/quorum.c ++++ b/block/quorum.c +@@ -1037,14 +1037,14 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags, + + close_exit: + /* cleanup on error */ +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + for (i = 0; i < s->num_children; i++) { + if (!opened[i]) { + continue; + } + bdrv_unref_child(bs, s->children[i]); + } +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + g_free(s->children); + g_free(opened); + exit: +@@ -1057,11 +1057,11 @@ static void quorum_close(BlockDriverState *bs) + BDRVQuorumState *s = bs->opaque; + int i; + +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + for (i = 0; i < s->num_children; i++) { + bdrv_unref_child(bs, s->children[i]); + } +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + + g_free(s->children); + } +diff --git a/block/replication.c b/block/replication.c +index 5ded5f1ca9..424b537ff7 100644 +--- a/block/replication.c ++++ b/block/replication.c +@@ -560,7 +560,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, + return; + } + +- bdrv_graph_wrlock(bs); ++ bdrv_graph_wrlock(); + + bdrv_ref(hidden_disk->bs); + s->hidden_disk = bdrv_attach_child(bs, hidden_disk->bs, "hidden disk", +@@ -568,7 +568,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, + &local_err); + if (local_err) { + error_propagate(errp, local_err); +- bdrv_graph_wrunlock(bs); ++ bdrv_graph_wrunlock(); + aio_context_release(aio_context); + return; + } +@@ -579,7 +579,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, + BDRV_CHILD_DATA, &local_err); + if (local_err) { + error_propagate(errp, local_err); +- bdrv_graph_wrunlock(bs); ++ bdrv_graph_wrunlock(); + aio_context_release(aio_context); + return; + } +@@ -592,7 +592,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, + if (!top_bs || !bdrv_is_root_node(top_bs) || + !check_top_bs(top_bs, bs)) { + error_setg(errp, "No top_bs or it is invalid"); +- bdrv_graph_wrunlock(bs); ++ bdrv_graph_wrunlock(); + reopen_backing_file(bs, false, NULL); + aio_context_release(aio_context); + return; +@@ -600,7 +600,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, + bdrv_op_block_all(top_bs, s->blocker); + bdrv_op_unblock(top_bs, BLOCK_OP_TYPE_DATAPLANE, s->blocker); + +- bdrv_graph_wrunlock(bs); ++ bdrv_graph_wrunlock(); + + s->backup_job = backup_job_create( + NULL, s->secondary_disk->bs, s->hidden_disk->bs, +@@ -691,12 +691,12 @@ static void replication_done(void *opaque, int ret) + if (ret == 0) { + s->stage = BLOCK_REPLICATION_DONE; + +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + bdrv_unref_child(bs, s->secondary_disk); + s->secondary_disk = NULL; + bdrv_unref_child(bs, s->hidden_disk); + s->hidden_disk = NULL; +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + + s->error = 0; + } else { +diff --git a/block/snapshot.c b/block/snapshot.c +index ec8cf4810b..e486d3e205 100644 +--- a/block/snapshot.c ++++ b/block/snapshot.c +@@ -290,9 +290,9 @@ int bdrv_snapshot_goto(BlockDriverState *bs, + } + + /* .bdrv_open() will re-attach it */ +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + bdrv_unref_child(bs, fallback); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + + ret = bdrv_snapshot_goto(fallback_bs, snapshot_id, errp); + open_ret = drv->bdrv_open(bs, options, bs->open_flags, &local_err); +diff --git a/block/stream.c b/block/stream.c +index 01fe7c0f16..048c2d282f 100644 +--- a/block/stream.c ++++ b/block/stream.c +@@ -99,9 +99,9 @@ static int stream_prepare(Job *job) + } + } + +- bdrv_graph_wrlock(s->target_bs); ++ bdrv_graph_wrlock(); + bdrv_set_backing_hd_drained(unfiltered_bs, base, &local_err); +- bdrv_graph_wrunlock(s->target_bs); ++ bdrv_graph_wrunlock(); + + /* + * This call will do I/O, so the graph can change again from here on. +@@ -366,10 +366,10 @@ void stream_start(const char *job_id, BlockDriverState *bs, + * already have our own plans. Also don't allow resize as the image size is + * queried only at the job start and then cached. + */ +- bdrv_graph_wrlock(bs); ++ bdrv_graph_wrlock(); + if (block_job_add_bdrv(&s->common, "active node", bs, 0, + basic_flags | BLK_PERM_WRITE, errp)) { +- bdrv_graph_wrunlock(bs); ++ bdrv_graph_wrunlock(); + goto fail; + } + +@@ -389,11 +389,11 @@ void stream_start(const char *job_id, BlockDriverState *bs, + ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0, + basic_flags, errp); + if (ret < 0) { +- bdrv_graph_wrunlock(bs); ++ bdrv_graph_wrunlock(); + goto fail; + } + } +- bdrv_graph_wrunlock(bs); ++ bdrv_graph_wrunlock(); + + s->base_overlay = base_overlay; + s->above_base = above_base; +diff --git a/block/vmdk.c b/block/vmdk.c +index d6971c7067..bf78e12383 100644 +--- a/block/vmdk.c ++++ b/block/vmdk.c +@@ -272,7 +272,7 @@ static void vmdk_free_extents(BlockDriverState *bs) + BDRVVmdkState *s = bs->opaque; + VmdkExtent *e; + +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + for (i = 0; i < s->num_extents; i++) { + e = &s->extents[i]; + g_free(e->l1_table); +@@ -283,7 +283,7 @@ static void vmdk_free_extents(BlockDriverState *bs) + bdrv_unref_child(bs, e->file); + } + } +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + + g_free(s->extents); + } +@@ -1247,9 +1247,9 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options, + 0, 0, 0, 0, 0, &extent, errp); + if (ret < 0) { + bdrv_graph_rdunlock_main_loop(); +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + bdrv_unref_child(bs, extent_file); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + bdrv_graph_rdlock_main_loop(); + goto out; + } +@@ -1266,9 +1266,9 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options, + g_free(buf); + if (ret) { + bdrv_graph_rdunlock_main_loop(); +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + bdrv_unref_child(bs, extent_file); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + bdrv_graph_rdlock_main_loop(); + goto out; + } +@@ -1277,9 +1277,9 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options, + ret = vmdk_open_se_sparse(bs, extent_file, bs->open_flags, errp); + if (ret) { + bdrv_graph_rdunlock_main_loop(); +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + bdrv_unref_child(bs, extent_file); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + bdrv_graph_rdlock_main_loop(); + goto out; + } +@@ -1287,9 +1287,9 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options, + } else { + error_setg(errp, "Unsupported extent type '%s'", type); + bdrv_graph_rdunlock_main_loop(); +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + bdrv_unref_child(bs, extent_file); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + bdrv_graph_rdlock_main_loop(); + ret = -ENOTSUP; + goto out; +diff --git a/blockdev.c b/blockdev.c +index c91f49e7b6..9e1381169d 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -1611,9 +1611,9 @@ static void external_snapshot_abort(void *opaque) + } + + bdrv_drained_begin(state->new_bs); +- bdrv_graph_wrlock(state->old_bs); ++ bdrv_graph_wrlock(); + bdrv_replace_node(state->new_bs, state->old_bs, &error_abort); +- bdrv_graph_wrunlock(state->old_bs); ++ bdrv_graph_wrunlock(); + bdrv_drained_end(state->new_bs); + + bdrv_unref(state->old_bs); /* bdrv_replace_node() ref'ed old_bs */ +@@ -3657,7 +3657,7 @@ void qmp_x_blockdev_change(const char *parent, const char *child, + BlockDriverState *parent_bs, *new_bs = NULL; + BdrvChild *p_child; + +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + + parent_bs = bdrv_lookup_bs(parent, parent, errp); + if (!parent_bs) { +@@ -3693,7 +3693,7 @@ void qmp_x_blockdev_change(const char *parent, const char *child, + } + + out: +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + } + + BlockJobInfoList *qmp_query_block_jobs(Error **errp) +diff --git a/blockjob.c b/blockjob.c +index b7a29052b9..7310412313 100644 +--- a/blockjob.c ++++ b/blockjob.c +@@ -199,7 +199,7 @@ void block_job_remove_all_bdrv(BlockJob *job) + * to process an already freed BdrvChild. + */ + aio_context_release(job->job.aio_context); +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + aio_context_acquire(job->job.aio_context); + while (job->nodes) { + GSList *l = job->nodes; +@@ -212,7 +212,7 @@ void block_job_remove_all_bdrv(BlockJob *job) + + g_slist_free_1(l); + } +- bdrv_graph_wrunlock_ctx(job->job.aio_context); ++ bdrv_graph_wrunlock(); + } + + bool block_job_has_bdrv(BlockJob *job, BlockDriverState *bs) +@@ -514,7 +514,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, + int ret; + GLOBAL_STATE_CODE(); + +- bdrv_graph_wrlock(bs); ++ bdrv_graph_wrlock(); + + if (job_id == NULL && !(flags & JOB_INTERNAL)) { + job_id = bdrv_get_device_name(bs); +@@ -523,7 +523,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, + job = job_create(job_id, &driver->job_driver, txn, bdrv_get_aio_context(bs), + flags, cb, opaque, errp); + if (job == NULL) { +- bdrv_graph_wrunlock(bs); ++ bdrv_graph_wrunlock(); + return NULL; + } + +@@ -563,11 +563,11 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, + goto fail; + } + +- bdrv_graph_wrunlock(bs); ++ bdrv_graph_wrunlock(); + return job; + + fail: +- bdrv_graph_wrunlock(bs); ++ bdrv_graph_wrunlock(); + job_early_fail(&job->job); + return NULL; + } +diff --git a/include/block/graph-lock.h b/include/block/graph-lock.h +index 22b5db1ed9..d7545e82d0 100644 +--- a/include/block/graph-lock.h ++++ b/include/block/graph-lock.h +@@ -110,34 +110,17 @@ void unregister_aiocontext(AioContext *ctx); + * + * The wrlock can only be taken from the main loop, with BQL held, as only the + * main loop is allowed to modify the graph. +- * +- * If @bs is non-NULL, its AioContext is temporarily released. +- * +- * This function polls. Callers must not hold the lock of any AioContext other +- * than the current one and the one of @bs. + */ + void no_coroutine_fn TSA_ACQUIRE(graph_lock) TSA_NO_TSA +-bdrv_graph_wrlock(BlockDriverState *bs); ++bdrv_graph_wrlock(void); + + /* + * bdrv_graph_wrunlock: + * Write finished, reset global has_writer to 0 and restart + * all readers that are waiting. +- * +- * If @bs is non-NULL, its AioContext is temporarily released. +- */ +-void no_coroutine_fn TSA_RELEASE(graph_lock) TSA_NO_TSA +-bdrv_graph_wrunlock(BlockDriverState *bs); +- +-/* +- * bdrv_graph_wrunlock_ctx: +- * Write finished, reset global has_writer to 0 and restart +- * all readers that are waiting. +- * +- * If @ctx is non-NULL, its lock is temporarily released. + */ + void no_coroutine_fn TSA_RELEASE(graph_lock) TSA_NO_TSA +-bdrv_graph_wrunlock_ctx(AioContext *ctx); ++bdrv_graph_wrunlock(void); + + /* + * bdrv_graph_co_rdlock: +diff --git a/scripts/block-coroutine-wrapper.py b/scripts/block-coroutine-wrapper.py +index a38e5833fb..38364fa557 100644 +--- a/scripts/block-coroutine-wrapper.py ++++ b/scripts/block-coroutine-wrapper.py +@@ -261,8 +261,8 @@ def gen_no_co_wrapper(func: FuncDecl) -> str: + graph_lock=' bdrv_graph_rdlock_main_loop();' + graph_unlock=' bdrv_graph_rdunlock_main_loop();' + elif func.graph_wrlock: +- graph_lock=' bdrv_graph_wrlock(NULL);' +- graph_unlock=' bdrv_graph_wrunlock(NULL);' ++ graph_lock=' bdrv_graph_wrlock();' ++ graph_unlock=' bdrv_graph_wrunlock();' + + return f"""\ + /* +diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c +index 704d1a3f36..d9754dfebc 100644 +--- a/tests/unit/test-bdrv-drain.c ++++ b/tests/unit/test-bdrv-drain.c +@@ -807,9 +807,9 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, + tjob->bs = src; + job = &tjob->common; + +- bdrv_graph_wrlock(target); ++ bdrv_graph_wrlock(); + block_job_add_bdrv(job, "target", target, 0, BLK_PERM_ALL, &error_abort); +- bdrv_graph_wrunlock(target); ++ bdrv_graph_wrunlock(); + + switch (result) { + case TEST_JOB_SUCCESS: +@@ -991,11 +991,11 @@ static void bdrv_test_top_close(BlockDriverState *bs) + { + BdrvChild *c, *next_c; + +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + QLIST_FOREACH_SAFE(c, &bs->children, next, next_c) { + bdrv_unref_child(bs, c); + } +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + } + + static int coroutine_fn GRAPH_RDLOCK +@@ -1085,10 +1085,10 @@ static void do_test_delete_by_drain(bool detach_instead_of_delete, + + null_bs = bdrv_open("null-co://", NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, + &error_abort); +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + bdrv_attach_child(bs, null_bs, "null-child", &child_of_bds, + BDRV_CHILD_DATA, &error_abort); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + + /* This child will be the one to pass to requests through to, and + * it will stall until a drain occurs */ +@@ -1096,21 +1096,21 @@ static void do_test_delete_by_drain(bool detach_instead_of_delete, + &error_abort); + child_bs->total_sectors = 65536 >> BDRV_SECTOR_BITS; + /* Takes our reference to child_bs */ +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + tts->wait_child = bdrv_attach_child(bs, child_bs, "wait-child", + &child_of_bds, + BDRV_CHILD_DATA | BDRV_CHILD_PRIMARY, + &error_abort); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + + /* This child is just there to be deleted + * (for detach_instead_of_delete == true) */ + null_bs = bdrv_open("null-co://", NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, + &error_abort); +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + bdrv_attach_child(bs, null_bs, "null-child", &child_of_bds, BDRV_CHILD_DATA, + &error_abort); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + + blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); + blk_insert_bs(blk, bs, &error_abort); +@@ -1193,14 +1193,14 @@ static void no_coroutine_fn detach_indirect_bh(void *opaque) + + bdrv_dec_in_flight(data->child_b->bs); + +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + bdrv_unref_child(data->parent_b, data->child_b); + + bdrv_ref(data->c); + data->child_c = bdrv_attach_child(data->parent_b, data->c, "PB-C", + &child_of_bds, BDRV_CHILD_DATA, + &error_abort); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + } + + static void coroutine_mixed_fn detach_by_parent_aio_cb(void *opaque, int ret) +@@ -1298,7 +1298,7 @@ static void TSA_NO_TSA test_detach_indirect(bool by_parent_cb) + /* Set child relationships */ + bdrv_ref(b); + bdrv_ref(a); +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + child_b = bdrv_attach_child(parent_b, b, "PB-B", &child_of_bds, + BDRV_CHILD_DATA, &error_abort); + child_a = bdrv_attach_child(parent_b, a, "PB-A", &child_of_bds, +@@ -1308,7 +1308,7 @@ static void TSA_NO_TSA test_detach_indirect(bool by_parent_cb) + bdrv_attach_child(parent_a, a, "PA-A", + by_parent_cb ? &child_of_bds : &detach_by_driver_cb_class, + BDRV_CHILD_DATA, &error_abort); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + + g_assert_cmpint(parent_a->refcnt, ==, 1); + g_assert_cmpint(parent_b->refcnt, ==, 1); +@@ -1727,7 +1727,7 @@ static void test_drop_intermediate_poll(void) + * Establish the chain last, so the chain links are the first + * elements in the BDS.parents lists + */ +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + for (i = 0; i < 3; i++) { + if (i) { + /* Takes the reference to chain[i - 1] */ +@@ -1735,7 +1735,7 @@ static void test_drop_intermediate_poll(void) + &chain_child_class, BDRV_CHILD_COW, &error_abort); + } + } +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + + job = block_job_create("job", &test_simple_job_driver, NULL, job_node, + 0, BLK_PERM_ALL, 0, 0, NULL, NULL, &error_abort); +@@ -1982,10 +1982,10 @@ static void do_test_replace_child_mid_drain(int old_drain_count, + new_child_bs->total_sectors = 1; + + bdrv_ref(old_child_bs); +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + bdrv_attach_child(parent_bs, old_child_bs, "child", &child_of_bds, + BDRV_CHILD_COW, &error_abort); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + parent_s->setup_completed = true; + + for (i = 0; i < old_drain_count; i++) { +@@ -2016,9 +2016,9 @@ static void do_test_replace_child_mid_drain(int old_drain_count, + g_assert(parent_bs->quiesce_counter == old_drain_count); + bdrv_drained_begin(old_child_bs); + bdrv_drained_begin(new_child_bs); +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + bdrv_replace_node(old_child_bs, new_child_bs, &error_abort); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + bdrv_drained_end(new_child_bs); + bdrv_drained_end(old_child_bs); + g_assert(parent_bs->quiesce_counter == new_drain_count); +diff --git a/tests/unit/test-bdrv-graph-mod.c b/tests/unit/test-bdrv-graph-mod.c +index 074adcbb93..8ee6ef38d8 100644 +--- a/tests/unit/test-bdrv-graph-mod.c ++++ b/tests/unit/test-bdrv-graph-mod.c +@@ -137,10 +137,10 @@ static void test_update_perm_tree(void) + + blk_insert_bs(root, bs, &error_abort); + +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + bdrv_attach_child(filter, bs, "child", &child_of_bds, + BDRV_CHILD_DATA, &error_abort); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + + aio_context_acquire(qemu_get_aio_context()); + ret = bdrv_append(filter, bs, NULL); +@@ -206,11 +206,11 @@ static void test_should_update_child(void) + + bdrv_set_backing_hd(target, bs, &error_abort); + +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + g_assert(target->backing->bs == bs); + bdrv_attach_child(filter, target, "target", &child_of_bds, + BDRV_CHILD_DATA, &error_abort); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + aio_context_acquire(qemu_get_aio_context()); + bdrv_append(filter, bs, &error_abort); + aio_context_release(qemu_get_aio_context()); +@@ -248,7 +248,7 @@ static void test_parallel_exclusive_write(void) + bdrv_ref(base); + bdrv_ref(fl1); + +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + bdrv_attach_child(top, fl1, "backing", &child_of_bds, + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, + &error_abort); +@@ -260,7 +260,7 @@ static void test_parallel_exclusive_write(void) + &error_abort); + + bdrv_replace_node(fl1, fl2, &error_abort); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + + bdrv_drained_end(fl2); + bdrv_drained_end(fl1); +@@ -367,7 +367,7 @@ static void test_parallel_perm_update(void) + */ + bdrv_ref(base); + +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + bdrv_attach_child(top, ws, "file", &child_of_bds, BDRV_CHILD_DATA, + &error_abort); + c_fl1 = bdrv_attach_child(ws, fl1, "first", &child_of_bds, +@@ -380,7 +380,7 @@ static void test_parallel_perm_update(void) + bdrv_attach_child(fl2, base, "backing", &child_of_bds, + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, + &error_abort); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + + /* Select fl1 as first child to be active */ + s->selected = c_fl1; +@@ -434,11 +434,11 @@ static void test_append_greedy_filter(void) + BlockDriverState *base = no_perm_node("base"); + BlockDriverState *fl = exclusive_writer_node("fl1"); + +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + bdrv_attach_child(top, base, "backing", &child_of_bds, + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, + &error_abort); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + + aio_context_acquire(qemu_get_aio_context()); + bdrv_append(fl, base, &error_abort); +-- +2.39.3 + diff --git a/SOURCES/kvm-hv-balloon-use-get_min_alignment-to-express-32-GiB-a.patch b/SOURCES/kvm-hv-balloon-use-get_min_alignment-to-express-32-GiB-a.patch new file mode 100644 index 0000000..4fb4844 --- /dev/null +++ b/SOURCES/kvm-hv-balloon-use-get_min_alignment-to-express-32-GiB-a.patch @@ -0,0 +1,94 @@ +From a5b4eec5f456b1ca3fe753e1d76f96cf3f8914ef Mon Sep 17 00:00:00 2001 +From: David Hildenbrand +Date: Wed, 17 Jan 2024 14:55:53 +0100 +Subject: [PATCH 01/22] hv-balloon: use get_min_alignment() to express 32 GiB + alignment + +RH-Author: David Hildenbrand +RH-MergeRequest: 221: memory-device: reintroduce memory region size check +RH-Jira: RHEL-20341 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Igor Mammedov +RH-Commit: [1/2] cbe092fe549552928270892253b31cd8fe199825 + +https://issues.redhat.com/browse/RHEL-20341 + +Let's implement the get_min_alignment() callback for memory devices, and +copy for the device memory region the alignment of the host memory +region. This mimics what virtio-mem does, and allows for re-introducing +proper alignment checks for the memory region size (where we don't care +about additional device requirements) in memory device core. + +Message-ID: <20240117135554.787344-2-david@redhat.com> +Reviewed-by: Maciej S. Szmigiero +Signed-off-by: David Hildenbrand +(cherry picked from commit f77c5f38f49c71bc14cf1019ac92b0b95f572414) +Signed-off-by: David Hildenbrand +--- + hw/hyperv/hv-balloon.c | 37 +++++++++++++++++++++---------------- + 1 file changed, 21 insertions(+), 16 deletions(-) + +diff --git a/hw/hyperv/hv-balloon.c b/hw/hyperv/hv-balloon.c +index 66f297c1d7..0829c495b0 100644 +--- a/hw/hyperv/hv-balloon.c ++++ b/hw/hyperv/hv-balloon.c +@@ -1476,22 +1476,7 @@ static void hv_balloon_ensure_mr(HvBalloon *balloon) + balloon->mr = g_new0(MemoryRegion, 1); + memory_region_init(balloon->mr, OBJECT(balloon), TYPE_HV_BALLOON, + memory_region_size(hostmem_mr)); +- +- /* +- * The VM can indicate an alignment up to 32 GiB. Memory device core can +- * usually only handle/guarantee 1 GiB alignment. The user will have to +- * specify a larger maxmem eventually. +- * +- * The memory device core will warn the user in case maxmem might have to be +- * increased and will fail plugging the device if there is not sufficient +- * space after alignment. +- * +- * TODO: we could do the alignment ourselves in a slightly bigger region. +- * But this feels better, although the warning might be annoying. Maybe +- * we can optimize that in the future (e.g., with such a device on the +- * cmdline place/size the device memory region differently. +- */ +- balloon->mr->align = MAX(32 * GiB, memory_region_get_alignment(hostmem_mr)); ++ balloon->mr->align = memory_region_get_alignment(hostmem_mr); + } + + static void hv_balloon_free_mr(HvBalloon *balloon) +@@ -1653,6 +1638,25 @@ static MemoryRegion *hv_balloon_md_get_memory_region(MemoryDeviceState *md, + return balloon->mr; + } + ++static uint64_t hv_balloon_md_get_min_alignment(const MemoryDeviceState *md) ++{ ++ /* ++ * The VM can indicate an alignment up to 32 GiB. Memory device core can ++ * usually only handle/guarantee 1 GiB alignment. The user will have to ++ * specify a larger maxmem eventually. ++ * ++ * The memory device core will warn the user in case maxmem might have to be ++ * increased and will fail plugging the device if there is not sufficient ++ * space after alignment. ++ * ++ * TODO: we could do the alignment ourselves in a slightly bigger region. ++ * But this feels better, although the warning might be annoying. Maybe ++ * we can optimize that in the future (e.g., with such a device on the ++ * cmdline place/size the device memory region differently. ++ */ ++ return 32 * GiB; ++} ++ + static void hv_balloon_md_fill_device_info(const MemoryDeviceState *md, + MemoryDeviceInfo *info) + { +@@ -1765,5 +1769,6 @@ static void hv_balloon_class_init(ObjectClass *klass, void *data) + mdc->get_memory_region = hv_balloon_md_get_memory_region; + mdc->decide_memslots = hv_balloon_decide_memslots; + mdc->get_memslots = hv_balloon_get_memslots; ++ mdc->get_min_alignment = hv_balloon_md_get_min_alignment; + mdc->fill_device_info = hv_balloon_md_fill_device_info; + } +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-acpi-Mark-acpi-blobs-as-resizable-on-RHEL-pc-mach.patch b/SOURCES/kvm-hw-acpi-Mark-acpi-blobs-as-resizable-on-RHEL-pc-mach.patch deleted file mode 100644 index 67e702c..0000000 --- a/SOURCES/kvm-hw-acpi-Mark-acpi-blobs-as-resizable-on-RHEL-pc-mach.patch +++ /dev/null @@ -1,40 +0,0 @@ -From b4645e7682aa1bde6f89df0eff2a9de83720eecc Mon Sep 17 00:00:00 2001 -From: Ani Sinha -Date: Tue, 2 May 2023 15:51:53 +0530 -Subject: [PATCH 3/3] hw/acpi: Mark acpi blobs as resizable on RHEL pc machines - version 7.6 and above - -RH-Author: Ani Sinha -RH-MergeRequest: 160: hw/acpi: limit warning on acpi table size to pc machines older than version 2.3 -RH-Bugzilla: 1934134 -RH-Acked-by: Igor Mammedov -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: MST -RH-Commit: [2/2] 95d443af6e75c569d89d04d028012c3c56c0c3a4 (anisinha/centos-qemu-kvm) - -Please look at QEMU upstream commit -1af507756bae7 ("hw/acpi: limit warning on acpi table size to pc machines older than version 2.3") -This patch adapts the above change so that it applies to RHEL pc machines of -version 7.6 and newer. These are the machine types that are currently supported -in RHEL. Q35 machines are not affected. - -Signed-off-by: Ani Sinha ---- - hw/i386/pc_piix.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 4d5880e249..6c7be628e1 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -914,6 +914,7 @@ static void pc_machine_rhel7_options(MachineClass *m) - m->default_machine_opts = "firmware=bios-256k.bin,hpet=off"; - pcmc->default_nic_model = "e1000"; - pcmc->pci_root_uid = 0; -+ pcmc->resizable_acpi_blob = true; - m->default_display = "std"; - m->no_parallel = 1; - m->numa_mem_supported = true; --- -2.39.1 - diff --git a/SOURCES/kvm-hw-acpi-limit-warning-on-acpi-table-size-to-pc-machi.patch b/SOURCES/kvm-hw-acpi-limit-warning-on-acpi-table-size-to-pc-machi.patch deleted file mode 100644 index e06113a..0000000 --- a/SOURCES/kvm-hw-acpi-limit-warning-on-acpi-table-size-to-pc-machi.patch +++ /dev/null @@ -1,101 +0,0 @@ -From 3f70da88788c398877b8ded0b27689530385302b Mon Sep 17 00:00:00 2001 -From: Ani Sinha -Date: Wed, 29 Mar 2023 10:27:26 +0530 -Subject: [PATCH 2/3] hw/acpi: limit warning on acpi table size to pc machines - older than version 2.3 - -RH-Author: Ani Sinha -RH-MergeRequest: 160: hw/acpi: limit warning on acpi table size to pc machines older than version 2.3 -RH-Bugzilla: 1934134 -RH-Acked-by: Igor Mammedov -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: MST -RH-Commit: [1/2] 96c3b6d51e16734eb4e8de52635e0ca036964090 (anisinha/centos-qemu-kvm) - -i440fx machine versions 2.3 and newer supports dynamic ram -resizing. See commit a1666142db6233 ("acpi-build: make ROMs RAM blocks resizeable") . -Currently supported all q35 machine types (versions 2.4 and newer) supports -resizable RAM/ROM blocks.Therefore the warning generated when the ACPI table -size exceeds a pre-defined value does not apply to those machine versions. -Add a check limiting the warning message to only those machines that does not -support expandable ram blocks (that is, i440fx machines with version 2.2 -and older). - -Signed-off-by: Ani Sinha -Message-Id: <20230329045726.14028-1-anisinha@redhat.com> -Reviewed-by: Igor Mammedov -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 1af507756bae775028c27d30e602e2b9c72cd074) ---- - hw/i386/acpi-build.c | 6 ++++-- - hw/i386/pc.c | 1 + - hw/i386/pc_piix.c | 1 + - include/hw/i386/pc.h | 3 +++ - 4 files changed, 9 insertions(+), 2 deletions(-) - -diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c -index ec857a117e..9bc4d8a981 100644 ---- a/hw/i386/acpi-build.c -+++ b/hw/i386/acpi-build.c -@@ -2695,7 +2695,8 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) - int legacy_table_size = - ROUND_UP(tables_blob->len - aml_len + legacy_aml_len, - ACPI_BUILD_ALIGN_SIZE); -- if (tables_blob->len > legacy_table_size) { -+ if ((tables_blob->len > legacy_table_size) && -+ !pcmc->resizable_acpi_blob) { - /* Should happen only with PCI bridges and -M pc-i440fx-2.0. */ - warn_report("ACPI table size %u exceeds %d bytes," - " migration may not work", -@@ -2706,7 +2707,8 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) - g_array_set_size(tables_blob, legacy_table_size); - } else { - /* Make sure we have a buffer in case we need to resize the tables. */ -- if (tables_blob->len > ACPI_BUILD_TABLE_SIZE / 2) { -+ if ((tables_blob->len > ACPI_BUILD_TABLE_SIZE / 2) && -+ !pcmc->resizable_acpi_blob) { - /* As of QEMU 2.1, this fires with 160 VCPUs and 255 memory slots. */ - warn_report("ACPI table size %u exceeds %d bytes," - " migration may not work", -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index f216922cee..7db5a2348f 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -2092,6 +2092,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) - pcmc->acpi_data_size = 0x20000 + 0x8000; - pcmc->pvh_enabled = true; - pcmc->kvmclock_create_always = true; -+ pcmc->resizable_acpi_blob = true; - assert(!mc->get_hotplug_handler); - mc->async_pf_vmexit_disable = false; - mc->get_hotplug_handler = pc_get_hotplug_handler; -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index fc704d783f..4d5880e249 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -750,6 +750,7 @@ static void pc_i440fx_2_2_machine_options(MachineClass *m) - compat_props_add(m->compat_props, hw_compat_2_2, hw_compat_2_2_len); - compat_props_add(m->compat_props, pc_compat_2_2, pc_compat_2_2_len); - pcmc->rsdp_in_ram = false; -+ pcmc->resizable_acpi_blob = false; - } - - DEFINE_I440FX_MACHINE(v2_2, "pc-i440fx-2.2", pc_compat_2_2_fn, -diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index d218ad1628..2f514d13d8 100644 ---- a/include/hw/i386/pc.h -+++ b/include/hw/i386/pc.h -@@ -130,6 +130,9 @@ struct PCMachineClass { - - /* create kvmclock device even when KVM PV features are not exposed */ - bool kvmclock_create_always; -+ -+ /* resizable acpi blob compat */ -+ bool resizable_acpi_blob; - }; - - #define TYPE_PC_MACHINE "generic-pc-machine" --- -2.39.1 - diff --git a/SOURCES/kvm-hw-arm-Activate-IOMMUFD-for-virt-machines.patch b/SOURCES/kvm-hw-arm-Activate-IOMMUFD-for-virt-machines.patch new file mode 100644 index 0000000..84f6108 --- /dev/null +++ b/SOURCES/kvm-hw-arm-Activate-IOMMUFD-for-virt-machines.patch @@ -0,0 +1,42 @@ +From ceaee9c4372bbdc4196cb6808515047388f7aa26 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 21 Nov 2023 16:44:18 +0800 +Subject: [PATCH 039/101] hw/arm: Activate IOMMUFD for virt machines +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [38/67] 0a059ae661616e95eb8455e17f35774495cae8e7 (eauger1/centos-qemu-kvm) + +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Eric Auger +Tested-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit 0970238343af45a8b547695bfc22f18d4eb7da7e) +Signed-off-by: Eric Auger +--- + hw/arm/Kconfig | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig +index 3ada335a24..660f49db49 100644 +--- a/hw/arm/Kconfig ++++ b/hw/arm/Kconfig +@@ -8,6 +8,7 @@ config ARM_VIRT + imply TPM_TIS_SYSBUS + imply TPM_TIS_I2C + imply NVDIMM ++ imply IOMMUFD + select ARM_GIC + select ACPI + select ARM_SMMUV3 +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-arm-Validate-cluster-and-NUMA-node-boundary.patch b/SOURCES/kvm-hw-arm-Validate-cluster-and-NUMA-node-boundary.patch deleted file mode 100644 index e96bb10..0000000 --- a/SOURCES/kvm-hw-arm-Validate-cluster-and-NUMA-node-boundary.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 7b57aec372fc238cbaafe86557f9fb4b560895b1 Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Tue, 27 Jun 2023 20:20:09 +1000 -Subject: [PATCH 2/6] hw/arm: Validate cluster and NUMA node boundary - -RH-Author: Gavin Shan -RH-MergeRequest: 175: hw/arm: Validate CPU cluster and NUMA node boundary for RHEL machines -RH-Bugzilla: 2171363 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Eric Auger -RH-Commit: [2/3] fcac7ea85d9f73613989903c642fc1bf6c51946b - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2171363 - -There are two ARM machines where NUMA is aware: 'virt' and 'sbsa-ref'. -Both of them are required to follow cluster-NUMA-node boundary. To -enable the validation to warn about the irregular configuration where -multiple CPUs in one cluster have been associated with different NUMA -nodes. - -Signed-off-by: Gavin Shan -Acked-by: Igor Mammedov -Message-Id: <20230509002739.18388-3-gshan@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit fecff672351ace5e39adf7dbcf7a8ee748b201cb) -Signed-off-by: Gavin Shan ---- - hw/arm/sbsa-ref.c | 2 ++ - hw/arm/virt.c | 2 ++ - 2 files changed, 4 insertions(+) - -diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c -index 0b93558dde..efb380e7c8 100644 ---- a/hw/arm/sbsa-ref.c -+++ b/hw/arm/sbsa-ref.c -@@ -864,6 +864,8 @@ static void sbsa_ref_class_init(ObjectClass *oc, void *data) - mc->possible_cpu_arch_ids = sbsa_ref_possible_cpu_arch_ids; - mc->cpu_index_to_instance_props = sbsa_ref_cpu_index_to_props; - mc->get_default_cpu_node_id = sbsa_ref_get_default_cpu_node_id; -+ /* platform instead of architectural choice */ -+ mc->cpu_cluster_has_numa_boundary = true; - } - - static const TypeInfo sbsa_ref_info = { -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 9be53e9355..df6a0231bc 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -3083,6 +3083,8 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) - mc->smp_props.clusters_supported = true; - mc->auto_enable_numa_with_memhp = true; - mc->auto_enable_numa_with_memdev = true; -+ /* platform instead of architectural choice */ -+ mc->cpu_cluster_has_numa_boundary = true; - mc->default_ram_id = "mach-virt.ram"; - - object_class_property_add(oc, "acpi", "OnOffAuto", --- -2.39.3 - diff --git a/SOURCES/kvm-hw-arm-smmu-Handle-big-endian-hosts-correctly.patch b/SOURCES/kvm-hw-arm-smmu-Handle-big-endian-hosts-correctly.patch deleted file mode 100644 index 3bbe93f..0000000 --- a/SOURCES/kvm-hw-arm-smmu-Handle-big-endian-hosts-correctly.patch +++ /dev/null @@ -1,166 +0,0 @@ -From a3412036477e8c91e0b71fcd91de4e24a9904077 Mon Sep 17 00:00:00 2001 -From: Peter Maydell -Date: Tue, 25 Jul 2023 10:56:51 +0100 -Subject: [PATCH 09/14] hw/arm/smmu: Handle big-endian hosts correctly -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 197: virtio-iommu/smmu: backport some late fixes -RH-Bugzilla: 2229133 -RH-Acked-by: Thomas Huth -RH-Acked-by: Peter Xu -RH-Commit: [3/3] df9c8d228b25273e0c4927a10b21e66fb4bef5f0 (eauger1/centos-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2229133 - -The implementation of the SMMUv3 has multiple places where it reads a -data structure from the guest and directly operates on it without -doing a guest-to-host endianness conversion. Since all SMMU data -structures are little-endian, this means that the SMMU doesn't work -on a big-endian host. In particular, this causes the Avocado test - machine_aarch64_virt.py:Aarch64VirtMachine.test_alpine_virt_tcg_gic_max -to fail on an s390x host. - -Add appropriate byte-swapping on reads and writes of guest in-memory -data structures so that the device works correctly on big-endian -hosts. - -As part of this we constrain queue_read() to operate only on Cmd -structs and queue_write() on Evt structs, because in practice these -are the only data structures the two functions are used with, and we -need to know what the data structure is to be able to byte-swap its -parts correctly. - -Signed-off-by: Peter Maydell -Tested-by: Thomas Huth -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Eric Auger -Message-id: 20230717132641.764660-1-peter.maydell@linaro.org -Cc: qemu-stable@nongnu.org -(cherry picked from commit c6445544d4cea2628fbad3bad09f3d3a03c749d3) -Signed-off-by: Eric Auger ---- - hw/arm/smmu-common.c | 3 +-- - hw/arm/smmuv3.c | 39 +++++++++++++++++++++++++++++++-------- - 2 files changed, 32 insertions(+), 10 deletions(-) - -diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c -index e7f1c1f219..daa02ce798 100644 ---- a/hw/arm/smmu-common.c -+++ b/hw/arm/smmu-common.c -@@ -192,8 +192,7 @@ static int get_pte(dma_addr_t baseaddr, uint32_t index, uint64_t *pte, - dma_addr_t addr = baseaddr + index * sizeof(*pte); - - /* TODO: guarantee 64-bit single-copy atomicity */ -- ret = dma_memory_read(&address_space_memory, addr, pte, sizeof(*pte), -- MEMTXATTRS_UNSPECIFIED); -+ ret = ldq_le_dma(&address_space_memory, addr, pte, MEMTXATTRS_UNSPECIFIED); - - if (ret != MEMTX_OK) { - info->type = SMMU_PTW_ERR_WALK_EABT; -diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c -index 270c80b665..cfb56725a6 100644 ---- a/hw/arm/smmuv3.c -+++ b/hw/arm/smmuv3.c -@@ -98,20 +98,34 @@ static void smmuv3_write_gerrorn(SMMUv3State *s, uint32_t new_gerrorn) - trace_smmuv3_write_gerrorn(toggled & pending, s->gerrorn); - } - --static inline MemTxResult queue_read(SMMUQueue *q, void *data) -+static inline MemTxResult queue_read(SMMUQueue *q, Cmd *cmd) - { - dma_addr_t addr = Q_CONS_ENTRY(q); -+ MemTxResult ret; -+ int i; - -- return dma_memory_read(&address_space_memory, addr, data, q->entry_size, -- MEMTXATTRS_UNSPECIFIED); -+ ret = dma_memory_read(&address_space_memory, addr, cmd, sizeof(Cmd), -+ MEMTXATTRS_UNSPECIFIED); -+ if (ret != MEMTX_OK) { -+ return ret; -+ } -+ for (i = 0; i < ARRAY_SIZE(cmd->word); i++) { -+ le32_to_cpus(&cmd->word[i]); -+ } -+ return ret; - } - --static MemTxResult queue_write(SMMUQueue *q, void *data) -+static MemTxResult queue_write(SMMUQueue *q, Evt *evt_in) - { - dma_addr_t addr = Q_PROD_ENTRY(q); - MemTxResult ret; -+ Evt evt = *evt_in; -+ int i; - -- ret = dma_memory_write(&address_space_memory, addr, data, q->entry_size, -+ for (i = 0; i < ARRAY_SIZE(evt.word); i++) { -+ cpu_to_le32s(&evt.word[i]); -+ } -+ ret = dma_memory_write(&address_space_memory, addr, &evt, sizeof(Evt), - MEMTXATTRS_UNSPECIFIED); - if (ret != MEMTX_OK) { - return ret; -@@ -291,7 +305,7 @@ static void smmuv3_init_regs(SMMUv3State *s) - static int smmu_get_ste(SMMUv3State *s, dma_addr_t addr, STE *buf, - SMMUEventInfo *event) - { -- int ret; -+ int ret, i; - - trace_smmuv3_get_ste(addr); - /* TODO: guarantee 64-bit single-copy atomicity */ -@@ -304,6 +318,9 @@ static int smmu_get_ste(SMMUv3State *s, dma_addr_t addr, STE *buf, - event->u.f_ste_fetch.addr = addr; - return -EINVAL; - } -+ for (i = 0; i < ARRAY_SIZE(buf->word); i++) { -+ le32_to_cpus(&buf->word[i]); -+ } - return 0; - - } -@@ -313,7 +330,7 @@ static int smmu_get_cd(SMMUv3State *s, STE *ste, uint32_t ssid, - CD *buf, SMMUEventInfo *event) - { - dma_addr_t addr = STE_CTXPTR(ste); -- int ret; -+ int ret, i; - - trace_smmuv3_get_cd(addr); - /* TODO: guarantee 64-bit single-copy atomicity */ -@@ -326,6 +343,9 @@ static int smmu_get_cd(SMMUv3State *s, STE *ste, uint32_t ssid, - event->u.f_ste_fetch.addr = addr; - return -EINVAL; - } -+ for (i = 0; i < ARRAY_SIZE(buf->word); i++) { -+ le32_to_cpus(&buf->word[i]); -+ } - return 0; - } - -@@ -407,7 +427,7 @@ static int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste, - return -EINVAL; - } - if (s->features & SMMU_FEATURE_2LVL_STE) { -- int l1_ste_offset, l2_ste_offset, max_l2_ste, span; -+ int l1_ste_offset, l2_ste_offset, max_l2_ste, span, i; - dma_addr_t l1ptr, l2ptr; - STEDesc l1std; - -@@ -431,6 +451,9 @@ static int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste, - event->u.f_ste_fetch.addr = l1ptr; - return -EINVAL; - } -+ for (i = 0; i < ARRAY_SIZE(l1std.word); i++) { -+ le32_to_cpus(&l1std.word[i]); -+ } - - span = L1STD_SPAN(&l1std); - --- -2.39.3 - diff --git a/SOURCES/kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch b/SOURCES/kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch new file mode 100644 index 0000000..76ab341 --- /dev/null +++ b/SOURCES/kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch @@ -0,0 +1,88 @@ +From e670722b9a6460d41497688d820d5a9a9b51d8e9 Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Tue, 9 Jan 2024 11:36:42 +1000 +Subject: [PATCH 001/101] hw/arm/virt: Add properties to disable high memory + regions + +RH-Author: Gavin Shan +RH-MergeRequest: 210: hw/arm/virt: Add properties to disable high memory regions +RH-Jira: RHEL-19738 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Eric Auger +RH-Commit: [1/1] 4097ba5133a67126e30b84202cb40df4e019c5f4 + +Upstream: RHEL-only +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=57927352 + +There are 3 high memory regions for GICv3 or GICv4 redistributor, PCI +ECAM and PCI MMIO. Each of them has a property introduced by upstream +commit 6a48c64eec ("hw/arm/virt: Add properties to disable high memory +regions") so that the corresponding high memory region can be disabled. + +It's notable that another property ("compact-highmem") introduced by +upstream commit f40408a9fe ("hw/arm/virt: Add 'compact-highmem' property") +so that the compact high memory region layout during assignment can be +disabled, compatible to the old machine types. However, we don't have +the compatible issue since the compact high memory region layout is +always kept as disabled until RHEL9.2.0 machine type and onwards. + +Expose those 3 properties: "highmem-redists", "highmem-ecam" and +"highmem-mmio". The property "compact-highmem" is kept as hidden. + +Signed-off-by: Gavin Shan +--- + hw/arm/virt.c | 24 +++++++++++++++++++++++- + 1 file changed, 23 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 5cab00b4cd..60f117f0d2 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2456,6 +2456,7 @@ static void virt_set_compact_highmem(Object *obj, bool value, Error **errp) + + vms->highmem_compact = value; + } ++#endif /* disabled for RHEL */ + + static bool virt_get_highmem_redists(Object *obj, Error **errp) + { +@@ -2498,7 +2499,6 @@ static void virt_set_highmem_mmio(Object *obj, bool value, Error **errp) + + vms->highmem_mmio = value; + } +-#endif /* disabled for RHEL */ + + static bool virt_get_its(Object *obj, Error **errp) + { +@@ -3521,6 +3521,28 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + "Set on/off to enable/disable using " + "physical address space above 32 bits"); + ++ object_class_property_add_bool(oc, "highmem-redists", ++ virt_get_highmem_redists, ++ virt_set_highmem_redists); ++ object_class_property_set_description(oc, "highmem-redists", ++ "Set on/off to enable/disable high " ++ "memory region for GICv3 or GICv4 " ++ "redistributor"); ++ ++ object_class_property_add_bool(oc, "highmem-ecam", ++ virt_get_highmem_ecam, ++ virt_set_highmem_ecam); ++ object_class_property_set_description(oc, "highmem-ecam", ++ "Set on/off to enable/disable high " ++ "memory region for PCI ECAM"); ++ ++ object_class_property_add_bool(oc, "highmem-mmio", ++ virt_get_highmem_mmio, ++ virt_set_highmem_mmio); ++ object_class_property_set_description(oc, "highmem-mmio", ++ "Set on/off to enable/disable high " ++ "memory region for PCI MMIO"); ++ + object_class_property_add_str(oc, "gic-version", virt_get_gic_version, + virt_set_gic_version); + object_class_property_set_description(oc, "gic-version", +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-arm-virt-Fix-compats.patch b/SOURCES/kvm-hw-arm-virt-Fix-compats.patch new file mode 100644 index 0000000..7e3af18 --- /dev/null +++ b/SOURCES/kvm-hw-arm-virt-Fix-compats.patch @@ -0,0 +1,132 @@ +From 3f58194f8642a71c47d91d3c00a34faf44ea2c11 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Wed, 3 Jan 2024 05:57:38 -0500 +Subject: [PATCH] hw/arm/virt: Fix compats + +RH-Author: Eric Auger +RH-MergeRequest: 209: hw/arm/virt: Fix compats +RH-Jira: RHEL-17168 +RH-Acked-by: Gavin Shan +RH-Acked-by: Sebastian Ott +RH-Commit: [1/1] bcdf6493bbd6d7b52b0b88ff44441d22aeddfde2 (eauger1/centos-qemu-kvm) + +arm_rhel_compat is not added for virt-rhel9.4.0 machine causing +the efi-virtio.rom to be looked for when instantiating a virtio-net-pci +device and it won't be found since not shipped on ARM. This is a +regression compared to 9.2. + +Actually we do not need any rom file for any virtio-net-pci variant +because edk2 already brings the functionality. So for 9.4 onwards, we +want to set romfiles to "" for all of them. + +However at the moment we apply arm_rhel_compat from the latest +rhel*_virt_options(). This is not aligned with the generic compat +usage which sets compats for a given machine type to accomodate for +changes that occured after its advent. Here we are somehow abusing +the compat infra to set general driver options that should apply for +all machines. On top of that this is really error prone and we have +forgotten to add arm_rhel_compat several times in the past. + +So let's introduce set_arm_rhel_compat() being called before any +*virt_options in the non abstract machine class. That way the setting +will apply to any machine type without any need to add it in any +future machine types. + +For < 9.4 machines we don't really care keeping non void romfiles +for transitional and non transitional devices because anyway this was +not working. So let's keep things simple and apply the new defaults for +all RHEL9 machine types. + +Finally, to follow the generic pattern we should set hw_compat_rhel_9_0 +in 9.0 machine as it is done on x86 or ccw. This has no consequence on +aarch64 because it only contains x86 stuff but that helps understanding +the consistency. + +Signed-off-by: Eric Auger +--- + hw/arm/virt.c | 43 +++++++++++++++++++++++++++++-------------- + 1 file changed, 29 insertions(+), 14 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 0b17c94ad7..5cab00b4cd 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -111,11 +111,39 @@ + DEFINE_VIRT_MACHINE_LATEST(major, minor, false) + #endif /* disabled for RHEL */ + ++/* ++ * This variable is for changes to properties that are RHEL specific, ++ * different to the current upstream and to be applied to the latest ++ * machine type. They may be overriden by older machine compats. ++ * ++ * virtio-net-pci variant romfiles are not needed because edk2 does ++ * fully support the pxe boot. Besides virtio romfiles are not shipped ++ * on rhel/aarch64. ++ */ ++GlobalProperty arm_rhel_compat[] = { ++ {"virtio-net-pci", "romfile", "" }, ++ {"virtio-net-pci-transitional", "romfile", "" }, ++ {"virtio-net-pci-non-transitional", "romfile", "" }, ++}; ++const size_t arm_rhel_compat_len = G_N_ELEMENTS(arm_rhel_compat); ++ ++/* ++ * This cannot be called from the rhel_virt_class_init() because ++ * TYPE_RHEL_MACHINE is abstract and mc->compat_props g_ptr_array_new() ++ * only is called on virt-rhelm.n.s non abstract class init. ++ */ ++static void arm_rhel_compat_set(MachineClass *mc) ++{ ++ compat_props_add(mc->compat_props, arm_rhel_compat, ++ arm_rhel_compat_len); ++} ++ + #define DEFINE_RHEL_MACHINE_LATEST(m, n, s, latest) \ + static void rhel##m##n##s##_virt_class_init(ObjectClass *oc, \ + void *data) \ + { \ + MachineClass *mc = MACHINE_CLASS(oc); \ ++ arm_rhel_compat_set(mc); \ + rhel##m##n##s##_virt_options(mc); \ + mc->desc = "RHEL " # m "." # n "." # s " ARM Virtual Machine"; \ + if (latest) { \ +@@ -139,19 +167,6 @@ + #define DEFINE_RHEL_MACHINE(major, minor, subminor) \ + DEFINE_RHEL_MACHINE_LATEST(major, minor, subminor, false) + +-/* This variable is for changes to properties that are RHEL specific, +- * different to the current upstream and to be applied to the latest +- * machine type. +- */ +-GlobalProperty arm_rhel_compat[] = { +- { +- .driver = "virtio-net-pci", +- .property = "romfile", +- .value = "", +- }, +-}; +-const size_t arm_rhel_compat_len = G_N_ELEMENTS(arm_rhel_compat); +- + /* Number of external interrupt lines to configure the GIC with */ + #define NUM_IRQS 256 + +@@ -3639,7 +3654,6 @@ static void rhel920_virt_options(MachineClass *mc) + { + rhel940_virt_options(mc); + +- compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); + compat_props_add(mc->compat_props, hw_compat_rhel_9_4, hw_compat_rhel_9_4_len); + compat_props_add(mc->compat_props, hw_compat_rhel_9_3, hw_compat_rhel_9_3_len); + compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len); +@@ -3653,6 +3667,7 @@ static void rhel900_virt_options(MachineClass *mc) + rhel920_virt_options(mc); + + compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len); + + /* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */ + vmc->no_tcg_lpa2 = true; +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-arm-virt-Validate-cluster-and-NUMA-node-boundary-.patch b/SOURCES/kvm-hw-arm-virt-Validate-cluster-and-NUMA-node-boundary-.patch deleted file mode 100644 index 42ec705..0000000 --- a/SOURCES/kvm-hw-arm-virt-Validate-cluster-and-NUMA-node-boundary-.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 022529f6d0ee306da857825c72a98bf7ddf5de22 Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Tue, 27 Jun 2023 20:20:09 +1000 -Subject: [PATCH 3/6] hw/arm/virt: Validate cluster and NUMA node boundary for - RHEL machines - -RH-Author: Gavin Shan -RH-MergeRequest: 175: hw/arm: Validate CPU cluster and NUMA node boundary for RHEL machines -RH-Bugzilla: 2171363 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Eric Auger -RH-Commit: [3/3] a396c499259b566861ca007b01f8539bf6113711 - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2171363 -Upstream Status: RHEL only - -Set mc->cpu_cluster_has_numa_boundary to true so that the boundary of -CPU cluster and NUMA node will be validated for 'virt-rhel*' machines. -A warning message will be printed if the boundary is broken. - -Signed-off-by: Gavin Shan ---- - hw/arm/virt.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index df6a0231bc..faf68488d5 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -3530,6 +3530,8 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) - mc->smp_props.clusters_supported = true; - mc->auto_enable_numa_with_memhp = true; - mc->auto_enable_numa_with_memdev = true; -+ /* platform instead of architectural choice */ -+ mc->cpu_cluster_has_numa_boundary = true; - mc->default_ram_id = "mach-virt.ram"; - - object_class_property_add(oc, "acpi", "OnOffAuto", --- -2.39.3 - diff --git a/SOURCES/kvm-hw-arm-virt-deprecate-virt-rhel9.-0-2-.0-machine-typ.patch b/SOURCES/kvm-hw-arm-virt-deprecate-virt-rhel9.-0-2-.0-machine-typ.patch new file mode 100644 index 0000000..4770a58 --- /dev/null +++ b/SOURCES/kvm-hw-arm-virt-deprecate-virt-rhel9.-0-2-.0-machine-typ.patch @@ -0,0 +1,41 @@ +From 4c1d07995a7afb6fae68a7e7a8b6b6c94fa0a7bb Mon Sep 17 00:00:00 2001 +From: Cornelia Huck +Date: Mon, 12 Feb 2024 10:37:54 +0100 +Subject: [PATCH 5/6] hw/arm/virt: deprecate virt-rhel9.{0,2}.0 machine types + +RH-Author: Cornelia Huck +RH-MergeRequest: 225: hw/arm/virt: deprecate virt-rhel9.{0,2}.0 machine types +RH-Jira: RHEL-24988 +RH-Acked-by: Sebastian Ott +RH-Acked-by: Eric Auger +RH-Commit: [1/1] f15579db44808fa8a2d7bc01b3915aa59c064411 (cohuck/qemu-kvm-c9s) + +Jira: https://issues.redhat.com/browse/RHEL-24988 +Upstream: RHEL only + +We do not plan to support any machine types prior to 9.4.0; leave them +in, but mark as deprecated. + +Signed-off-by: Cornelia Huck +--- + hw/arm/virt.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 60f117f0d2..943c563391 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3679,6 +3679,10 @@ static void rhel920_virt_options(MachineClass *mc) + compat_props_add(mc->compat_props, hw_compat_rhel_9_4, hw_compat_rhel_9_4_len); + compat_props_add(mc->compat_props, hw_compat_rhel_9_3, hw_compat_rhel_9_3_len); + compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len); ++ ++ /* RHEL 9.4 is the first supported release */ ++ mc->deprecation_reason = ++ "machine types for versions prior to 9.4 are deprecated"; + } + DEFINE_RHEL_MACHINE(9, 2, 0) + +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-Activate-IOMMUFD-for-q35-machines.patch b/SOURCES/kvm-hw-i386-Activate-IOMMUFD-for-q35-machines.patch new file mode 100644 index 0000000..81c20e5 --- /dev/null +++ b/SOURCES/kvm-hw-i386-Activate-IOMMUFD-for-q35-machines.patch @@ -0,0 +1,41 @@ +From 7a6be312c11911bdd2ce82566be22a3e014947c2 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 21 Nov 2023 16:44:20 +0800 +Subject: [PATCH 041/101] hw/i386: Activate IOMMUFD for q35 machines +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [40/67] b15764ab24fd57389a8d219736613484acd7d29e (eauger1/centos-qemu-kvm) + +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit 64ad06f6eba66c514477f490bcba409439a480d8) +Signed-off-by: Eric Auger +--- + hw/i386/Kconfig | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig +index 55850791df..a1846be6f7 100644 +--- a/hw/i386/Kconfig ++++ b/hw/i386/Kconfig +@@ -95,6 +95,7 @@ config Q35 + imply E1000E_PCI_EXPRESS + imply VMPORT + imply VMMOUSE ++ imply IOMMUFD + select PC_PCI + select PC_ACPI + select PCI_EXPRESS_Q35 +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-pc-Defer-smbios_set_defaults-to-machine_done.patch b/SOURCES/kvm-hw-i386-pc-Defer-smbios_set_defaults-to-machine_done.patch new file mode 100644 index 0000000..5470bdf --- /dev/null +++ b/SOURCES/kvm-hw-i386-pc-Defer-smbios_set_defaults-to-machine_done.patch @@ -0,0 +1,186 @@ +From ea2e2368dcf4140be47288472f2c2a094358e0c7 Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Thu, 8 Feb 2024 23:03:45 +0100 +Subject: [PATCH 03/20] hw/i386/pc: Defer smbios_set_defaults() to machine_done +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Igor Mammedov +RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS +RH-Jira: RHEL-21705 +RH-Acked-by: MST +RH-Acked-by: Ani Sinha +RH-Commit: [1/18] 9d4c1d1a910fec7d310429d6fc0b10c798932db7 + +JIRA: https://issues.redhat.com/browse/RHEL-21705 + +commit: a0204a5ed091dfe79aced7ec8f3ce1931fd25816 +Author: Bernhard Beschow + + Handling most of smbios data generation in the machine_done notifier is similar + to how the ARM virt machine handles it which also calls smbios_set_defaults() + there. The result is that all pc machines are freed from explicitly worrying + about smbios setup. + + Signed-off-by: Bernhard Beschow + Reviewed-by: Philippe Mathieu-Daudé + Message-ID: <20240208220349.4948-6-shentey@gmail.com> + Signed-off-by: Philippe Mathieu-Daudé + +Conflicts: hw/i386/pc_q35.c, hw/i386/pc_piix.c + due to missing 4d3457fef9 (w/i386/pc: Merge pc_guest_info_init() into pc_machine_initfn()) + and different signature of smbios_set_defaults() downstream +Fixup: hw/i386/fw_cfg.c to account for downstream changes smbios_set_defaults() + +Signed-off-by: Igor Mammedov +--- + hw/i386/fw_cfg.c | 14 +++++++++++++- + hw/i386/fw_cfg.h | 3 ++- + hw/i386/pc.c | 2 +- + hw/i386/pc_piix.c | 12 ------------ + hw/i386/pc_q35.c | 11 ----------- + include/hw/i386/pc.h | 1 - + 6 files changed, 16 insertions(+), 27 deletions(-) + +diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c +index 7362daa45a..6a5466faf0 100644 +--- a/hw/i386/fw_cfg.c ++++ b/hw/i386/fw_cfg.c +@@ -48,15 +48,27 @@ const char *fw_cfg_arch_key_name(uint16_t key) + return NULL; + } + +-void fw_cfg_build_smbios(MachineState *ms, FWCfgState *fw_cfg) ++void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg) + { + #ifdef CONFIG_SMBIOS + uint8_t *smbios_tables, *smbios_anchor; + size_t smbios_tables_len, smbios_anchor_len; + struct smbios_phys_mem_area *mem_array; + unsigned i, array_count; ++ MachineState *ms = MACHINE(pcms); ++ PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); ++ MachineClass *mc = MACHINE_GET_CLASS(pcms); + X86CPU *cpu = X86_CPU(ms->possible_cpus->cpus[0].cpu); + ++ if (pcmc->smbios_defaults) { ++ /* These values are guest ABI, do not change */ ++ smbios_set_defaults("QEMU", mc->desc, mc->name, ++ pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded, ++ pcmc->smbios_stream_product, ++ pcmc->smbios_stream_version, ++ pcms->smbios_entry_point_type); ++ } ++ + /* tell smbios about cpuid version and features */ + smbios_set_cpuid(cpu->env.cpuid_version, cpu->env.features[FEAT_1_EDX]); + +diff --git a/hw/i386/fw_cfg.h b/hw/i386/fw_cfg.h +index 86ca7c1c0c..1e1de6b4a3 100644 +--- a/hw/i386/fw_cfg.h ++++ b/hw/i386/fw_cfg.h +@@ -10,6 +10,7 @@ + #define HW_I386_FW_CFG_H + + #include "hw/boards.h" ++#include "hw/i386/pc.h" + #include "hw/nvram/fw_cfg.h" + + #define FW_CFG_IO_BASE 0x510 +@@ -22,7 +23,7 @@ + FWCfgState *fw_cfg_arch_create(MachineState *ms, + uint16_t boot_cpus, + uint16_t apic_id_limit); +-void fw_cfg_build_smbios(MachineState *ms, FWCfgState *fw_cfg); ++void fw_cfg_build_smbios(PCMachineState *ms, FWCfgState *fw_cfg); + void fw_cfg_build_feature_control(MachineState *ms, FWCfgState *fw_cfg); + void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg); + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index a1faa9e92c..16de2a59e8 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -847,7 +847,7 @@ void pc_machine_done(Notifier *notifier, void *data) + + acpi_setup(); + if (x86ms->fw_cfg) { +- fw_cfg_build_smbios(MACHINE(pcms), x86ms->fw_cfg); ++ fw_cfg_build_smbios(pcms, x86ms->fw_cfg); + fw_cfg_build_feature_control(MACHINE(pcms), x86ms->fw_cfg); + /* update FW_CFG_NB_CPUS to account for -device added CPUs */ + fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 09d02cc91f..7344b35cf1 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -36,7 +36,6 @@ + #include "hw/rtc/mc146818rtc.h" + #include "hw/southbridge/piix.h" + #include "hw/display/ramfb.h" +-#include "hw/firmware/smbios.h" + #include "hw/pci/pci.h" + #include "hw/pci/pci_ids.h" + #include "hw/usb.h" +@@ -233,17 +232,6 @@ static void pc_init1(MachineState *machine, + + pc_guest_info_init(pcms); + +- if (pcmc->smbios_defaults) { +- MachineClass *mc = MACHINE_GET_CLASS(machine); +- /* These values are guest ABI, do not change */ +- smbios_set_defaults("Red Hat", "KVM", +- mc->desc, pcmc->smbios_legacy_mode, +- pcmc->smbios_uuid_encoded, +- pcmc->smbios_stream_product, +- pcmc->smbios_stream_version, +- pcms->smbios_entry_point_type); +- } +- + /* allocate ram and load rom/bios */ + if (!xen_enabled()) { + pc_memory_init(pcms, system_memory, rom_memory, hole64_size); +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index c6967e1846..9a22ff5dd6 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -45,7 +45,6 @@ + #include "hw/i386/amd_iommu.h" + #include "hw/i386/intel_iommu.h" + #include "hw/display/ramfb.h" +-#include "hw/firmware/smbios.h" + #include "hw/ide/pci.h" + #include "hw/ide/ahci.h" + #include "hw/intc/ioapic.h" +@@ -201,16 +200,6 @@ static void pc_q35_init(MachineState *machine) + + pc_guest_info_init(pcms); + +- if (pcmc->smbios_defaults) { +- /* These values are guest ABI, do not change */ +- smbios_set_defaults("Red Hat", "KVM", +- mc->desc, pcmc->smbios_legacy_mode, +- pcmc->smbios_uuid_encoded, +- pcmc->smbios_stream_product, +- pcmc->smbios_stream_version, +- pcms->smbios_entry_point_type); +- } +- + /* create pci host bus */ + phb = OBJECT(qdev_new(TYPE_Q35_HOST_DEVICE)); + +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 37644ede7e..c286c10bc3 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -12,7 +12,6 @@ + #include "hw/hotplug.h" + #include "qom/object.h" + #include "hw/i386/sgx-epc.h" +-#include "hw/firmware/smbios.h" + #include "hw/cxl/cxl.h" + + #define HPET_INTCAP "hpet-intcap" +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-reg-for-machine-ty.patch b/SOURCES/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-reg-for-machine-ty.patch deleted file mode 100644 index fe9cd8c..0000000 --- a/SOURCES/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-reg-for-machine-ty.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 491cf9e251026d135f315b7fe0d8771841f06e9f Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Tue, 25 Jul 2023 15:34:45 -0300 -Subject: [PATCH 8/9] hw/pci: Disable PCI_ERR_UNCOR_MASK reg for machine type - <= pc-q35-rhel9.2.0 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 192: hw/pci: Disable PCI_ERR_UNCOR_MASK reg for machine type <= pc-q35-rhel9.2.0 -RH-Bugzilla: 2223691 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [1/1] e57816f8ad15a9ce5f342b061c103ae011ec1223 (LeoBras/centos-qemu-kvm) - -This is a downstream-only patch to that sets off the property -x-pcie-err-unc-mask for machine types <= pc-q35-rhel9.2.0, allowing -live migrations to RHEL9.2 happen successfully. - -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2223691 -Fixes: 293a34b4be ("hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine -type < 8.0") -Signed-off-by: Leonardo Bras ---- - hw/core/machine.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 5ea52317b9..6f5117669d 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -62,6 +62,8 @@ GlobalProperty hw_compat_rhel_9_2[] = { - { "virtio-mem", "x-early-migration", "false" }, - /* hw_compat_rhel_9_2 from hw_compat_7_2 */ - { "migration", "x-preempt-pre-7-2", "true" }, -+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */ -+ { TYPE_PCI_DEVICE, "x-pcie-err-unc-mask", "off" }, - }; - const size_t hw_compat_rhel_9_2_len = G_N_ELEMENTS(hw_compat_rhel_9_2); - --- -2.39.3 - diff --git a/SOURCES/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-register-for-machi.patch b/SOURCES/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-register-for-machi.patch deleted file mode 100644 index 164bea7..0000000 --- a/SOURCES/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-register-for-machi.patch +++ /dev/null @@ -1,118 +0,0 @@ -From 3ac01bb90da12538898f95b2fb4e7f6bc1557eb3 Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Tue, 2 May 2023 21:27:02 -0300 -Subject: [PATCH 18/21] hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine - type < 8.0 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 170: hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine type < 8.0 -RH-Bugzilla: 2189423 -RH-Acked-by: Peter Xu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/1] ad62dd5a8567f386770577513c00a0bf36bd3df1 (LeoBras/centos-qemu-kvm) - -Since it's implementation on v8.0.0-rc0, having the PCI_ERR_UNCOR_MASK -set for machine types < 8.0 will cause migration to fail if the target -QEMU version is < 8.0.0 : - -qemu-system-x86_64: get_pci_config_device: Bad config data: i=0x10a read: 40 device: 0 cmask: ff wmask: 0 w1cmask:0 -qemu-system-x86_64: Failed to load PCIDevice:config -qemu-system-x86_64: Failed to load e1000e:parent_obj -qemu-system-x86_64: error while loading state for instance 0x0 of device '0000:00:02.0/e1000e' -qemu-system-x86_64: load of migration failed: Invalid argument - -The above test migrated a 7.2 machine type from QEMU master to QEMU 7.2.0, -with this cmdline: - -./qemu-system-x86_64 -M pc-q35-7.2 [-incoming XXX] - -In order to fix this, property x-pcie-err-unc-mask was introduced to -control when PCI_ERR_UNCOR_MASK is enabled. This property is enabled by -default, but is disabled if machine type <= 7.2. - -Fixes: 010746ae1d ("hw/pci/aer: Implement PCI_ERR_UNCOR_MASK register") -Suggested-by: Michael S. Tsirkin -Signed-off-by: Leonardo Bras -Message-Id: <20230503002701.854329-1-leobras@redhat.com> -Reviewed-by: Jonathan Cameron -Reviewed-by: Peter Xu -Reviewed-by: Juan Quintela -Fixes: https://gitlab.com/qemu-project/qemu/-/issues/1576 -Tested-by: Fiona Ebner -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 5ed3dabe57dd9f4c007404345e5f5bf0e347317f) -Signed-off-by: Leonardo Bras ---- - hw/core/machine.c | 1 + - hw/pci/pci.c | 2 ++ - hw/pci/pcie_aer.c | 11 +++++++---- - include/hw/pci/pci.h | 2 ++ - 4 files changed, 12 insertions(+), 4 deletions(-) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 0e0120b7f2..c28702b690 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -43,6 +43,7 @@ GlobalProperty hw_compat_7_2[] = { - { "e1000e", "migrate-timadj", "off" }, - { "virtio-mem", "x-early-migration", "false" }, - { "migration", "x-preempt-pre-7-2", "true" }, -+ { TYPE_PCI_DEVICE, "x-pcie-err-unc-mask", "off" }, - }; - const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2); - -diff --git a/hw/pci/pci.c b/hw/pci/pci.c -index def5000e7b..8ad4349e96 100644 ---- a/hw/pci/pci.c -+++ b/hw/pci/pci.c -@@ -79,6 +79,8 @@ static Property pci_props[] = { - DEFINE_PROP_STRING("failover_pair_id", PCIDevice, - failover_pair_id), - DEFINE_PROP_UINT32("acpi-index", PCIDevice, acpi_index, 0), -+ DEFINE_PROP_BIT("x-pcie-err-unc-mask", PCIDevice, cap_present, -+ QEMU_PCIE_ERR_UNC_MASK_BITNR, true), - DEFINE_PROP_END_OF_LIST() - }; - -diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c -index 103667c368..374d593ead 100644 ---- a/hw/pci/pcie_aer.c -+++ b/hw/pci/pcie_aer.c -@@ -112,10 +112,13 @@ int pcie_aer_init(PCIDevice *dev, uint8_t cap_ver, uint16_t offset, - - pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS, - PCI_ERR_UNC_SUPPORTED); -- pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK, -- PCI_ERR_UNC_MASK_DEFAULT); -- pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK, -- PCI_ERR_UNC_SUPPORTED); -+ -+ if (dev->cap_present & QEMU_PCIE_ERR_UNC_MASK) { -+ pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK, -+ PCI_ERR_UNC_MASK_DEFAULT); -+ pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK, -+ PCI_ERR_UNC_SUPPORTED); -+ } - - pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER, - PCI_ERR_UNC_SEVERITY_DEFAULT); -diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h -index d5a40cd058..6dc6742fc4 100644 ---- a/include/hw/pci/pci.h -+++ b/include/hw/pci/pci.h -@@ -207,6 +207,8 @@ enum { - QEMU_PCIE_EXTCAP_INIT = (1 << QEMU_PCIE_EXTCAP_INIT_BITNR), - #define QEMU_PCIE_CXL_BITNR 10 - QEMU_PCIE_CAP_CXL = (1 << QEMU_PCIE_CXL_BITNR), -+#define QEMU_PCIE_ERR_UNC_MASK_BITNR 11 -+ QEMU_PCIE_ERR_UNC_MASK = (1 << QEMU_PCIE_ERR_UNC_MASK_BITNR), - }; - - typedef struct PCIINTxRoute { --- -2.39.3 - diff --git a/SOURCES/kvm-hw-ppc-Kconfig-Imply-VFIO_PCI.patch b/SOURCES/kvm-hw-ppc-Kconfig-Imply-VFIO_PCI.patch new file mode 100644 index 0000000..f850765 --- /dev/null +++ b/SOURCES/kvm-hw-ppc-Kconfig-Imply-VFIO_PCI.patch @@ -0,0 +1,116 @@ +From 84f378c41832602dcf9bad6167b1f532c7c53e37 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 21 Nov 2023 15:03:55 +0100 +Subject: [PATCH 048/101] hw/ppc/Kconfig: Imply VFIO_PCI +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [47/67] c1a40cdab9bf62b16cb428d57a20b3e0eaa6de38 (eauger1/centos-qemu-kvm) + +When the legacy and iommufd backends were introduced, a set of common +vfio-pci routines were exported in pci.c for both backends to use : + + vfio_pci_pre_reset + vfio_pci_get_pci_hot_reset_info + vfio_pci_host_match + vfio_pci_post_reset + +This introduced a build failure on PPC when --without-default-devices +is use because VFIO is always selected in ppc/Kconfig but VFIO_PCI is +not. + +Use an 'imply VFIO_PCI' in ppc/Kconfig and bypass compilation of the +VFIO EEH hooks routines defined in hw/ppc/spapr_pci_vfio.c with +CONFIG_VFIO_PCI. + +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Zhenzhong Duan +Signed-off-by: Cédric Le Goater +(cherry picked from commit 4278df9d1d2383b738338c857406357660f11e42) +Signed-off-by: Eric Auger +--- + hw/ppc/Kconfig | 2 +- + hw/ppc/spapr_pci_vfio.c | 36 ++++++++++++++++++++++++++++++++++++ + 2 files changed, 37 insertions(+), 1 deletion(-) + +diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig +index 56f0475a8e..44263a58c4 100644 +--- a/hw/ppc/Kconfig ++++ b/hw/ppc/Kconfig +@@ -3,11 +3,11 @@ config PSERIES + imply PCI_DEVICES + imply TEST_DEVICES + imply VIRTIO_VGA ++ imply VFIO_PCI if LINUX # needed by spapr_pci_vfio.c + select NVDIMM + select DIMM + select PCI + select SPAPR_VSCSI +- select VFIO if LINUX # needed by spapr_pci_vfio.c + select XICS + select XIVE + select MSI_NONBROKEN +diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c +index d1d07bec46..76b2a3487b 100644 +--- a/hw/ppc/spapr_pci_vfio.c ++++ b/hw/ppc/spapr_pci_vfio.c +@@ -26,10 +26,12 @@ + #include "hw/pci/pci_device.h" + #include "hw/vfio/vfio-common.h" + #include "qemu/error-report.h" ++#include CONFIG_DEVICES /* CONFIG_VFIO_PCI */ + + /* + * Interfaces for IBM EEH (Enhanced Error Handling) + */ ++#ifdef CONFIG_VFIO_PCI + static bool vfio_eeh_container_ok(VFIOContainer *container) + { + /* +@@ -314,3 +316,37 @@ int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb) + + return RTAS_OUT_SUCCESS; + } ++ ++#else ++ ++bool spapr_phb_eeh_available(SpaprPhbState *sphb) ++{ ++ return false; ++} ++ ++void spapr_phb_vfio_reset(DeviceState *qdev) ++{ ++} ++ ++int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb, ++ unsigned int addr, int option) ++{ ++ return RTAS_OUT_NOT_SUPPORTED; ++} ++ ++int spapr_phb_vfio_eeh_get_state(SpaprPhbState *sphb, int *state) ++{ ++ return RTAS_OUT_NOT_SUPPORTED; ++} ++ ++int spapr_phb_vfio_eeh_reset(SpaprPhbState *sphb, int option) ++{ ++ return RTAS_OUT_NOT_SUPPORTED; ++} ++ ++int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb) ++{ ++ return RTAS_OUT_NOT_SUPPORTED; ++} ++ ++#endif /* CONFIG_VFIO_PCI */ +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-replace-most-qemu_bh_new-calls-with-qemu_bh_new_g.patch b/SOURCES/kvm-hw-replace-most-qemu_bh_new-calls-with-qemu_bh_new_g.patch deleted file mode 100644 index 08ee94f..0000000 --- a/SOURCES/kvm-hw-replace-most-qemu_bh_new-calls-with-qemu_bh_new_g.patch +++ /dev/null @@ -1,470 +0,0 @@ -From d1b7a9b25c0df9016cd8e93d40837314b1a81d70 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 9 May 2023 10:29:03 -0400 -Subject: [PATCH 08/21] hw: replace most qemu_bh_new calls with - qemu_bh_new_guarded - -RH-Author: Jon Maloy -RH-MergeRequest: 165: memory: prevent dma-reentracy issues -RH-Jira: RHEL-516 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [4/13] bcbc67dd0023aee2b3a342665237daa83b183c7b (jmaloy/jmaloy-qemu-kvm-2) - -Jira: https://issues.redhat.com/browse/RHEL-516 -Upstream: Merged -CVE: CVE-2023-2680 - -commit f63192b0544af5d3e4d5edfd85ab520fcf671377 -Author: Alexander Bulekov -Date: Thu Apr 27 17:10:09 2023 -0400 - - hw: replace most qemu_bh_new calls with qemu_bh_new_guarded - - This protects devices from bh->mmio reentrancy issues. - - Thanks: Thomas Huth for diagnosing OS X test failure. - Signed-off-by: Alexander Bulekov - Reviewed-by: Darren Kenny - Reviewed-by: Stefan Hajnoczi - Reviewed-by: Michael S. Tsirkin - Reviewed-by: Paul Durrant - Reviewed-by: Thomas Huth - Message-Id: <20230427211013.2994127-5-alxndr@bu.edu> - Signed-off-by: Thomas Huth - -Signed-off-by: Jon Maloy ---- - hw/9pfs/xen-9p-backend.c | 5 ++++- - hw/block/dataplane/virtio-blk.c | 3 ++- - hw/block/dataplane/xen-block.c | 5 +++-- - hw/char/virtio-serial-bus.c | 3 ++- - hw/display/qxl.c | 9 ++++++--- - hw/display/virtio-gpu.c | 6 ++++-- - hw/ide/ahci.c | 3 ++- - hw/ide/ahci_internal.h | 1 + - hw/ide/core.c | 4 +++- - hw/misc/imx_rngc.c | 6 ++++-- - hw/misc/macio/mac_dbdma.c | 2 +- - hw/net/virtio-net.c | 3 ++- - hw/nvme/ctrl.c | 6 ++++-- - hw/scsi/mptsas.c | 3 ++- - hw/scsi/scsi-bus.c | 3 ++- - hw/scsi/vmw_pvscsi.c | 3 ++- - hw/usb/dev-uas.c | 3 ++- - hw/usb/hcd-dwc2.c | 3 ++- - hw/usb/hcd-ehci.c | 3 ++- - hw/usb/hcd-uhci.c | 2 +- - hw/usb/host-libusb.c | 6 ++++-- - hw/usb/redirect.c | 6 ++++-- - hw/usb/xen-usb.c | 3 ++- - hw/virtio/virtio-balloon.c | 5 +++-- - hw/virtio/virtio-crypto.c | 3 ++- - 25 files changed, 66 insertions(+), 33 deletions(-) - -diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c -index 74f3a05f88..0e266c552b 100644 ---- a/hw/9pfs/xen-9p-backend.c -+++ b/hw/9pfs/xen-9p-backend.c -@@ -61,6 +61,7 @@ typedef struct Xen9pfsDev { - - int num_rings; - Xen9pfsRing *rings; -+ MemReentrancyGuard mem_reentrancy_guard; - } Xen9pfsDev; - - static void xen_9pfs_disconnect(struct XenLegacyDevice *xendev); -@@ -443,7 +444,9 @@ static int xen_9pfs_connect(struct XenLegacyDevice *xendev) - xen_9pdev->rings[i].ring.out = xen_9pdev->rings[i].data + - XEN_FLEX_RING_SIZE(ring_order); - -- xen_9pdev->rings[i].bh = qemu_bh_new(xen_9pfs_bh, &xen_9pdev->rings[i]); -+ xen_9pdev->rings[i].bh = qemu_bh_new_guarded(xen_9pfs_bh, -+ &xen_9pdev->rings[i], -+ &xen_9pdev->mem_reentrancy_guard); - xen_9pdev->rings[i].out_cons = 0; - xen_9pdev->rings[i].out_size = 0; - xen_9pdev->rings[i].inprogress = false; -diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c -index b28d81737e..a6202997ee 100644 ---- a/hw/block/dataplane/virtio-blk.c -+++ b/hw/block/dataplane/virtio-blk.c -@@ -127,7 +127,8 @@ bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, - } else { - s->ctx = qemu_get_aio_context(); - } -- s->bh = aio_bh_new(s->ctx, notify_guest_bh, s); -+ s->bh = aio_bh_new_guarded(s->ctx, notify_guest_bh, s, -+ &DEVICE(vdev)->mem_reentrancy_guard); - s->batch_notify_vqs = bitmap_new(conf->num_queues); - - *dataplane = s; -diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c -index 734da42ea7..d8bc39d359 100644 ---- a/hw/block/dataplane/xen-block.c -+++ b/hw/block/dataplane/xen-block.c -@@ -633,8 +633,9 @@ XenBlockDataPlane *xen_block_dataplane_create(XenDevice *xendev, - } else { - dataplane->ctx = qemu_get_aio_context(); - } -- dataplane->bh = aio_bh_new(dataplane->ctx, xen_block_dataplane_bh, -- dataplane); -+ dataplane->bh = aio_bh_new_guarded(dataplane->ctx, xen_block_dataplane_bh, -+ dataplane, -+ &DEVICE(xendev)->mem_reentrancy_guard); - - return dataplane; - } -diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c -index 7d4601cb5d..dd619f0731 100644 ---- a/hw/char/virtio-serial-bus.c -+++ b/hw/char/virtio-serial-bus.c -@@ -985,7 +985,8 @@ static void virtser_port_device_realize(DeviceState *dev, Error **errp) - return; - } - -- port->bh = qemu_bh_new(flush_queued_data_bh, port); -+ port->bh = qemu_bh_new_guarded(flush_queued_data_bh, port, -+ &dev->mem_reentrancy_guard); - port->elem = NULL; - } - -diff --git a/hw/display/qxl.c b/hw/display/qxl.c -index 80ce1e9a93..f1c0eb7dfc 100644 ---- a/hw/display/qxl.c -+++ b/hw/display/qxl.c -@@ -2201,11 +2201,14 @@ static void qxl_realize_common(PCIQXLDevice *qxl, Error **errp) - - qemu_add_vm_change_state_handler(qxl_vm_change_state_handler, qxl); - -- qxl->update_irq = qemu_bh_new(qxl_update_irq_bh, qxl); -+ qxl->update_irq = qemu_bh_new_guarded(qxl_update_irq_bh, qxl, -+ &DEVICE(qxl)->mem_reentrancy_guard); - qxl_reset_state(qxl); - -- qxl->update_area_bh = qemu_bh_new(qxl_render_update_area_bh, qxl); -- qxl->ssd.cursor_bh = qemu_bh_new(qemu_spice_cursor_refresh_bh, &qxl->ssd); -+ qxl->update_area_bh = qemu_bh_new_guarded(qxl_render_update_area_bh, qxl, -+ &DEVICE(qxl)->mem_reentrancy_guard); -+ qxl->ssd.cursor_bh = qemu_bh_new_guarded(qemu_spice_cursor_refresh_bh, &qxl->ssd, -+ &DEVICE(qxl)->mem_reentrancy_guard); - } - - static void qxl_realize_primary(PCIDevice *dev, Error **errp) -diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c -index 5e15c79b94..66ac9b6cc5 100644 ---- a/hw/display/virtio-gpu.c -+++ b/hw/display/virtio-gpu.c -@@ -1339,8 +1339,10 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error **errp) - - g->ctrl_vq = virtio_get_queue(vdev, 0); - g->cursor_vq = virtio_get_queue(vdev, 1); -- g->ctrl_bh = qemu_bh_new(virtio_gpu_ctrl_bh, g); -- g->cursor_bh = qemu_bh_new(virtio_gpu_cursor_bh, g); -+ g->ctrl_bh = qemu_bh_new_guarded(virtio_gpu_ctrl_bh, g, -+ &qdev->mem_reentrancy_guard); -+ g->cursor_bh = qemu_bh_new_guarded(virtio_gpu_cursor_bh, g, -+ &qdev->mem_reentrancy_guard); - QTAILQ_INIT(&g->reslist); - QTAILQ_INIT(&g->cmdq); - QTAILQ_INIT(&g->fenceq); -diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c -index 55902e1df7..4e76d6b191 100644 ---- a/hw/ide/ahci.c -+++ b/hw/ide/ahci.c -@@ -1509,7 +1509,8 @@ static void ahci_cmd_done(const IDEDMA *dma) - ahci_write_fis_d2h(ad); - - if (ad->port_regs.cmd_issue && !ad->check_bh) { -- ad->check_bh = qemu_bh_new(ahci_check_cmd_bh, ad); -+ ad->check_bh = qemu_bh_new_guarded(ahci_check_cmd_bh, ad, -+ &ad->mem_reentrancy_guard); - qemu_bh_schedule(ad->check_bh); - } - } -diff --git a/hw/ide/ahci_internal.h b/hw/ide/ahci_internal.h -index 303fcd7235..2480455372 100644 ---- a/hw/ide/ahci_internal.h -+++ b/hw/ide/ahci_internal.h -@@ -321,6 +321,7 @@ struct AHCIDevice { - bool init_d2h_sent; - AHCICmdHdr *cur_cmd; - NCQTransferState ncq_tfs[AHCI_MAX_CMDS]; -+ MemReentrancyGuard mem_reentrancy_guard; - }; - - struct AHCIPCIState { -diff --git a/hw/ide/core.c b/hw/ide/core.c -index 45d14a25e9..de48ff9f86 100644 ---- a/hw/ide/core.c -+++ b/hw/ide/core.c -@@ -513,6 +513,7 @@ BlockAIOCB *ide_issue_trim( - BlockCompletionFunc *cb, void *cb_opaque, void *opaque) - { - IDEState *s = opaque; -+ IDEDevice *dev = s->unit ? s->bus->slave : s->bus->master; - TrimAIOCB *iocb; - - /* Paired with a decrement in ide_trim_bh_cb() */ -@@ -520,7 +521,8 @@ BlockAIOCB *ide_issue_trim( - - iocb = blk_aio_get(&trim_aiocb_info, s->blk, cb, cb_opaque); - iocb->s = s; -- iocb->bh = qemu_bh_new(ide_trim_bh_cb, iocb); -+ iocb->bh = qemu_bh_new_guarded(ide_trim_bh_cb, iocb, -+ &DEVICE(dev)->mem_reentrancy_guard); - iocb->ret = 0; - iocb->qiov = qiov; - iocb->i = -1; -diff --git a/hw/misc/imx_rngc.c b/hw/misc/imx_rngc.c -index 632c03779c..082c6980ad 100644 ---- a/hw/misc/imx_rngc.c -+++ b/hw/misc/imx_rngc.c -@@ -228,8 +228,10 @@ static void imx_rngc_realize(DeviceState *dev, Error **errp) - sysbus_init_mmio(sbd, &s->iomem); - - sysbus_init_irq(sbd, &s->irq); -- s->self_test_bh = qemu_bh_new(imx_rngc_self_test, s); -- s->seed_bh = qemu_bh_new(imx_rngc_seed, s); -+ s->self_test_bh = qemu_bh_new_guarded(imx_rngc_self_test, s, -+ &dev->mem_reentrancy_guard); -+ s->seed_bh = qemu_bh_new_guarded(imx_rngc_seed, s, -+ &dev->mem_reentrancy_guard); - } - - static void imx_rngc_reset(DeviceState *dev) -diff --git a/hw/misc/macio/mac_dbdma.c b/hw/misc/macio/mac_dbdma.c -index 43bb1f56ba..80a789f32b 100644 ---- a/hw/misc/macio/mac_dbdma.c -+++ b/hw/misc/macio/mac_dbdma.c -@@ -914,7 +914,7 @@ static void mac_dbdma_realize(DeviceState *dev, Error **errp) - { - DBDMAState *s = MAC_DBDMA(dev); - -- s->bh = qemu_bh_new(DBDMA_run_bh, s); -+ s->bh = qemu_bh_new_guarded(DBDMA_run_bh, s, &dev->mem_reentrancy_guard); - } - - static void mac_dbdma_class_init(ObjectClass *oc, void *data) -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index 53e1c32643..447f669921 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -2917,7 +2917,8 @@ static void virtio_net_add_queue(VirtIONet *n, int index) - n->vqs[index].tx_vq = - virtio_add_queue(vdev, n->net_conf.tx_queue_size, - virtio_net_handle_tx_bh); -- n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]); -+ n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index], -+ &DEVICE(vdev)->mem_reentrancy_guard); - } - - n->vqs[index].tx_waiting = 0; -diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c -index ac24eeb5ed..e5a468975e 100644 ---- a/hw/nvme/ctrl.c -+++ b/hw/nvme/ctrl.c -@@ -4607,7 +4607,8 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr, - QTAILQ_INSERT_TAIL(&(sq->req_list), &sq->io_req[i], entry); - } - -- sq->bh = qemu_bh_new(nvme_process_sq, sq); -+ sq->bh = qemu_bh_new_guarded(nvme_process_sq, sq, -+ &DEVICE(sq->ctrl)->mem_reentrancy_guard); - - if (n->dbbuf_enabled) { - sq->db_addr = n->dbbuf_dbs + (sqid << 3); -@@ -5253,7 +5254,8 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr, - } - } - n->cq[cqid] = cq; -- cq->bh = qemu_bh_new(nvme_post_cqes, cq); -+ cq->bh = qemu_bh_new_guarded(nvme_post_cqes, cq, -+ &DEVICE(cq->ctrl)->mem_reentrancy_guard); - } - - static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req) -diff --git a/hw/scsi/mptsas.c b/hw/scsi/mptsas.c -index c485da792c..3de288b454 100644 ---- a/hw/scsi/mptsas.c -+++ b/hw/scsi/mptsas.c -@@ -1322,7 +1322,8 @@ static void mptsas_scsi_realize(PCIDevice *dev, Error **errp) - } - s->max_devices = MPTSAS_NUM_PORTS; - -- s->request_bh = qemu_bh_new(mptsas_fetch_requests, s); -+ s->request_bh = qemu_bh_new_guarded(mptsas_fetch_requests, s, -+ &DEVICE(dev)->mem_reentrancy_guard); - - scsi_bus_init(&s->bus, sizeof(s->bus), &dev->qdev, &mptsas_scsi_info); - } -diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c -index c97176110c..3c20b47ad0 100644 ---- a/hw/scsi/scsi-bus.c -+++ b/hw/scsi/scsi-bus.c -@@ -193,7 +193,8 @@ static void scsi_dma_restart_cb(void *opaque, bool running, RunState state) - AioContext *ctx = blk_get_aio_context(s->conf.blk); - /* The reference is dropped in scsi_dma_restart_bh.*/ - object_ref(OBJECT(s)); -- s->bh = aio_bh_new(ctx, scsi_dma_restart_bh, s); -+ s->bh = aio_bh_new_guarded(ctx, scsi_dma_restart_bh, s, -+ &DEVICE(s)->mem_reentrancy_guard); - qemu_bh_schedule(s->bh); - } - } -diff --git a/hw/scsi/vmw_pvscsi.c b/hw/scsi/vmw_pvscsi.c -index fa76696855..4de34536e9 100644 ---- a/hw/scsi/vmw_pvscsi.c -+++ b/hw/scsi/vmw_pvscsi.c -@@ -1184,7 +1184,8 @@ pvscsi_realizefn(PCIDevice *pci_dev, Error **errp) - pcie_endpoint_cap_init(pci_dev, PVSCSI_EXP_EP_OFFSET); - } - -- s->completion_worker = qemu_bh_new(pvscsi_process_completion_queue, s); -+ s->completion_worker = qemu_bh_new_guarded(pvscsi_process_completion_queue, s, -+ &DEVICE(pci_dev)->mem_reentrancy_guard); - - scsi_bus_init(&s->bus, sizeof(s->bus), DEVICE(pci_dev), &pvscsi_scsi_info); - /* override default SCSI bus hotplug-handler, with pvscsi's one */ -diff --git a/hw/usb/dev-uas.c b/hw/usb/dev-uas.c -index 88f99c05d5..f013ded91e 100644 ---- a/hw/usb/dev-uas.c -+++ b/hw/usb/dev-uas.c -@@ -937,7 +937,8 @@ static void usb_uas_realize(USBDevice *dev, Error **errp) - - QTAILQ_INIT(&uas->results); - QTAILQ_INIT(&uas->requests); -- uas->status_bh = qemu_bh_new(usb_uas_send_status_bh, uas); -+ uas->status_bh = qemu_bh_new_guarded(usb_uas_send_status_bh, uas, -+ &d->mem_reentrancy_guard); - - dev->flags |= (1 << USB_DEV_FLAG_IS_SCSI_STORAGE); - scsi_bus_init(&uas->bus, sizeof(uas->bus), DEVICE(dev), &usb_uas_scsi_info); -diff --git a/hw/usb/hcd-dwc2.c b/hw/usb/hcd-dwc2.c -index 8755e9cbb0..a0c4e782b2 100644 ---- a/hw/usb/hcd-dwc2.c -+++ b/hw/usb/hcd-dwc2.c -@@ -1364,7 +1364,8 @@ static void dwc2_realize(DeviceState *dev, Error **errp) - s->fi = USB_FRMINTVL - 1; - s->eof_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, dwc2_frame_boundary, s); - s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, dwc2_work_timer, s); -- s->async_bh = qemu_bh_new(dwc2_work_bh, s); -+ s->async_bh = qemu_bh_new_guarded(dwc2_work_bh, s, -+ &dev->mem_reentrancy_guard); - - sysbus_init_irq(sbd, &s->irq); - } -diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c -index d4da8dcb8d..c930c60921 100644 ---- a/hw/usb/hcd-ehci.c -+++ b/hw/usb/hcd-ehci.c -@@ -2533,7 +2533,8 @@ void usb_ehci_realize(EHCIState *s, DeviceState *dev, Error **errp) - } - - s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, ehci_work_timer, s); -- s->async_bh = qemu_bh_new(ehci_work_bh, s); -+ s->async_bh = qemu_bh_new_guarded(ehci_work_bh, s, -+ &dev->mem_reentrancy_guard); - s->device = dev; - - s->vmstate = qemu_add_vm_change_state_handler(usb_ehci_vm_state_change, s); -diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c -index 8ac1175ad2..77baaa7a6b 100644 ---- a/hw/usb/hcd-uhci.c -+++ b/hw/usb/hcd-uhci.c -@@ -1190,7 +1190,7 @@ void usb_uhci_common_realize(PCIDevice *dev, Error **errp) - USB_SPEED_MASK_LOW | USB_SPEED_MASK_FULL); - } - } -- s->bh = qemu_bh_new(uhci_bh, s); -+ s->bh = qemu_bh_new_guarded(uhci_bh, s, &DEVICE(dev)->mem_reentrancy_guard); - s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, uhci_frame_timer, s); - s->num_ports_vmstate = NB_PORTS; - QTAILQ_INIT(&s->queues); -diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c -index 176868d345..f500db85ab 100644 ---- a/hw/usb/host-libusb.c -+++ b/hw/usb/host-libusb.c -@@ -1141,7 +1141,8 @@ static void usb_host_nodev_bh(void *opaque) - static void usb_host_nodev(USBHostDevice *s) - { - if (!s->bh_nodev) { -- s->bh_nodev = qemu_bh_new(usb_host_nodev_bh, s); -+ s->bh_nodev = qemu_bh_new_guarded(usb_host_nodev_bh, s, -+ &DEVICE(s)->mem_reentrancy_guard); - } - qemu_bh_schedule(s->bh_nodev); - } -@@ -1739,7 +1740,8 @@ static int usb_host_post_load(void *opaque, int version_id) - USBHostDevice *dev = opaque; - - if (!dev->bh_postld) { -- dev->bh_postld = qemu_bh_new(usb_host_post_load_bh, dev); -+ dev->bh_postld = qemu_bh_new_guarded(usb_host_post_load_bh, dev, -+ &DEVICE(dev)->mem_reentrancy_guard); - } - qemu_bh_schedule(dev->bh_postld); - dev->bh_postld_pending = true; -diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c -index fd7df599bc..39fbaaab16 100644 ---- a/hw/usb/redirect.c -+++ b/hw/usb/redirect.c -@@ -1441,8 +1441,10 @@ static void usbredir_realize(USBDevice *udev, Error **errp) - } - } - -- dev->chardev_close_bh = qemu_bh_new(usbredir_chardev_close_bh, dev); -- dev->device_reject_bh = qemu_bh_new(usbredir_device_reject_bh, dev); -+ dev->chardev_close_bh = qemu_bh_new_guarded(usbredir_chardev_close_bh, dev, -+ &DEVICE(dev)->mem_reentrancy_guard); -+ dev->device_reject_bh = qemu_bh_new_guarded(usbredir_device_reject_bh, dev, -+ &DEVICE(dev)->mem_reentrancy_guard); - dev->attach_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, usbredir_do_attach, dev); - - packet_id_queue_init(&dev->cancelled, dev, "cancelled"); -diff --git a/hw/usb/xen-usb.c b/hw/usb/xen-usb.c -index 66cb3f7c24..38ee660a30 100644 ---- a/hw/usb/xen-usb.c -+++ b/hw/usb/xen-usb.c -@@ -1032,7 +1032,8 @@ static void usbback_alloc(struct XenLegacyDevice *xendev) - - QTAILQ_INIT(&usbif->req_free_q); - QSIMPLEQ_INIT(&usbif->hotplug_q); -- usbif->bh = qemu_bh_new(usbback_bh, usbif); -+ usbif->bh = qemu_bh_new_guarded(usbback_bh, usbif, -+ &DEVICE(xendev)->mem_reentrancy_guard); - } - - static int usbback_free(struct XenLegacyDevice *xendev) -diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c -index 43092aa634..5186e831dd 100644 ---- a/hw/virtio/virtio-balloon.c -+++ b/hw/virtio/virtio-balloon.c -@@ -909,8 +909,9 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp) - precopy_add_notifier(&s->free_page_hint_notify); - - object_ref(OBJECT(s->iothread)); -- s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread), -- virtio_ballloon_get_free_page_hints, s); -+ s->free_page_bh = aio_bh_new_guarded(iothread_get_aio_context(s->iothread), -+ virtio_ballloon_get_free_page_hints, s, -+ &dev->mem_reentrancy_guard); - } - - if (virtio_has_feature(s->host_features, VIRTIO_BALLOON_F_REPORTING)) { -diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c -index 802e1b9659..2fe804510f 100644 ---- a/hw/virtio/virtio-crypto.c -+++ b/hw/virtio/virtio-crypto.c -@@ -1074,7 +1074,8 @@ static void virtio_crypto_device_realize(DeviceState *dev, Error **errp) - vcrypto->vqs[i].dataq = - virtio_add_queue(vdev, 1024, virtio_crypto_handle_dataq_bh); - vcrypto->vqs[i].dataq_bh = -- qemu_bh_new(virtio_crypto_dataq_bh, &vcrypto->vqs[i]); -+ qemu_bh_new_guarded(virtio_crypto_dataq_bh, &vcrypto->vqs[i], -+ &dev->mem_reentrancy_guard); - vcrypto->vqs[i].vcrypto = vcrypto; - } - --- -2.39.3 - diff --git a/SOURCES/kvm-hw-scsi-lsi53c895a-Fix-reentrancy-issues-in-the-LSI-.patch b/SOURCES/kvm-hw-scsi-lsi53c895a-Fix-reentrancy-issues-in-the-LSI-.patch deleted file mode 100644 index efa966e..0000000 --- a/SOURCES/kvm-hw-scsi-lsi53c895a-Fix-reentrancy-issues-in-the-LSI-.patch +++ /dev/null @@ -1,141 +0,0 @@ -From 8075a9e05699ef0c4e078017eefc20db3186328f Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Mon, 29 May 2023 14:21:08 -0400 -Subject: [PATCH 17/21] hw/scsi/lsi53c895a: Fix reentrancy issues in the LSI - controller (CVE-2023-0330) - -RH-Author: Jon Maloy -RH-MergeRequest: 165: memory: prevent dma-reentracy issues -RH-Jira: RHEL-516 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [13/13] 0b6fa742075ef2db3a354ee672dccca3747051cc (jmaloy/jmaloy-qemu-kvm-2) - -Jira: https://issues.redhat.com/browse/RHEL-516 -Upstream: Merged -CVE: CVE-2023-2680 - -commit b987718bbb1d0eabf95499b976212dd5f0120d75 -Author: Thomas Huth -Date: Mon May 22 11:10:11 2023 +0200 - - hw/scsi/lsi53c895a: Fix reentrancy issues in the LSI controller (CVE-2023-0330) - - We cannot use the generic reentrancy guard in the LSI code, so - we have to manually prevent endless reentrancy here. The problematic - lsi_execute_script() function has already a way to detect whether - too many instructions have been executed - we just have to slightly - change the logic here that it also takes into account if the function - has been called too often in a reentrant way. - - The code in fuzz-lsi53c895a-test.c has been taken from an earlier - patch by Mauro Matteo Cascella. - - Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1563 - Message-Id: <20230522091011.1082574-1-thuth@redhat.com> - Reviewed-by: Stefan Hajnoczi - Reviewed-by: Alexander Bulekov - Signed-off-by: Thomas Huth - -Signed-off-by: Jon Maloy ---- - hw/scsi/lsi53c895a.c | 23 +++++++++++++++------ - tests/qtest/fuzz-lsi53c895a-test.c | 33 ++++++++++++++++++++++++++++++ - 2 files changed, 50 insertions(+), 6 deletions(-) - -diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c -index 048436352b..f7d45b0b20 100644 ---- a/hw/scsi/lsi53c895a.c -+++ b/hw/scsi/lsi53c895a.c -@@ -1134,15 +1134,24 @@ static void lsi_execute_script(LSIState *s) - uint32_t addr, addr_high; - int opcode; - int insn_processed = 0; -+ static int reentrancy_level; -+ -+ reentrancy_level++; - - s->istat1 |= LSI_ISTAT1_SRUN; - again: -- if (++insn_processed > LSI_MAX_INSN) { -- /* Some windows drivers make the device spin waiting for a memory -- location to change. If we have been executed a lot of code then -- assume this is the case and force an unexpected device disconnect. -- This is apparently sufficient to beat the drivers into submission. -- */ -+ /* -+ * Some windows drivers make the device spin waiting for a memory location -+ * to change. If we have executed more than LSI_MAX_INSN instructions then -+ * assume this is the case and force an unexpected device disconnect. This -+ * is apparently sufficient to beat the drivers into submission. -+ * -+ * Another issue (CVE-2023-0330) can occur if the script is programmed to -+ * trigger itself again and again. Avoid this problem by stopping after -+ * being called multiple times in a reentrant way (8 is an arbitrary value -+ * which should be enough for all valid use cases). -+ */ -+ if (++insn_processed > LSI_MAX_INSN || reentrancy_level > 8) { - if (!(s->sien0 & LSI_SIST0_UDC)) { - qemu_log_mask(LOG_GUEST_ERROR, - "lsi_scsi: inf. loop with UDC masked"); -@@ -1596,6 +1605,8 @@ again: - } - } - trace_lsi_execute_script_stop(); -+ -+ reentrancy_level--; - } - - static uint8_t lsi_reg_readb(LSIState *s, int offset) -diff --git a/tests/qtest/fuzz-lsi53c895a-test.c b/tests/qtest/fuzz-lsi53c895a-test.c -index 2012bd54b7..1b55928b9f 100644 ---- a/tests/qtest/fuzz-lsi53c895a-test.c -+++ b/tests/qtest/fuzz-lsi53c895a-test.c -@@ -8,6 +8,36 @@ - #include "qemu/osdep.h" - #include "libqtest.h" - -+/* -+ * This used to trigger a DMA reentrancy issue -+ * leading to memory corruption bugs like stack -+ * overflow or use-after-free -+ * https://gitlab.com/qemu-project/qemu/-/issues/1563 -+ */ -+static void test_lsi_dma_reentrancy(void) -+{ -+ QTestState *s; -+ -+ s = qtest_init("-M q35 -m 512M -nodefaults " -+ "-blockdev driver=null-co,node-name=null0 " -+ "-device lsi53c810 -device scsi-cd,drive=null0"); -+ -+ qtest_outl(s, 0xcf8, 0x80000804); /* PCI Command Register */ -+ qtest_outw(s, 0xcfc, 0x7); /* Enables accesses */ -+ qtest_outl(s, 0xcf8, 0x80000814); /* Memory Bar 1 */ -+ qtest_outl(s, 0xcfc, 0xff100000); /* Set MMIO Address*/ -+ qtest_outl(s, 0xcf8, 0x80000818); /* Memory Bar 2 */ -+ qtest_outl(s, 0xcfc, 0xff000000); /* Set RAM Address*/ -+ qtest_writel(s, 0xff000000, 0xc0000024); -+ qtest_writel(s, 0xff000114, 0x00000080); -+ qtest_writel(s, 0xff00012c, 0xff000000); -+ qtest_writel(s, 0xff000004, 0xff000114); -+ qtest_writel(s, 0xff000008, 0xff100014); -+ qtest_writel(s, 0xff10002f, 0x000000ff); -+ -+ qtest_quit(s); -+} -+ - /* - * This used to trigger a UAF in lsi_do_msgout() - * https://gitlab.com/qemu-project/qemu/-/issues/972 -@@ -124,5 +154,8 @@ int main(int argc, char **argv) - qtest_add_func("fuzz/lsi53c895a/lsi_do_msgout_cancel_req", - test_lsi_do_msgout_cancel_req); - -+ qtest_add_func("fuzz/lsi53c895a/lsi_dma_reentrancy", -+ test_lsi_dma_reentrancy); -+ - return g_test_run(); - } --- -2.39.3 - diff --git a/SOURCES/kvm-hw-vfio-fix-iteration-over-global-VFIODevice-list.patch b/SOURCES/kvm-hw-vfio-fix-iteration-over-global-VFIODevice-list.patch new file mode 100644 index 0000000..2c7f6ff --- /dev/null +++ b/SOURCES/kvm-hw-vfio-fix-iteration-over-global-VFIODevice-list.patch @@ -0,0 +1,73 @@ +From 8f27893a37e55a31180bb66cd9eae7199911881b Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Volker=20R=C3=BCmelin?= +Date: Fri, 29 Dec 2023 21:38:54 +0100 +Subject: [PATCH 060/101] hw/vfio: fix iteration over global VFIODevice list +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [59/67] f926e1233c8c5ad418e8794b1a103371c9dc5eb0 (eauger1/centos-qemu-kvm) + +Commit 3d779abafe ("vfio/common: Introduce a global VFIODevice list") +introduced a global VFIODevice list, but forgot to update the list +element field name when iterating over the new list. Change the code +to use the correct list element field. + +Fixes: 3d779abafe ("vfio/common: Introduce a global VFIODevice list") +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2061 +Signed-off-by: Volker Rümelin +Reviewed-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Reviewed-by: Eric Auger +(cherry picked from commit 9353b6da430f90e47f352dbf6dc31120c8914da6) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 0d4d8b8416..0b3352f2a9 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -73,7 +73,7 @@ bool vfio_mig_active(void) + return false; + } + +- QLIST_FOREACH(vbasedev, &vfio_device_list, next) { ++ QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) { + if (vbasedev->migration_blocker) { + return false; + } +@@ -94,7 +94,7 @@ static bool vfio_multiple_devices_migration_is_supported(void) + unsigned int device_num = 0; + bool all_support_p2p = true; + +- QLIST_FOREACH(vbasedev, &vfio_device_list, next) { ++ QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) { + if (vbasedev->migration) { + device_num++; + +@@ -1366,13 +1366,13 @@ void vfio_reset_handler(void *opaque) + { + VFIODevice *vbasedev; + +- QLIST_FOREACH(vbasedev, &vfio_device_list, next) { ++ QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) { + if (vbasedev->dev->realized) { + vbasedev->ops->vfio_compute_needs_reset(vbasedev); + } + } + +- QLIST_FOREACH(vbasedev, &vfio_device_list, next) { ++ QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) { + if (vbasedev->dev->realized && vbasedev->needs_reset) { + vbasedev->ops->vfio_hot_reset_multi(vbasedev); + } +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-vfio-pci-quirks-Sanitize-capability-pointer.patch b/SOURCES/kvm-hw-vfio-pci-quirks-Sanitize-capability-pointer.patch deleted file mode 100644 index ffabd75..0000000 --- a/SOURCES/kvm-hw-vfio-pci-quirks-Sanitize-capability-pointer.patch +++ /dev/null @@ -1,76 +0,0 @@ -From fcd6219a95851d17fd8bde69d87e78c6533be990 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 24/37] hw/vfio/pci-quirks: Sanitize capability pointer -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [22/28] cb080409c1912f4365f8e31cd23c914b48f91575 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 0ddcb39c9357 -Author: Alex Williamson -Date: Fri Jun 30 16:36:08 2023 -0600 - - hw/vfio/pci-quirks: Sanitize capability pointer - - Coverity reports a tained scalar when traversing the capabilities - chain (CID 1516589). In practice I've never seen a device with a - chain so broken as to cause an issue, but it's also pretty easy to - sanitize. - - Fixes: f6b30c1984f7 ("hw/vfio/pci-quirks: Support alternate offset for GPUDirect Cliques") - Signed-off-by: Alex Williamson - Reviewed-by: Cédric Le Goater - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/pci-quirks.c | 10 ++++++++-- - 1 file changed, 8 insertions(+), 2 deletions(-) - -diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c -index 0ed2fcd531..f4ff836805 100644 ---- a/hw/vfio/pci-quirks.c -+++ b/hw/vfio/pci-quirks.c -@@ -1530,6 +1530,12 @@ const PropertyInfo qdev_prop_nv_gpudirect_clique = { - .set = set_nv_gpudirect_clique_id, - }; - -+static bool is_valid_std_cap_offset(uint8_t pos) -+{ -+ return (pos >= PCI_STD_HEADER_SIZEOF && -+ pos <= (PCI_CFG_SPACE_SIZE - PCI_CAP_SIZEOF)); -+} -+ - static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) - { - PCIDevice *pdev = &vdev->pdev; -@@ -1563,7 +1569,7 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) - */ - ret = pread(vdev->vbasedev.fd, &tmp, 1, - vdev->config_offset + PCI_CAPABILITY_LIST); -- if (ret != 1 || !tmp) { -+ if (ret != 1 || !is_valid_std_cap_offset(tmp)) { - error_setg(errp, "NVIDIA GPUDirect Clique ID: error getting cap list"); - return -EINVAL; - } -@@ -1575,7 +1581,7 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) - d4_conflict = true; - } - tmp = pdev->config[tmp + PCI_CAP_LIST_NEXT]; -- } while (tmp); -+ } while (is_valid_std_cap_offset(tmp)); - - if (!c8_conflict) { - pos = 0xC8; --- -2.39.3 - diff --git a/SOURCES/kvm-hw-vfio-pci-quirks-Support-alternate-offset-for-GPUD.patch b/SOURCES/kvm-hw-vfio-pci-quirks-Support-alternate-offset-for-GPUD.patch deleted file mode 100644 index 99f5c75..0000000 --- a/SOURCES/kvm-hw-vfio-pci-quirks-Support-alternate-offset-for-GPUD.patch +++ /dev/null @@ -1,110 +0,0 @@ -From dd38230a0a375fb8427fa106ff79562e56c51b6c Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 18/37] hw/vfio/pci-quirks: Support alternate offset for - GPUDirect Cliques -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [16/28] 9befb7c9adaeb58e9d0b49686cf54b751c742832 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit f6b30c1984f7 -Author: Alex Williamson -Date: Thu Jun 8 12:05:07 2023 -0600 - - hw/vfio/pci-quirks: Support alternate offset for GPUDirect Cliques - - NVIDIA Turing and newer GPUs implement the MSI-X capability at the offset - previously reserved for use by hypervisors to implement the GPUDirect - Cliques capability. A revised specification provides an alternate - location. Add a config space walk to the quirk to check for conflicts, - allowing us to fall back to the new location or generate an error at the - quirk setup rather than when the real conflicting capability is added - should there be no available location. - - Signed-off-by: Alex Williamson - Reviewed-by: Cédric Le Goater - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/pci-quirks.c | 41 ++++++++++++++++++++++++++++++++++++++++- - 1 file changed, 40 insertions(+), 1 deletion(-) - -diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c -index f0147a050a..0ed2fcd531 100644 ---- a/hw/vfio/pci-quirks.c -+++ b/hw/vfio/pci-quirks.c -@@ -1490,6 +1490,9 @@ void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev) - * +---------------------------------+---------------------------------+ - * - * https://lists.gnu.org/archive/html/qemu-devel/2017-08/pdfUda5iEpgOS.pdf -+ * -+ * Specification for Turning and later GPU architectures: -+ * https://lists.gnu.org/archive/html/qemu-devel/2023-06/pdf142OR4O4c2.pdf - */ - static void get_nv_gpudirect_clique_id(Object *obj, Visitor *v, - const char *name, void *opaque, -@@ -1530,7 +1533,9 @@ const PropertyInfo qdev_prop_nv_gpudirect_clique = { - static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) - { - PCIDevice *pdev = &vdev->pdev; -- int ret, pos = 0xC8; -+ int ret, pos; -+ bool c8_conflict = false, d4_conflict = false; -+ uint8_t tmp; - - if (vdev->nv_gpudirect_clique == 0xFF) { - return 0; -@@ -1547,6 +1552,40 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) - return -EINVAL; - } - -+ /* -+ * Per the updated specification above, it's recommended to use offset -+ * D4h for Turing and later GPU architectures due to a conflict of the -+ * MSI-X capability at C8h. We don't know how to determine the GPU -+ * architecture, instead we walk the capability chain to mark conflicts -+ * and choose one or error based on the result. -+ * -+ * NB. Cap list head in pdev->config is already cleared, read from device. -+ */ -+ ret = pread(vdev->vbasedev.fd, &tmp, 1, -+ vdev->config_offset + PCI_CAPABILITY_LIST); -+ if (ret != 1 || !tmp) { -+ error_setg(errp, "NVIDIA GPUDirect Clique ID: error getting cap list"); -+ return -EINVAL; -+ } -+ -+ do { -+ if (tmp == 0xC8) { -+ c8_conflict = true; -+ } else if (tmp == 0xD4) { -+ d4_conflict = true; -+ } -+ tmp = pdev->config[tmp + PCI_CAP_LIST_NEXT]; -+ } while (tmp); -+ -+ if (!c8_conflict) { -+ pos = 0xC8; -+ } else if (!d4_conflict) { -+ pos = 0xD4; -+ } else { -+ error_setg(errp, "NVIDIA GPUDirect Clique ID: invalid config space"); -+ return -EINVAL; -+ } -+ - ret = pci_add_capability(pdev, PCI_CAP_ID_VNDR, pos, 8, errp); - if (ret < 0) { - error_prepend(errp, "Failed to add NVIDIA GPUDirect cap: "); --- -2.39.3 - diff --git a/SOURCES/kvm-hw-virtio-iommu-Fix-potential-OOB-access-in-virtio_i.patch b/SOURCES/kvm-hw-virtio-iommu-Fix-potential-OOB-access-in-virtio_i.patch deleted file mode 100644 index 7a5963c..0000000 --- a/SOURCES/kvm-hw-virtio-iommu-Fix-potential-OOB-access-in-virtio_i.patch +++ /dev/null @@ -1,62 +0,0 @@ -From 0a731ac1191182546e80af5f39d178a5a2f3688f Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Mon, 17 Jul 2023 18:21:26 +0200 -Subject: [PATCH 07/14] hw/virtio-iommu: Fix potential OOB access in - virtio_iommu_handle_command() - -RH-Author: Eric Auger -RH-MergeRequest: 197: virtio-iommu/smmu: backport some late fixes -RH-Bugzilla: 2229133 -RH-Acked-by: Thomas Huth -RH-Acked-by: Peter Xu -RH-Commit: [1/3] ecdb1e1aa6b93761dc87ea79bc0a1093ad649a74 (eauger1/centos-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2229133 - -In the virtio_iommu_handle_command() when a PROBE request is handled, -output_size takes a value greater than the tail size and on a subsequent -iteration we can get a stack out-of-band access. Initialize the -output_size on each iteration. - -The issue was found with ASAN. Credits to: -Yiming Tao(Zhejiang University) -Gaoning Pan(Zhejiang University) - -Fixes: 1733eebb9e7 ("virtio-iommu: Implement RESV_MEM probe request") -Signed-off-by: Eric Auger -Reported-by: Mauro Matteo Cascella -Cc: qemu-stable@nongnu.org - -Message-Id: <20230717162126.11693-1-eric.auger@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit cf2f89edf36a59183166ae8721a8d7ab5cd286bd) -Signed-off-by: Eric Auger ---- - hw/virtio/virtio-iommu.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c -index 421e2a944f..17ce630200 100644 ---- a/hw/virtio/virtio-iommu.c -+++ b/hw/virtio/virtio-iommu.c -@@ -728,13 +728,15 @@ static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq) - VirtIOIOMMU *s = VIRTIO_IOMMU(vdev); - struct virtio_iommu_req_head head; - struct virtio_iommu_req_tail tail = {}; -- size_t output_size = sizeof(tail), sz; - VirtQueueElement *elem; - unsigned int iov_cnt; - struct iovec *iov; - void *buf = NULL; -+ size_t sz; - - for (;;) { -+ size_t output_size = sizeof(tail); -+ - elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); - if (!elem) { - return; --- -2.39.3 - diff --git a/SOURCES/kvm-i386-cpu-Update-how-the-EBX-register-of-CPUID-0x8000.patch b/SOURCES/kvm-i386-cpu-Update-how-the-EBX-register-of-CPUID-0x8000.patch deleted file mode 100644 index 3ee6b29..0000000 --- a/SOURCES/kvm-i386-cpu-Update-how-the-EBX-register-of-CPUID-0x8000.patch +++ /dev/null @@ -1,52 +0,0 @@ -From f9d982fae156aa9db0506e1e098c1e8a7f7eec94 Mon Sep 17 00:00:00 2001 -From: Bandan Das -Date: Thu, 3 Aug 2023 14:29:15 -0400 -Subject: [PATCH 13/14] i386/cpu: Update how the EBX register of CPUID - 0x8000001F is set - -RH-Author: Bandan Das -RH-MergeRequest: 196: Updates to SEV reduced-phys-bits parameter -RH-Bugzilla: 2214839 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [4/4] efc368b2c844fd4fbc3c755a5e2da288329e7a2c (bdas1/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2214839 - -commit fb6bbafc0f19385fb257ee073ed13dcaf613f2f8 -Author: Tom Lendacky -Date: Fri Sep 30 10:14:30 2022 -0500 - - i386/cpu: Update how the EBX register of CPUID 0x8000001F is set - - Update the setting of CPUID 0x8000001F EBX to clearly document the ranges - associated with fields being set. - - Fixes: 6cb8f2a663 ("cpu/i386: populate CPUID 0x8000_001F when SEV is active") - Signed-off-by: Tom Lendacky - Reviewed-by: Dr. David Alan Gilbert - Message-Id: <5822fd7d02b575121380e1f493a8f6d9eba2b11a.1664550870.git.thomas.lendacky@amd.com> - Signed-off-by: Paolo Bonzini - -Signed-off-by: Bandan Das ---- - target/i386/cpu.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 839706b430..4ac3046313 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -6008,8 +6008,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, - if (sev_enabled()) { - *eax = 0x2; - *eax |= sev_es_enabled() ? 0x8 : 0; -- *ebx = sev_get_cbit_position(); -- *ebx |= sev_get_reduced_phys_bits() << 6; -+ *ebx = sev_get_cbit_position() & 0x3f; /* EBX[5:0] */ -+ *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */ - } - break; - default: --- -2.39.3 - diff --git a/SOURCES/kvm-i386-sev-Update-checks-and-information-related-to-re.patch b/SOURCES/kvm-i386-sev-Update-checks-and-information-related-to-re.patch deleted file mode 100644 index e9d28d3..0000000 --- a/SOURCES/kvm-i386-sev-Update-checks-and-information-related-to-re.patch +++ /dev/null @@ -1,77 +0,0 @@ -From 5c0d254762caaffd574bd95dbfc1df416e6e2509 Mon Sep 17 00:00:00 2001 -From: Bandan Das -Date: Thu, 3 Aug 2023 14:22:55 -0400 -Subject: [PATCH 12/14] i386/sev: Update checks and information related to - reduced-phys-bits - -RH-Author: Bandan Das -RH-MergeRequest: 196: Updates to SEV reduced-phys-bits parameter -RH-Bugzilla: 2214839 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/4] 7c5e7ea9f6cd39e84e5b60417c849430296399fd (bdas1/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2214839 - -commit 8168fed9f84e3128f7628969ae78af49433d5ce7 -Author: Tom Lendacky -Date: Fri Sep 30 10:14:29 2022 -0500 - - i386/sev: Update checks and information related to reduced-phys-bits - - The value of the reduced-phys-bits parameter is propogated to the CPUID - information exposed to the guest. Update the current validation check to - account for the size of the CPUID field (6-bits), ensuring the value is - in the range of 1 to 63. - - Maintain backward compatibility, to an extent, by allowing a value greater - than 1 (so that the previously documented value of 5 still works), but not - allowing anything over 63. - - Fixes: d8575c6c02 ("sev/i386: add command to initialize the memory encryption context") - Signed-off-by: Tom Lendacky - Reviewed-by: Dr. David Alan Gilbert - Message-Id: - Signed-off-by: Paolo Bonzini - -Signed-off-by: Bandan Das ---- - target/i386/sev.c | 17 ++++++++++++++--- - 1 file changed, 14 insertions(+), 3 deletions(-) - -diff --git a/target/i386/sev.c b/target/i386/sev.c -index 859e06f6ad..fe2144c038 100644 ---- a/target/i386/sev.c -+++ b/target/i386/sev.c -@@ -932,15 +932,26 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) - host_cpuid(0x8000001F, 0, NULL, &ebx, NULL, NULL); - host_cbitpos = ebx & 0x3f; - -+ /* -+ * The cbitpos value will be placed in bit positions 5:0 of the EBX -+ * register of CPUID 0x8000001F. No need to verify the range as the -+ * comparison against the host value accomplishes that. -+ */ - if (host_cbitpos != sev->cbitpos) { - error_setg(errp, "%s: cbitpos check failed, host '%d' requested '%d'", - __func__, host_cbitpos, sev->cbitpos); - goto err; - } - -- if (sev->reduced_phys_bits < 1) { -- error_setg(errp, "%s: reduced_phys_bits check failed, it should be >=1," -- " requested '%d'", __func__, sev->reduced_phys_bits); -+ /* -+ * The reduced-phys-bits value will be placed in bit positions 11:6 of -+ * the EBX register of CPUID 0x8000001F, so verify the supplied value -+ * is in the range of 1 to 63. -+ */ -+ if (sev->reduced_phys_bits < 1 || sev->reduced_phys_bits > 63) { -+ error_setg(errp, "%s: reduced_phys_bits check failed," -+ " it should be in the range of 1 to 63, requested '%d'", -+ __func__, sev->reduced_phys_bits); - goto err; - } - --- -2.39.3 - diff --git a/SOURCES/kvm-include-ui-rect.h-fix-qemu_rect_init-mis-assignment.patch b/SOURCES/kvm-include-ui-rect.h-fix-qemu_rect_init-mis-assignment.patch new file mode 100644 index 0000000..0cf782e --- /dev/null +++ b/SOURCES/kvm-include-ui-rect.h-fix-qemu_rect_init-mis-assignment.patch @@ -0,0 +1,54 @@ +From 51b8f29cddb73eb02f91af5f52a205fdd3af6583 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Wed, 17 Jan 2024 21:08:59 +0100 +Subject: [PATCH 099/101] include/ui/rect.h: fix qemu_rect_init() + mis-assignment +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 216: Fix regression in QEMU's virtio-gpu VNC sessions +RH-Jira: RHEL-21570 +RH-Acked-by: Marc-André Lureau +RH-Acked-by: Cédric Le Goater +RH-Commit: [1/1] a9d487be04e2c1847b80c479b5cc790af81e3428 (thuth/qemu-kvm-cs9) + +JIRA: https://issues.redhat.com/browse/RHEL-21570 + +commit 9d5b42beb6978dc6219d5dc029c9d453c6b8d503 +Author: Elen Avan +Date: Fri Dec 22 22:17:21 2023 +0300 + + include/ui/rect.h: fix qemu_rect_init() mis-assignment + + Signed-off-by: Elen Avan + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2051 + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2050 + Fixes: a200d53b1fde "virtio-gpu: replace PIXMAN for region/rect test" + Cc: qemu-stable@nongnu.org + Reviewed-by: Michael Tokarev + Reviewed-by: Marc-André Lureau + Signed-off-by: Michael Tokarev + +Signed-off-by: Thomas Huth +--- + include/ui/rect.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/ui/rect.h b/include/ui/rect.h +index 94898f92d0..68f05d78a8 100644 +--- a/include/ui/rect.h ++++ b/include/ui/rect.h +@@ -19,7 +19,7 @@ static inline void qemu_rect_init(QemuRect *rect, + uint16_t width, uint16_t height) + { + rect->x = x; +- rect->y = x; ++ rect->y = y; + rect->width = width; + rect->height = height; + } +-- +2.39.3 + diff --git a/SOURCES/kvm-io-remove-io-watch-if-TLS-channel-is-closed-during-h.patch b/SOURCES/kvm-io-remove-io-watch-if-TLS-channel-is-closed-during-h.patch deleted file mode 100644 index 8d6795e..0000000 --- a/SOURCES/kvm-io-remove-io-watch-if-TLS-channel-is-closed-during-h.patch +++ /dev/null @@ -1,102 +0,0 @@ -From 0306736e3afbe7be99d01e4d70d1a5f2e38c32c2 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 15 Aug 2023 00:08:55 +0000 -Subject: [PATCH] io: remove io watch if TLS channel is closed during handshake -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jon Maloy -RH-MergeRequest: 315: io: remove io watch if TLS channel is closed during handshake -RH-Bugzilla: 2216504 -RH-Acked-by: Peter Xu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/1] 5f23602074b2edde0d445d529f07434bd156202d (redhat/rhel/src/qemu-kvm/jons-qemu-kvm-2) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2216504 -CVE: CVE-2023-3354 -Upstream: Merged - -commit 10be627d2b5ec2d6b3dce045144aa739eef678b4 -Author: Daniel P. Berrangé -Date: Tue Jun 20 09:45:34 2023 +0100 - - io: remove io watch if TLS channel is closed during handshake - - The TLS handshake make take some time to complete, during which time an - I/O watch might be registered with the main loop. If the owner of the - I/O channel invokes qio_channel_close() while the handshake is waiting - to continue the I/O watch must be removed. Failing to remove it will - later trigger the completion callback which the owner is not expecting - to receive. In the case of the VNC server, this results in a SEGV as - vnc_disconnect_start() tries to shutdown a client connection that is - already gone / NULL. - - CVE-2023-3354 - Reported-by: jiangyegen - Signed-off-by: Daniel P. Berrangé - -Signed-off-by: Jon Maloy ---- - include/io/channel-tls.h | 1 + - io/channel-tls.c | 18 ++++++++++++------ - 2 files changed, 13 insertions(+), 6 deletions(-) - -diff --git a/include/io/channel-tls.h b/include/io/channel-tls.h -index 5672479e9e..26c67f17e2 100644 ---- a/include/io/channel-tls.h -+++ b/include/io/channel-tls.h -@@ -48,6 +48,7 @@ struct QIOChannelTLS { - QIOChannel *master; - QCryptoTLSSession *session; - QIOChannelShutdown shutdown; -+ guint hs_ioc_tag; - }; - - /** -diff --git a/io/channel-tls.c b/io/channel-tls.c -index 9805dd0a3f..847d5297c3 100644 ---- a/io/channel-tls.c -+++ b/io/channel-tls.c -@@ -198,12 +198,13 @@ static void qio_channel_tls_handshake_task(QIOChannelTLS *ioc, - } - - trace_qio_channel_tls_handshake_pending(ioc, status); -- qio_channel_add_watch_full(ioc->master, -- condition, -- qio_channel_tls_handshake_io, -- data, -- NULL, -- context); -+ ioc->hs_ioc_tag = -+ qio_channel_add_watch_full(ioc->master, -+ condition, -+ qio_channel_tls_handshake_io, -+ data, -+ NULL, -+ context); - } - } - -@@ -218,6 +219,7 @@ static gboolean qio_channel_tls_handshake_io(QIOChannel *ioc, - QIOChannelTLS *tioc = QIO_CHANNEL_TLS( - qio_task_get_source(task)); - -+ tioc->hs_ioc_tag = 0; - g_free(data); - qio_channel_tls_handshake_task(tioc, task, context); - -@@ -378,6 +380,10 @@ static int qio_channel_tls_close(QIOChannel *ioc, - { - QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc); - -+ if (tioc->hs_ioc_tag) { -+ g_clear_handle_id(&tioc->hs_ioc_tag, g_source_remove); -+ } -+ - return qio_channel_close(tioc->master, errp); - } - --- -2.39.3 - diff --git a/SOURCES/kvm-iotests-Add-test-for-reset-AioContext-switches-with-.patch b/SOURCES/kvm-iotests-Add-test-for-reset-AioContext-switches-with-.patch new file mode 100644 index 0000000..3b2841f --- /dev/null +++ b/SOURCES/kvm-iotests-Add-test-for-reset-AioContext-switches-with-.patch @@ -0,0 +1,133 @@ +From 65d58819ff7b012e43b5f1da1356b559d3f5a962 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 14 Mar 2024 17:58:25 +0100 +Subject: [PATCH 3/4] iotests: Add test for reset/AioContext switches with NBD + exports + +RH-Author: Kevin Wolf +RH-MergeRequest: 231: Fix deadlock and crash during storage migration +RH-Jira: RHEL-28125 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [3/3] 7266acdf85271d157497bfe452aa6eeb58fbe7c6 (kmwolf/centos-qemu-kvm) + +This replicates the scenario in which the bug was reported. +Unfortunately this relies on actually executing a guest (so that the +firmware initialises the virtio-blk device and moves it to its +configured iothread), so this can't make use of the qtest accelerator +like most other test cases. I tried to find a different easy way to +trigger the bug, but couldn't find one. + +Signed-off-by: Kevin Wolf +Message-ID: <20240314165825.40261-3-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit e8fce34eccf68a32f4ecf2c6f121ff2ac383d6bf) +Signed-off-by: Kevin Wolf +--- + tests/qemu-iotests/tests/iothreads-nbd-export | 66 +++++++++++++++++++ + .../tests/iothreads-nbd-export.out | 19 ++++++ + 2 files changed, 85 insertions(+) + create mode 100755 tests/qemu-iotests/tests/iothreads-nbd-export + create mode 100644 tests/qemu-iotests/tests/iothreads-nbd-export.out + +diff --git a/tests/qemu-iotests/tests/iothreads-nbd-export b/tests/qemu-iotests/tests/iothreads-nbd-export +new file mode 100755 +index 0000000000..037260729c +--- /dev/null ++++ b/tests/qemu-iotests/tests/iothreads-nbd-export +@@ -0,0 +1,66 @@ ++#!/usr/bin/env python3 ++# group: rw quick ++# ++# Copyright (C) 2024 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++# Creator/Owner: Kevin Wolf ++ ++import time ++import qemu ++import iotests ++ ++iotests.script_initialize(supported_fmts=['qcow2'], ++ supported_platforms=['linux']) ++ ++with iotests.FilePath('disk1.img') as path, \ ++ iotests.FilePath('nbd.sock', base_dir=iotests.sock_dir) as nbd_sock, \ ++ qemu.machine.QEMUMachine(iotests.qemu_prog) as vm: ++ ++ img_size = '10M' ++ ++ iotests.log('Preparing disk...') ++ iotests.qemu_img_create('-f', iotests.imgfmt, path, img_size) ++ vm.add_args('-blockdev', f'file,node-name=disk-file,filename={path}') ++ vm.add_args('-blockdev', 'qcow2,node-name=disk,file=disk-file') ++ vm.add_args('-object', 'iothread,id=iothread0') ++ vm.add_args('-device', ++ 'virtio-blk,drive=disk,iothread=iothread0,share-rw=on') ++ ++ iotests.log('Launching VM...') ++ vm.add_args('-accel', 'kvm', '-accel', 'tcg') ++ #vm.add_args('-accel', 'qtest') ++ vm.launch() ++ ++ iotests.log('Exporting to NBD...') ++ iotests.log(vm.qmp('nbd-server-start', ++ addr={'type': 'unix', 'data': {'path': nbd_sock}})) ++ iotests.log(vm.qmp('block-export-add', type='nbd', id='exp0', ++ node_name='disk', writable=True)) ++ ++ iotests.log('Connecting qemu-img...') ++ qemu_io = iotests.QemuIoInteractive('-f', 'raw', ++ f'nbd+unix:///disk?socket={nbd_sock}') ++ ++ iotests.log('Moving the NBD export to a different iothread...') ++ for i in range(0, 10): ++ iotests.log(vm.qmp('system_reset')) ++ time.sleep(0.1) ++ ++ iotests.log('Checking that it is still alive...') ++ iotests.log(vm.qmp('query-status')) ++ ++ qemu_io.close() ++ vm.shutdown() +diff --git a/tests/qemu-iotests/tests/iothreads-nbd-export.out b/tests/qemu-iotests/tests/iothreads-nbd-export.out +new file mode 100644 +index 0000000000..bc514e35e5 +--- /dev/null ++++ b/tests/qemu-iotests/tests/iothreads-nbd-export.out +@@ -0,0 +1,19 @@ ++Preparing disk... ++Launching VM... ++Exporting to NBD... ++{"return": {}} ++{"return": {}} ++Connecting qemu-img... ++Moving the NBD export to a different iothread... ++{"return": {}} ++{"return": {}} ++{"return": {}} ++{"return": {}} ++{"return": {}} ++{"return": {}} ++{"return": {}} ++{"return": {}} ++{"return": {}} ++{"return": {}} ++Checking that it is still alive... ++{"return": {"running": true, "status": "running"}} +-- +2.39.3 + diff --git a/SOURCES/kvm-iotests-Test-commit-with-iothreads-and-ongoing-I-O.patch b/SOURCES/kvm-iotests-Test-commit-with-iothreads-and-ongoing-I-O.patch deleted file mode 100644 index 1fc5697..0000000 --- a/SOURCES/kvm-iotests-Test-commit-with-iothreads-and-ongoing-I-O.patch +++ /dev/null @@ -1,144 +0,0 @@ -From 399bfc04fb8352af6d2f4c984e68c334d2043368 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 17 May 2023 17:28:34 +0200 -Subject: [PATCH 04/21] iotests: Test commit with iothreads and ongoing I/O - -RH-Author: Kevin Wolf -RH-MergeRequest: 166: block/graph-lock: Disable locking for now -RH-Bugzilla: 2186725 -RH-Acked-by: Eric Blake -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [4/4] 1e42fde5951ae12bddc4eea2320f066f7079878f (kmwolf/centos-qemu-kvm) - -This tests exercises graph locking, draining, and graph modifications -with AioContext switches a lot. Amongst others, it serves as a -regression test for bdrv_graph_wrlock() deadlocking because it is called -with a locked AioContext and for AioContext handling in the NBD server. - -Signed-off-by: Kevin Wolf -Message-Id: <20230517152834.277483-4-kwolf@redhat.com> -Tested-by: Eric Blake -Reviewed-by: Eric Blake -Signed-off-by: Kevin Wolf -(cherry picked from commit 95fdd8db61848d31fde1d9b32da7f3f76babfa25) -Signed-off-by: Kevin Wolf ---- - tests/qemu-iotests/iotests.py | 4 ++ - .../qemu-iotests/tests/graph-changes-while-io | 56 +++++++++++++++++-- - .../tests/graph-changes-while-io.out | 4 +- - 3 files changed, 58 insertions(+), 6 deletions(-) - -diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py -index 3e82c634cf..7073579a7d 100644 ---- a/tests/qemu-iotests/iotests.py -+++ b/tests/qemu-iotests/iotests.py -@@ -462,6 +462,10 @@ def qmp(self, cmd: str, args: Optional[Dict[str, object]] = None) \ - assert self._qmp is not None - return self._qmp.cmd(cmd, args) - -+ def get_qmp(self) -> QEMUMonitorProtocol: -+ assert self._qmp is not None -+ return self._qmp -+ - def stop(self, kill_signal=15): - self._p.send_signal(kill_signal) - self._p.wait() -diff --git a/tests/qemu-iotests/tests/graph-changes-while-io b/tests/qemu-iotests/tests/graph-changes-while-io -index 7664f33689..750e7d4d38 100755 ---- a/tests/qemu-iotests/tests/graph-changes-while-io -+++ b/tests/qemu-iotests/tests/graph-changes-while-io -@@ -22,19 +22,19 @@ - import os - from threading import Thread - import iotests --from iotests import imgfmt, qemu_img, qemu_img_create, QMPTestCase, \ -- QemuStorageDaemon -+from iotests import imgfmt, qemu_img, qemu_img_create, qemu_io, \ -+ QMPTestCase, QemuStorageDaemon - - - top = os.path.join(iotests.test_dir, 'top.img') - nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock') - - --def do_qemu_img_bench() -> None: -+def do_qemu_img_bench(count: int = 2000000) -> None: - """ - Do some I/O requests on `nbd_sock`. - """ -- qemu_img('bench', '-f', 'raw', '-c', '2000000', -+ qemu_img('bench', '-f', 'raw', '-c', str(count), - f'nbd+unix:///node0?socket={nbd_sock}') - - -@@ -84,6 +84,54 @@ class TestGraphChangesWhileIO(QMPTestCase): - - bench_thr.join() - -+ def test_commit_while_io(self) -> None: -+ # Run qemu-img bench in the background -+ bench_thr = Thread(target=do_qemu_img_bench, args=(200000, )) -+ bench_thr.start() -+ -+ qemu_io('-c', 'write 0 64k', top) -+ qemu_io('-c', 'write 128k 64k', top) -+ -+ result = self.qsd.qmp('blockdev-add', { -+ 'driver': imgfmt, -+ 'node-name': 'overlay', -+ 'backing': None, -+ 'file': { -+ 'driver': 'file', -+ 'filename': top -+ } -+ }) -+ self.assert_qmp(result, 'return', {}) -+ -+ result = self.qsd.qmp('blockdev-snapshot', { -+ 'node': 'node0', -+ 'overlay': 'overlay', -+ }) -+ self.assert_qmp(result, 'return', {}) -+ -+ # While qemu-img bench is running, repeatedly commit overlay to node0 -+ while bench_thr.is_alive(): -+ result = self.qsd.qmp('block-commit', { -+ 'job-id': 'job0', -+ 'device': 'overlay', -+ }) -+ self.assert_qmp(result, 'return', {}) -+ -+ result = self.qsd.qmp('block-job-cancel', { -+ 'device': 'job0', -+ }) -+ self.assert_qmp(result, 'return', {}) -+ -+ cancelled = False -+ while not cancelled: -+ for event in self.qsd.get_qmp().get_events(wait=10.0): -+ if event['event'] != 'JOB_STATUS_CHANGE': -+ continue -+ if event['data']['status'] == 'null': -+ cancelled = True -+ -+ bench_thr.join() -+ - if __name__ == '__main__': - # Format must support raw backing files - iotests.main(supported_fmts=['qcow', 'qcow2', 'qed'], -diff --git a/tests/qemu-iotests/tests/graph-changes-while-io.out b/tests/qemu-iotests/tests/graph-changes-while-io.out -index ae1213e6f8..fbc63e62f8 100644 ---- a/tests/qemu-iotests/tests/graph-changes-while-io.out -+++ b/tests/qemu-iotests/tests/graph-changes-while-io.out -@@ -1,5 +1,5 @@ --. -+.. - ---------------------------------------------------------------------- --Ran 1 tests -+Ran 2 tests - - OK --- -2.39.3 - diff --git a/SOURCES/kvm-iotests-Test-resizing-image-attached-to-an-iothread.patch b/SOURCES/kvm-iotests-Test-resizing-image-attached-to-an-iothread.patch deleted file mode 100644 index 4e91505..0000000 --- a/SOURCES/kvm-iotests-Test-resizing-image-attached-to-an-iothread.patch +++ /dev/null @@ -1,132 +0,0 @@ -From 2c9e6892369ff99decd4030642b8dcf3875e9ebf Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Tue, 9 May 2023 15:41:33 +0200 -Subject: [PATCH 55/56] iotests: Test resizing image attached to an iothread - -RH-Author: Kevin Wolf -RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize() -RH-Bugzilla: 2185688 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Eric Blake -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [4/4] 8d31752d1e6e8c6a422d68d9cb2251fbc34b7aef (kmwolf/centos-qemu-kvm) - -This tests that trying to resize an image with QMP block_resize doesn't -hang or otherwise fail when the image is attached to a device running in -an iothread. - -This is a regression test for the recent fix that changed -qmp_block_resize, which is a coroutine based QMP handler, to avoid -calling no_coroutine_fns directly. - -Signed-off-by: Kevin Wolf -Message-Id: <20230509134133.373408-1-kwolf@redhat.com> -Reviewed-by: Eric Blake -Signed-off-by: Kevin Wolf -(cherry picked from commit e113362e4cdfdcfe1d497e569527f70a0021333a) -Signed-off-by: Kevin Wolf ---- - tests/qemu-iotests/tests/iothreads-resize | 71 +++++++++++++++++++ - tests/qemu-iotests/tests/iothreads-resize.out | 11 +++ - 2 files changed, 82 insertions(+) - create mode 100755 tests/qemu-iotests/tests/iothreads-resize - create mode 100644 tests/qemu-iotests/tests/iothreads-resize.out - -diff --git a/tests/qemu-iotests/tests/iothreads-resize b/tests/qemu-iotests/tests/iothreads-resize -new file mode 100755 -index 0000000000..36e4598c62 ---- /dev/null -+++ b/tests/qemu-iotests/tests/iothreads-resize -@@ -0,0 +1,71 @@ -+#!/usr/bin/env bash -+# group: rw auto quick -+# -+# Test resizing an image that is attached to a separate iothread -+# -+# Copyright (C) 2023 Red Hat, Inc. -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+# -+ -+# creator -+owner=kwolf@redhat.com -+ -+seq=`basename $0` -+echo "QA output created by $seq" -+ -+status=1 # failure is the default! -+ -+_cleanup() -+{ -+ _cleanup_test_img -+} -+trap "_cleanup; exit \$status" 0 1 2 3 15 -+ -+# get standard environment, filters and checks -+cd .. -+. ./common.rc -+. ./common.filter -+ -+# Resizing images is only supported by a few block drivers -+_supported_fmt raw qcow2 qed -+_supported_proto file -+_require_devices virtio-scsi-pci -+ -+size=64M -+_make_test_img $size -+ -+qmp() { -+cat < -Date: Thu, 11 May 2023 13:03:22 +0200 -Subject: [PATCH 54/56] iotests: Use alternative CPU type that is not - deprecated in RHEL - -RH-Author: Kevin Wolf -RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize() -RH-Bugzilla: 2185688 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Eric Blake -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [3/4] 038d4718c0ee7a17ff5e6f4af8fc04d07e452f8d (kmwolf/centos-qemu-kvm) - -This is a downstream-only patch that is necessary because the default -CPU in RHEL is marked as deprecated. This makes test cases fail due to -the warning in the output: - -qemu-system-x86_64: warning: CPU model qemu64-x86_64-cpu is deprecated -- use at least 'Nehalem' / 'Opteron_G4', or 'host' / 'max' - -Fixes: 318178778db60b6475d1484509bee136317156d3 -Signed-off-by: Kevin Wolf ---- - tests/qemu-iotests/testenv.py | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/tests/qemu-iotests/testenv.py b/tests/qemu-iotests/testenv.py -index 9a37ad9152..963514aab3 100644 ---- a/tests/qemu-iotests/testenv.py -+++ b/tests/qemu-iotests/testenv.py -@@ -244,6 +244,9 @@ def __init__(self, source_dir: str, build_dir: str, - if self.qemu_prog.endswith(f'qemu-system-{suffix}'): - self.qemu_options += f' -machine {machine}' - -+ if self.qemu_prog.endswith('qemu-system-x86_64'): -+ self.qemu_options += ' -cpu Nehalem' -+ - # QEMU_DEFAULT_MACHINE - self.qemu_default_machine = get_default_machine(self.qemu_prog) - --- -2.39.1 - diff --git a/SOURCES/kvm-iotests-add-filter_qmp_generated_node_ids.patch b/SOURCES/kvm-iotests-add-filter_qmp_generated_node_ids.patch new file mode 100644 index 0000000..d9072f5 --- /dev/null +++ b/SOURCES/kvm-iotests-add-filter_qmp_generated_node_ids.patch @@ -0,0 +1,49 @@ +From a9be663beaace1c31d75ca353e5d3bb0657a4f6c Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 18 Jan 2024 09:48:21 -0500 +Subject: [PATCH 11/22] iotests: add filter_qmp_generated_node_ids() + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter +RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Czenczek +RH-Commit: [7/17] 8dd20acc5b1e992294ed422e80897a9c221940dd (stefanha/centos-stream-qemu-kvm) + +Add a filter function for QMP responses that contain QEMU's +automatically generated node ids. The ids change between runs and must +be masked in the reference output. + +The next commit will use this new function. + +Signed-off-by: Stefan Hajnoczi +Message-ID: <20240118144823.1497953-2-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit da62b507a20510d819bcfbe8f5e573409b954006) +Signed-off-by: Stefan Hajnoczi +--- + tests/qemu-iotests/iotests.py | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py +index e5c5798c71..ea48af4a7b 100644 +--- a/tests/qemu-iotests/iotests.py ++++ b/tests/qemu-iotests/iotests.py +@@ -651,6 +651,13 @@ def _filter(_key, value): + def filter_generated_node_ids(msg): + return re.sub("#block[0-9]+", "NODE_NAME", msg) + ++def filter_qmp_generated_node_ids(qmsg): ++ def _filter(_key, value): ++ if is_str(value): ++ return filter_generated_node_ids(value) ++ return value ++ return filter_qmp(qmsg, _filter) ++ + def filter_img_info(output: str, filename: str, + drop_child_info: bool = True) -> str: + lines = [] +-- +2.39.3 + diff --git a/SOURCES/kvm-iotests-iothreads-stream-Use-the-right-TimeoutError.patch b/SOURCES/kvm-iotests-iothreads-stream-Use-the-right-TimeoutError.patch new file mode 100644 index 0000000..ab63004 --- /dev/null +++ b/SOURCES/kvm-iotests-iothreads-stream-Use-the-right-TimeoutError.patch @@ -0,0 +1,49 @@ +From 453da839a7d81896d03b827a95c1991a60740dc5 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 25 Jan 2024 16:21:50 +0100 +Subject: [PATCH 21/22] iotests/iothreads-stream: Use the right TimeoutError + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter +RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Czenczek +RH-Commit: [17/17] ca5a512ccccb668089b726d7499562d1e294c828 (stefanha/centos-stream-qemu-kvm) + +Since Python 3.11 asyncio.TimeoutError is an alias for TimeoutError, but +in older versions it's not. We really have to catch asyncio.TimeoutError +here, otherwise a slow test run will fail (as has happened multiple +times on CI recently). + +Signed-off-by: Kevin Wolf +Message-ID: <20240125152150.42389-1-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit c9c0b37ff4c11b712b21efabe8e5381d223d0295) +Signed-off-by: Stefan Hajnoczi +--- + tests/qemu-iotests/tests/iothreads-stream | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/tests/qemu-iotests/tests/iothreads-stream b/tests/qemu-iotests/tests/iothreads-stream +index 503f221f16..231195b5e8 100755 +--- a/tests/qemu-iotests/tests/iothreads-stream ++++ b/tests/qemu-iotests/tests/iothreads-stream +@@ -18,6 +18,7 @@ + # + # Creator/Owner: Kevin Wolf + ++import asyncio + import iotests + + iotests.script_initialize(supported_fmts=['qcow2'], +@@ -69,6 +70,6 @@ with iotests.FilePath('disk1.img') as base1_path, \ + # The test is done once both jobs are gone + if finished == 2: + break +- except TimeoutError: ++ except asyncio.TimeoutError: + pass + vm.cmd('query-jobs') +-- +2.39.3 + diff --git a/SOURCES/kvm-iotests-iov-padding-New-test.patch b/SOURCES/kvm-iotests-iov-padding-New-test.patch deleted file mode 100644 index 9ef37a2..0000000 --- a/SOURCES/kvm-iotests-iov-padding-New-test.patch +++ /dev/null @@ -1,186 +0,0 @@ -From add833b5de202d6765dda56c8773985fbe7f40a6 Mon Sep 17 00:00:00 2001 -From: Hanna Czenczek -Date: Tue, 11 Apr 2023 19:34:18 +0200 -Subject: [PATCH 4/9] iotests/iov-padding: New test - -RH-Author: Hanna Czenczek -RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX -RH-Bugzilla: 2174676 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [4/5] b32715b5c2a3e2add39c5ed6e8f71df56e0b91a0 (hreitz/qemu-kvm-c-9-s) - -Test that even vectored IO requests with 1024 vector elements that are -not aligned to the device's request alignment will succeed. - -Reviewed-by: Eric Blake -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Hanna Czenczek -Message-Id: <20230411173418.19549-5-hreitz@redhat.com> -(cherry picked from commit d7e1905e3f54ff9512db4c7a946a8603b62b108d) -Signed-off-by: Hanna Czenczek ---- - tests/qemu-iotests/tests/iov-padding | 85 ++++++++++++++++++++++++ - tests/qemu-iotests/tests/iov-padding.out | 59 ++++++++++++++++ - 2 files changed, 144 insertions(+) - create mode 100755 tests/qemu-iotests/tests/iov-padding - create mode 100644 tests/qemu-iotests/tests/iov-padding.out - -diff --git a/tests/qemu-iotests/tests/iov-padding b/tests/qemu-iotests/tests/iov-padding -new file mode 100755 -index 0000000000..b9604900c7 ---- /dev/null -+++ b/tests/qemu-iotests/tests/iov-padding -@@ -0,0 +1,85 @@ -+#!/usr/bin/env bash -+# group: rw quick -+# -+# Check the interaction of request padding (to fit alignment restrictions) with -+# vectored I/O from the guest -+# -+# Copyright Red Hat -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+# -+ -+seq=$(basename $0) -+echo "QA output created by $seq" -+ -+status=1 # failure is the default! -+ -+_cleanup() -+{ -+ _cleanup_test_img -+} -+trap "_cleanup; exit \$status" 0 1 2 3 15 -+ -+# get standard environment, filters and checks -+cd .. -+. ./common.rc -+. ./common.filter -+ -+_supported_fmt raw -+_supported_proto file -+ -+_make_test_img 1M -+ -+IMGSPEC="driver=blkdebug,align=4096,image.driver=file,image.filename=$TEST_IMG" -+ -+# Four combinations: -+# - Offset 4096, length 1023 * 512 + 512: Fully aligned to 4k -+# - Offset 4096, length 1023 * 512 + 4096: Head is aligned, tail is not -+# - Offset 512, length 1023 * 512 + 512: Neither head nor tail are aligned -+# - Offset 512, length 1023 * 512 + 4096: Tail is aligned, head is not -+for start_offset in 4096 512; do -+ for last_element_length in 512 4096; do -+ length=$((1023 * 512 + $last_element_length)) -+ -+ echo -+ echo "== performing 1024-element vectored requests to image (offset: $start_offset; length: $length) ==" -+ -+ # Fill with data for testing -+ $QEMU_IO -c 'write -P 1 0 1M' "$TEST_IMG" | _filter_qemu_io -+ -+ # 1023 512-byte buffers, and then one with length $last_element_length -+ cmd_params="-P 2 $start_offset $(yes 512 | head -n 1023 | tr '\n' ' ') $last_element_length" -+ QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS_NO_FMT" $QEMU_IO \ -+ -c "writev $cmd_params" \ -+ --image-opts \ -+ "$IMGSPEC" \ -+ | _filter_qemu_io -+ -+ # Read all patterns -- read the part we just wrote with writev twice, -+ # once "normally", and once with a readv, so we see that that works, too -+ QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS_NO_FMT" $QEMU_IO \ -+ -c "read -P 1 0 $start_offset" \ -+ -c "read -P 2 $start_offset $length" \ -+ -c "readv $cmd_params" \ -+ -c "read -P 1 $((start_offset + length)) $((1024 * 1024 - length - start_offset))" \ -+ --image-opts \ -+ "$IMGSPEC" \ -+ | _filter_qemu_io -+ done -+done -+ -+# success, all done -+echo "*** done" -+rm -f $seq.full -+status=0 -diff --git a/tests/qemu-iotests/tests/iov-padding.out b/tests/qemu-iotests/tests/iov-padding.out -new file mode 100644 -index 0000000000..e07a91fac7 ---- /dev/null -+++ b/tests/qemu-iotests/tests/iov-padding.out -@@ -0,0 +1,59 @@ -+QA output created by iov-padding -+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 -+ -+== performing 1024-element vectored requests to image (offset: 4096; length: 524288) == -+wrote 1048576/1048576 bytes at offset 0 -+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+wrote 524288/524288 bytes at offset 4096 -+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 4096/4096 bytes at offset 0 -+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 524288/524288 bytes at offset 4096 -+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 524288/524288 bytes at offset 4096 -+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 520192/520192 bytes at offset 528384 -+508 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+== performing 1024-element vectored requests to image (offset: 4096; length: 527872) == -+wrote 1048576/1048576 bytes at offset 0 -+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+wrote 527872/527872 bytes at offset 4096 -+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 4096/4096 bytes at offset 0 -+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 527872/527872 bytes at offset 4096 -+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 527872/527872 bytes at offset 4096 -+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 516608/516608 bytes at offset 531968 -+504.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+== performing 1024-element vectored requests to image (offset: 512; length: 524288) == -+wrote 1048576/1048576 bytes at offset 0 -+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+wrote 524288/524288 bytes at offset 512 -+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 512/512 bytes at offset 0 -+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 524288/524288 bytes at offset 512 -+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 524288/524288 bytes at offset 512 -+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 523776/523776 bytes at offset 524800 -+511.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+== performing 1024-element vectored requests to image (offset: 512; length: 527872) == -+wrote 1048576/1048576 bytes at offset 0 -+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+wrote 527872/527872 bytes at offset 512 -+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 512/512 bytes at offset 0 -+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 527872/527872 bytes at offset 512 -+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 527872/527872 bytes at offset 512 -+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 520192/520192 bytes at offset 528384 -+508 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+*** done --- -2.39.3 - diff --git a/SOURCES/kvm-iotests-port-141-to-Python-for-reliable-QMP-testing.patch b/SOURCES/kvm-iotests-port-141-to-Python-for-reliable-QMP-testing.patch new file mode 100644 index 0000000..209bd1e --- /dev/null +++ b/SOURCES/kvm-iotests-port-141-to-Python-for-reliable-QMP-testing.patch @@ -0,0 +1,592 @@ +From 70efc3bbf1f7d7b1b0c2475d9ce3bb70cc9d1cc7 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 18 Jan 2024 09:48:22 -0500 +Subject: [PATCH 12/22] iotests: port 141 to Python for reliable QMP testing + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter +RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Czenczek +RH-Commit: [8/17] 0783f536508916feac4b4c39e41c22c24a2e52e7 (stefanha/centos-stream-qemu-kvm) + +The common.qemu bash functions allow tests to interact with the QMP +monitor of a QEMU process. I spent two days trying to update 141 when +the order of the test output changed, but found it would still fail +occassionally because printf() and QMP events race with synchronous QMP +communication. + +I gave up and ported 141 to the existing Python API for QMP tests. The +Python API is less affected by the order in which QEMU prints output +because it does not print all QMP traffic by default. + +The next commit changes the order in which QMP messages are received. +Make 141 reliable first. + +Cc: Hanna Czenczek +Signed-off-by: Stefan Hajnoczi +Message-ID: <20240118144823.1497953-3-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 9ee2dd4c22a3639c5462b3fc20df60c005c3de64) +Signed-off-by: Stefan Hajnoczi +--- + tests/qemu-iotests/141 | 307 ++++++++++++++++--------------------- + tests/qemu-iotests/141.out | 200 ++++++------------------ + 2 files changed, 176 insertions(+), 331 deletions(-) + +diff --git a/tests/qemu-iotests/141 b/tests/qemu-iotests/141 +index a37030ee17..a7d3985a02 100755 +--- a/tests/qemu-iotests/141 ++++ b/tests/qemu-iotests/141 +@@ -1,9 +1,12 @@ +-#!/usr/bin/env bash ++#!/usr/bin/env python3 + # group: rw auto quick + # + # Test case for ejecting BDSs with block jobs still running on them + # +-# Copyright (C) 2016 Red Hat, Inc. ++# Originally written in bash by Hanna Czenczek, ported to Python by Stefan ++# Hajnoczi. ++# ++# Copyright Red Hat + # + # This program is free software; you can redistribute it and/or modify + # it under the terms of the GNU General Public License as published by +@@ -19,177 +22,129 @@ + # along with this program. If not, see . + # + +-# creator +-owner=hreitz@redhat.com +- +-seq="$(basename $0)" +-echo "QA output created by $seq" +- +-status=1 # failure is the default! +- +-_cleanup() +-{ +- _cleanup_qemu +- _cleanup_test_img +- for img in "$TEST_DIR"/{b,m,o}.$IMGFMT; do +- _rm_test_img "$img" +- done +-} +-trap "_cleanup; exit \$status" 0 1 2 3 15 +- +-# get standard environment, filters and checks +-. ./common.rc +-. ./common.filter +-. ./common.qemu +- +-# Needs backing file and backing format support +-_supported_fmt qcow2 qed +-_supported_proto file +-_supported_os Linux +- +- +-test_blockjob() +-{ +- _send_qemu_cmd $QEMU_HANDLE \ +- "{'execute': 'blockdev-add', +- 'arguments': { +- 'node-name': 'drv0', +- 'driver': '$IMGFMT', +- 'file': { +- 'driver': 'file', +- 'filename': '$TEST_IMG' +- }}}" \ +- 'return' +- +- # If "$2" is an event, we may or may not see it before the +- # {"return": {}}. Therefore, filter the {"return": {}} out both +- # here and in the next command. (Naturally, if we do not see it +- # here, we will see it before the next command can be executed, +- # so it will appear in the next _send_qemu_cmd's output.) +- _send_qemu_cmd $QEMU_HANDLE \ +- "$1" \ +- "$2" \ +- | _filter_img_create | _filter_qmp_empty_return +- +- # We want this to return an error because the block job is still running +- _send_qemu_cmd $QEMU_HANDLE \ +- "{'execute': 'blockdev-del', +- 'arguments': {'node-name': 'drv0'}}" \ +- 'error' | _filter_generated_node_ids | _filter_qmp_empty_return +- +- _send_qemu_cmd $QEMU_HANDLE \ +- "{'execute': 'block-job-cancel', +- 'arguments': {'device': 'job0'}}" \ +- "$3" +- +- _send_qemu_cmd $QEMU_HANDLE \ +- "{'execute': 'blockdev-del', +- 'arguments': {'node-name': 'drv0'}}" \ +- 'return' +-} +- +- +-TEST_IMG="$TEST_DIR/b.$IMGFMT" _make_test_img 1M +-TEST_IMG="$TEST_DIR/m.$IMGFMT" _make_test_img -b "$TEST_DIR/b.$IMGFMT" -F $IMGFMT 1M +-_make_test_img -b "$TEST_DIR/m.$IMGFMT" 1M -F $IMGFMT +- +-_launch_qemu -nodefaults +- +-_send_qemu_cmd $QEMU_HANDLE \ +- "{'execute': 'qmp_capabilities'}" \ +- 'return' +- +-echo +-echo '=== Testing drive-backup ===' +-echo +- +-# drive-backup will not send BLOCK_JOB_READY by itself, and cancelling the job +-# will consequently result in BLOCK_JOB_CANCELLED being emitted. +- +-test_blockjob \ +- "{'execute': 'drive-backup', +- 'arguments': {'job-id': 'job0', +- 'device': 'drv0', +- 'target': '$TEST_DIR/o.$IMGFMT', +- 'format': '$IMGFMT', +- 'sync': 'none'}}" \ +- 'return' \ +- '"status": "null"' +- +-echo +-echo '=== Testing drive-mirror ===' +-echo +- +-# drive-mirror will send BLOCK_JOB_READY basically immediately, and cancelling +-# the job will consequently result in BLOCK_JOB_COMPLETED being emitted. +- +-test_blockjob \ +- "{'execute': 'drive-mirror', +- 'arguments': {'job-id': 'job0', +- 'device': 'drv0', +- 'target': '$TEST_DIR/o.$IMGFMT', +- 'format': '$IMGFMT', +- 'sync': 'none'}}" \ +- 'BLOCK_JOB_READY' \ +- '"status": "null"' +- +-echo +-echo '=== Testing active block-commit ===' +-echo +- +-# An active block-commit will send BLOCK_JOB_READY basically immediately, and +-# cancelling the job will consequently result in BLOCK_JOB_COMPLETED being +-# emitted. +- +-test_blockjob \ +- "{'execute': 'block-commit', +- 'arguments': {'job-id': 'job0', 'device': 'drv0'}}" \ +- 'BLOCK_JOB_READY' \ +- '"status": "null"' +- +-echo +-echo '=== Testing non-active block-commit ===' +-echo +- +-# Give block-commit something to work on, otherwise it would be done +-# immediately, send a BLOCK_JOB_COMPLETED and ejecting the BDS would work just +-# fine without the block job still running. +- +-$QEMU_IO -c 'write 0 1M' "$TEST_DIR/m.$IMGFMT" | _filter_qemu_io +- +-test_blockjob \ +- "{'execute': 'block-commit', +- 'arguments': {'job-id': 'job0', +- 'device': 'drv0', +- 'top': '$TEST_DIR/m.$IMGFMT', +- 'speed': 1}}" \ +- 'return' \ +- '"status": "null"' +- +-echo +-echo '=== Testing block-stream ===' +-echo +- +-# Give block-stream something to work on, otherwise it would be done +-# immediately, send a BLOCK_JOB_COMPLETED and ejecting the BDS would work just +-# fine without the block job still running. +- +-$QEMU_IO -c 'write 0 1M' "$TEST_DIR/b.$IMGFMT" | _filter_qemu_io +- +-# With some data to stream (and @speed set to 1), block-stream will not complete +-# until we send the block-job-cancel command. +- +-test_blockjob \ +- "{'execute': 'block-stream', +- 'arguments': {'job-id': 'job0', +- 'device': 'drv0', +- 'speed': 1}}" \ +- 'return' \ +- '"status": "null"' +- +-_cleanup_qemu +- +-# success, all done +-echo "*** done" +-rm -f $seq.full +-status=0 ++import iotests ++ ++# Common filters to mask values that vary in the test output ++QMP_FILTERS = [iotests.filter_qmp_testfiles, \ ++ iotests.filter_qmp_imgfmt] ++ ++ ++class TestCase: ++ def __init__(self, name, vm, image_path, cancel_event): ++ self.name = name ++ self.vm = vm ++ self.image_path = image_path ++ self.cancel_event = cancel_event ++ ++ def __enter__(self): ++ iotests.log(f'=== Testing {self.name} ===') ++ self.vm.qmp_log('blockdev-add', \ ++ node_name='drv0', \ ++ driver=iotests.imgfmt, \ ++ file={'driver': 'file', 'filename': self.image_path}, \ ++ filters=QMP_FILTERS) ++ ++ def __exit__(self, *exc_details): ++ # This is expected to fail because the job still exists ++ self.vm.qmp_log('blockdev-del', node_name='drv0', \ ++ filters=[iotests.filter_qmp_generated_node_ids]) ++ ++ self.vm.qmp_log('block-job-cancel', device='job0') ++ event = self.vm.event_wait(self.cancel_event) ++ iotests.log(event, filters=[iotests.filter_qmp_event]) ++ ++ # This time it succeeds ++ self.vm.qmp_log('blockdev-del', node_name='drv0') ++ ++ # Separate test cases in output ++ iotests.log('') ++ ++ ++def main() -> None: ++ with iotests.FilePath('bottom', 'middle', 'top', 'target') as \ ++ (bottom_path, middle_path, top_path, target_path), \ ++ iotests.VM() as vm: ++ ++ iotests.log('Creating bottom <- middle <- top backing file chain...') ++ IMAGE_SIZE='1M' ++ iotests.qemu_img_create('-f', iotests.imgfmt, bottom_path, IMAGE_SIZE) ++ iotests.qemu_img_create('-f', iotests.imgfmt, \ ++ '-F', iotests.imgfmt, \ ++ '-b', bottom_path, \ ++ middle_path, \ ++ IMAGE_SIZE) ++ iotests.qemu_img_create('-f', iotests.imgfmt, \ ++ '-F', iotests.imgfmt, \ ++ '-b', middle_path, \ ++ top_path, \ ++ IMAGE_SIZE) ++ ++ iotests.log('Starting VM...') ++ vm.add_args('-nodefaults') ++ vm.launch() ++ ++ # drive-backup will not send BLOCK_JOB_READY by itself, and cancelling ++ # the job will consequently result in BLOCK_JOB_CANCELLED being ++ # emitted. ++ with TestCase('drive-backup', vm, top_path, 'BLOCK_JOB_CANCELLED'): ++ vm.qmp_log('drive-backup', \ ++ job_id='job0', \ ++ device='drv0', \ ++ target=target_path, \ ++ format=iotests.imgfmt, \ ++ sync='none', \ ++ filters=QMP_FILTERS) ++ ++ # drive-mirror will send BLOCK_JOB_READY basically immediately, and ++ # cancelling the job will consequently result in BLOCK_JOB_COMPLETED ++ # being emitted. ++ with TestCase('drive-mirror', vm, top_path, 'BLOCK_JOB_COMPLETED'): ++ vm.qmp_log('drive-mirror', \ ++ job_id='job0', \ ++ device='drv0', \ ++ target=target_path, \ ++ format=iotests.imgfmt, \ ++ sync='none', \ ++ filters=QMP_FILTERS) ++ event = vm.event_wait('BLOCK_JOB_READY') ++ assert event is not None # silence mypy ++ iotests.log(event, filters=[iotests.filter_qmp_event]) ++ ++ # An active block-commit will send BLOCK_JOB_READY basically ++ # immediately, and cancelling the job will consequently result in ++ # BLOCK_JOB_COMPLETED being emitted. ++ with TestCase('active block-commit', vm, top_path, \ ++ 'BLOCK_JOB_COMPLETED'): ++ vm.qmp_log('block-commit', \ ++ job_id='job0', \ ++ device='drv0') ++ event = vm.event_wait('BLOCK_JOB_READY') ++ assert event is not None # silence mypy ++ iotests.log(event, filters=[iotests.filter_qmp_event]) ++ ++ # Give block-commit something to work on, otherwise it would be done ++ # immediately, send a BLOCK_JOB_COMPLETED and ejecting the BDS would ++ # work just fine without the block job still running. ++ iotests.qemu_io(middle_path, '-c', f'write 0 {IMAGE_SIZE}') ++ with TestCase('non-active block-commit', vm, top_path, \ ++ 'BLOCK_JOB_CANCELLED'): ++ vm.qmp_log('block-commit', \ ++ job_id='job0', \ ++ device='drv0', \ ++ top=middle_path, \ ++ speed=1, \ ++ filters=[iotests.filter_qmp_testfiles]) ++ ++ # Give block-stream something to work on, otherwise it would be done ++ # immediately, send a BLOCK_JOB_COMPLETED and ejecting the BDS would ++ # work just fine without the block job still running. ++ iotests.qemu_io(bottom_path, '-c', f'write 0 {IMAGE_SIZE}') ++ with TestCase('block-stream', vm, top_path, 'BLOCK_JOB_CANCELLED'): ++ vm.qmp_log('block-stream', \ ++ job_id='job0', \ ++ device='drv0', \ ++ speed=1) ++ ++if __name__ == '__main__': ++ iotests.script_main(main, supported_fmts=['qcow2', 'qed'], ++ supported_protocols=['file']) +diff --git a/tests/qemu-iotests/141.out b/tests/qemu-iotests/141.out +index 63203d9944..91b7ba50af 100644 +--- a/tests/qemu-iotests/141.out ++++ b/tests/qemu-iotests/141.out +@@ -1,179 +1,69 @@ +-QA output created by 141 +-Formatting 'TEST_DIR/b.IMGFMT', fmt=IMGFMT size=1048576 +-Formatting 'TEST_DIR/m.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/b.IMGFMT backing_fmt=IMGFMT +-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/m.IMGFMT backing_fmt=IMGFMT +-{'execute': 'qmp_capabilities'} +-{"return": {}} +- ++Creating bottom <- middle <- top backing file chain... ++Starting VM... + === Testing drive-backup === +- +-{'execute': 'blockdev-add', +- 'arguments': { +- 'node-name': 'drv0', +- 'driver': 'IMGFMT', +- 'file': { +- 'driver': 'file', +- 'filename': 'TEST_DIR/t.IMGFMT' +- }}} +-{"return": {}} +-{'execute': 'drive-backup', +-'arguments': {'job-id': 'job0', +-'device': 'drv0', +-'target': 'TEST_DIR/o.IMGFMT', +-'format': 'IMGFMT', +-'sync': 'none'}} +-Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT backing_fmt=IMGFMT +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} +-{'execute': 'blockdev-del', +- 'arguments': {'node-name': 'drv0'}} ++{"execute": "blockdev-add", "arguments": {"driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-top"}, "node-name": "drv0"}} ++{"return": {}} ++{"execute": "drive-backup", "arguments": {"device": "drv0", "format": "IMGFMT", "job-id": "job0", "sync": "none", "target": "TEST_DIR/PID-target"}} ++{"return": {}} ++{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}} + {"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: node is used as backing hd of 'NODE_NAME'"}} +-{'execute': 'block-job-cancel', +- 'arguments': {'device': 'job0'}} ++{"execute": "block-job-cancel", "arguments": {"device": "job0"}} + {"return": {}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "job0", "len": 1048576, "offset": 0, "speed": 0, "type": "backup"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}} +-{'execute': 'blockdev-del', +- 'arguments': {'node-name': 'drv0'}} ++{"data": {"device": "job0", "len": 1048576, "offset": 0, "speed": 0, "type": "backup"}, "event": "BLOCK_JOB_CANCELLED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}} + {"return": {}} + + === Testing drive-mirror === +- +-{'execute': 'blockdev-add', +- 'arguments': { +- 'node-name': 'drv0', +- 'driver': 'IMGFMT', +- 'file': { +- 'driver': 'file', +- 'filename': 'TEST_DIR/t.IMGFMT' +- }}} +-{"return": {}} +-{'execute': 'drive-mirror', +-'arguments': {'job-id': 'job0', +-'device': 'drv0', +-'target': 'TEST_DIR/o.IMGFMT', +-'format': 'IMGFMT', +-'sync': 'none'}} +-Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT backing_fmt=IMGFMT +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}} +-{'execute': 'blockdev-del', +- 'arguments': {'node-name': 'drv0'}} ++{"execute": "blockdev-add", "arguments": {"driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-top"}, "node-name": "drv0"}} ++{"return": {}} ++{"execute": "drive-mirror", "arguments": {"device": "drv0", "format": "IMGFMT", "job-id": "job0", "sync": "none", "target": "TEST_DIR/PID-target"}} ++{"return": {}} ++{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}, "event": "BLOCK_JOB_READY", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}} + {"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: block device is in use by block job: mirror"}} +-{'execute': 'block-job-cancel', +- 'arguments': {'device': 'job0'}} ++{"execute": "block-job-cancel", "arguments": {"device": "job0"}} + {"return": {}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}} +-{'execute': 'blockdev-del', +- 'arguments': {'node-name': 'drv0'}} ++{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}} + {"return": {}} + + === Testing active block-commit === +- +-{'execute': 'blockdev-add', +- 'arguments': { +- 'node-name': 'drv0', +- 'driver': 'IMGFMT', +- 'file': { +- 'driver': 'file', +- 'filename': 'TEST_DIR/t.IMGFMT' +- }}} +-{"return": {}} +-{'execute': 'block-commit', +-'arguments': {'job-id': 'job0', 'device': 'drv0'}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}} +-{'execute': 'blockdev-del', +- 'arguments': {'node-name': 'drv0'}} ++{"execute": "blockdev-add", "arguments": {"driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-top"}, "node-name": "drv0"}} ++{"return": {}} ++{"execute": "block-commit", "arguments": {"device": "drv0", "job-id": "job0"}} ++{"return": {}} ++{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_READY", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}} + {"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: block device is in use by block job: commit"}} +-{'execute': 'block-job-cancel', +- 'arguments': {'device': 'job0'}} ++{"execute": "block-job-cancel", "arguments": {"device": "job0"}} + {"return": {}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}} +-{'execute': 'blockdev-del', +- 'arguments': {'node-name': 'drv0'}} ++{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}} + {"return": {}} + + === Testing non-active block-commit === +- +-wrote 1048576/1048576 bytes at offset 0 +-1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +-{'execute': 'blockdev-add', +- 'arguments': { +- 'node-name': 'drv0', +- 'driver': 'IMGFMT', +- 'file': { +- 'driver': 'file', +- 'filename': 'TEST_DIR/t.IMGFMT' +- }}} +-{"return": {}} +-{'execute': 'block-commit', +-'arguments': {'job-id': 'job0', +-'device': 'drv0', +-'top': 'TEST_DIR/m.IMGFMT', +-'speed': 1}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} +-{'execute': 'blockdev-del', +- 'arguments': {'node-name': 'drv0'}} ++{"execute": "blockdev-add", "arguments": {"driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-top"}, "node-name": "drv0"}} ++{"return": {}} ++{"execute": "block-commit", "arguments": {"device": "drv0", "job-id": "job0", "speed": 1, "top": "TEST_DIR/PID-middle"}} ++{"return": {}} ++{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}} + {"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: block device is in use by block job: commit"}} +-{'execute': 'block-job-cancel', +- 'arguments': {'device': 'job0'}} ++{"execute": "block-job-cancel", "arguments": {"device": "job0"}} + {"return": {}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "job0", "len": 1048576, "offset": 524288, "speed": 1, "type": "commit"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}} +-{'execute': 'blockdev-del', +- 'arguments': {'node-name': 'drv0'}} ++{"data": {"device": "job0", "len": 1048576, "offset": 524288, "speed": 1, "type": "commit"}, "event": "BLOCK_JOB_CANCELLED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}} + {"return": {}} + + === Testing block-stream === +- +-wrote 1048576/1048576 bytes at offset 0 +-1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +-{'execute': 'blockdev-add', +- 'arguments': { +- 'node-name': 'drv0', +- 'driver': 'IMGFMT', +- 'file': { +- 'driver': 'file', +- 'filename': 'TEST_DIR/t.IMGFMT' +- }}} +-{"return": {}} +-{'execute': 'block-stream', +-'arguments': {'job-id': 'job0', +-'device': 'drv0', +-'speed': 1}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} +-{'execute': 'blockdev-del', +- 'arguments': {'node-name': 'drv0'}} ++{"execute": "blockdev-add", "arguments": {"driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-top"}, "node-name": "drv0"}} ++{"return": {}} ++{"execute": "block-stream", "arguments": {"device": "drv0", "job-id": "job0", "speed": 1}} ++{"return": {}} ++{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}} + {"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: block device is in use by block job: stream"}} +-{'execute': 'block-job-cancel', +- 'arguments': {'device': 'job0'}} ++{"execute": "block-job-cancel", "arguments": {"device": "job0"}} + {"return": {}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "job0", "len": 1048576, "offset": 524288, "speed": 1, "type": "stream"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}} +-{'execute': 'blockdev-del', +- 'arguments': {'node-name': 'drv0'}} ++{"data": {"device": "job0", "len": 1048576, "offset": 524288, "speed": 1, "type": "stream"}, "event": "BLOCK_JOB_CANCELLED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}} + {"return": {}} +-*** done ++ +-- +2.39.3 + diff --git a/SOURCES/kvm-job-remove-outdated-AioContext-locking-comments.patch b/SOURCES/kvm-job-remove-outdated-AioContext-locking-comments.patch new file mode 100644 index 0000000..fc1c62f --- /dev/null +++ b/SOURCES/kvm-job-remove-outdated-AioContext-locking-comments.patch @@ -0,0 +1,105 @@ +From 4ab25b33831fa207500179bd30f29388d81e4cce Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 5 Dec 2023 13:20:10 -0500 +Subject: [PATCH 093/101] job: remove outdated AioContext locking comments + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [24/26] 15ff2928be82d6905c22619458487fbb72d6044a (kmwolf/centos-qemu-kvm) + +The AioContext lock no longer exists. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Eric Blake +Message-ID: <20231205182011.1976568-14-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +--- + include/qemu/job.h | 20 -------------------- + 1 file changed, 20 deletions(-) + +diff --git a/include/qemu/job.h b/include/qemu/job.h +index e502787dd8..9ea98b5927 100644 +--- a/include/qemu/job.h ++++ b/include/qemu/job.h +@@ -67,8 +67,6 @@ typedef struct Job { + + /** + * The completion function that will be called when the job completes. +- * Called with AioContext lock held, since many callback implementations +- * use bdrv_* functions that require to hold the lock. + */ + BlockCompletionFunc *cb; + +@@ -264,9 +262,6 @@ struct JobDriver { + * + * This callback will not be invoked if the job has already failed. + * If it fails, abort and then clean will be called. +- * +- * Called with AioContext lock held, since many callbacs implementations +- * use bdrv_* functions that require to hold the lock. + */ + int (*prepare)(Job *job); + +@@ -277,9 +272,6 @@ struct JobDriver { + * + * All jobs will complete with a call to either .commit() or .abort() but + * never both. +- * +- * Called with AioContext lock held, since many callback implementations +- * use bdrv_* functions that require to hold the lock. + */ + void (*commit)(Job *job); + +@@ -290,9 +282,6 @@ struct JobDriver { + * + * All jobs will complete with a call to either .commit() or .abort() but + * never both. +- * +- * Called with AioContext lock held, since many callback implementations +- * use bdrv_* functions that require to hold the lock. + */ + void (*abort)(Job *job); + +@@ -301,9 +290,6 @@ struct JobDriver { + * .commit() or .abort(). Regardless of which callback is invoked after + * completion, .clean() will always be called, even if the job does not + * belong to a transaction group. +- * +- * Called with AioContext lock held, since many callbacs implementations +- * use bdrv_* functions that require to hold the lock. + */ + void (*clean)(Job *job); + +@@ -318,17 +304,12 @@ struct JobDriver { + * READY). + * (If the callback is NULL, the job is assumed to terminate + * without I/O.) +- * +- * Called with AioContext lock held, since many callback implementations +- * use bdrv_* functions that require to hold the lock. + */ + bool (*cancel)(Job *job, bool force); + + + /** + * Called when the job is freed. +- * Called with AioContext lock held, since many callback implementations +- * use bdrv_* functions that require to hold the lock. + */ + void (*free)(Job *job); + }; +@@ -424,7 +405,6 @@ void job_ref_locked(Job *job); + * Release a reference that was previously acquired with job_ref_locked() or + * job_create(). If it's the last reference to the object, it will be freed. + * +- * Takes AioContext lock internally to invoke a job->driver callback. + * Called with job lock held. + */ + void job_unref_locked(Job *job); +-- +2.39.3 + diff --git a/SOURCES/kvm-kconfig-Activate-IOMMUFD-for-s390x-machines.patch b/SOURCES/kvm-kconfig-Activate-IOMMUFD-for-s390x-machines.patch new file mode 100644 index 0000000..3e562b8 --- /dev/null +++ b/SOURCES/kvm-kconfig-Activate-IOMMUFD-for-s390x-machines.patch @@ -0,0 +1,42 @@ +From 9c2eb4ab03903bc084c53ac29b60b8d2121c9fed Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 21 Nov 2023 16:44:19 +0800 +Subject: [PATCH 040/101] kconfig: Activate IOMMUFD for s390x machines +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [39/67] cf0ebe770b8db5916dd35247618c0a325dc1eaab (eauger1/centos-qemu-kvm) + +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Matthew Rosato +Reviewed-by: Eric Farman +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit 73e2df669335047b542b67d37ade060a6ae40dd8) +Signed-off-by: Eric Auger +--- + hw/s390x/Kconfig | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/s390x/Kconfig b/hw/s390x/Kconfig +index 4c068d7960..26ad104485 100644 +--- a/hw/s390x/Kconfig ++++ b/hw/s390x/Kconfig +@@ -6,6 +6,7 @@ config S390_CCW_VIRTIO + imply VFIO_CCW + imply WDT_DIAG288 + imply PCIE_DEVICES ++ imply IOMMUFD + select PCI_EXPRESS + select S390_FLIC + select S390_FLIC_KVM if KVM +-- +2.39.3 + diff --git a/SOURCES/kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch b/SOURCES/kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch deleted file mode 100644 index d6a6d73..0000000 --- a/SOURCES/kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch +++ /dev/null @@ -1,160 +0,0 @@ -From a5857fb12fcad46e27c415fe82ce13c0cb5d09c7 Mon Sep 17 00:00:00 2001 -From: Marcelo Tosatti -Date: Thu, 29 Jun 2023 14:48:32 -0300 -Subject: [PATCH 5/6] kvm: reuse per-vcpu stats fd to avoid vcpu interruption -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marcelo Tosatti -RH-MergeRequest: 177: kvm: reuse per-vcpu stats fd to avoid vcpu interruption -RH-Bugzilla: 2218644 -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Leonardo Brás -RH-Commit: [1/1] 4ec72385a9047888121485f49bacb1aff84f7018 (mtosatti/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2218644 -Commit: 3b6f485275ae95a81eec589d2773b86ca9ddec4d - -A regression has been detected in latency testing of KVM guests. -More specifically, it was observed that the cyclictest -numbers inside of an isolated vcpu (running on isolated pcpu) are: - -Where a maximum of 50us is acceptable. - -The implementation of KVM_GET_STATS_FD uses run_on_cpu to query -per vcpu statistics, which interrupts the vcpu (and is unnecessary). - -To fix this, open the per vcpu stats fd on vcpu initialization, -and read from that fd from QEMU's main thread. - -Signed-off-by: Marcelo Tosatti -Signed-off-by: Paolo Bonzini ---- - accel/kvm/kvm-all.c | 30 +++++++++++++++--------------- - include/hw/core/cpu.h | 1 + - 2 files changed, 16 insertions(+), 15 deletions(-) - -diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index cf3a88d90e..fa7ca46c66 100644 ---- a/accel/kvm/kvm-all.c -+++ b/accel/kvm/kvm-all.c -@@ -450,6 +450,8 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp) - "kvm_init_vcpu: kvm_arch_init_vcpu failed (%lu)", - kvm_arch_vcpu_id(cpu)); - } -+ cpu->kvm_vcpu_stats_fd = kvm_vcpu_ioctl(cpu, KVM_GET_STATS_FD, NULL); -+ - err: - return ret; - } -@@ -3959,7 +3961,7 @@ static StatsDescriptors *find_stats_descriptors(StatsTarget target, int stats_fd - - /* Read stats header */ - kvm_stats_header = &descriptors->kvm_stats_header; -- ret = read(stats_fd, kvm_stats_header, sizeof(*kvm_stats_header)); -+ ret = pread(stats_fd, kvm_stats_header, sizeof(*kvm_stats_header), 0); - if (ret != sizeof(*kvm_stats_header)) { - error_setg(errp, "KVM stats: failed to read stats header: " - "expected %zu actual %zu", -@@ -3990,7 +3992,8 @@ static StatsDescriptors *find_stats_descriptors(StatsTarget target, int stats_fd - } - - static void query_stats(StatsResultList **result, StatsTarget target, -- strList *names, int stats_fd, Error **errp) -+ strList *names, int stats_fd, CPUState *cpu, -+ Error **errp) - { - struct kvm_stats_desc *kvm_stats_desc; - struct kvm_stats_header *kvm_stats_header; -@@ -4048,7 +4051,7 @@ static void query_stats(StatsResultList **result, StatsTarget target, - break; - case STATS_TARGET_VCPU: - add_stats_entry(result, STATS_PROVIDER_KVM, -- current_cpu->parent_obj.canonical_path, -+ cpu->parent_obj.canonical_path, - stats_list); - break; - default: -@@ -4085,10 +4088,9 @@ static void query_stats_schema(StatsSchemaList **result, StatsTarget target, - add_stats_schema(result, STATS_PROVIDER_KVM, target, stats_list); - } - --static void query_stats_vcpu(CPUState *cpu, run_on_cpu_data data) -+static void query_stats_vcpu(CPUState *cpu, StatsArgs *kvm_stats_args) - { -- StatsArgs *kvm_stats_args = (StatsArgs *) data.host_ptr; -- int stats_fd = kvm_vcpu_ioctl(cpu, KVM_GET_STATS_FD, NULL); -+ int stats_fd = cpu->kvm_vcpu_stats_fd; - Error *local_err = NULL; - - if (stats_fd == -1) { -@@ -4097,14 +4099,13 @@ static void query_stats_vcpu(CPUState *cpu, run_on_cpu_data data) - return; - } - query_stats(kvm_stats_args->result.stats, STATS_TARGET_VCPU, -- kvm_stats_args->names, stats_fd, kvm_stats_args->errp); -- close(stats_fd); -+ kvm_stats_args->names, stats_fd, cpu, -+ kvm_stats_args->errp); - } - --static void query_stats_schema_vcpu(CPUState *cpu, run_on_cpu_data data) -+static void query_stats_schema_vcpu(CPUState *cpu, StatsArgs *kvm_stats_args) - { -- StatsArgs *kvm_stats_args = (StatsArgs *) data.host_ptr; -- int stats_fd = kvm_vcpu_ioctl(cpu, KVM_GET_STATS_FD, NULL); -+ int stats_fd = cpu->kvm_vcpu_stats_fd; - Error *local_err = NULL; - - if (stats_fd == -1) { -@@ -4114,7 +4115,6 @@ static void query_stats_schema_vcpu(CPUState *cpu, run_on_cpu_data data) - } - query_stats_schema(kvm_stats_args->result.schema, STATS_TARGET_VCPU, stats_fd, - kvm_stats_args->errp); -- close(stats_fd); - } - - static void query_stats_cb(StatsResultList **result, StatsTarget target, -@@ -4132,7 +4132,7 @@ static void query_stats_cb(StatsResultList **result, StatsTarget target, - error_setg_errno(errp, errno, "KVM stats: ioctl failed"); - return; - } -- query_stats(result, target, names, stats_fd, errp); -+ query_stats(result, target, names, stats_fd, NULL, errp); - close(stats_fd); - break; - } -@@ -4146,7 +4146,7 @@ static void query_stats_cb(StatsResultList **result, StatsTarget target, - if (!apply_str_list_filter(cpu->parent_obj.canonical_path, targets)) { - continue; - } -- run_on_cpu(cpu, query_stats_vcpu, RUN_ON_CPU_HOST_PTR(&stats_args)); -+ query_stats_vcpu(cpu, &stats_args); - } - break; - } -@@ -4172,6 +4172,6 @@ void query_stats_schemas_cb(StatsSchemaList **result, Error **errp) - if (first_cpu) { - stats_args.result.schema = result; - stats_args.errp = errp; -- run_on_cpu(first_cpu, query_stats_schema_vcpu, RUN_ON_CPU_HOST_PTR(&stats_args)); -+ query_stats_schema_vcpu(first_cpu, &stats_args); - } - } -diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h -index 397fd3ac68..ae96be07e7 100644 ---- a/include/hw/core/cpu.h -+++ b/include/hw/core/cpu.h -@@ -399,6 +399,7 @@ struct CPUState { - struct kvm_dirty_gfn *kvm_dirty_gfns; - uint32_t kvm_fetch_index; - uint64_t dirty_pages; -+ int kvm_vcpu_stats_fd; - - /* Use by accel-block: CPU is executing an ioctl() */ - QemuLockCnt in_ioctl_lock; --- -2.39.3 - diff --git a/SOURCES/kvm-loongarch-mark-loongarch_ipi_iocsr-re-entrnacy-safe.patch b/SOURCES/kvm-loongarch-mark-loongarch_ipi_iocsr-re-entrnacy-safe.patch deleted file mode 100644 index c1100a5..0000000 --- a/SOURCES/kvm-loongarch-mark-loongarch_ipi_iocsr-re-entrnacy-safe.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 6de2f37d9a5db6578554929227377e4fd6d2feb3 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 9 May 2023 10:29:03 -0400 -Subject: [PATCH 14/21] loongarch: mark loongarch_ipi_iocsr re-entrnacy safe - -RH-Author: Jon Maloy -RH-MergeRequest: 165: memory: prevent dma-reentracy issues -RH-Jira: RHEL-516 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [10/13] 02435b9148b906960137de32eb5a3c4961e44a57 (jmaloy/jmaloy-qemu-kvm-2) - -Jira: https://issues.redhat.com/browse/RHEL-516 -Upstream: Merged -CVE: CVE-2023-2680 - -commit 6d0589e0e6c64b888864a2bf980537be20389264 -Author: Alexander Bulekov -Date: Sat May 6 07:21:45 2023 -0400 - - loongarch: mark loongarch_ipi_iocsr re-entrnacy safe - - loongarch_ipi_iocsr MRs rely on re-entrant IO through the ipi_send - function. As such, mark these MRs re-entrancy-safe. - - Fixes: a2e1753b80 ("memory: prevent dma-reentracy issues") - Signed-off-by: Alexander Bulekov - Reviewed-by: Song Gao - Message-Id: <20230506112145.3563708-1-alxndr@bu.edu> - Signed-off-by: Song Gao - -Signed-off-by: Jon Maloy ---- - hw/intc/loongarch_ipi.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/hw/intc/loongarch_ipi.c b/hw/intc/loongarch_ipi.c -index aa4bf9eb74..40e98af2ce 100644 ---- a/hw/intc/loongarch_ipi.c -+++ b/hw/intc/loongarch_ipi.c -@@ -215,6 +215,10 @@ static void loongarch_ipi_init(Object *obj) - for (cpu = 0; cpu < MAX_IPI_CORE_NUM; cpu++) { - memory_region_init_io(&s->ipi_iocsr_mem[cpu], obj, &loongarch_ipi_ops, - &lams->ipi_core[cpu], "loongarch_ipi_iocsr", 0x48); -+ -+ /* loongarch_ipi_iocsr performs re-entrant IO through ipi_send */ -+ s->ipi_iocsr_mem[cpu].disable_reentrancy_guard = true; -+ - sysbus_init_mmio(sbd, &s->ipi_iocsr_mem[cpu]); - - memory_region_init_io(&s->ipi64_iocsr_mem[cpu], obj, &loongarch_ipi64_ops, --- -2.39.3 - diff --git a/SOURCES/kvm-lsi53c895a-disable-reentrancy-detection-for-MMIO-reg.patch b/SOURCES/kvm-lsi53c895a-disable-reentrancy-detection-for-MMIO-reg.patch deleted file mode 100644 index 359d53f..0000000 --- a/SOURCES/kvm-lsi53c895a-disable-reentrancy-detection-for-MMIO-reg.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 0660a7a6994db0db9f6d0b84f6345aa06dc61761 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Mon, 29 May 2023 14:21:08 -0400 -Subject: [PATCH 16/21] lsi53c895a: disable reentrancy detection for MMIO - region, too - -RH-Author: Jon Maloy -RH-MergeRequest: 165: memory: prevent dma-reentracy issues -RH-Jira: RHEL-516 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [12/13] fb9da8b68cdf0dc0b0bd8fb8540849c944d0bf20 (jmaloy/jmaloy-qemu-kvm-2) - -Jira: https://issues.redhat.com/browse/RHEL-516 -Upstream: Merged -CVE: CVE-2023-2680 - -commit d139fe9ad8a27bcc50b4ead77d2f97d191a0e95e -Author: Thomas Huth -Date: Tue May 16 11:05:56 2023 +0200 - - lsi53c895a: disable reentrancy detection for MMIO region, too - - While trying to use a SCSI disk on the LSI controller with an - older version of Fedora (25), I'm getting: - - qemu: warning: Blocked re-entrant IO on MemoryRegion: lsi-mmio at addr: 0x34 - - and the SCSI controller is not usable. Seems like we have to - disable the reentrancy checker for the MMIO region, too, to - get this working again. - - The problem could be reproduced it like this: - - ./qemu-system-x86_64 -accel kvm -m 2G -machine q35 \ - -device lsi53c810,id=lsi1 -device scsi-hd,drive=d0 \ - -drive if=none,id=d0,file=.../somedisk.qcow2 \ - -cdrom Fedora-Everything-netinst-i386-25-1.3.iso - - Where somedisk.qcow2 is an image that contains already some partitions - and file systems. - - In the boot menu of Fedora, go to - "Troubleshooting" -> "Rescue a Fedora system" -> "3) Skip to shell" - - Then check "dmesg | grep -i 53c" for failure messages, and try to mount - a partition from somedisk.qcow2. - - Message-Id: <20230516090556.553813-1-thuth@redhat.com> - Signed-off-by: Thomas Huth - -Signed-off-by: Jon Maloy ---- - hw/scsi/lsi53c895a.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c -index db27872963..048436352b 100644 ---- a/hw/scsi/lsi53c895a.c -+++ b/hw/scsi/lsi53c895a.c -@@ -2307,6 +2307,7 @@ static void lsi_scsi_realize(PCIDevice *dev, Error **errp) - * re-entrancy guard. - */ - s->ram_io.disable_reentrancy_guard = true; -+ s->mmio_io.disable_reentrancy_guard = true; - - address_space_init(&s->pci_io_as, pci_address_space_io(dev), "lsi-pci-io"); - qdev_init_gpio_out(d, &s->ext_irq, 1); --- -2.39.3 - diff --git a/SOURCES/kvm-lsi53c895a-disable-reentrancy-detection-for-script-R.patch b/SOURCES/kvm-lsi53c895a-disable-reentrancy-detection-for-script-R.patch deleted file mode 100644 index e671c92..0000000 --- a/SOURCES/kvm-lsi53c895a-disable-reentrancy-detection-for-script-R.patch +++ /dev/null @@ -1,58 +0,0 @@ -From 621808c6c4da3adcc073231493d487d6360386c9 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 9 May 2023 10:29:03 -0400 -Subject: [PATCH 09/21] lsi53c895a: disable reentrancy detection for script RAM - -RH-Author: Jon Maloy -RH-MergeRequest: 165: memory: prevent dma-reentracy issues -RH-Jira: RHEL-516 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [5/13] 765d65fc3fb735eb4b52a408ccff91b538ad32b6 (jmaloy/jmaloy-qemu-kvm-2) - -Jira: https://issues.redhat.com/browse/RHEL-516 -Upstream: Merged -CVE: CVE-2023-2680 - -commit bfd6e7ae6a72b84e2eb9574f56e6ec037f05182c -Author: Alexander Bulekov -Date: Thu Apr 27 17:10:10 2023 -0400 - - lsi53c895a: disable reentrancy detection for script RAM - - As the code is designed to use the memory APIs to access the script ram, - disable reentrancy checks for the pseudo-RAM ram_io MemoryRegion. - - In the future, ram_io may be converted from an IO to a proper RAM MemoryRegion. - - Reported-by: Fiona Ebner - Signed-off-by: Alexander Bulekov - Reviewed-by: Thomas Huth - Reviewed-by: Darren Kenny - Message-Id: <20230427211013.2994127-6-alxndr@bu.edu> - Signed-off-by: Thomas Huth - -Signed-off-by: Jon Maloy ---- - hw/scsi/lsi53c895a.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c -index af93557a9a..db27872963 100644 ---- a/hw/scsi/lsi53c895a.c -+++ b/hw/scsi/lsi53c895a.c -@@ -2302,6 +2302,12 @@ static void lsi_scsi_realize(PCIDevice *dev, Error **errp) - memory_region_init_io(&s->io_io, OBJECT(s), &lsi_io_ops, s, - "lsi-io", 256); - -+ /* -+ * Since we use the address-space API to interact with ram_io, disable the -+ * re-entrancy guard. -+ */ -+ s->ram_io.disable_reentrancy_guard = true; -+ - address_space_init(&s->pci_io_as, pci_address_space_io(dev), "lsi-pci-io"); - qdev_init_gpio_out(d, &s->ext_irq, 1); - --- -2.39.3 - diff --git a/SOURCES/kvm-memory-device-reintroduce-memory-region-size-check.patch b/SOURCES/kvm-memory-device-reintroduce-memory-region-size-check.patch new file mode 100644 index 0000000..5b531f5 --- /dev/null +++ b/SOURCES/kvm-memory-device-reintroduce-memory-region-size-check.patch @@ -0,0 +1,112 @@ +From 633c6a52ac88526534466ae311522fe5447bcf91 Mon Sep 17 00:00:00 2001 +From: David Hildenbrand +Date: Wed, 17 Jan 2024 14:55:54 +0100 +Subject: [PATCH 02/22] memory-device: reintroduce memory region size check + +RH-Author: David Hildenbrand +RH-MergeRequest: 221: memory-device: reintroduce memory region size check +RH-Jira: RHEL-20341 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Igor Mammedov +RH-Commit: [2/2] e9ff2339b0c07c3f48f5834c9c80cd6d4cbc8f71 + +JIRA: https://issues.redhat.com/browse/RHEL-20341 + +We used to check that the memory region size is multiples of the overall +requested address alignment for the device memory address. + +We removed that check, because there are cases (i.e., hv-balloon) where +devices unconditionally request an address alignment that has a very large +alignment (i.e., 32 GiB), but the actual memory device size might not be +multiples of that alignment. + +However, this change: + +(a) allows for some practically impossible DIMM sizes, like "1GB+1 byte". +(b) allows for DIMMs that partially cover hugetlb pages, previously + reported in [1]. + +Both scenarios don't make any sense: we might even waste memory. + +So let's reintroduce that check, but only check that the +memory region size is multiples of the memory region alignment (i.e., +page size, huge page size), but not any additional memory device +requirements communicated using md->get_min_alignment(). + +The following examples now fail again as expected: + +(a) 1M with 2M THP + qemu-system-x86_64 -m 4g,maxmem=16g,slots=1 -S -nodefaults -nographic \ + -object memory-backend-ram,id=mem1,size=1M \ + -device pc-dimm,id=dimm1,memdev=mem1 + -> backend memory size must be multiple of 0x200000 + +(b) 1G+1byte + + qemu-system-x86_64 -m 4g,maxmem=16g,slots=1 -S -nodefaults -nographic \ + -object memory-backend-ram,id=mem1,size=1073741825B \ + -device pc-dimm,id=dimm1,memdev=mem1 + -> backend memory size must be multiple of 0x200000 + +(c) Unliagned hugetlb size (2M) + + qemu-system-x86_64 -m 4g,maxmem=16g,slots=1 -S -nodefaults -nographic \ + -object memory-backend-file,id=mem1,mem-path=/dev/hugepages/tmp,size=511M \ + -device pc-dimm,id=dimm1,memdev=mem1 + backend memory size must be multiple of 0x200000 + +(d) Unliagned hugetlb size (1G) + + qemu-system-x86_64 -m 4g,maxmem=16g,slots=1 -S -nodefaults -nographic \ + -object memory-backend-file,id=mem1,mem-path=/dev/hugepages1G/tmp,size=2047M \ + -device pc-dimm,id=dimm1,memdev=mem1 + -> backend memory size must be multiple of 0x40000000 + +Note that this fix depends on a hv-balloon change to communicate its +additional alignment requirements using get_min_alignment() instead of +through the memory region. + +[1] https://lkml.kernel.org/r/f77d641d500324525ac036fe1827b3070de75fc1.1701088320.git.mprivozn@redhat.com + +Message-ID: <20240117135554.787344-3-david@redhat.com> +Reported-by: Zhenyu Zhang +Reported-by: Michal Privoznik +Fixes: eb1b7c4bd413 ("memory-device: Drop size alignment check") +Tested-by: Zhenyu Zhang +Tested-by: Mario Casquero +Reviewed-by: Maciej S. Szmigiero +Signed-off-by: David Hildenbrand +(cherry picked from commit 540a1abbf0b243e4cfb4333c5d30a041f7080ba4) +Signed-off-by: David Hildenbrand +--- + hw/mem/memory-device.c | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +diff --git a/hw/mem/memory-device.c b/hw/mem/memory-device.c +index a1b1af26bc..e098585cda 100644 +--- a/hw/mem/memory-device.c ++++ b/hw/mem/memory-device.c +@@ -374,6 +374,20 @@ void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms, + goto out; + } + ++ /* ++ * We always want the memory region size to be multiples of the memory ++ * region alignment: for example, DIMMs with 1G+1byte size don't make ++ * any sense. Note that we don't check that the size is multiples ++ * of any additional alignment requirements the memory device might ++ * have when it comes to the address in physical address space. ++ */ ++ if (!QEMU_IS_ALIGNED(memory_region_size(mr), ++ memory_region_get_alignment(mr))) { ++ error_setg(errp, "backend memory size must be multiple of 0x%" ++ PRIx64, memory_region_get_alignment(mr)); ++ return; ++ } ++ + if (legacy_align) { + align = *legacy_align; + } else { +-- +2.39.3 + diff --git a/SOURCES/kvm-memory-prevent-dma-reentracy-issues.patch b/SOURCES/kvm-memory-prevent-dma-reentracy-issues.patch deleted file mode 100644 index d3697dc..0000000 --- a/SOURCES/kvm-memory-prevent-dma-reentracy-issues.patch +++ /dev/null @@ -1,150 +0,0 @@ -From 0bc9295be331781491e993b6f1b0dca959194f13 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 9 May 2023 10:29:03 -0400 -Subject: [PATCH 05/21] memory: prevent dma-reentracy issues - -RH-Author: Jon Maloy -RH-MergeRequest: 165: memory: prevent dma-reentracy issues -RH-Jira: RHEL-516 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/13] d4a762d3b156200a65d09cde58cd6d77b229071e (jmaloy/jmaloy-qemu-kvm-2) - -Jira: https://issues.redhat.com/browse/RHEL-516 -Upstream: Merged -CVE: CVE-2023-2680 -CVE: CVE-2023-0330 - -commit a2e1753b8054344f32cf94f31c6399a58794a380 -Author: Alexander Bulekov -Date: Thu Apr 27 17:10:06 2023 -0400 - - memory: prevent dma-reentracy issues - - Add a flag to the DeviceState, when a device is engaged in PIO/MMIO/DMA. - This flag is set/checked prior to calling a device's MemoryRegion - handlers, and set when device code initiates DMA. The purpose of this - flag is to prevent two types of DMA-based reentrancy issues: - - 1.) mmio -> dma -> mmio case - 2.) bh -> dma write -> mmio case - - These issues have led to problems such as stack-exhaustion and - use-after-frees. - - Summary of the problem from Peter Maydell: - https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com - - Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62 - Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540 - Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541 - Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556 - Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557 - Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827 - Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1282 - Resolves: CVE-2023-0330 - - Signed-off-by: Alexander Bulekov - Reviewed-by: Thomas Huth - Message-Id: <20230427211013.2994127-2-alxndr@bu.edu> - [thuth: Replace warn_report() with warn_report_once()] - Signed-off-by: Thomas Huth - -Signed-off-by: Jon Maloy ---- - include/exec/memory.h | 5 +++++ - include/hw/qdev-core.h | 7 +++++++ - softmmu/memory.c | 16 ++++++++++++++++ - 3 files changed, 28 insertions(+) - -diff --git a/include/exec/memory.h b/include/exec/memory.h -index 15ade918ba..e45ce6061f 100644 ---- a/include/exec/memory.h -+++ b/include/exec/memory.h -@@ -767,6 +767,8 @@ struct MemoryRegion { - bool is_iommu; - RAMBlock *ram_block; - Object *owner; -+ /* owner as TYPE_DEVICE. Used for re-entrancy checks in MR access hotpath */ -+ DeviceState *dev; - - const MemoryRegionOps *ops; - void *opaque; -@@ -791,6 +793,9 @@ struct MemoryRegion { - unsigned ioeventfd_nb; - MemoryRegionIoeventfd *ioeventfds; - RamDiscardManager *rdm; /* Only for RAM */ -+ -+ /* For devices designed to perform re-entrant IO into their own IO MRs */ -+ bool disable_reentrancy_guard; - }; - - struct IOMMUMemoryRegion { -diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h -index bd50ad5ee1..7623703943 100644 ---- a/include/hw/qdev-core.h -+++ b/include/hw/qdev-core.h -@@ -162,6 +162,10 @@ struct NamedClockList { - QLIST_ENTRY(NamedClockList) node; - }; - -+typedef struct { -+ bool engaged_in_io; -+} MemReentrancyGuard; -+ - /** - * DeviceState: - * @realized: Indicates whether the device has been fully constructed. -@@ -194,6 +198,9 @@ struct DeviceState { - int alias_required_for_version; - ResettableState reset; - GSList *unplug_blockers; -+ -+ /* Is the device currently in mmio/pio/dma? Used to prevent re-entrancy */ -+ MemReentrancyGuard mem_reentrancy_guard; - }; - - struct DeviceListener { -diff --git a/softmmu/memory.c b/softmmu/memory.c -index b1a6cae6f5..b7b3386e9d 100644 ---- a/softmmu/memory.c -+++ b/softmmu/memory.c -@@ -542,6 +542,18 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, - access_size_max = 4; - } - -+ /* Do not allow more than one simultaneous access to a device's IO Regions */ -+ if (mr->dev && !mr->disable_reentrancy_guard && -+ !mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) { -+ if (mr->dev->mem_reentrancy_guard.engaged_in_io) { -+ warn_report_once("Blocked re-entrant IO on MemoryRegion: " -+ "%s at addr: 0x%" HWADDR_PRIX, -+ memory_region_name(mr), addr); -+ return MEMTX_ACCESS_ERROR; -+ } -+ mr->dev->mem_reentrancy_guard.engaged_in_io = true; -+ } -+ - /* FIXME: support unaligned access? */ - access_size = MAX(MIN(size, access_size_max), access_size_min); - access_mask = MAKE_64BIT_MASK(0, access_size * 8); -@@ -556,6 +568,9 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, - access_mask, attrs); - } - } -+ if (mr->dev) { -+ mr->dev->mem_reentrancy_guard.engaged_in_io = false; -+ } - return r; - } - -@@ -1170,6 +1185,7 @@ static void memory_region_do_init(MemoryRegion *mr, - } - mr->name = g_strdup(name); - mr->owner = owner; -+ mr->dev = (DeviceState *) object_dynamic_cast(mr->owner, TYPE_DEVICE); - mr->ram_block = NULL; - - if (name) { --- -2.39.3 - diff --git a/SOURCES/kvm-memory-stricter-checks-prior-to-unsetting-engaged_in.patch b/SOURCES/kvm-memory-stricter-checks-prior-to-unsetting-engaged_in.patch deleted file mode 100644 index f45abea..0000000 --- a/SOURCES/kvm-memory-stricter-checks-prior-to-unsetting-engaged_in.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 3f2042e33acb6db91594e12ebd63b9abd9e753cc Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Wed, 7 Jun 2023 11:45:09 -0400 -Subject: [PATCH 15/21] memory: stricter checks prior to unsetting - engaged_in_io - -RH-Author: Jon Maloy -RH-MergeRequest: 165: memory: prevent dma-reentracy issues -RH-Jira: RHEL-516 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [11/13] b8e1a4b49dd7fa3b7948d32f46dfe1d7f7a4c1cf (jmaloy/jmaloy-qemu-kvm-2) - -Jira: https://issues.redhat.com/browse/RHEL-516 -Upstream: Merged -CVE: CVE-2023-2680 - -commit 3884bf6468ac6bbb58c2b3feaa74e87f821b52f3 -Author: Alexander Bulekov -Date: Tue May 16 04:40:02 2023 -0400 - - memory: stricter checks prior to unsetting engaged_in_io - - engaged_in_io could be unset by an MR with re-entrancy checks disabled. - Ensure that only MRs that can set the engaged_in_io flag can unset it. - - Signed-off-by: Alexander Bulekov - Message-Id: <20230516084002.3813836-1-alxndr@bu.edu> - Reviewed-by: Darren Kenny - Signed-off-by: Thomas Huth - -Signed-off-by: Jon Maloy ---- - softmmu/memory.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/softmmu/memory.c b/softmmu/memory.c -index b7b3386e9d..26424f1d78 100644 ---- a/softmmu/memory.c -+++ b/softmmu/memory.c -@@ -534,6 +534,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, - unsigned access_size; - unsigned i; - MemTxResult r = MEMTX_OK; -+ bool reentrancy_guard_applied = false; - - if (!access_size_min) { - access_size_min = 1; -@@ -552,6 +553,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, - return MEMTX_ACCESS_ERROR; - } - mr->dev->mem_reentrancy_guard.engaged_in_io = true; -+ reentrancy_guard_applied = true; - } - - /* FIXME: support unaligned access? */ -@@ -568,7 +570,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, - access_mask, attrs); - } - } -- if (mr->dev) { -+ if (mr->dev && reentrancy_guard_applied) { - mr->dev->mem_reentrancy_guard.engaged_in_io = false; - } - return r; --- -2.39.3 - diff --git a/SOURCES/kvm-migration-Add-.save_prepare-handler-to-struct-SaveVM.patch b/SOURCES/kvm-migration-Add-.save_prepare-handler-to-struct-SaveVM.patch deleted file mode 100644 index b94ba7c..0000000 --- a/SOURCES/kvm-migration-Add-.save_prepare-handler-to-struct-SaveVM.patch +++ /dev/null @@ -1,186 +0,0 @@ -From d831672c4f1d41d863823584173452b89e754e26 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Mon, 11 Sep 2023 16:10:19 +0200 -Subject: [PATCH 3/4] migration: Add .save_prepare() handler to struct - SaveVMHandlers -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 318: VFIO migration: fix a QEMU crash when postcopy is enabled -RH-Bugzilla: 2229868 -RH-Acked-by: Alex Williamson -RH-Acked-by: Peter Xu -RH-Commit: [3/4] b3154a736764ae4430561d7f5c298ab4c6ef9e01 - -Bugzilla: https://bugzilla.redhat.com/2229868 - -commit 08fc4cb51774f763dcc6fd74637aa9e00eb6a0ba -Author: Avihai Horon -Date: Wed Sep 6 18:08:51 2023 +0300 - - migration: Add .save_prepare() handler to struct SaveVMHandlers - - Add a new .save_prepare() handler to struct SaveVMHandlers. This handler - is called early, even before migration starts, and can be used by - devices to perform early checks. - - Refactor migrate_init() to be able to return errors and call - .save_prepare() from there. - - Suggested-by: Peter Xu - Signed-off-by: Avihai Horon - Reviewed-by: Peter Xu - Reviewed-by: Cédric Le Goater - Signed-off-by: Cédric Le Goater - -Conflicts: - - migration/migration.c - context change in migrate_init() due to missing commit - aff3f6606d14 ("migration: Rename ram_counters to mig_stats") - context change in migrate_prepare() due to missing commit - 87c22901094a ("migration: Move migrate_set_block_incremental() - to options.c") - -Signed-off-by: Cédric Le Goater ---- - include/migration/register.h | 5 +++++ - migration/migration.c | 15 +++++++++++++-- - migration/migration.h | 2 +- - migration/savevm.c | 29 ++++++++++++++++++++++++++++- - migration/savevm.h | 1 + - 5 files changed, 48 insertions(+), 4 deletions(-) - -diff --git a/include/migration/register.h b/include/migration/register.h -index 90914f32f5..2b12c6adec 100644 ---- a/include/migration/register.h -+++ b/include/migration/register.h -@@ -20,6 +20,11 @@ typedef struct SaveVMHandlers { - /* This runs inside the iothread lock. */ - SaveStateHandler *save_state; - -+ /* -+ * save_prepare is called early, even before migration starts, and can be -+ * used to perform early checks. -+ */ -+ int (*save_prepare)(void *opaque, Error **errp); - void (*save_cleanup)(void *opaque); - int (*save_live_complete_postcopy)(QEMUFile *f, void *opaque); - int (*save_live_complete_precopy)(QEMUFile *f, void *opaque); -diff --git a/migration/migration.c b/migration/migration.c -index a85c8936d9..cdaa757e23 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1389,8 +1389,15 @@ bool migration_is_active(MigrationState *s) - s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); - } - --void migrate_init(MigrationState *s) -+int migrate_init(MigrationState *s, Error **errp) - { -+ int ret; -+ -+ ret = qemu_savevm_state_prepare(errp); -+ if (ret) { -+ return ret; -+ } -+ - /* - * Reinitialise all migration state, except - * parameters/capabilities that the user set, and -@@ -1429,6 +1436,8 @@ void migrate_init(MigrationState *s) - memset(&ram_counters, 0, sizeof(ram_counters)); - memset(&compression_counters, 0, sizeof(compression_counters)); - migration_reset_vfio_bytes_transferred(); -+ -+ return 0; - } - - int migrate_add_blocker_internal(Error *reason, Error **errp) -@@ -1638,7 +1647,9 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, - migrate_set_block_incremental(s, true); - } - -- migrate_init(s); -+ if (migrate_init(s, errp)) { -+ return false; -+ } - - return true; - } -diff --git a/migration/migration.h b/migration/migration.h -index c5b98485e3..cfbe7c390d 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -465,7 +465,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in); - bool migration_is_setup_or_active(int state); - bool migration_is_running(int state); - --void migrate_init(MigrationState *s); -+int migrate_init(MigrationState *s, Error **errp); - bool migration_is_blocked(Error **errp); - /* True if outgoing migration has entered postcopy phase */ - bool migration_in_postcopy(void); -diff --git a/migration/savevm.c b/migration/savevm.c -index 13c1a9afa1..2913563d6e 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -1231,6 +1231,30 @@ bool qemu_savevm_state_guest_unplug_pending(void) - return false; - } - -+int qemu_savevm_state_prepare(Error **errp) -+{ -+ SaveStateEntry *se; -+ int ret; -+ -+ QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { -+ if (!se->ops || !se->ops->save_prepare) { -+ continue; -+ } -+ if (se->ops->is_active) { -+ if (!se->ops->is_active(se->opaque)) { -+ continue; -+ } -+ } -+ -+ ret = se->ops->save_prepare(se->opaque, errp); -+ if (ret < 0) { -+ return ret; -+ } -+ } -+ -+ return 0; -+} -+ - void qemu_savevm_state_setup(QEMUFile *f) - { - MigrationState *ms = migrate_get_current(); -@@ -1617,7 +1641,10 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) - return -EINVAL; - } - -- migrate_init(ms); -+ ret = migrate_init(ms, errp); -+ if (ret) { -+ return ret; -+ } - ms->to_dst_file = f; - - qemu_mutex_unlock_iothread(); -diff --git a/migration/savevm.h b/migration/savevm.h -index e894bbc143..74669733dd 100644 ---- a/migration/savevm.h -+++ b/migration/savevm.h -@@ -31,6 +31,7 @@ - - bool qemu_savevm_state_blocked(Error **errp); - void qemu_savevm_non_migratable_list(strList **reasons); -+int qemu_savevm_state_prepare(Error **errp); - void qemu_savevm_state_setup(QEMUFile *f); - bool qemu_savevm_state_guest_unplug_pending(void); - int qemu_savevm_state_resume_prepare(MigrationState *s); --- -2.39.3 - diff --git a/SOURCES/kvm-migration-Add-migration-prefix-to-functions-in-targe.patch b/SOURCES/kvm-migration-Add-migration-prefix-to-functions-in-targe.patch deleted file mode 100644 index 5cba9f4..0000000 --- a/SOURCES/kvm-migration-Add-migration-prefix-to-functions-in-targe.patch +++ /dev/null @@ -1,139 +0,0 @@ -From f053185a7fb9fab2a41c0a5ae4e1a403bc99a9a0 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Mon, 11 Sep 2023 16:10:19 +0200 -Subject: [PATCH 1/4] migration: Add migration prefix to functions in target.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 318: VFIO migration: fix a QEMU crash when postcopy is enabled -RH-Bugzilla: 2229868 -RH-Acked-by: Alex Williamson -RH-Acked-by: Peter Xu -RH-Commit: [1/4] 4594d2035423385690d7f1feb5f2e4c8f0be74f5 - -Bugzilla: https://bugzilla.redhat.com/2229868 - -commit 38c482b4778595ee337761f73ec0730d6c47b404 -Author: Avihai Horon -Date: Wed Sep 6 18:08:48 2023 +0300 - - migration: Add migration prefix to functions in target.c - - The functions in target.c are not static, yet they don't have a proper - migration prefix. Add such prefix. - - Signed-off-by: Avihai Horon - Reviewed-by: Cédric Le Goater - Signed-off-by: Cédric Le Goater - -Conflicts: - - migration/migration.c, migration/savevm.c - context changes in migrate_prepare() and qemu_savevm_state() due - to missing commit aff3f6606d14 ("migration: Rename ram_counters - to mig_stats") - -Signed-off-by: Cédric Le Goater ---- - migration/migration.c | 6 +++--- - migration/migration.h | 4 ++-- - migration/savevm.c | 2 +- - migration/target.c | 8 ++++---- - 4 files changed, 10 insertions(+), 10 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 47ad6c43cb..5aa9e5dada 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1021,7 +1021,7 @@ static void fill_source_migration_info(MigrationInfo *info) - populate_time_info(info, s); - populate_ram_info(info, s); - populate_disk_info(info); -- populate_vfio_info(info); -+ migration_populate_vfio_info(info); - break; - case MIGRATION_STATUS_COLO: - info->has_status = true; -@@ -1030,7 +1030,7 @@ static void fill_source_migration_info(MigrationInfo *info) - case MIGRATION_STATUS_COMPLETED: - populate_time_info(info, s); - populate_ram_info(info, s); -- populate_vfio_info(info); -+ migration_populate_vfio_info(info); - break; - case MIGRATION_STATUS_FAILED: - info->has_status = true; -@@ -1638,7 +1638,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, - */ - memset(&ram_counters, 0, sizeof(ram_counters)); - memset(&compression_counters, 0, sizeof(compression_counters)); -- reset_vfio_bytes_transferred(); -+ migration_reset_vfio_bytes_transferred(); - - return true; - } -diff --git a/migration/migration.h b/migration/migration.h -index dfec649af8..c5b98485e3 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -505,8 +505,8 @@ void migration_consume_urgent_request(void); - bool migration_rate_limit(void); - void migration_cancel(const Error *error); - --void populate_vfio_info(MigrationInfo *info); --void reset_vfio_bytes_transferred(void); -+void migration_populate_vfio_info(MigrationInfo *info); -+void migration_reset_vfio_bytes_transferred(void); - void postcopy_temp_page_reset(PostcopyTmpPage *tmp_page); - - #endif -diff --git a/migration/savevm.c b/migration/savevm.c -index 83088fc3f8..05db79bfad 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -1620,7 +1620,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) - migrate_init(ms); - memset(&ram_counters, 0, sizeof(ram_counters)); - memset(&compression_counters, 0, sizeof(compression_counters)); -- reset_vfio_bytes_transferred(); -+ migration_reset_vfio_bytes_transferred(); - ms->to_dst_file = f; - - qemu_mutex_unlock_iothread(); -diff --git a/migration/target.c b/migration/target.c -index f39c9a8d88..a6ffa9a5ce 100644 ---- a/migration/target.c -+++ b/migration/target.c -@@ -15,7 +15,7 @@ - #endif - - #ifdef CONFIG_VFIO --void populate_vfio_info(MigrationInfo *info) -+void migration_populate_vfio_info(MigrationInfo *info) - { - if (vfio_mig_active()) { - info->vfio = g_malloc0(sizeof(*info->vfio)); -@@ -23,16 +23,16 @@ void populate_vfio_info(MigrationInfo *info) - } - } - --void reset_vfio_bytes_transferred(void) -+void migration_reset_vfio_bytes_transferred(void) - { - vfio_reset_bytes_transferred(); - } - #else --void populate_vfio_info(MigrationInfo *info) -+void migration_populate_vfio_info(MigrationInfo *info) - { - } - --void reset_vfio_bytes_transferred(void) -+void migration_reset_vfio_bytes_transferred(void) - { - } - #endif --- -2.39.3 - diff --git a/SOURCES/kvm-migration-Add-switchover-ack-capability.patch b/SOURCES/kvm-migration-Add-switchover-ack-capability.patch deleted file mode 100644 index 399c9ed..0000000 --- a/SOURCES/kvm-migration-Add-switchover-ack-capability.patch +++ /dev/null @@ -1,162 +0,0 @@ -From 8f89d3bc8f226cd038bf88b9fb3ef43b0fb33034 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 10/37] migration: Add switchover ack capability -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [8/28] 2f4ca020783bd617eca13b18289fce764279833b (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 6574232fff6a -Author: Avihai Horon -Date: Wed Jun 21 14:11:54 2023 +0300 - - migration: Add switchover ack capability - - Migration downtime estimation is calculated based on bandwidth and - remaining migration data. This assumes that loading of migration data in - the destination takes a negligible amount of time and that downtime - depends only on network speed. - - While this may be true for RAM, it's not necessarily true for other - migrated devices. For example, loading the data of a VFIO device in the - destination might require from the device to allocate resources, prepare - internal data structures and so on. These operations can take a - significant amount of time which can increase migration downtime. - - This patch adds a new capability "switchover ack" that prevents the - source from stopping the VM and completing the migration until an ACK - is received from the destination that it's OK to do so. - - This can be used by migrated devices in various ways to reduce downtime. - For example, a device can send initial precopy metadata to pre-allocate - resources in the destination and use this capability to make sure that - the pre-allocation is completed before the source VM is stopped, so it - will have full effect. - - This new capability relies on the return path capability to communicate - from the destination back to the source. - - The actual implementation of the capability will be added in the - following patches. - - Signed-off-by: Avihai Horon - Reviewed-by: Peter Xu - Acked-by: Markus Armbruster - Tested-by: YangHang Liu - Acked-by: Alex Williamson - Signed-off-by: Cédric Le Goater - -Conflicts: - - qapi/migration.json - re-indent of @switchover-ack to avoid ../qapi/migration.json:482:1: - unexpected de-indent (expected at least 17 spaces) - -Signed-off-by: Cédric Le Goater ---- - migration/options.c | 21 +++++++++++++++++++++ - migration/options.h | 1 + - qapi/migration.json | 14 +++++++++++++- - 3 files changed, 35 insertions(+), 1 deletion(-) - -diff --git a/migration/options.c b/migration/options.c -index a76984276d..c3df6c6dde 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -182,6 +182,8 @@ Property migration_properties[] = { - DEFINE_PROP_MIG_CAP("x-zero-copy-send", - MIGRATION_CAPABILITY_ZERO_COPY_SEND), - #endif -+ DEFINE_PROP_MIG_CAP("x-switchover-ack", -+ MIGRATION_CAPABILITY_SWITCHOVER_ACK), - - DEFINE_PROP_END_OF_LIST(), - }; -@@ -305,6 +307,13 @@ bool migrate_return_path(void) - return s->capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; - } - -+bool migrate_switchover_ack(void) -+{ -+ MigrationState *s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_SWITCHOVER_ACK]; -+} -+ - bool migrate_validate_uuid(void) - { - MigrationState *s = migrate_get_current(); -@@ -532,6 +541,18 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) - } - } - -+ if (new_caps[MIGRATION_CAPABILITY_SWITCHOVER_ACK]) { -+ if (!new_caps[MIGRATION_CAPABILITY_RETURN_PATH]) { -+ error_setg(errp, "Capability 'switchover-ack' requires capability " -+ "'return-path'"); -+ return false; -+ } -+ -+ /* Disable this capability until it's implemented */ -+ error_setg(errp, "'switchover-ack' is not implemented yet"); -+ return false; -+ } -+ - return true; - } - -diff --git a/migration/options.h b/migration/options.h -index 7b0f7245ad..0fc7be6869 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -47,6 +47,7 @@ bool migrate_postcopy_ram(void); - bool migrate_rdma_pin_all(void); - bool migrate_release_ram(void); - bool migrate_return_path(void); -+bool migrate_switchover_ack(void); - bool migrate_validate_uuid(void); - bool migrate_xbzrle(void); - bool migrate_zero_blocks(void); -diff --git a/qapi/migration.json b/qapi/migration.json -index 2c35b7b9cf..b6a58347cc 100644 ---- a/qapi/migration.json -+++ b/qapi/migration.json -@@ -478,6 +478,18 @@ - # should not affect the correctness of postcopy migration. - # (since 7.1) - # -+# @switchover-ack: If enabled, migration will not stop the source VM -+# and complete the migration until an ACK is received -+# from the destination that it's OK to do so. -+# Exactly when this ACK is sent depends on the -+# migrated devices that use this feature. For -+# example, a device can use it to make sure some of -+# its data is sent and loaded in the destination -+# before doing switchover. This can reduce downtime -+# if devices that support this capability are -+# present. 'return-path' capability must be enabled -+# to use it. (since 8.1) -+# - # Features: - # @unstable: Members @x-colo and @x-ignore-shared are experimental. - # -@@ -492,7 +504,7 @@ - 'dirty-bitmaps', 'postcopy-blocktime', 'late-block-activate', - { 'name': 'x-ignore-shared', 'features': [ 'unstable' ] }, - 'validate-uuid', 'background-snapshot', -- 'zero-copy-send', 'postcopy-preempt'] } -+ 'zero-copy-send', 'postcopy-preempt', 'switchover-ack'] } - - ## - # @MigrationCapabilityStatus: --- -2.39.3 - diff --git a/SOURCES/kvm-migration-Allow-postcopy_ram_supported_by_host-to-re.patch b/SOURCES/kvm-migration-Allow-postcopy_ram_supported_by_host-to-re.patch deleted file mode 100644 index 7c9748b..0000000 --- a/SOURCES/kvm-migration-Allow-postcopy_ram_supported_by_host-to-re.patch +++ /dev/null @@ -1,308 +0,0 @@ -From e2c2910edf90186ca0d7d13c9943caa284e95ea9 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Tue, 25 Apr 2023 21:15:14 -0400 -Subject: [PATCH 51/56] migration: Allow postcopy_ram_supported_by_host() to - report err -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [50/50] 08c44affc11c27ddf1aa7ce0dfacbaf5effb80cb (peterx/qemu-kvm) - -Instead of print it to STDERR, bring the error upwards so that it can be -reported via QMP responses. - -E.g.: - -{ "execute": "migrate-set-capabilities" , - "arguments": { "capabilities": - [ { "capability": "postcopy-ram", "state": true } ] } } - -{ "error": - { "class": "GenericError", - "desc": "Postcopy is not supported: Host backend files need to be TMPFS - or HUGETLBFS only" } } - -Signed-off-by: Peter Xu -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -(cherry picked from commit 74c38cf7fd24c60e4f0a90585d17250478260877) -Signed-off-by: Peter Xu ---- - migration/options.c | 8 ++---- - migration/postcopy-ram.c | 60 +++++++++++++++++++++------------------- - migration/postcopy-ram.h | 3 +- - migration/savevm.c | 3 +- - 4 files changed, 39 insertions(+), 35 deletions(-) - -diff --git a/migration/options.c b/migration/options.c -index 4701c75a4d..e51d667e14 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -302,6 +302,7 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) - { - MigrationIncomingState *mis = migration_incoming_get_current(); - -+ ERRP_GUARD(); - #ifndef CONFIG_LIVE_BLOCK_MIGRATION - if (new_caps[MIGRATION_CAPABILITY_BLOCK]) { - error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) " -@@ -327,11 +328,8 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) - */ - if (!old_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM] && - runstate_check(RUN_STATE_INMIGRATE) && -- !postcopy_ram_supported_by_host(mis)) { -- /* postcopy_ram_supported_by_host will have emitted a more -- * detailed message -- */ -- error_setg(errp, "Postcopy is not supported"); -+ !postcopy_ram_supported_by_host(mis, errp)) { -+ error_prepend(errp, "Postcopy is not supported: "); - return false; - } - -diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c -index 0711500036..75aa276bb1 100644 ---- a/migration/postcopy-ram.c -+++ b/migration/postcopy-ram.c -@@ -283,11 +283,13 @@ static bool request_ufd_features(int ufd, uint64_t features) - return true; - } - --static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) -+static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis, -+ Error **errp) - { - uint64_t asked_features = 0; - static uint64_t supported_features; - -+ ERRP_GUARD(); - /* - * it's not possible to - * request UFFD_API twice per one fd -@@ -295,7 +297,7 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) - */ - if (!supported_features) { - if (!receive_ufd_features(&supported_features)) { -- error_report("%s failed", __func__); -+ error_setg(errp, "Userfault feature detection failed"); - return false; - } - } -@@ -317,8 +319,7 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) - * userfault file descriptor - */ - if (!request_ufd_features(ufd, asked_features)) { -- error_report("%s failed: features %" PRIu64, __func__, -- asked_features); -+ error_setg(errp, "Failed features %" PRIu64, asked_features); - return false; - } - -@@ -329,7 +330,8 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) - have_hp = supported_features & UFFD_FEATURE_MISSING_HUGETLBFS; - #endif - if (!have_hp) { -- error_report("Userfault on this host does not support huge pages"); -+ error_setg(errp, -+ "Userfault on this host does not support huge pages"); - return false; - } - } -@@ -338,7 +340,7 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) - - /* Callback from postcopy_ram_supported_by_host block iterator. - */ --static int test_ramblock_postcopiable(RAMBlock *rb) -+static int test_ramblock_postcopiable(RAMBlock *rb, Error **errp) - { - const char *block_name = qemu_ram_get_idstr(rb); - ram_addr_t length = qemu_ram_get_used_length(rb); -@@ -346,16 +348,18 @@ static int test_ramblock_postcopiable(RAMBlock *rb) - QemuFsType fs; - - if (length % pagesize) { -- error_report("Postcopy requires RAM blocks to be a page size multiple," -- " block %s is 0x" RAM_ADDR_FMT " bytes with a " -- "page size of 0x%zx", block_name, length, pagesize); -+ error_setg(errp, -+ "Postcopy requires RAM blocks to be a page size multiple," -+ " block %s is 0x" RAM_ADDR_FMT " bytes with a " -+ "page size of 0x%zx", block_name, length, pagesize); - return 1; - } - - if (rb->fd >= 0) { - fs = qemu_fd_getfs(rb->fd); - if (fs != QEMU_FS_TYPE_TMPFS && fs != QEMU_FS_TYPE_HUGETLBFS) { -- error_report("Host backend files need to be TMPFS or HUGETLBFS only"); -+ error_setg(errp, -+ "Host backend files need to be TMPFS or HUGETLBFS only"); - return 1; - } - } -@@ -368,7 +372,7 @@ static int test_ramblock_postcopiable(RAMBlock *rb) - * normally fine since if the postcopy succeeds it gets turned back on at the - * end. - */ --bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) -+bool postcopy_ram_supported_by_host(MigrationIncomingState *mis, Error **errp) - { - long pagesize = qemu_real_host_page_size(); - int ufd = -1; -@@ -377,29 +381,27 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) - struct uffdio_register reg_struct; - struct uffdio_range range_struct; - uint64_t feature_mask; -- Error *local_err = NULL; - RAMBlock *block; - -+ ERRP_GUARD(); - if (qemu_target_page_size() > pagesize) { -- error_report("Target page size bigger than host page size"); -+ error_setg(errp, "Target page size bigger than host page size"); - goto out; - } - - ufd = uffd_open(O_CLOEXEC); - if (ufd == -1) { -- error_report("%s: userfaultfd not available: %s", __func__, -- strerror(errno)); -+ error_setg(errp, "Userfaultfd not available: %s", strerror(errno)); - goto out; - } - - /* Give devices a chance to object */ -- if (postcopy_notify(POSTCOPY_NOTIFY_PROBE, &local_err)) { -- error_report_err(local_err); -+ if (postcopy_notify(POSTCOPY_NOTIFY_PROBE, errp)) { - goto out; - } - - /* Version and features check */ -- if (!ufd_check_and_apply(ufd, mis)) { -+ if (!ufd_check_and_apply(ufd, mis, errp)) { - goto out; - } - -@@ -417,7 +419,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) - * affect in reality, or we can revisit. - */ - RAMBLOCK_FOREACH(block) { -- if (test_ramblock_postcopiable(block)) { -+ if (test_ramblock_postcopiable(block, errp)) { - goto out; - } - } -@@ -427,7 +429,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) - * it was enabled. - */ - if (munlockall()) { -- error_report("%s: munlockall: %s", __func__, strerror(errno)); -+ error_setg(errp, "munlockall() failed: %s", strerror(errno)); - goto out; - } - -@@ -439,8 +441,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) - testarea = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE | - MAP_ANONYMOUS, -1, 0); - if (testarea == MAP_FAILED) { -- error_report("%s: Failed to map test area: %s", __func__, -- strerror(errno)); -+ error_setg(errp, "Failed to map test area: %s", strerror(errno)); - goto out; - } - g_assert(QEMU_PTR_IS_ALIGNED(testarea, pagesize)); -@@ -450,14 +451,14 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) - reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING; - - if (ioctl(ufd, UFFDIO_REGISTER, ®_struct)) { -- error_report("%s userfault register: %s", __func__, strerror(errno)); -+ error_setg(errp, "UFFDIO_REGISTER failed: %s", strerror(errno)); - goto out; - } - - range_struct.start = (uintptr_t)testarea; - range_struct.len = pagesize; - if (ioctl(ufd, UFFDIO_UNREGISTER, &range_struct)) { -- error_report("%s userfault unregister: %s", __func__, strerror(errno)); -+ error_setg(errp, "UFFDIO_UNREGISTER failed: %s", strerror(errno)); - goto out; - } - -@@ -465,8 +466,8 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) - (__u64)1 << _UFFDIO_COPY | - (__u64)1 << _UFFDIO_ZEROPAGE; - if ((reg_struct.ioctls & feature_mask) != feature_mask) { -- error_report("Missing userfault map features: %" PRIx64, -- (uint64_t)(~reg_struct.ioctls & feature_mask)); -+ error_setg(errp, "Missing userfault map features: %" PRIx64, -+ (uint64_t)(~reg_struct.ioctls & feature_mask)); - goto out; - } - -@@ -1188,6 +1189,8 @@ static int postcopy_temp_pages_setup(MigrationIncomingState *mis) - - int postcopy_ram_incoming_setup(MigrationIncomingState *mis) - { -+ Error *local_err = NULL; -+ - /* Open the fd for the kernel to give us userfaults */ - mis->userfault_fd = uffd_open(O_CLOEXEC | O_NONBLOCK); - if (mis->userfault_fd == -1) { -@@ -1200,7 +1203,8 @@ int postcopy_ram_incoming_setup(MigrationIncomingState *mis) - * Although the host check already tested the API, we need to - * do the check again as an ABI handshake on the new fd. - */ -- if (!ufd_check_and_apply(mis->userfault_fd, mis)) { -+ if (!ufd_check_and_apply(mis->userfault_fd, mis, &local_err)) { -+ error_report_err(local_err); - return -1; - } - -@@ -1360,7 +1364,7 @@ void fill_destination_postcopy_migration_info(MigrationInfo *info) - { - } - --bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) -+bool postcopy_ram_supported_by_host(MigrationIncomingState *mis, Error **errp) - { - error_report("%s: No OS support", __func__); - return false; -diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h -index b4867a32d5..442ab89752 100644 ---- a/migration/postcopy-ram.h -+++ b/migration/postcopy-ram.h -@@ -14,7 +14,8 @@ - #define QEMU_POSTCOPY_RAM_H - - /* Return true if the host supports everything we need to do postcopy-ram */ --bool postcopy_ram_supported_by_host(MigrationIncomingState *mis); -+bool postcopy_ram_supported_by_host(MigrationIncomingState *mis, -+ Error **errp); - - /* - * Make all of RAM sensitive to accesses to areas that haven't yet been written -diff --git a/migration/savevm.c b/migration/savevm.c -index 9671211339..211eff3a8b 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -1753,7 +1753,8 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis, - return -EINVAL; - } - -- if (!postcopy_ram_supported_by_host(mis)) { -+ if (!postcopy_ram_supported_by_host(mis, &local_err)) { -+ error_report_err(local_err); - postcopy_state_set(POSTCOPY_INCOMING_NONE); - return -1; - } --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Attempt-disk-reactivation-in-more-failure-.patch b/SOURCES/kvm-migration-Attempt-disk-reactivation-in-more-failure-.patch deleted file mode 100644 index d1620f0..0000000 --- a/SOURCES/kvm-migration-Attempt-disk-reactivation-in-more-failure-.patch +++ /dev/null @@ -1,111 +0,0 @@ -From 3691bb5f956e3c60dbf6de183011b31dbc7a7801 Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Tue, 2 May 2023 15:52:12 -0500 -Subject: [PATCH 01/56] migration: Attempt disk reactivation in more failure - scenarios - -RH-Author: Eric Blake -RH-MergeRequest: 161: Avoid migration assertion from failed NFS server. -RH-Bugzilla: 2058982 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Acked-by: Kevin Wolf -RH-Commit: [1/1] 5999b747b314641259d3b8809033b057805eed3f (ebblake/centos-qemu-kvm) - -Commit fe904ea824 added a fail_inactivate label, which tries to -reactivate disks on the source after a failure while s->state == -MIGRATION_STATUS_ACTIVE, but didn't actually use the label if -qemu_savevm_state_complete_precopy() failed. This failure to -reactivate is also present in commit 6039dd5b1c (also covering the new -s->state == MIGRATION_STATUS_DEVICE state) and 403d18ae (ensuring -s->block_inactive is set more reliably). - -Consolidate the two labels back into one - no matter HOW migration is -failed, if there is any chance we can reach vm_start() after having -attempted inactivation, it is essential that we have tried to restart -disks before then. This also makes the cleanup more like -migrate_fd_cancel(). - -Suggested-by: Kevin Wolf -Signed-off-by: Eric Blake -Message-Id: <20230502205212.134680-1-eblake@redhat.com> -Acked-by: Peter Xu -Reviewed-by: Juan Quintela -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 6dab4c93ecfae48e2e67b984d1032c1e988d3005) -[eblake: downstream migrate_colo() => migrate_colo_enabled()] -Signed-off-by: Eric Blake ---- - migration/migration.c | 24 ++++++++++++++---------- - 1 file changed, 14 insertions(+), 10 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 08007cef4e..99f86bd6c2 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -3443,6 +3443,11 @@ static void migration_completion(MigrationState *s) - MIGRATION_STATUS_DEVICE); - } - if (ret >= 0) { -+ /* -+ * Inactivate disks except in COLO, and track that we -+ * have done so in order to remember to reactivate -+ * them if migration fails or is cancelled. -+ */ - s->block_inactive = !migrate_colo_enabled(); - qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); - ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, -@@ -3487,13 +3492,13 @@ static void migration_completion(MigrationState *s) - rp_error = await_return_path_close_on_source(s); - trace_migration_return_path_end_after(rp_error); - if (rp_error) { -- goto fail_invalidate; -+ goto fail; - } - } - - if (qemu_file_get_error(s->to_dst_file)) { - trace_migration_completion_file_err(); -- goto fail_invalidate; -+ goto fail; - } - - if (migrate_colo_enabled() && s->state == MIGRATION_STATUS_ACTIVE) { -@@ -3507,26 +3512,25 @@ static void migration_completion(MigrationState *s) - - return; - --fail_invalidate: -- /* If not doing postcopy, vm_start() will be called: let's regain -- * control on images. -- */ -- if (s->state == MIGRATION_STATUS_ACTIVE || -- s->state == MIGRATION_STATUS_DEVICE) { -+fail: -+ if (s->block_inactive && (s->state == MIGRATION_STATUS_ACTIVE || -+ s->state == MIGRATION_STATUS_DEVICE)) { -+ /* -+ * If not doing postcopy, vm_start() will be called: let's -+ * regain control on images. -+ */ - Error *local_err = NULL; - - qemu_mutex_lock_iothread(); - bdrv_activate_all(&local_err); - if (local_err) { - error_report_err(local_err); -- s->block_inactive = true; - } else { - s->block_inactive = false; - } - qemu_mutex_unlock_iothread(); - } - --fail: - migrate_set_state(&s->state, current_active_state, - MIGRATION_STATUS_FAILED); - } --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Create-migrate_cap_set.patch b/SOURCES/kvm-migration-Create-migrate_cap_set.patch deleted file mode 100644 index 33268bb..0000000 --- a/SOURCES/kvm-migration-Create-migrate_cap_set.patch +++ /dev/null @@ -1,93 +0,0 @@ -From d772464e9a51a085e10864b2dc7ffd49991fc23b Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 21:02:42 +0100 -Subject: [PATCH 22/56] migration: Create migrate_cap_set() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [21/50] 5b12f04013cf2d374a869134bb67c938c789e24d (peterx/qemu-kvm) - -And remove the convoluted use of qmp_migrate_set_capabilities() to -enable disable MIGRATION_CAPABILITY_BLOCK. - -Signed-off-by: Juan Quintela -Reviewed-by: Fabiano Rosas -(cherry picked from commit 9eb1109cfba5415dd0b0cb82e80fc5e42fe861b7) -Signed-off-by: Peter Xu ---- - migration/migration.c | 34 ++++++++++++++++------------------ - 1 file changed, 16 insertions(+), 18 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index b745d829a4..18058fb597 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1912,25 +1912,24 @@ void migrate_set_state(int *state, int old_state, int new_state) - } - } - --static MigrationCapabilityStatus *migrate_cap_add(MigrationCapability index, -- bool state) -+static bool migrate_cap_set(int cap, bool value, Error **errp) - { -- MigrationCapabilityStatus *cap; -- -- cap = g_new0(MigrationCapabilityStatus, 1); -- cap->capability = index; -- cap->state = state; -+ MigrationState *s = migrate_get_current(); -+ bool new_caps[MIGRATION_CAPABILITY__MAX]; - -- return cap; --} -+ if (migration_is_running(s->state)) { -+ error_setg(errp, QERR_MIGRATION_ACTIVE); -+ return false; -+ } - --void migrate_set_block_enabled(bool value, Error **errp) --{ -- MigrationCapabilityStatusList *cap = NULL; -+ memcpy(new_caps, s->capabilities, sizeof(new_caps)); -+ new_caps[cap] = value; - -- QAPI_LIST_PREPEND(cap, migrate_cap_add(MIGRATION_CAPABILITY_BLOCK, value)); -- qmp_migrate_set_capabilities(cap, errp); -- qapi_free_MigrationCapabilityStatusList(cap); -+ if (!migrate_caps_check(s->capabilities, new_caps, errp)) { -+ return false; -+ } -+ s->capabilities[cap] = value; -+ return true; - } - - static void migrate_set_block_incremental(MigrationState *s, bool value) -@@ -1942,7 +1941,7 @@ static void block_cleanup_parameters(MigrationState *s) - { - if (s->must_remove_block_options) { - /* setting to false can never fail */ -- migrate_set_block_enabled(false, &error_abort); -+ migrate_cap_set(MIGRATION_CAPABILITY_BLOCK, false, &error_abort); - migrate_set_block_incremental(s, false); - s->must_remove_block_options = false; - } -@@ -2429,8 +2428,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, - "current migration capabilities"); - return false; - } -- migrate_set_block_enabled(true, &local_err); -- if (local_err) { -+ if (!migrate_cap_set(MIGRATION_CAPABILITY_BLOCK, true, &local_err)) { - error_propagate(errp, local_err); - return false; - } --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Create-migrate_checkpoint_delay.patch b/SOURCES/kvm-migration-Create-migrate_checkpoint_delay.patch deleted file mode 100644 index 408d258..0000000 --- a/SOURCES/kvm-migration-Create-migrate_checkpoint_delay.patch +++ /dev/null @@ -1,84 +0,0 @@ -From a17bee3c8ab48daa471ec53bed0e2cb0bb41fc76 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 01:04:55 +0100 -Subject: [PATCH 41/56] migration: Create migrate_checkpoint_delay() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [40/50] b972d3f12e49dc27aa78eb723ca6d0fac4d174d8 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Fabiano Rosas -(cherry picked from commit f94a858fa3e72ba954a338c01ae9fecc15fcce5c) -Signed-off-by: Peter Xu ---- - migration/colo.c | 5 ++--- - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - 3 files changed, 12 insertions(+), 3 deletions(-) - -diff --git a/migration/colo.c b/migration/colo.c -index 93b78c9270..07bfa21fea 100644 ---- a/migration/colo.c -+++ b/migration/colo.c -@@ -576,7 +576,7 @@ static void colo_process_checkpoint(MigrationState *s) - trace_colo_vm_state_change("stop", "run"); - - timer_mod(s->colo_delay_timer, qemu_clock_get_ms(QEMU_CLOCK_HOST) + -- s->parameters.x_checkpoint_delay); -+ migrate_checkpoint_delay()); - - while (s->state == MIGRATION_STATUS_COLO) { - if (failover_get_state() != FAILOVER_STATUS_NONE) { -@@ -651,8 +651,7 @@ void colo_checkpoint_notify(void *opaque) - - qemu_event_set(&s->colo_checkpoint_event); - s->colo_checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST); -- next_notify_time = s->colo_checkpoint_time + -- s->parameters.x_checkpoint_delay; -+ next_notify_time = s->colo_checkpoint_time + migrate_checkpoint_delay(); - timer_mod(s->colo_delay_timer, next_notify_time); - } - -diff --git a/migration/options.c b/migration/options.c -index b9f3815f7e..0e102e5700 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -472,6 +472,15 @@ bool migrate_block_incremental(void) - return s->parameters.block_incremental; - } - -+uint32_t migrate_checkpoint_delay(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.x_checkpoint_delay; -+} -+ - int migrate_compress_level(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index aa54443353..adc2879bbb 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -46,6 +46,7 @@ bool migrate_cap_set(int cap, bool value, Error **errp); - /* parameters */ - - bool migrate_block_incremental(void); -+uint32_t migrate_checkpoint_delay(void); - int migrate_compress_level(void); - int migrate_compress_threads(void); - int migrate_compress_wait_thread(void); --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Create-migrate_cpu_throttle_increment-func.patch b/SOURCES/kvm-migration-Create-migrate_cpu_throttle_increment-func.patch deleted file mode 100644 index 65bad3c..0000000 --- a/SOURCES/kvm-migration-Create-migrate_cpu_throttle_increment-func.patch +++ /dev/null @@ -1,75 +0,0 @@ -From 7ff430e011780dad00e5ebaad0318c5fa3aec102 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 10:20:49 +0100 -Subject: [PATCH 45/56] migration: Create migrate_cpu_throttle_increment() - function -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [44/50] aec990a106a0347b265f5c056a516e0b91e8183c (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Fabiano Rosas -(cherry picked from commit 9605c2ac282c565bb00b5f344217161bef29eff8) -Signed-off-by: Peter Xu ---- - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - migration/ram.c | 2 +- - 3 files changed, 11 insertions(+), 1 deletion(-) - -diff --git a/migration/options.c b/migration/options.c -index f7fb6999f7..31435d2b45 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -509,6 +509,15 @@ int migrate_compress_wait_thread(void) - return s->parameters.compress_wait_thread; - } - -+uint8_t migrate_cpu_throttle_increment(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.cpu_throttle_increment; -+} -+ - uint8_t migrate_cpu_throttle_initial(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index fd8b91d767..49b29bdafd 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -50,6 +50,7 @@ uint32_t migrate_checkpoint_delay(void); - int migrate_compress_level(void); - int migrate_compress_threads(void); - int migrate_compress_wait_thread(void); -+uint8_t migrate_cpu_throttle_increment(void); - uint8_t migrate_cpu_throttle_initial(void); - int migrate_decompress_threads(void); - uint8_t migrate_max_cpu_throttle(void); -diff --git a/migration/ram.c b/migration/ram.c -index 5e855d5c22..5645745a42 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -713,7 +713,7 @@ static void mig_throttle_guest_down(uint64_t bytes_dirty_period, - { - MigrationState *s = migrate_get_current(); - uint64_t pct_initial = migrate_cpu_throttle_initial(); -- uint64_t pct_increment = s->parameters.cpu_throttle_increment; -+ uint64_t pct_increment = migrate_cpu_throttle_increment(); - bool pct_tailslow = s->parameters.cpu_throttle_tailslow; - int pct_max = migrate_max_cpu_throttle(); - --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Create-migrate_cpu_throttle_initial-to-opt.patch b/SOURCES/kvm-migration-Create-migrate_cpu_throttle_initial-to-opt.patch deleted file mode 100644 index aab2013..0000000 --- a/SOURCES/kvm-migration-Create-migrate_cpu_throttle_initial-to-opt.patch +++ /dev/null @@ -1,75 +0,0 @@ -From fdc2f14bfb3ef8897310a7db63287a9bab1fb858 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 01:22:44 +0100 -Subject: [PATCH 44/56] migration: Create migrate_cpu_throttle_initial() to - option.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [43/50] e0e0db7218f28aefd4bd022edbaec236e2030cb1 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Fabiano Rosas -(cherry picked from commit 2a8ec38082f8098f2693bb3632175453c0c84a51) -Signed-off-by: Peter Xu ---- - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - migration/ram.c | 2 +- - 3 files changed, 11 insertions(+), 1 deletion(-) - -diff --git a/migration/options.c b/migration/options.c -index 418aafac64..f7fb6999f7 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -509,6 +509,15 @@ int migrate_compress_wait_thread(void) - return s->parameters.compress_wait_thread; - } - -+uint8_t migrate_cpu_throttle_initial(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.cpu_throttle_initial; -+} -+ - int migrate_decompress_threads(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index 72b1a320b7..fd8b91d767 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -50,6 +50,7 @@ uint32_t migrate_checkpoint_delay(void); - int migrate_compress_level(void); - int migrate_compress_threads(void); - int migrate_compress_wait_thread(void); -+uint8_t migrate_cpu_throttle_initial(void); - int migrate_decompress_threads(void); - uint8_t migrate_max_cpu_throttle(void); - int64_t migrate_max_postcopy_bandwidth(void); -diff --git a/migration/ram.c b/migration/ram.c -index 5c786513ef..5e855d5c22 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -712,7 +712,7 @@ static void mig_throttle_guest_down(uint64_t bytes_dirty_period, - uint64_t bytes_dirty_threshold) - { - MigrationState *s = migrate_get_current(); -- uint64_t pct_initial = s->parameters.cpu_throttle_initial; -+ uint64_t pct_initial = migrate_cpu_throttle_initial(); - uint64_t pct_increment = s->parameters.cpu_throttle_increment; - bool pct_tailslow = s->parameters.cpu_throttle_tailslow; - int pct_max = migrate_max_cpu_throttle(); --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Create-migrate_cpu_throttle_tailslow-funct.patch b/SOURCES/kvm-migration-Create-migrate_cpu_throttle_tailslow-funct.patch deleted file mode 100644 index e36f003..0000000 --- a/SOURCES/kvm-migration-Create-migrate_cpu_throttle_tailslow-funct.patch +++ /dev/null @@ -1,78 +0,0 @@ -From b88c51c4b02639e28da73143b1da7bd3d6706ce5 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 10:29:51 +0100 -Subject: [PATCH 46/56] migration: Create migrate_cpu_throttle_tailslow() - function -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [45/50] e93e96392405c60f75abbf288e4fddb191bbc996 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Fabiano Rosas -(cherry picked from commit 873f674c559e3162a6e6e92994301d400c5cc873) -Signed-off-by: Peter Xu ---- - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - migration/ram.c | 3 +-- - 3 files changed, 11 insertions(+), 2 deletions(-) - -diff --git a/migration/options.c b/migration/options.c -index 31435d2b45..615534c151 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -527,6 +527,15 @@ uint8_t migrate_cpu_throttle_initial(void) - return s->parameters.cpu_throttle_initial; - } - -+bool migrate_cpu_throttle_tailslow(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.cpu_throttle_tailslow; -+} -+ - int migrate_decompress_threads(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index 49b29bdafd..99f6bbd7a1 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -52,6 +52,7 @@ int migrate_compress_threads(void); - int migrate_compress_wait_thread(void); - uint8_t migrate_cpu_throttle_increment(void); - uint8_t migrate_cpu_throttle_initial(void); -+bool migrate_cpu_throttle_tailslow(void); - int migrate_decompress_threads(void); - uint8_t migrate_max_cpu_throttle(void); - int64_t migrate_max_postcopy_bandwidth(void); -diff --git a/migration/ram.c b/migration/ram.c -index 5645745a42..01356f60a4 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -711,10 +711,9 @@ static size_t save_page_header(PageSearchStatus *pss, QEMUFile *f, - static void mig_throttle_guest_down(uint64_t bytes_dirty_period, - uint64_t bytes_dirty_threshold) - { -- MigrationState *s = migrate_get_current(); - uint64_t pct_initial = migrate_cpu_throttle_initial(); - uint64_t pct_increment = migrate_cpu_throttle_increment(); -- bool pct_tailslow = s->parameters.cpu_throttle_tailslow; -+ bool pct_tailslow = migrate_cpu_throttle_tailslow(); - int pct_max = migrate_max_cpu_throttle(); - - uint64_t throttle_now = cpu_throttle_get_percentage(); --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Create-migrate_max_bandwidth-function.patch b/SOURCES/kvm-migration-Create-migrate_max_bandwidth-function.patch deleted file mode 100644 index ba1d34c..0000000 --- a/SOURCES/kvm-migration-Create-migrate_max_bandwidth-function.patch +++ /dev/null @@ -1,232 +0,0 @@ -From b6228b3122f5c1f220f92042277ab1bfbb5ba086 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 11:00:12 +0100 -Subject: [PATCH 48/56] migration: Create migrate_max_bandwidth() function -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [47/50] 3874656f70cb9c2a30f4d63e146539480d422326 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Fabiano Rosas -(cherry picked from commit 9c894df3a37d675652390f7dbbe2f65b7bad7efa) -Signed-off-by: Peter Xu ---- - migration/migration.c | 70 +------------------------------------- - migration/options.c | 79 +++++++++++++++++++++++++++++++++++++++++++ - migration/options.h | 1 + - 3 files changed, 81 insertions(+), 69 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 46a5ea4d42..c2e109329d 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -886,74 +886,6 @@ void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value) - migrate_send_rp_message(mis, MIG_RP_MSG_RESUME_ACK, sizeof(buf), &buf); - } - --MigrationParameters *qmp_query_migrate_parameters(Error **errp) --{ -- MigrationParameters *params; -- MigrationState *s = migrate_get_current(); -- -- /* TODO use QAPI_CLONE() instead of duplicating it inline */ -- params = g_malloc0(sizeof(*params)); -- params->has_compress_level = true; -- params->compress_level = s->parameters.compress_level; -- params->has_compress_threads = true; -- params->compress_threads = s->parameters.compress_threads; -- params->has_compress_wait_thread = true; -- params->compress_wait_thread = s->parameters.compress_wait_thread; -- params->has_decompress_threads = true; -- params->decompress_threads = s->parameters.decompress_threads; -- params->has_throttle_trigger_threshold = true; -- params->throttle_trigger_threshold = s->parameters.throttle_trigger_threshold; -- params->has_cpu_throttle_initial = true; -- params->cpu_throttle_initial = s->parameters.cpu_throttle_initial; -- params->has_cpu_throttle_increment = true; -- params->cpu_throttle_increment = s->parameters.cpu_throttle_increment; -- params->has_cpu_throttle_tailslow = true; -- params->cpu_throttle_tailslow = s->parameters.cpu_throttle_tailslow; -- params->tls_creds = g_strdup(s->parameters.tls_creds); -- params->tls_hostname = g_strdup(s->parameters.tls_hostname); -- params->tls_authz = g_strdup(s->parameters.tls_authz ? -- s->parameters.tls_authz : ""); -- params->has_max_bandwidth = true; -- params->max_bandwidth = s->parameters.max_bandwidth; -- params->has_downtime_limit = true; -- params->downtime_limit = s->parameters.downtime_limit; -- params->has_x_checkpoint_delay = true; -- params->x_checkpoint_delay = s->parameters.x_checkpoint_delay; -- params->has_block_incremental = true; -- params->block_incremental = s->parameters.block_incremental; -- params->has_multifd_channels = true; -- params->multifd_channels = s->parameters.multifd_channels; -- params->has_multifd_compression = true; -- params->multifd_compression = s->parameters.multifd_compression; -- params->has_multifd_zlib_level = true; -- params->multifd_zlib_level = s->parameters.multifd_zlib_level; -- params->has_multifd_zstd_level = true; -- params->multifd_zstd_level = s->parameters.multifd_zstd_level; -- params->has_xbzrle_cache_size = true; -- params->xbzrle_cache_size = s->parameters.xbzrle_cache_size; -- params->has_max_postcopy_bandwidth = true; -- params->max_postcopy_bandwidth = s->parameters.max_postcopy_bandwidth; -- params->has_max_cpu_throttle = true; -- params->max_cpu_throttle = s->parameters.max_cpu_throttle; -- params->has_announce_initial = true; -- params->announce_initial = s->parameters.announce_initial; -- params->has_announce_max = true; -- params->announce_max = s->parameters.announce_max; -- params->has_announce_rounds = true; -- params->announce_rounds = s->parameters.announce_rounds; -- params->has_announce_step = true; -- params->announce_step = s->parameters.announce_step; -- -- if (s->parameters.has_block_bitmap_mapping) { -- params->has_block_bitmap_mapping = true; -- params->block_bitmap_mapping = -- QAPI_CLONE(BitmapMigrationNodeAliasList, -- s->parameters.block_bitmap_mapping); -- } -- -- return params; --} -- - /* - * Return true if we're already in the middle of a migration - * (i.e. any of the active or setup states) -@@ -3775,7 +3707,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) - XFER_LIMIT_RATIO; - } else { - /* This is a fresh new migration */ -- rate_limit = s->parameters.max_bandwidth / XFER_LIMIT_RATIO; -+ rate_limit = migrate_max_bandwidth() / XFER_LIMIT_RATIO; - - /* Notify before starting migration thread */ - notifier_list_notify(&migration_state_notifiers, s); -diff --git a/migration/options.c b/migration/options.c -index 8bd2d949ae..8e8753d9be 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -12,8 +12,10 @@ - */ - - #include "qemu/osdep.h" -+#include "qapi/clone-visitor.h" - #include "qapi/error.h" - #include "qapi/qapi-commands-migration.h" -+#include "qapi/qapi-visit-migration.h" - #include "qapi/qmp/qerror.h" - #include "sysemu/runstate.h" - #include "migration/misc.h" -@@ -562,6 +564,15 @@ uint8_t migrate_max_cpu_throttle(void) - return s->parameters.max_cpu_throttle; - } - -+uint64_t migrate_max_bandwidth(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.max_bandwidth; -+} -+ - int64_t migrate_max_postcopy_bandwidth(void) - { - MigrationState *s; -@@ -641,3 +652,71 @@ AnnounceParameters *migrate_announce_params(void) - - return ≈ - } -+ -+MigrationParameters *qmp_query_migrate_parameters(Error **errp) -+{ -+ MigrationParameters *params; -+ MigrationState *s = migrate_get_current(); -+ -+ /* TODO use QAPI_CLONE() instead of duplicating it inline */ -+ params = g_malloc0(sizeof(*params)); -+ params->has_compress_level = true; -+ params->compress_level = s->parameters.compress_level; -+ params->has_compress_threads = true; -+ params->compress_threads = s->parameters.compress_threads; -+ params->has_compress_wait_thread = true; -+ params->compress_wait_thread = s->parameters.compress_wait_thread; -+ params->has_decompress_threads = true; -+ params->decompress_threads = s->parameters.decompress_threads; -+ params->has_throttle_trigger_threshold = true; -+ params->throttle_trigger_threshold = s->parameters.throttle_trigger_threshold; -+ params->has_cpu_throttle_initial = true; -+ params->cpu_throttle_initial = s->parameters.cpu_throttle_initial; -+ params->has_cpu_throttle_increment = true; -+ params->cpu_throttle_increment = s->parameters.cpu_throttle_increment; -+ params->has_cpu_throttle_tailslow = true; -+ params->cpu_throttle_tailslow = s->parameters.cpu_throttle_tailslow; -+ params->tls_creds = g_strdup(s->parameters.tls_creds); -+ params->tls_hostname = g_strdup(s->parameters.tls_hostname); -+ params->tls_authz = g_strdup(s->parameters.tls_authz ? -+ s->parameters.tls_authz : ""); -+ params->has_max_bandwidth = true; -+ params->max_bandwidth = s->parameters.max_bandwidth; -+ params->has_downtime_limit = true; -+ params->downtime_limit = s->parameters.downtime_limit; -+ params->has_x_checkpoint_delay = true; -+ params->x_checkpoint_delay = s->parameters.x_checkpoint_delay; -+ params->has_block_incremental = true; -+ params->block_incremental = s->parameters.block_incremental; -+ params->has_multifd_channels = true; -+ params->multifd_channels = s->parameters.multifd_channels; -+ params->has_multifd_compression = true; -+ params->multifd_compression = s->parameters.multifd_compression; -+ params->has_multifd_zlib_level = true; -+ params->multifd_zlib_level = s->parameters.multifd_zlib_level; -+ params->has_multifd_zstd_level = true; -+ params->multifd_zstd_level = s->parameters.multifd_zstd_level; -+ params->has_xbzrle_cache_size = true; -+ params->xbzrle_cache_size = s->parameters.xbzrle_cache_size; -+ params->has_max_postcopy_bandwidth = true; -+ params->max_postcopy_bandwidth = s->parameters.max_postcopy_bandwidth; -+ params->has_max_cpu_throttle = true; -+ params->max_cpu_throttle = s->parameters.max_cpu_throttle; -+ params->has_announce_initial = true; -+ params->announce_initial = s->parameters.announce_initial; -+ params->has_announce_max = true; -+ params->announce_max = s->parameters.announce_max; -+ params->has_announce_rounds = true; -+ params->announce_rounds = s->parameters.announce_rounds; -+ params->has_announce_step = true; -+ params->announce_step = s->parameters.announce_step; -+ -+ if (s->parameters.has_block_bitmap_mapping) { -+ params->has_block_bitmap_mapping = true; -+ params->block_bitmap_mapping = -+ QAPI_CLONE(BitmapMigrationNodeAliasList, -+ s->parameters.block_bitmap_mapping); -+ } -+ -+ return params; -+} -diff --git a/migration/options.h b/migration/options.h -index 093bc907a1..1b78fa9f3d 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -64,6 +64,7 @@ uint8_t migrate_cpu_throttle_initial(void); - bool migrate_cpu_throttle_tailslow(void); - int migrate_decompress_threads(void); - uint8_t migrate_max_cpu_throttle(void); -+uint64_t migrate_max_bandwidth(void); - int64_t migrate_max_postcopy_bandwidth(void); - int migrate_multifd_channels(void); - MultiFDCompression migrate_multifd_compression(void); --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Create-migrate_max_cpu_throttle.patch b/SOURCES/kvm-migration-Create-migrate_max_cpu_throttle.patch deleted file mode 100644 index 6628b80..0000000 --- a/SOURCES/kvm-migration-Create-migrate_max_cpu_throttle.patch +++ /dev/null @@ -1,88 +0,0 @@ -From f0d4e34b00f66d2336b755a34a1ba226571641c4 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 01:13:01 +0100 -Subject: [PATCH 42/56] migration: Create migrate_max_cpu_throttle() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [41/50] fc7537c06d8e1f53d7bb552661f6ddb0133a978d (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Fabiano Rosas -(cherry picked from commit 24155bd0520035d5148c0af5b925932c4d8064a8) -Signed-off-by: Peter Xu ---- - migration/migration.h | 2 -- - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - migration/ram.c | 2 +- - 4 files changed, 11 insertions(+), 3 deletions(-) - -diff --git a/migration/migration.h b/migration/migration.h -index 86051af132..3ae938b19c 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -451,8 +451,6 @@ bool migrate_postcopy(void); - - int migrate_use_tls(void); - --int migrate_max_cpu_throttle(void); -- - uint64_t ram_get_total_transferred_pages(void); - - /* Sending on the return path - generic and then for each message type */ -diff --git a/migration/options.c b/migration/options.c -index 0e102e5700..2cb04fbbd1 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -517,6 +517,15 @@ int migrate_decompress_threads(void) - return s->parameters.decompress_threads; - } - -+uint8_t migrate_max_cpu_throttle(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.max_cpu_throttle; -+} -+ - int64_t migrate_max_postcopy_bandwidth(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index adc2879bbb..72b1a320b7 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -51,6 +51,7 @@ int migrate_compress_level(void); - int migrate_compress_threads(void); - int migrate_compress_wait_thread(void); - int migrate_decompress_threads(void); -+uint8_t migrate_max_cpu_throttle(void); - int64_t migrate_max_postcopy_bandwidth(void); - int migrate_multifd_channels(void); - MultiFDCompression migrate_multifd_compression(void); -diff --git a/migration/ram.c b/migration/ram.c -index e82cee97c3..5c786513ef 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -715,7 +715,7 @@ static void mig_throttle_guest_down(uint64_t bytes_dirty_period, - uint64_t pct_initial = s->parameters.cpu_throttle_initial; - uint64_t pct_increment = s->parameters.cpu_throttle_increment; - bool pct_tailslow = s->parameters.cpu_throttle_tailslow; -- int pct_max = s->parameters.max_cpu_throttle; -+ int pct_max = migrate_max_cpu_throttle(); - - uint64_t throttle_now = cpu_throttle_get_percentage(); - uint64_t cpu_now, cpu_ideal, throttle_inc; --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Create-migrate_rdma_pin_all-function.patch b/SOURCES/kvm-migration-Create-migrate_rdma_pin_all-function.patch deleted file mode 100644 index c7799f1..0000000 --- a/SOURCES/kvm-migration-Create-migrate_rdma_pin_all-function.patch +++ /dev/null @@ -1,95 +0,0 @@ -From e4ef0f2cee6cdf2cf4bd225ac9e610f41d66dfcb Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 22:41:55 +0100 -Subject: [PATCH 32/56] migration: Create migrate_rdma_pin_all() function -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [31/50] 206d96d47d9ee73ddc89dd01186560bf62ea5295 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy - ---- - -Fixed missing space after comma (fabiano) - -(cherry picked from commit 17cba690cdd42108369fafe6b07bff09872fbea6) -Signed-off-by: Peter Xu ---- - migration/options.c | 7 +++++++ - migration/options.h | 1 + - migration/rdma.c | 6 +++--- - 3 files changed, 11 insertions(+), 3 deletions(-) - -diff --git a/migration/options.c b/migration/options.c -index 2003e413da..9c9b8e5863 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -138,6 +138,13 @@ bool migrate_postcopy_ram(void) - return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; - } - -+bool migrate_rdma_pin_all(void) -+{ -+ MigrationState *s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL]; -+} -+ - bool migrate_release_ram(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index 316efd1063..25c002b37a 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -30,6 +30,7 @@ bool migrate_pause_before_switchover(void); - bool migrate_postcopy_blocktime(void); - bool migrate_postcopy_preempt(void); - bool migrate_postcopy_ram(void); -+bool migrate_rdma_pin_all(void); - bool migrate_release_ram(void); - bool migrate_return_path(void); - bool migrate_validate_uuid(void); -diff --git a/migration/rdma.c b/migration/rdma.c -index bf55e2f163..0af5e944f0 100644 ---- a/migration/rdma.c -+++ b/migration/rdma.c -@@ -35,6 +35,7 @@ - #include - #include "trace.h" - #include "qom/object.h" -+#include "options.h" - #include - - /* -@@ -4178,8 +4179,7 @@ void rdma_start_outgoing_migration(void *opaque, - goto err; - } - -- ret = qemu_rdma_source_init(rdma, -- s->capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp); -+ ret = qemu_rdma_source_init(rdma, migrate_rdma_pin_all(), errp); - - if (ret) { - goto err; -@@ -4201,7 +4201,7 @@ void rdma_start_outgoing_migration(void *opaque, - } - - ret = qemu_rdma_source_init(rdma_return_path, -- s->capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp); -+ migrate_rdma_pin_all(), errp); - - if (ret) { - goto return_path_err; --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Create-migrate_throttle_trigger_threshold.patch b/SOURCES/kvm-migration-Create-migrate_throttle_trigger_threshold.patch deleted file mode 100644 index 5fc1072..0000000 --- a/SOURCES/kvm-migration-Create-migrate_throttle_trigger_threshold.patch +++ /dev/null @@ -1,75 +0,0 @@ -From 27862b9d31da6447b60f185cdad95764018c6bc6 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 00:59:13 +0100 -Subject: [PATCH 40/56] migration: Create migrate_throttle_trigger_threshold() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [39/50] b8af9080c49be3d38bd2784d61289be89c03db3e (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Fabiano Rosas -(cherry picked from commit 6499efdb16e5c1288b4c8390d3bf68b313329b8b) -Signed-off-by: Peter Xu ---- - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - migration/ram.c | 3 +-- - 3 files changed, 11 insertions(+), 2 deletions(-) - -diff --git a/migration/options.c b/migration/options.c -index 2b6d88b4b9..b9f3815f7e 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -554,6 +554,15 @@ int migrate_multifd_zstd_level(void) - return s->parameters.multifd_zstd_level; - } - -+uint8_t migrate_throttle_trigger_threshold(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.throttle_trigger_threshold; -+} -+ - uint64_t migrate_xbzrle_cache_size(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index 96d5a8e6e4..aa54443353 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -55,6 +55,7 @@ int migrate_multifd_channels(void); - MultiFDCompression migrate_multifd_compression(void); - int migrate_multifd_zlib_level(void); - int migrate_multifd_zstd_level(void); -+uint8_t migrate_throttle_trigger_threshold(void); - uint64_t migrate_xbzrle_cache_size(void); - - #endif -diff --git a/migration/ram.c b/migration/ram.c -index 4576d0d849..e82cee97c3 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -1178,8 +1178,7 @@ static void migration_update_rates(RAMState *rs, int64_t end_time) - - static void migration_trigger_throttle(RAMState *rs) - { -- MigrationState *s = migrate_get_current(); -- uint64_t threshold = s->parameters.throttle_trigger_threshold; -+ uint64_t threshold = migrate_throttle_trigger_threshold(); - uint64_t bytes_xfer_period = - stat64_get(&ram_counters.transferred) - rs->bytes_xfer_prev; - uint64_t bytes_dirty_period = rs->num_dirty_pages_period * TARGET_PAGE_SIZE; --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Create-options.c.patch b/SOURCES/kvm-migration-Create-options.c.patch deleted file mode 100644 index ea60202..0000000 --- a/SOURCES/kvm-migration-Create-options.c.patch +++ /dev/null @@ -1,524 +0,0 @@ -From 282634a835f4711c8b501dd76c344058bc399fbd Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 21:18:45 +0100 -Subject: [PATCH 23/56] migration: Create options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [22/50] 10c9be528b9fcfae93f1a12fcd09db1a69e58f64 (peterx/qemu-kvm) - -We move there all capabilities helpers from migration.c. - -Signed-off-by: Juan Quintela -Reviewed-by: Dr. David Alan Gilbert - ---- - -Following David advise: -- looked through the history, capabilities are newer than 2012, so we - can remove that bit of the header. -- This part is posterior to Anthony. - Original Author is Orit. Once there, - I put myself. Peter Xu also did quite a bit of work here. - Anyone else wants/needs to be there? I didn't search too hard - because nobody asked before to be added. - -What do you think? - -(cherry picked from commit 1f0776f1c03312aad5d6a5f98871240bc3af01e5) -Signed-off-by: Peter Xu ---- - hw/virtio/virtio-balloon.c | 1 + - migration/block-dirty-bitmap.c | 1 + - migration/block.c | 1 + - migration/colo.c | 1 + - migration/meson.build | 1 + - migration/migration.c | 109 +---------------------------- - migration/migration.h | 12 ---- - migration/options.c | 124 +++++++++++++++++++++++++++++++++ - migration/options.h | 32 +++++++++ - migration/postcopy-ram.c | 1 + - migration/ram.c | 1 + - migration/savevm.c | 1 + - migration/socket.c | 1 + - 13 files changed, 166 insertions(+), 120 deletions(-) - create mode 100644 migration/options.c - create mode 100644 migration/options.h - -diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c -index 746f07c4d2..43092aa634 100644 ---- a/hw/virtio/virtio-balloon.c -+++ b/hw/virtio/virtio-balloon.c -@@ -32,6 +32,7 @@ - #include "qemu/error-report.h" - #include "migration/misc.h" - #include "migration/migration.h" -+#include "migration/options.h" - - #include "hw/virtio/virtio-bus.h" - #include "hw/virtio/virtio-access.h" -diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c -index fe73aa94b1..a6ffae0002 100644 ---- a/migration/block-dirty-bitmap.c -+++ b/migration/block-dirty-bitmap.c -@@ -79,6 +79,7 @@ - #include "qapi/qapi-visit-migration.h" - #include "qapi/clone-visitor.h" - #include "trace.h" -+#include "options.h" - - #define CHUNK_SIZE (1 << 10) - -diff --git a/migration/block.c b/migration/block.c -index b2497bbd32..4b167fa5cf 100644 ---- a/migration/block.c -+++ b/migration/block.c -@@ -28,6 +28,7 @@ - #include "migration/vmstate.h" - #include "sysemu/block-backend.h" - #include "trace.h" -+#include "options.h" - - #define BLK_MIG_BLOCK_SIZE (1ULL << 20) - #define BDRV_SECTORS_PER_DIRTY_CHUNK (BLK_MIG_BLOCK_SIZE >> BDRV_SECTOR_BITS) -diff --git a/migration/colo.c b/migration/colo.c -index 0716e64689..93b78c9270 100644 ---- a/migration/colo.c -+++ b/migration/colo.c -@@ -36,6 +36,7 @@ - #include "sysemu/cpus.h" - #include "sysemu/runstate.h" - #include "net/filter.h" -+#include "options.h" - - static bool vmstate_loading; - static Notifier packets_compare_notifier; -diff --git a/migration/meson.build b/migration/meson.build -index 0d1bb9f96e..480ff6854a 100644 ---- a/migration/meson.build -+++ b/migration/meson.build -@@ -22,6 +22,7 @@ softmmu_ss.add(files( - 'migration.c', - 'multifd.c', - 'multifd-zlib.c', -+ 'options.c', - 'postcopy-ram.c', - 'savevm.c', - 'socket.c', -diff --git a/migration/migration.c b/migration/migration.c -index 18058fb597..66ea55be06 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -63,6 +63,7 @@ - #include "sysemu/cpus.h" - #include "yank_functions.h" - #include "sysemu/qtest.h" -+#include "options.h" - - #define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ - -@@ -357,15 +358,6 @@ static void migrate_generate_event(int new_state) - } - } - --static bool migrate_late_block_activate(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; --} -- - /* - * Send a message on the return channel back to the source - * of the migration. -@@ -2525,56 +2517,11 @@ void qmp_migrate_continue(MigrationStatus state, Error **errp) - qemu_sem_post(&s->pause_sem); - } - --bool migrate_release_ram(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; --} -- --bool migrate_postcopy_ram(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; --} -- - bool migrate_postcopy(void) - { - return migrate_postcopy_ram() || migrate_dirty_bitmaps(); - } - --bool migrate_auto_converge(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; --} -- --bool migrate_zero_blocks(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; --} -- --bool migrate_postcopy_blocktime(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; --} -- - bool migrate_use_compression(void) - { - MigrationState *s; -@@ -2620,33 +2567,6 @@ int migrate_decompress_threads(void) - return s->parameters.decompress_threads; - } - --bool migrate_dirty_bitmaps(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; --} -- --bool migrate_ignore_shared(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED]; --} -- --bool migrate_validate_uuid(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; --} -- - bool migrate_use_events(void) - { - MigrationState *s; -@@ -2665,15 +2585,6 @@ bool migrate_use_multifd(void) - return s->capabilities[MIGRATION_CAPABILITY_MULTIFD]; - } - --bool migrate_pause_before_switchover(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; --} -- - int migrate_multifd_channels(void) - { - MigrationState *s; -@@ -2785,24 +2696,6 @@ bool migrate_use_block_incremental(void) - return s->parameters.block_incremental; - } - --bool migrate_background_snapshot(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; --} -- --bool migrate_postcopy_preempt(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]; --} -- - /* migration thread support */ - /* - * Something bad happened to the RP stream, mark an error -diff --git a/migration/migration.h b/migration/migration.h -index 04e0860b4e..a25fed6ef0 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -449,16 +449,7 @@ MigrationState *migrate_get_current(void); - - bool migrate_postcopy(void); - --bool migrate_release_ram(void); --bool migrate_postcopy_ram(void); --bool migrate_zero_blocks(void); --bool migrate_dirty_bitmaps(void); --bool migrate_ignore_shared(void); --bool migrate_validate_uuid(void); -- --bool migrate_auto_converge(void); - bool migrate_use_multifd(void); --bool migrate_pause_before_switchover(void); - int migrate_multifd_channels(void); - MultiFDCompression migrate_multifd_compression(void); - int migrate_multifd_zlib_level(void); -@@ -487,9 +478,6 @@ int migrate_compress_threads(void); - int migrate_compress_wait_thread(void); - int migrate_decompress_threads(void); - bool migrate_use_events(void); --bool migrate_postcopy_blocktime(void); --bool migrate_background_snapshot(void); --bool migrate_postcopy_preempt(void); - - /* Sending on the return path - generic and then for each message type */ - void migrate_send_rp_shut(MigrationIncomingState *mis, -diff --git a/migration/options.c b/migration/options.c -new file mode 100644 -index 0000000000..88a9a45913 ---- /dev/null -+++ b/migration/options.c -@@ -0,0 +1,124 @@ -+/* -+ * QEMU migration capabilities -+ * -+ * Copyright (c) 2012-2023 Red Hat Inc -+ * -+ * Authors: -+ * Orit Wasserman -+ * Juan Quintela -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or later. -+ * See the COPYING file in the top-level directory. -+ */ -+ -+#include "qemu/osdep.h" -+#include "migration.h" -+#include "options.h" -+ -+bool migrate_auto_converge(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; -+} -+ -+bool migrate_background_snapshot(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; -+} -+ -+bool migrate_dirty_bitmaps(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; -+} -+ -+bool migrate_ignore_shared(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED]; -+} -+ -+bool migrate_late_block_activate(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; -+} -+ -+bool migrate_pause_before_switchover(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; -+} -+ -+bool migrate_postcopy_blocktime(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; -+} -+ -+bool migrate_postcopy_preempt(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]; -+} -+ -+bool migrate_postcopy_ram(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; -+} -+ -+bool migrate_release_ram(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; -+} -+ -+bool migrate_validate_uuid(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; -+} -+ -+bool migrate_zero_blocks(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; -+} -diff --git a/migration/options.h b/migration/options.h -new file mode 100644 -index 0000000000..0dfa0af245 ---- /dev/null -+++ b/migration/options.h -@@ -0,0 +1,32 @@ -+/* -+ * QEMU migration capabilities -+ * -+ * Copyright (c) 2012-2023 Red Hat Inc -+ * -+ * Authors: -+ * Orit Wasserman -+ * Juan Quintela -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or later. -+ * See the COPYING file in the top-level directory. -+ */ -+ -+#ifndef QEMU_MIGRATION_OPTIONS_H -+#define QEMU_MIGRATION_OPTIONS_H -+ -+/* capabilities */ -+ -+bool migrate_auto_converge(void); -+bool migrate_background_snapshot(void); -+bool migrate_dirty_bitmaps(void); -+bool migrate_ignore_shared(void); -+bool migrate_late_block_activate(void); -+bool migrate_pause_before_switchover(void); -+bool migrate_postcopy_blocktime(void); -+bool migrate_postcopy_preempt(void); -+bool migrate_postcopy_ram(void); -+bool migrate_release_ram(void); -+bool migrate_validate_uuid(void); -+bool migrate_zero_blocks(void); -+ -+#endif -diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c -index d7b48dd920..0711500036 100644 ---- a/migration/postcopy-ram.c -+++ b/migration/postcopy-ram.c -@@ -37,6 +37,7 @@ - #include "tls.h" - #include "qemu/userfaultfd.h" - #include "qemu/mmap-alloc.h" -+#include "options.h" - - /* Arbitrary limit on size of each discard command, - * keeps them around ~200 bytes -diff --git a/migration/ram.c b/migration/ram.c -index 229714045a..912ccd89fa 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -57,6 +57,7 @@ - #include "qemu/iov.h" - #include "multifd.h" - #include "sysemu/runstate.h" -+#include "options.h" - - #include "hw/boards.h" /* for machine_dump_guest_core() */ - -diff --git a/migration/savevm.c b/migration/savevm.c -index 589ef926ab..ebcf571e37 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -67,6 +67,7 @@ - #include "qemu/yank.h" - #include "yank_functions.h" - #include "sysemu/qtest.h" -+#include "options.h" - - const unsigned int postcopy_ram_discard_version; - -diff --git a/migration/socket.c b/migration/socket.c -index e6fdf3c5e1..ebf9ac41af 100644 ---- a/migration/socket.c -+++ b/migration/socket.c -@@ -27,6 +27,7 @@ - #include "io/net-listener.h" - #include "trace.h" - #include "postcopy-ram.h" -+#include "options.h" - - struct SocketOutgoingArgs { - SocketAddress *saddr; --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Enable-switchover-ack-capability.patch b/SOURCES/kvm-migration-Enable-switchover-ack-capability.patch deleted file mode 100644 index e08e5df..0000000 --- a/SOURCES/kvm-migration-Enable-switchover-ack-capability.patch +++ /dev/null @@ -1,56 +0,0 @@ -From bbe565f7d3b7fe46971e020e9bd8e79dc9ffa69c Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 12/37] migration: Enable switchover ack capability -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [10/28] c4a7d7d26a97181c9516d133a6610bfa5dcb1d16 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 538ef4fe2f72 -Author: Avihai Horon -Date: Wed Jun 21 14:11:56 2023 +0300 - - migration: Enable switchover ack capability - - Now that switchover ack logic has been implemented, enable the - capability. - - Signed-off-by: Avihai Horon - Reviewed-by: Juan Quintela - Reviewed-by: Peter Xu - Tested-by: YangHang Liu - Acked-by: Alex Williamson - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - migration/options.c | 4 ---- - 1 file changed, 4 deletions(-) - -diff --git a/migration/options.c b/migration/options.c -index c3df6c6dde..ccd7ef3907 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -547,10 +547,6 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) - "'return-path'"); - return false; - } -- -- /* Disable this capability until it's implemented */ -- error_setg(errp, "'switchover-ack' is not implemented yet"); -- return false; - } - - return true; --- -2.39.3 - diff --git a/SOURCES/kvm-migration-Handle-block-device-inactivation-failures-.patch b/SOURCES/kvm-migration-Handle-block-device-inactivation-failures-.patch deleted file mode 100644 index 26c8437..0000000 --- a/SOURCES/kvm-migration-Handle-block-device-inactivation-failures-.patch +++ /dev/null @@ -1,116 +0,0 @@ -From 2aac64623d8d2d06d248c1bcc71aa13572fc843c Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Fri, 14 Apr 2023 10:33:58 -0500 -Subject: [PATCH 1/2] migration: Handle block device inactivation failures - better - -RH-Author: Eric Blake -RH-MergeRequest: 161: Avoid migration assertion from failed NFS server. -RH-Bugzilla: 2058982 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [1/2] 5ae143c9234f6eee9fc5154944172bcd56975b36 (ebblake/centos-qemu-kvm) - -Consider what happens when performing a migration between two host -machines connected to an NFS server serving multiple block devices to -the guest, when the NFS server becomes unavailable. The migration -attempts to inactivate all block devices on the source (a necessary -step before the destination can take over); but if the NFS server is -non-responsive, the attempt to inactivate can itself fail. When that -happens, the destination fails to get the migrated guest (good, -because the source wasn't able to flush everything properly): - - (qemu) qemu-kvm: load of migration failed: Input/output error - -at which point, our only hope for the guest is for the source to take -back control. With the current code base, the host outputs a message, but then appears to resume: - - (qemu) qemu-kvm: qemu_savevm_state_complete_precopy_non_iterable: bdrv_inactivate_all() failed (-1) - - (src qemu)info status - VM status: running - -but a second migration attempt now asserts: - - (src qemu) qemu-kvm: ../block.c:6738: int bdrv_inactivate_recurse(BlockDriverState *): Assertion `!(bs->open_flags & BDRV_O_INACTIVE)' failed. - -Whether the guest is recoverable on the source after the first failure -is debatable, but what we do not want is to have qemu itself fail due -to an assertion. It looks like the problem is as follows: - -In migration.c:migration_completion(), the source sets 'inactivate' to -true (since COLO is not enabled), then tries -savevm.c:qemu_savevm_state_complete_precopy() with a request to -inactivate block devices. In turn, this calls -block.c:bdrv_inactivate_all(), which fails when flushing runs up -against the non-responsive NFS server. With savevm failing, we are -now left in a state where some, but not all, of the block devices have -been inactivated; but migration_completion() then jumps to 'fail' -rather than 'fail_invalidate' and skips an attempt to reclaim those -those disks by calling bdrv_activate_all(). Even if we do attempt to -reclaim disks, we aren't taking note of failure there, either. - -Thus, we have reached a state where the migration engine has forgotten -all state about whether a block device is inactive, because we did not -set s->block_inactive in enough places; so migration allows the source -to reach vm_start() and resume execution, violating the block layer -invariant that the guest CPUs should not be restarted while a device -is inactive. Note that the code in migration.c:migrate_fd_cancel() -will also try to reactivate all block devices if s->block_inactive was -set, but because we failed to set that flag after the first failure, -the source assumes it has reclaimed all devices, even though it still -has remaining inactivated devices and does not try again. Normally, -qmp_cont() will also try to reactivate all disks (or correctly fail if -the disks are not reclaimable because NFS is not yet back up), but the -auto-resumption of the source after a migration failure does not go -through qmp_cont(). And because we have left the block layer in an -inconsistent state with devices still inactivated, the later migration -attempt is hitting the assertion failure. - -Since it is important to not resume the source with inactive disks, -this patch marks s->block_inactive before attempting inactivation, -rather than after succeeding, in order to prevent any vm_start() until -it has successfully reactivated all devices. - -See also https://bugzilla.redhat.com/show_bug.cgi?id=2058982 - -Signed-off-by: Eric Blake -Reviewed-by: Juan Quintela -Acked-by: Lukas Straub -Tested-by: Lukas Straub -Signed-off-by: Juan Quintela -(cherry picked from commit 403d18ae384239876764bbfa111d6cc5dcb673d1) ---- - migration/migration.c | 5 ++--- - 1 file changed, 2 insertions(+), 3 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index bda4789193..cb0d42c061 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -3444,13 +3444,11 @@ static void migration_completion(MigrationState *s) - MIGRATION_STATUS_DEVICE); - } - if (ret >= 0) { -+ s->block_inactive = inactivate; - qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); - ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, - inactivate); - } -- if (inactivate && ret >= 0) { -- s->block_inactive = true; -- } - } - qemu_mutex_unlock_iothread(); - -@@ -3522,6 +3520,7 @@ fail_invalidate: - bdrv_activate_all(&local_err); - if (local_err) { - error_report_err(local_err); -+ s->block_inactive = true; - } else { - s->block_inactive = false; - } --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Implement-switchover-ack-logic.patch b/SOURCES/kvm-migration-Implement-switchover-ack-logic.patch deleted file mode 100644 index 49b9f12..0000000 --- a/SOURCES/kvm-migration-Implement-switchover-ack-logic.patch +++ /dev/null @@ -1,339 +0,0 @@ -From 387c39f198d94f600be525e363edc7ca916dc261 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 11/37] migration: Implement switchover ack logic -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [9/28] 853e1978f3b9f87942863bba894a0ed908bde6b1 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 1b4adb10f898 -Author: Avihai Horon -Date: Wed Jun 21 14:11:55 2023 +0300 - - migration: Implement switchover ack logic - - Implement switchover ack logic. This prevents the source from stopping - the VM and completing the migration until an ACK is received from the - destination that it's OK to do so. - - To achieve this, a new SaveVMHandlers handler switchover_ack_needed() - and a new return path message MIG_RP_MSG_SWITCHOVER_ACK are added. - - The switchover_ack_needed() handler is called during migration setup in - the destination to check if switchover ack is used by the migrated - device. - - When switchover is approved by all migrated devices in the destination - that support this capability, the MIG_RP_MSG_SWITCHOVER_ACK return path - message is sent to the source to notify it that it's OK to do - switchover. - - Signed-off-by: Avihai Horon - Reviewed-by: Peter Xu - Tested-by: YangHang Liu - Acked-by: Alex Williamson - Signed-off-by: Cédric Le Goater - -Conflicts: - - migration/migration.c - context changes due to commit f4584076fc31 ("migration: switch - from .vm_was_running to .vm_old_state") - -Signed-off-by: Cédric Le Goater ---- - include/migration/register.h | 2 ++ - migration/migration.c | 32 +++++++++++++++++++-- - migration/migration.h | 14 ++++++++++ - migration/savevm.c | 54 ++++++++++++++++++++++++++++++++++++ - migration/savevm.h | 1 + - migration/trace-events | 3 ++ - 6 files changed, 104 insertions(+), 2 deletions(-) - -diff --git a/include/migration/register.h b/include/migration/register.h -index a8dfd8fefd..90914f32f5 100644 ---- a/include/migration/register.h -+++ b/include/migration/register.h -@@ -71,6 +71,8 @@ typedef struct SaveVMHandlers { - int (*load_cleanup)(void *opaque); - /* Called when postcopy migration wants to resume from failure */ - int (*resume_prepare)(MigrationState *s, void *opaque); -+ /* Checks if switchover ack should be used. Called only in dest */ -+ bool (*switchover_ack_needed)(void *opaque); - } SaveVMHandlers; - - int register_savevm_live(const char *idstr, -diff --git a/migration/migration.c b/migration/migration.c -index 1ac5f19bc2..9bf1caee6c 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -76,6 +76,7 @@ enum mig_rp_message_type { - MIG_RP_MSG_REQ_PAGES, /* data (start: be64, len: be32) */ - MIG_RP_MSG_RECV_BITMAP, /* send recved_bitmap back to source */ - MIG_RP_MSG_RESUME_ACK, /* tell source that we are ready to resume */ -+ MIG_RP_MSG_SWITCHOVER_ACK, /* Tell source it's OK to do switchover */ - - MIG_RP_MSG_MAX - }; -@@ -756,6 +757,11 @@ bool migration_has_all_channels(void) - return true; - } - -+int migrate_send_rp_switchover_ack(MigrationIncomingState *mis) -+{ -+ return migrate_send_rp_message(mis, MIG_RP_MSG_SWITCHOVER_ACK, 0, NULL); -+} -+ - /* - * Send a 'SHUT' message on the return channel with the given value - * to indicate that we've finished with the RP. Non-0 value indicates -@@ -1415,6 +1421,7 @@ void migrate_init(MigrationState *s) - s->vm_was_running = false; - s->iteration_initial_bytes = 0; - s->threshold_size = 0; -+ s->switchover_acked = false; - } - - int migrate_add_blocker_internal(Error *reason, Error **errp) -@@ -1731,6 +1738,7 @@ static struct rp_cmd_args { - [MIG_RP_MSG_REQ_PAGES_ID] = { .len = -1, .name = "REQ_PAGES_ID" }, - [MIG_RP_MSG_RECV_BITMAP] = { .len = -1, .name = "RECV_BITMAP" }, - [MIG_RP_MSG_RESUME_ACK] = { .len = 4, .name = "RESUME_ACK" }, -+ [MIG_RP_MSG_SWITCHOVER_ACK] = { .len = 0, .name = "SWITCHOVER_ACK" }, - [MIG_RP_MSG_MAX] = { .len = -1, .name = "MAX" }, - }; - -@@ -1969,6 +1977,11 @@ retry: - } - break; - -+ case MIG_RP_MSG_SWITCHOVER_ACK: -+ ms->switchover_acked = true; -+ trace_source_return_path_thread_switchover_acked(); -+ break; -+ - default: - break; - } -@@ -2720,6 +2733,20 @@ static void migration_update_counters(MigrationState *s, - bandwidth, s->threshold_size); - } - -+static bool migration_can_switchover(MigrationState *s) -+{ -+ if (!migrate_switchover_ack()) { -+ return true; -+ } -+ -+ /* No reason to wait for switchover ACK if VM is stopped */ -+ if (!runstate_is_running()) { -+ return true; -+ } -+ -+ return s->switchover_acked; -+} -+ - /* Migration thread iteration status */ - typedef enum { - MIG_ITERATE_RESUME, /* Resume current iteration */ -@@ -2735,6 +2762,7 @@ static MigIterateState migration_iteration_run(MigrationState *s) - { - uint64_t must_precopy, can_postcopy; - bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE; -+ bool can_switchover = migration_can_switchover(s); - - qemu_savevm_state_pending_estimate(&must_precopy, &can_postcopy); - uint64_t pending_size = must_precopy + can_postcopy; -@@ -2747,14 +2775,14 @@ static MigIterateState migration_iteration_run(MigrationState *s) - trace_migrate_pending_exact(pending_size, must_precopy, can_postcopy); - } - -- if (!pending_size || pending_size < s->threshold_size) { -+ if ((!pending_size || pending_size < s->threshold_size) && can_switchover) { - trace_migration_thread_low_pending(pending_size); - migration_completion(s); - return MIG_ITERATE_BREAK; - } - - /* Still a significant amount to transfer */ -- if (!in_postcopy && must_precopy <= s->threshold_size && -+ if (!in_postcopy && must_precopy <= s->threshold_size && can_switchover && - qatomic_read(&s->start_postcopy)) { - if (postcopy_start(s)) { - error_report("%s: postcopy failed to start", __func__); -diff --git a/migration/migration.h b/migration/migration.h -index 2b71df8617..e9679f8029 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -204,6 +204,13 @@ struct MigrationIncomingState { - * contains valid information. - */ - QemuMutex page_request_mutex; -+ -+ /* -+ * Number of devices that have yet to approve switchover. When this reaches -+ * zero an ACK that it's OK to do switchover is sent to the source. No lock -+ * is needed as this field is updated serially. -+ */ -+ unsigned int switchover_ack_pending_num; - }; - - MigrationIncomingState *migration_incoming_get_current(void); -@@ -421,6 +428,12 @@ struct MigrationState { - - /* QEMU_VM_VMDESCRIPTION content filled for all non-iterable devices. */ - JSONWriter *vmdesc; -+ -+ /* -+ * Indicates whether an ACK from the destination that it's OK to do -+ * switchover has been received. -+ */ -+ bool switchover_acked; - }; - - void migrate_set_state(int *state, int old_state, int new_state); -@@ -461,6 +474,7 @@ int migrate_send_rp_message_req_pages(MigrationIncomingState *mis, - void migrate_send_rp_recv_bitmap(MigrationIncomingState *mis, - char *block_name); - void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value); -+int migrate_send_rp_switchover_ack(MigrationIncomingState *mis); - - void dirty_bitmap_mig_before_vm_start(void); - void dirty_bitmap_mig_cancel_outgoing(void); -diff --git a/migration/savevm.c b/migration/savevm.c -index 211eff3a8b..aff70e6263 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -2358,6 +2358,21 @@ static int loadvm_process_command(QEMUFile *f) - error_report("CMD_OPEN_RETURN_PATH failed"); - return -1; - } -+ -+ /* -+ * Switchover ack is enabled but no device uses it, so send an ACK to -+ * source that it's OK to switchover. Do it here, after return path has -+ * been created. -+ */ -+ if (migrate_switchover_ack() && !mis->switchover_ack_pending_num) { -+ int ret = migrate_send_rp_switchover_ack(mis); -+ if (ret) { -+ error_report( -+ "Could not send switchover ack RP MSG, err %d (%s)", ret, -+ strerror(-ret)); -+ return ret; -+ } -+ } - break; - - case MIG_CMD_PING: -@@ -2584,6 +2599,23 @@ static int qemu_loadvm_state_header(QEMUFile *f) - return 0; - } - -+static void qemu_loadvm_state_switchover_ack_needed(MigrationIncomingState *mis) -+{ -+ SaveStateEntry *se; -+ -+ QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { -+ if (!se->ops || !se->ops->switchover_ack_needed) { -+ continue; -+ } -+ -+ if (se->ops->switchover_ack_needed(se->opaque)) { -+ mis->switchover_ack_pending_num++; -+ } -+ } -+ -+ trace_loadvm_state_switchover_ack_needed(mis->switchover_ack_pending_num); -+} -+ - static int qemu_loadvm_state_setup(QEMUFile *f) - { - SaveStateEntry *se; -@@ -2787,6 +2819,10 @@ int qemu_loadvm_state(QEMUFile *f) - return -EINVAL; - } - -+ if (migrate_switchover_ack()) { -+ qemu_loadvm_state_switchover_ack_needed(mis); -+ } -+ - cpu_synchronize_all_pre_loadvm(); - - ret = qemu_loadvm_state_main(f, mis); -@@ -2860,6 +2896,24 @@ int qemu_load_device_state(QEMUFile *f) - return 0; - } - -+int qemu_loadvm_approve_switchover(void) -+{ -+ MigrationIncomingState *mis = migration_incoming_get_current(); -+ -+ if (!mis->switchover_ack_pending_num) { -+ return -EINVAL; -+ } -+ -+ mis->switchover_ack_pending_num--; -+ trace_loadvm_approve_switchover(mis->switchover_ack_pending_num); -+ -+ if (mis->switchover_ack_pending_num) { -+ return 0; -+ } -+ -+ return migrate_send_rp_switchover_ack(mis); -+} -+ - bool save_snapshot(const char *name, bool overwrite, const char *vmstate, - bool has_devices, strList *devices, Error **errp) - { -diff --git a/migration/savevm.h b/migration/savevm.h -index fb636735f0..e894bbc143 100644 ---- a/migration/savevm.h -+++ b/migration/savevm.h -@@ -65,6 +65,7 @@ int qemu_loadvm_state(QEMUFile *f); - void qemu_loadvm_state_cleanup(void); - int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis); - int qemu_load_device_state(QEMUFile *f); -+int qemu_loadvm_approve_switchover(void); - int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f, - bool in_postcopy, bool inactivate_disks); - -diff --git a/migration/trace-events b/migration/trace-events -index 92161eeac5..cda807d271 100644 ---- a/migration/trace-events -+++ b/migration/trace-events -@@ -7,6 +7,7 @@ qemu_loadvm_state_section_partend(uint32_t section_id) "%u" - qemu_loadvm_state_post_main(int ret) "%d" - qemu_loadvm_state_section_startfull(uint32_t section_id, const char *idstr, uint32_t instance_id, uint32_t version_id) "%u(%s) %u %u" - qemu_savevm_send_packaged(void) "" -+loadvm_state_switchover_ack_needed(unsigned int switchover_ack_pending_num) "Switchover ack pending num=%u" - loadvm_state_setup(void) "" - loadvm_state_cleanup(void) "" - loadvm_handle_cmd_packaged(unsigned int length) "%u" -@@ -23,6 +24,7 @@ loadvm_postcopy_ram_handle_discard_end(void) "" - loadvm_postcopy_ram_handle_discard_header(const char *ramid, uint16_t len) "%s: %ud" - loadvm_process_command(const char *s, uint16_t len) "com=%s len=%d" - loadvm_process_command_ping(uint32_t val) "0x%x" -+loadvm_approve_switchover(unsigned int switchover_ack_pending_num) "Switchover ack pending num=%u" - postcopy_ram_listen_thread_exit(void) "" - postcopy_ram_listen_thread_start(void) "" - qemu_savevm_send_postcopy_advise(void) "" -@@ -180,6 +182,7 @@ source_return_path_thread_loop_top(void) "" - source_return_path_thread_pong(uint32_t val) "0x%x" - source_return_path_thread_shut(uint32_t val) "0x%x" - source_return_path_thread_resume_ack(uint32_t v) "%"PRIu32 -+source_return_path_thread_switchover_acked(void) "" - migration_thread_low_pending(uint64_t pending) "%" PRIu64 - migrate_transferred(uint64_t tranferred, uint64_t time_spent, uint64_t bandwidth, uint64_t size) "transferred %" PRIu64 " time_spent %" PRIu64 " bandwidth %" PRIu64 " max_size %" PRId64 - process_incoming_migration_co_end(int ret, int ps) "ret=%d postcopy-state=%d" --- -2.39.3 - diff --git a/SOURCES/kvm-migration-Make-all-functions-check-have-the-same-for.patch b/SOURCES/kvm-migration-Make-all-functions-check-have-the-same-for.patch deleted file mode 100644 index f873f3f..0000000 --- a/SOURCES/kvm-migration-Make-all-functions-check-have-the-same-for.patch +++ /dev/null @@ -1,431 +0,0 @@ -From eaccfc91b34f93dcaf597e6b39f78741da618ff3 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 08/37] migration: Make all functions check have the same - format -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [6/28] 774df2a81502d3eab5d5b8f64fa9b69f8be43669 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 8f9c532756c5 -Author: Juan Quintela -Date: Wed Mar 1 23:11:08 2023 +0100 - - migration: Make all functions check have the same format - - Signed-off-by: Juan Quintela - Reviewed-by: Vladimir Sementsov-Ogievskiy - -Signed-off-by: Cédric Le Goater ---- - migration/options.c | 153 +++++++++++--------------------------------- - 1 file changed, 39 insertions(+), 114 deletions(-) - -diff --git a/migration/options.c b/migration/options.c -index e51d667e14..bcfe244fa9 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -33,27 +33,21 @@ - - bool migrate_auto_converge(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; - } - - bool migrate_background_snapshot(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; - } - - bool migrate_block(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_BLOCK]; - } -@@ -61,95 +55,76 @@ bool migrate_block(void) - bool migrate_colo(void) - { - MigrationState *s = migrate_get_current(); -+ - return s->capabilities[MIGRATION_CAPABILITY_X_COLO]; - } - - bool migrate_compress(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_COMPRESS]; - } - - bool migrate_dirty_bitmaps(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; - } - - bool migrate_events(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_EVENTS]; - } - - bool migrate_ignore_shared(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED]; - } - - bool migrate_late_block_activate(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; - } - - bool migrate_multifd(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_MULTIFD]; - } - - bool migrate_pause_before_switchover(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; - } - - bool migrate_postcopy_blocktime(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; - } - - bool migrate_postcopy_preempt(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]; - } - - bool migrate_postcopy_ram(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; - } -@@ -163,54 +138,42 @@ bool migrate_rdma_pin_all(void) - - bool migrate_release_ram(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; - } - - bool migrate_return_path(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; - } - - bool migrate_validate_uuid(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; - } - - bool migrate_xbzrle(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_XBZRLE]; - } - - bool migrate_zero_blocks(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; - } - - bool migrate_zero_copy_send(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; - } -@@ -224,9 +187,7 @@ bool migrate_postcopy(void) - - bool migrate_tls(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.tls_creds && *s->parameters.tls_creds; - } -@@ -491,126 +452,98 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, - - bool migrate_block_incremental(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.block_incremental; - } - - uint32_t migrate_checkpoint_delay(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.x_checkpoint_delay; - } - - int migrate_compress_level(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.compress_level; - } - - int migrate_compress_threads(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.compress_threads; - } - - int migrate_compress_wait_thread(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.compress_wait_thread; - } - - uint8_t migrate_cpu_throttle_increment(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.cpu_throttle_increment; - } - - uint8_t migrate_cpu_throttle_initial(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.cpu_throttle_initial; - } - - bool migrate_cpu_throttle_tailslow(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.cpu_throttle_tailslow; - } - - int migrate_decompress_threads(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.decompress_threads; - } - - uint8_t migrate_max_cpu_throttle(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.max_cpu_throttle; - } - - uint64_t migrate_max_bandwidth(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.max_bandwidth; - } - - int64_t migrate_max_postcopy_bandwidth(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.max_postcopy_bandwidth; - } - - int migrate_multifd_channels(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.multifd_channels; - } - - MultiFDCompression migrate_multifd_compression(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - assert(s->parameters.multifd_compression < MULTIFD_COMPRESSION__MAX); - return s->parameters.multifd_compression; -@@ -618,36 +551,28 @@ MultiFDCompression migrate_multifd_compression(void) - - int migrate_multifd_zlib_level(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.multifd_zlib_level; - } - - int migrate_multifd_zstd_level(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.multifd_zstd_level; - } - - uint8_t migrate_throttle_trigger_threshold(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.throttle_trigger_threshold; - } - - uint64_t migrate_xbzrle_cache_size(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.xbzrle_cache_size; - } --- -2.39.3 - diff --git a/SOURCES/kvm-migration-Make-dirty_sync_count-atomic.patch b/SOURCES/kvm-migration-Make-dirty_sync_count-atomic.patch deleted file mode 100644 index ad1de7b..0000000 --- a/SOURCES/kvm-migration-Make-dirty_sync_count-atomic.patch +++ /dev/null @@ -1,105 +0,0 @@ -From 886b511e0a225b1c4428c646534d7bcc65bd9e2a Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 11 Apr 2023 18:02:34 +0200 -Subject: [PATCH 14/56] migration: Make dirty_sync_count atomic -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [13/50] ef3ae8cdd960e944ba9e73a53d54c9a5a55bb1ce (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Peter Xu -(cherry picked from commit 536b5a4e56ec67c958f46e7d46cbd5ac34e5a239) -Signed-off-by: Peter Xu ---- - migration/migration.c | 3 ++- - migration/ram.c | 13 +++++++------ - migration/ram.h | 2 +- - 3 files changed, 10 insertions(+), 8 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 8f2847d298..8fca751050 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1148,7 +1148,8 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) - info->ram->normal = stat64_get(&ram_counters.normal); - info->ram->normal_bytes = info->ram->normal * page_size; - info->ram->mbps = s->mbps; -- info->ram->dirty_sync_count = ram_counters.dirty_sync_count; -+ info->ram->dirty_sync_count = -+ stat64_get(&ram_counters.dirty_sync_count); - info->ram->dirty_sync_missed_zero_copy = - stat64_get(&ram_counters.dirty_sync_missed_zero_copy); - info->ram->postcopy_requests = ram_counters.postcopy_requests; -diff --git a/migration/ram.c b/migration/ram.c -index b1722b6071..3c13136559 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -764,7 +764,7 @@ static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr) - /* We don't care if this fails to allocate a new cache page - * as long as it updated an old one */ - cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page, -- ram_counters.dirty_sync_count); -+ stat64_get(&ram_counters.dirty_sync_count)); - } - - #define ENCODING_FLAG_XBZRLE 0x1 -@@ -790,13 +790,13 @@ static int save_xbzrle_page(RAMState *rs, PageSearchStatus *pss, - int encoded_len = 0, bytes_xbzrle; - uint8_t *prev_cached_page; - QEMUFile *file = pss->pss_channel; -+ uint64_t generation = stat64_get(&ram_counters.dirty_sync_count); - -- if (!cache_is_cached(XBZRLE.cache, current_addr, -- ram_counters.dirty_sync_count)) { -+ if (!cache_is_cached(XBZRLE.cache, current_addr, generation)) { - xbzrle_counters.cache_miss++; - if (!rs->last_stage) { - if (cache_insert(XBZRLE.cache, current_addr, *current_data, -- ram_counters.dirty_sync_count) == -1) { -+ generation) == -1) { - return -1; - } else { - /* update *current_data when the page has been -@@ -1209,7 +1209,7 @@ static void migration_bitmap_sync(RAMState *rs) - RAMBlock *block; - int64_t end_time; - -- ram_counters.dirty_sync_count++; -+ stat64_add(&ram_counters.dirty_sync_count, 1); - - if (!rs->time_last_bitmap_sync) { - rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); -@@ -1246,7 +1246,8 @@ static void migration_bitmap_sync(RAMState *rs) - rs->bytes_xfer_prev = stat64_get(&ram_counters.transferred); - } - if (migrate_use_events()) { -- qapi_event_send_migration_pass(ram_counters.dirty_sync_count); -+ uint64_t generation = stat64_get(&ram_counters.dirty_sync_count); -+ qapi_event_send_migration_pass(generation); - } - } - -diff --git a/migration/ram.h b/migration/ram.h -index bb52632424..8c0d07c43a 100644 ---- a/migration/ram.h -+++ b/migration/ram.h -@@ -42,7 +42,7 @@ - */ - typedef struct { - int64_t dirty_pages_rate; -- int64_t dirty_sync_count; -+ Stat64 dirty_sync_count; - Stat64 dirty_sync_missed_zero_copy; - Stat64 downtime_bytes; - Stat64 duplicate; --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Make-dirty_sync_missed_zero_copy-atomic.patch b/SOURCES/kvm-migration-Make-dirty_sync_missed_zero_copy-atomic.patch deleted file mode 100644 index b7b0f60..0000000 --- a/SOURCES/kvm-migration-Make-dirty_sync_missed_zero_copy-atomic.patch +++ /dev/null @@ -1,92 +0,0 @@ -From e9ff20d7f7e6c2354f3696e8bca265e535eeb801 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 11 Apr 2023 17:33:56 +0200 -Subject: [PATCH 11/56] migration: Make dirty_sync_missed_zero_copy atomic -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [10/50] 041230abb087db0e7ffae02b4f85772490b805a0 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Peter Xu -(cherry picked from commit 4291823694fd8507831d26e2558d9cd0030841f7) -Signed-off-by: Peter Xu ---- - migration/migration.c | 2 +- - migration/multifd.c | 2 +- - migration/ram.c | 5 ----- - migration/ram.h | 4 +--- - 4 files changed, 3 insertions(+), 10 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index ca68808b5c..645fb4b3c5 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1150,7 +1150,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) - info->ram->mbps = s->mbps; - info->ram->dirty_sync_count = ram_counters.dirty_sync_count; - info->ram->dirty_sync_missed_zero_copy = -- ram_counters.dirty_sync_missed_zero_copy; -+ stat64_get(&ram_counters.dirty_sync_missed_zero_copy); - info->ram->postcopy_requests = ram_counters.postcopy_requests; - info->ram->page_size = page_size; - info->ram->multifd_bytes = stat64_get(&ram_counters.multifd_bytes); -diff --git a/migration/multifd.c b/migration/multifd.c -index 1c992abf53..903df2117b 100644 ---- a/migration/multifd.c -+++ b/migration/multifd.c -@@ -576,7 +576,7 @@ static int multifd_zero_copy_flush(QIOChannel *c) - return -1; - } - if (ret == 1) { -- dirty_sync_missed_zero_copy(); -+ stat64_add(&ram_counters.dirty_sync_missed_zero_copy, 1); - } - - return ret; -diff --git a/migration/ram.c b/migration/ram.c -index 71320ed27a..93e0a48af4 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -472,11 +472,6 @@ void ram_transferred_add(uint64_t bytes) - stat64_add(&ram_counters.transferred, bytes); - } - --void dirty_sync_missed_zero_copy(void) --{ -- ram_counters.dirty_sync_missed_zero_copy++; --} -- - struct MigrationOps { - int (*ram_save_target_page)(RAMState *rs, PageSearchStatus *pss); - }; -diff --git a/migration/ram.h b/migration/ram.h -index ed70391317..2170c55e67 100644 ---- a/migration/ram.h -+++ b/migration/ram.h -@@ -43,7 +43,7 @@ - typedef struct { - int64_t dirty_pages_rate; - int64_t dirty_sync_count; -- uint64_t dirty_sync_missed_zero_copy; -+ Stat64 dirty_sync_missed_zero_copy; - uint64_t downtime_bytes; - Stat64 duplicate; - Stat64 multifd_bytes; -@@ -114,6 +114,4 @@ void ram_write_tracking_prepare(void); - int ram_write_tracking_start(void); - void ram_write_tracking_stop(void); - --void dirty_sync_missed_zero_copy(void); -- - #endif --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Make-downtime_bytes-atomic.patch b/SOURCES/kvm-migration-Make-downtime_bytes-atomic.patch deleted file mode 100644 index 9b206bc..0000000 --- a/SOURCES/kvm-migration-Make-downtime_bytes-atomic.patch +++ /dev/null @@ -1,68 +0,0 @@ -From 4c6af064277b5445b31db4a598e1c4402ba56452 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 11 Apr 2023 17:38:11 +0200 -Subject: [PATCH 13/56] migration: Make downtime_bytes atomic -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [12/50] ebfc16aae8bc4a8c1fec431780a062950e6f50c4 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Peter Xu -(cherry picked from commit 296a4ac2aa63038b6b702f2ee8f0f93ae26727ae) -Signed-off-by: Peter Xu ---- - migration/migration.c | 2 +- - migration/ram.c | 2 +- - migration/ram.h | 2 +- - 3 files changed, 3 insertions(+), 3 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 3a68d93d69..8f2847d298 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1156,7 +1156,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) - info->ram->multifd_bytes = stat64_get(&ram_counters.multifd_bytes); - info->ram->pages_per_second = s->pages_per_second; - info->ram->precopy_bytes = stat64_get(&ram_counters.precopy_bytes); -- info->ram->downtime_bytes = ram_counters.downtime_bytes; -+ info->ram->downtime_bytes = stat64_get(&ram_counters.downtime_bytes); - info->ram->postcopy_bytes = stat64_get(&ram_counters.postcopy_bytes); - - if (migrate_use_xbzrle()) { -diff --git a/migration/ram.c b/migration/ram.c -index 0b4693215e..b1722b6071 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -467,7 +467,7 @@ void ram_transferred_add(uint64_t bytes) - } else if (migration_in_postcopy()) { - stat64_add(&ram_counters.postcopy_bytes, bytes); - } else { -- ram_counters.downtime_bytes += bytes; -+ stat64_add(&ram_counters.downtime_bytes, bytes); - } - stat64_add(&ram_counters.transferred, bytes); - } -diff --git a/migration/ram.h b/migration/ram.h -index a766b895fa..bb52632424 100644 ---- a/migration/ram.h -+++ b/migration/ram.h -@@ -44,7 +44,7 @@ typedef struct { - int64_t dirty_pages_rate; - int64_t dirty_sync_count; - Stat64 dirty_sync_missed_zero_copy; -- uint64_t downtime_bytes; -+ Stat64 downtime_bytes; - Stat64 duplicate; - Stat64 multifd_bytes; - Stat64 normal; --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Make-multifd_bytes-atomic.patch b/SOURCES/kvm-migration-Make-multifd_bytes-atomic.patch deleted file mode 100644 index b315fdc..0000000 --- a/SOURCES/kvm-migration-Make-multifd_bytes-atomic.patch +++ /dev/null @@ -1,99 +0,0 @@ -From bfcc4bc8f60b541d545f1ea27b1ff156d8092d33 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 23 Nov 2022 20:36:56 +0100 -Subject: [PATCH 10/56] migration: Make multifd_bytes atomic -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [9/50] c2bc6b173770a0ea81c3f9d850c583c651647070 (peterx/qemu-kvm) - -In the spirit of: - -commit 394d323bc3451e4d07f13341cb8817fac8dfbadd -Author: Peter Xu -Date: Tue Oct 11 17:55:51 2022 -0400 - - migration: Use atomic ops properly for page accountings - -Reviewed-by: David Edmondson -Reviewed-by: Peter Xu -Signed-off-by: Juan Quintela -(cherry picked from commit cf671116facf4e29d91fce9c9ffb535385ffac81) -Signed-off-by: Peter Xu ---- - migration/migration.c | 4 ++-- - migration/multifd.c | 4 ++-- - migration/ram.h | 2 +- - 3 files changed, 5 insertions(+), 5 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index a91704d35c..ca68808b5c 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1153,7 +1153,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) - ram_counters.dirty_sync_missed_zero_copy; - info->ram->postcopy_requests = ram_counters.postcopy_requests; - info->ram->page_size = page_size; -- info->ram->multifd_bytes = ram_counters.multifd_bytes; -+ info->ram->multifd_bytes = stat64_get(&ram_counters.multifd_bytes); - info->ram->pages_per_second = s->pages_per_second; - info->ram->precopy_bytes = ram_counters.precopy_bytes; - info->ram->downtime_bytes = ram_counters.downtime_bytes; -@@ -3780,7 +3780,7 @@ static MigThrError migration_detect_error(MigrationState *s) - static uint64_t migration_total_bytes(MigrationState *s) - { - return qemu_file_total_transferred(s->to_dst_file) + -- ram_counters.multifd_bytes; -+ stat64_get(&ram_counters.multifd_bytes); - } - - static void migration_calculate_complete(MigrationState *s) -diff --git a/migration/multifd.c b/migration/multifd.c -index 6ef3a27938..1c992abf53 100644 ---- a/migration/multifd.c -+++ b/migration/multifd.c -@@ -432,9 +432,9 @@ static int multifd_send_pages(QEMUFile *f) - p->pages = pages; - transferred = ((uint64_t) pages->num) * p->page_size + p->packet_len; - qemu_file_acct_rate_limit(f, transferred); -- ram_counters.multifd_bytes += transferred; - qemu_mutex_unlock(&p->mutex); - stat64_add(&ram_counters.transferred, transferred); -+ stat64_add(&ram_counters.multifd_bytes, transferred); - qemu_sem_post(&p->sem); - - return 1; -@@ -627,9 +627,9 @@ int multifd_send_sync_main(QEMUFile *f) - p->flags |= MULTIFD_FLAG_SYNC; - p->pending_job++; - qemu_file_acct_rate_limit(f, p->packet_len); -- ram_counters.multifd_bytes += p->packet_len; - qemu_mutex_unlock(&p->mutex); - stat64_add(&ram_counters.transferred, p->packet_len); -+ stat64_add(&ram_counters.multifd_bytes, p->packet_len); - qemu_sem_post(&p->sem); - } - for (i = 0; i < migrate_multifd_channels(); i++) { -diff --git a/migration/ram.h b/migration/ram.h -index 7c026b5242..ed70391317 100644 ---- a/migration/ram.h -+++ b/migration/ram.h -@@ -46,7 +46,7 @@ typedef struct { - uint64_t dirty_sync_missed_zero_copy; - uint64_t downtime_bytes; - Stat64 duplicate; -- uint64_t multifd_bytes; -+ Stat64 multifd_bytes; - Stat64 normal; - Stat64 postcopy_bytes; - int64_t postcopy_requests; --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Make-postcopy_requests-atomic.patch b/SOURCES/kvm-migration-Make-postcopy_requests-atomic.patch deleted file mode 100644 index 894419a..0000000 --- a/SOURCES/kvm-migration-Make-postcopy_requests-atomic.patch +++ /dev/null @@ -1,69 +0,0 @@ -From e6ff4536a5e5f5bbfda370ecb525d0e066c3ab1c Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 11 Apr 2023 18:04:59 +0200 -Subject: [PATCH 15/56] migration: Make postcopy_requests atomic -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [14/50] d15c6052b77e7ded7bf34c66caa11bf86b75f2e8 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Peter Xu -(cherry picked from commit 3c764f9b2bc3e5eb5ed93ab45c2de6d599fef00f) -Signed-off-by: Peter Xu ---- - migration/migration.c | 3 ++- - migration/ram.c | 2 +- - migration/ram.h | 2 +- - 3 files changed, 4 insertions(+), 3 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 8fca751050..39501a0ed8 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1152,7 +1152,8 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) - stat64_get(&ram_counters.dirty_sync_count); - info->ram->dirty_sync_missed_zero_copy = - stat64_get(&ram_counters.dirty_sync_missed_zero_copy); -- info->ram->postcopy_requests = ram_counters.postcopy_requests; -+ info->ram->postcopy_requests = -+ stat64_get(&ram_counters.postcopy_requests); - info->ram->page_size = page_size; - info->ram->multifd_bytes = stat64_get(&ram_counters.multifd_bytes); - info->ram->pages_per_second = s->pages_per_second; -diff --git a/migration/ram.c b/migration/ram.c -index 3c13136559..fe69ecaef4 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -2169,7 +2169,7 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len) - RAMBlock *ramblock; - RAMState *rs = ram_state; - -- ram_counters.postcopy_requests++; -+ stat64_add(&ram_counters.postcopy_requests, 1); - RCU_READ_LOCK_GUARD(); - - if (!rbname) { -diff --git a/migration/ram.h b/migration/ram.h -index 8c0d07c43a..afa68521d7 100644 ---- a/migration/ram.h -+++ b/migration/ram.h -@@ -49,7 +49,7 @@ typedef struct { - Stat64 multifd_bytes; - Stat64 normal; - Stat64 postcopy_bytes; -- int64_t postcopy_requests; -+ Stat64 postcopy_requests; - Stat64 precopy_bytes; - int64_t remaining; - Stat64 transferred; --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Make-precopy_bytes-atomic.patch b/SOURCES/kvm-migration-Make-precopy_bytes-atomic.patch deleted file mode 100644 index 8e6c177..0000000 --- a/SOURCES/kvm-migration-Make-precopy_bytes-atomic.patch +++ /dev/null @@ -1,68 +0,0 @@ -From 7e4d4316855f7f6556364eb16828f925b61c80d4 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 11 Apr 2023 17:36:48 +0200 -Subject: [PATCH 12/56] migration: Make precopy_bytes atomic -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [11/50] 23bec49b4b8f4d23c2192b401416139e3ca13626 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Peter Xu -(cherry picked from commit b013b5d1f32ef88457e66c7ce576f6475238f97f) -Signed-off-by: Peter Xu ---- - migration/migration.c | 2 +- - migration/ram.c | 2 +- - migration/ram.h | 2 +- - 3 files changed, 3 insertions(+), 3 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 645fb4b3c5..3a68d93d69 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1155,7 +1155,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) - info->ram->page_size = page_size; - info->ram->multifd_bytes = stat64_get(&ram_counters.multifd_bytes); - info->ram->pages_per_second = s->pages_per_second; -- info->ram->precopy_bytes = ram_counters.precopy_bytes; -+ info->ram->precopy_bytes = stat64_get(&ram_counters.precopy_bytes); - info->ram->downtime_bytes = ram_counters.downtime_bytes; - info->ram->postcopy_bytes = stat64_get(&ram_counters.postcopy_bytes); - -diff --git a/migration/ram.c b/migration/ram.c -index 93e0a48af4..0b4693215e 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -463,7 +463,7 @@ RAMStats ram_counters; - void ram_transferred_add(uint64_t bytes) - { - if (runstate_is_running()) { -- ram_counters.precopy_bytes += bytes; -+ stat64_add(&ram_counters.precopy_bytes, bytes); - } else if (migration_in_postcopy()) { - stat64_add(&ram_counters.postcopy_bytes, bytes); - } else { -diff --git a/migration/ram.h b/migration/ram.h -index 2170c55e67..a766b895fa 100644 ---- a/migration/ram.h -+++ b/migration/ram.h -@@ -50,7 +50,7 @@ typedef struct { - Stat64 normal; - Stat64 postcopy_bytes; - int64_t postcopy_requests; -- uint64_t precopy_bytes; -+ Stat64 precopy_bytes; - int64_t remaining; - Stat64 transferred; - } RAMStats; --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Merge-ram_counters-and-ram_atomic_counters.patch b/SOURCES/kvm-migration-Merge-ram_counters-and-ram_atomic_counters.patch deleted file mode 100644 index 0679e89..0000000 --- a/SOURCES/kvm-migration-Merge-ram_counters-and-ram_atomic_counters.patch +++ /dev/null @@ -1,270 +0,0 @@ -From 5a87058eea6ee56f37fb454486c35baaf693d691 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 22 Feb 2023 15:56:45 +0100 -Subject: [PATCH 08/56] migration: Merge ram_counters and ram_atomic_counters -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [7/50] 90e395de66aa32b886cf151f7996a680190471f5 (peterx/qemu-kvm) - -Using MgrationStats as type for ram_counters mean that we didn't have -to re-declare each value in another struct. The need of atomic -counters have make us to create MigrationAtomicStats for this atomic -counters. - -Create RAMStats type which is a merge of MigrationStats and -MigrationAtomicStats removing unused members. - -Signed-off-by: Juan Quintela -Reviewed-by: Peter Xu - ---- - -Fix typos found by David Edmondson - -(cherry picked from commit abce5fa16d126ed085ccf8a5b3fe61a1efa20994) -Signed-off-by: Peter Xu ---- - migration/migration.c | 8 ++++---- - migration/multifd.c | 4 ++-- - migration/ram.c | 39 ++++++++++++++++----------------------- - migration/ram.h | 28 +++++++++++++++------------- - 4 files changed, 37 insertions(+), 42 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 99f86bd6c2..a91704d35c 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1140,12 +1140,12 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) - size_t page_size = qemu_target_page_size(); - - info->ram = g_malloc0(sizeof(*info->ram)); -- info->ram->transferred = stat64_get(&ram_atomic_counters.transferred); -+ info->ram->transferred = stat64_get(&ram_counters.transferred); - info->ram->total = ram_bytes_total(); -- info->ram->duplicate = stat64_get(&ram_atomic_counters.duplicate); -+ info->ram->duplicate = stat64_get(&ram_counters.duplicate); - /* legacy value. It is not used anymore */ - info->ram->skipped = 0; -- info->ram->normal = stat64_get(&ram_atomic_counters.normal); -+ info->ram->normal = stat64_get(&ram_counters.normal); - info->ram->normal_bytes = info->ram->normal * page_size; - info->ram->mbps = s->mbps; - info->ram->dirty_sync_count = ram_counters.dirty_sync_count; -@@ -1157,7 +1157,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) - info->ram->pages_per_second = s->pages_per_second; - info->ram->precopy_bytes = ram_counters.precopy_bytes; - info->ram->downtime_bytes = ram_counters.downtime_bytes; -- info->ram->postcopy_bytes = stat64_get(&ram_atomic_counters.postcopy_bytes); -+ info->ram->postcopy_bytes = stat64_get(&ram_counters.postcopy_bytes); - - if (migrate_use_xbzrle()) { - info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache)); -diff --git a/migration/multifd.c b/migration/multifd.c -index cbc0dfe39b..01fab01a92 100644 ---- a/migration/multifd.c -+++ b/migration/multifd.c -@@ -433,7 +433,7 @@ static int multifd_send_pages(QEMUFile *f) - transferred = ((uint64_t) pages->num) * p->page_size + p->packet_len; - qemu_file_acct_rate_limit(f, transferred); - ram_counters.multifd_bytes += transferred; -- stat64_add(&ram_atomic_counters.transferred, transferred); -+ stat64_add(&ram_counters.transferred, transferred); - qemu_mutex_unlock(&p->mutex); - qemu_sem_post(&p->sem); - -@@ -628,7 +628,7 @@ int multifd_send_sync_main(QEMUFile *f) - p->pending_job++; - qemu_file_acct_rate_limit(f, p->packet_len); - ram_counters.multifd_bytes += p->packet_len; -- stat64_add(&ram_atomic_counters.transferred, p->packet_len); -+ stat64_add(&ram_counters.transferred, p->packet_len); - qemu_mutex_unlock(&p->mutex); - qemu_sem_post(&p->sem); - } -diff --git a/migration/ram.c b/migration/ram.c -index 0e68099bf9..71320ed27a 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -458,25 +458,18 @@ uint64_t ram_bytes_remaining(void) - 0; - } - --/* -- * NOTE: not all stats in ram_counters are used in reality. See comments -- * for struct MigrationAtomicStats. The ultimate result of ram migration -- * counters will be a merged version with both ram_counters and the atomic -- * fields in ram_atomic_counters. -- */ --MigrationStats ram_counters; --MigrationAtomicStats ram_atomic_counters; -+RAMStats ram_counters; - - void ram_transferred_add(uint64_t bytes) - { - if (runstate_is_running()) { - ram_counters.precopy_bytes += bytes; - } else if (migration_in_postcopy()) { -- stat64_add(&ram_atomic_counters.postcopy_bytes, bytes); -+ stat64_add(&ram_counters.postcopy_bytes, bytes); - } else { - ram_counters.downtime_bytes += bytes; - } -- stat64_add(&ram_atomic_counters.transferred, bytes); -+ stat64_add(&ram_counters.transferred, bytes); - } - - void dirty_sync_missed_zero_copy(void) -@@ -756,7 +749,7 @@ void mig_throttle_counter_reset(void) - - rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); - rs->num_dirty_pages_period = 0; -- rs->bytes_xfer_prev = stat64_get(&ram_atomic_counters.transferred); -+ rs->bytes_xfer_prev = stat64_get(&ram_counters.transferred); - } - - /** -@@ -1130,8 +1123,8 @@ uint64_t ram_pagesize_summary(void) - - uint64_t ram_get_total_transferred_pages(void) - { -- return stat64_get(&ram_atomic_counters.normal) + -- stat64_get(&ram_atomic_counters.duplicate) + -+ return stat64_get(&ram_counters.normal) + -+ stat64_get(&ram_counters.duplicate) + - compression_counters.pages + xbzrle_counters.pages; - } - -@@ -1192,7 +1185,7 @@ static void migration_trigger_throttle(RAMState *rs) - MigrationState *s = migrate_get_current(); - uint64_t threshold = s->parameters.throttle_trigger_threshold; - uint64_t bytes_xfer_period = -- stat64_get(&ram_atomic_counters.transferred) - rs->bytes_xfer_prev; -+ stat64_get(&ram_counters.transferred) - rs->bytes_xfer_prev; - uint64_t bytes_dirty_period = rs->num_dirty_pages_period * TARGET_PAGE_SIZE; - uint64_t bytes_dirty_threshold = bytes_xfer_period * threshold / 100; - -@@ -1255,7 +1248,7 @@ static void migration_bitmap_sync(RAMState *rs) - /* reset period counters */ - rs->time_last_bitmap_sync = end_time; - rs->num_dirty_pages_period = 0; -- rs->bytes_xfer_prev = stat64_get(&ram_atomic_counters.transferred); -+ rs->bytes_xfer_prev = stat64_get(&ram_counters.transferred); - } - if (migrate_use_events()) { - qapi_event_send_migration_pass(ram_counters.dirty_sync_count); -@@ -1331,7 +1324,7 @@ static int save_zero_page(PageSearchStatus *pss, QEMUFile *f, RAMBlock *block, - int len = save_zero_page_to_file(pss, f, block, offset); - - if (len) { -- stat64_add(&ram_atomic_counters.duplicate, 1); -+ stat64_add(&ram_counters.duplicate, 1); - ram_transferred_add(len); - return 1; - } -@@ -1368,9 +1361,9 @@ static bool control_save_page(PageSearchStatus *pss, RAMBlock *block, - } - - if (bytes_xmit > 0) { -- stat64_add(&ram_atomic_counters.normal, 1); -+ stat64_add(&ram_counters.normal, 1); - } else if (bytes_xmit == 0) { -- stat64_add(&ram_atomic_counters.duplicate, 1); -+ stat64_add(&ram_counters.duplicate, 1); - } - - return true; -@@ -1402,7 +1395,7 @@ static int save_normal_page(PageSearchStatus *pss, RAMBlock *block, - qemu_put_buffer(file, buf, TARGET_PAGE_SIZE); - } - ram_transferred_add(TARGET_PAGE_SIZE); -- stat64_add(&ram_atomic_counters.normal, 1); -+ stat64_add(&ram_counters.normal, 1); - return 1; - } - -@@ -1458,7 +1451,7 @@ static int ram_save_multifd_page(QEMUFile *file, RAMBlock *block, - if (multifd_queue_page(file, block, offset) < 0) { - return -1; - } -- stat64_add(&ram_atomic_counters.normal, 1); -+ stat64_add(&ram_counters.normal, 1); - - return 1; - } -@@ -1497,7 +1490,7 @@ update_compress_thread_counts(const CompressParam *param, int bytes_xmit) - ram_transferred_add(bytes_xmit); - - if (param->zero_page) { -- stat64_add(&ram_atomic_counters.duplicate, 1); -+ stat64_add(&ram_counters.duplicate, 1); - return; - } - -@@ -2632,9 +2625,9 @@ void acct_update_position(QEMUFile *f, size_t size, bool zero) - uint64_t pages = size / TARGET_PAGE_SIZE; - - if (zero) { -- stat64_add(&ram_atomic_counters.duplicate, pages); -+ stat64_add(&ram_counters.duplicate, pages); - } else { -- stat64_add(&ram_atomic_counters.normal, pages); -+ stat64_add(&ram_counters.normal, pages); - ram_transferred_add(size); - qemu_file_credit_transfer(f, size); - } -diff --git a/migration/ram.h b/migration/ram.h -index 81cbb0947c..7c026b5242 100644 ---- a/migration/ram.h -+++ b/migration/ram.h -@@ -35,25 +35,27 @@ - #include "qemu/stats64.h" - - /* -- * These are the migration statistic counters that need to be updated using -- * atomic ops (can be accessed by more than one thread). Here since we -- * cannot modify MigrationStats directly to use Stat64 as it was defined in -- * the QAPI scheme, we define an internal structure to hold them, and we -- * propagate the real values when QMP queries happen. -- * -- * IOW, the corresponding fields within ram_counters on these specific -- * fields will be always zero and not being used at all; they're just -- * placeholders to make it QAPI-compatible. -+ * These are the ram migration statistic counters. It is loosely -+ * based on MigrationStats. We change to Stat64 any counter that -+ * needs to be updated using atomic ops (can be accessed by more than -+ * one thread). - */ - typedef struct { -- Stat64 transferred; -+ int64_t dirty_pages_rate; -+ int64_t dirty_sync_count; -+ uint64_t dirty_sync_missed_zero_copy; -+ uint64_t downtime_bytes; - Stat64 duplicate; -+ uint64_t multifd_bytes; - Stat64 normal; - Stat64 postcopy_bytes; --} MigrationAtomicStats; -+ int64_t postcopy_requests; -+ uint64_t precopy_bytes; -+ int64_t remaining; -+ Stat64 transferred; -+} RAMStats; - --extern MigrationAtomicStats ram_atomic_counters; --extern MigrationStats ram_counters; -+extern RAMStats ram_counters; - extern XBZRLECacheStats xbzrle_counters; - extern CompressionStats compression_counters; - --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Minor-control-flow-simplification.patch b/SOURCES/kvm-migration-Minor-control-flow-simplification.patch deleted file mode 100644 index a0dbdd9..0000000 --- a/SOURCES/kvm-migration-Minor-control-flow-simplification.patch +++ /dev/null @@ -1,52 +0,0 @@ -From c3bc974ea4b5186a76daa433209c1209d94dd0b7 Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Thu, 20 Apr 2023 09:35:51 -0500 -Subject: [PATCH 2/2] migration: Minor control flow simplification - -RH-Author: Eric Blake -RH-MergeRequest: 161: Avoid migration assertion from failed NFS server. -RH-Bugzilla: 2058982 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [2/2] 5afd8c25d6f14bdb2a380ecc77bc6c2f2a26df87 (ebblake/centos-qemu-kvm) - -No need to declare a temporary variable. - -Suggested-by: Juan Quintela -Fixes: 1df36e8c6289 ("migration: Handle block device inactivation failures better") -Signed-off-by: Eric Blake -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -(cherry picked from commit 5d39f44d7ac5c63f53d4d0900ceba9521bc27e49) ---- - migration/migration.c | 5 ++--- - 1 file changed, 2 insertions(+), 3 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index cb0d42c061..08007cef4e 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -3436,7 +3436,6 @@ static void migration_completion(MigrationState *s) - ret = global_state_store(); - - if (!ret) { -- bool inactivate = !migrate_colo_enabled(); - ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); - trace_migration_completion_vm_stop(ret); - if (ret >= 0) { -@@ -3444,10 +3443,10 @@ static void migration_completion(MigrationState *s) - MIGRATION_STATUS_DEVICE); - } - if (ret >= 0) { -- s->block_inactive = inactivate; -+ s->block_inactive = !migrate_colo_enabled(); - qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); - ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, -- inactivate); -+ s->block_inactive); - } - } - qemu_mutex_unlock_iothread(); --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_announce_params-to-option.c.patch b/SOURCES/kvm-migration-Move-migrate_announce_params-to-option.c.patch deleted file mode 100644 index 24dcb16..0000000 --- a/SOURCES/kvm-migration-Move-migrate_announce_params-to-option.c.patch +++ /dev/null @@ -1,90 +0,0 @@ -From 1f5232d611ecaaf61bcac151e7d90b8b452ac161 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 01:17:23 +0100 -Subject: [PATCH 43/56] migration: Move migrate_announce_params() to option.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [42/50] 541be7adc7f81c269058485aef5b14e787b2efe6 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Fabiano Rosas - ---- - -Fix extra whitespace (fabiano) - -(cherry picked from commit 2682c4eea72c621dfd0fb0151cbd758e81d1bdff) -Signed-off-by: Peter Xu ---- - migration/migration.c | 14 -------------- - migration/options.c | 17 +++++++++++++++++ - 2 files changed, 17 insertions(+), 14 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 724e841eb9..f27ce30be2 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -954,20 +954,6 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) - return params; - } - --AnnounceParameters *migrate_announce_params(void) --{ -- static AnnounceParameters ap; -- -- MigrationState *s = migrate_get_current(); -- -- ap.initial = s->parameters.announce_initial; -- ap.max = s->parameters.announce_max; -- ap.rounds = s->parameters.announce_rounds; -- ap.step = s->parameters.announce_step; -- -- return ≈ --} -- - /* - * Return true if we're already in the middle of a migration - * (i.e. any of the active or setup states) -diff --git a/migration/options.c b/migration/options.c -index 2cb04fbbd1..418aafac64 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -16,6 +16,7 @@ - #include "qapi/qapi-commands-migration.h" - #include "qapi/qmp/qerror.h" - #include "sysemu/runstate.h" -+#include "migration/misc.h" - #include "migration.h" - #include "ram.h" - #include "options.h" -@@ -589,3 +590,19 @@ uint64_t migrate_xbzrle_cache_size(void) - - return s->parameters.xbzrle_cache_size; - } -+ -+/* parameters helpers */ -+ -+AnnounceParameters *migrate_announce_params(void) -+{ -+ static AnnounceParameters ap; -+ -+ MigrationState *s = migrate_get_current(); -+ -+ ap.initial = s->parameters.announce_initial; -+ ap.max = s->parameters.announce_max; -+ ap.rounds = s->parameters.announce_rounds; -+ ap.step = s->parameters.announce_step; -+ -+ return ≈ -+} --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_cap_set-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_cap_set-to-options.c.patch deleted file mode 100644 index 0e33c4c..0000000 --- a/SOURCES/kvm-migration-Move-migrate_cap_set-to-options.c.patch +++ /dev/null @@ -1,110 +0,0 @@ -From 9c4f8d869f5bbdd07381f6baad2ed755b07d03f4 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 23:25:44 +0100 -Subject: [PATCH 36/56] migration: Move migrate_cap_set() to options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [35/50] d0cd6b8e9cf0534a56795d94c3da18622fa10ad7 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit f80196b772ddeeb07d3d80d5c8382cb5d1063fa2) -Signed-off-by: Peter Xu ---- - migration/migration.c | 20 -------------------- - migration/options.c | 21 +++++++++++++++++++++ - migration/options.h | 1 + - 3 files changed, 22 insertions(+), 20 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 369cd91796..880a51210e 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1666,26 +1666,6 @@ void migrate_set_state(int *state, int old_state, int new_state) - } - } - --static bool migrate_cap_set(int cap, bool value, Error **errp) --{ -- MigrationState *s = migrate_get_current(); -- bool new_caps[MIGRATION_CAPABILITY__MAX]; -- -- if (migration_is_running(s->state)) { -- error_setg(errp, QERR_MIGRATION_ACTIVE); -- return false; -- } -- -- memcpy(new_caps, s->capabilities, sizeof(new_caps)); -- new_caps[cap] = value; -- -- if (!migrate_caps_check(s->capabilities, new_caps, errp)) { -- return false; -- } -- s->capabilities[cap] = value; -- return true; --} -- - static void migrate_set_block_incremental(MigrationState *s, bool value) - { - s->parameters.block_incremental = value; -diff --git a/migration/options.c b/migration/options.c -index 4cbe77e35a..f3b2d6e482 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -14,6 +14,7 @@ - #include "qemu/osdep.h" - #include "qapi/error.h" - #include "qapi/qapi-commands-migration.h" -+#include "qapi/qmp/qerror.h" - #include "sysemu/runstate.h" - #include "migration.h" - #include "ram.h" -@@ -392,6 +393,26 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) - return true; - } - -+bool migrate_cap_set(int cap, bool value, Error **errp) -+{ -+ MigrationState *s = migrate_get_current(); -+ bool new_caps[MIGRATION_CAPABILITY__MAX]; -+ -+ if (migration_is_running(s->state)) { -+ error_setg(errp, QERR_MIGRATION_ACTIVE); -+ return false; -+ } -+ -+ memcpy(new_caps, s->capabilities, sizeof(new_caps)); -+ new_caps[cap] = value; -+ -+ if (!migrate_caps_check(s->capabilities, new_caps, errp)) { -+ return false; -+ } -+ s->capabilities[cap] = value; -+ return true; -+} -+ - MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) - { - MigrationCapabilityStatusList *head = NULL, **tail = &head; -diff --git a/migration/options.h b/migration/options.h -index e779f14161..5979e4ff90 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -41,5 +41,6 @@ bool migrate_zero_copy_send(void); - /* capabilities helpers */ - - bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp); -+bool migrate_cap_set(int cap, bool value, Error **errp); - - #endif --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_caps_check-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_caps_check-to-options.c.patch deleted file mode 100644 index 0d6fa08..0000000 --- a/SOURCES/kvm-migration-Move-migrate_caps_check-to-options.c.patch +++ /dev/null @@ -1,458 +0,0 @@ -From 3af7c7aaf7407ec14c19e54d52a2229ce4dbb7c5 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 23:05:53 +0100 -Subject: [PATCH 33/56] migration: Move migrate_caps_check() to options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [32/50] 12999471063d97fffb2b04c6dcb80083b902f963 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 77608706459bd197e25ac1ef54591b9f8a0b46f8) -Signed-off-by: Peter Xu ---- - migration/migration.c | 190 ----------------------------------------- - migration/options.c | 192 ++++++++++++++++++++++++++++++++++++++++++ - migration/options.h | 4 + - 3 files changed, 196 insertions(+), 190 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index f7facecd66..d9e30ca918 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -136,39 +136,6 @@ enum mig_rp_message_type { - MIG_RP_MSG_MAX - }; - --/* Migration capabilities set */ --struct MigrateCapsSet { -- int size; /* Capability set size */ -- MigrationCapability caps[]; /* Variadic array of capabilities */ --}; --typedef struct MigrateCapsSet MigrateCapsSet; -- --/* Define and initialize MigrateCapsSet */ --#define INITIALIZE_MIGRATE_CAPS_SET(_name, ...) \ -- MigrateCapsSet _name = { \ -- .size = sizeof((int []) { __VA_ARGS__ }) / sizeof(int), \ -- .caps = { __VA_ARGS__ } \ -- } -- --/* Background-snapshot compatibility check list */ --static const --INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot, -- MIGRATION_CAPABILITY_POSTCOPY_RAM, -- MIGRATION_CAPABILITY_DIRTY_BITMAPS, -- MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME, -- MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE, -- MIGRATION_CAPABILITY_RETURN_PATH, -- MIGRATION_CAPABILITY_MULTIFD, -- MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER, -- MIGRATION_CAPABILITY_AUTO_CONVERGE, -- MIGRATION_CAPABILITY_RELEASE_RAM, -- MIGRATION_CAPABILITY_RDMA_PIN_ALL, -- MIGRATION_CAPABILITY_COMPRESS, -- MIGRATION_CAPABILITY_XBZRLE, -- MIGRATION_CAPABILITY_X_COLO, -- MIGRATION_CAPABILITY_VALIDATE_UUID, -- MIGRATION_CAPABILITY_ZERO_COPY_SEND); -- - /* When we add fault tolerance, we could have several - migrations at once. For now we don't need to add - dynamic creation of migration */ -@@ -1235,163 +1202,6 @@ static void fill_source_migration_info(MigrationInfo *info) - info->status = state; - } - --typedef enum WriteTrackingSupport { -- WT_SUPPORT_UNKNOWN = 0, -- WT_SUPPORT_ABSENT, -- WT_SUPPORT_AVAILABLE, -- WT_SUPPORT_COMPATIBLE --} WriteTrackingSupport; -- --static --WriteTrackingSupport migrate_query_write_tracking(void) --{ -- /* Check if kernel supports required UFFD features */ -- if (!ram_write_tracking_available()) { -- return WT_SUPPORT_ABSENT; -- } -- /* -- * Check if current memory configuration is -- * compatible with required UFFD features. -- */ -- if (!ram_write_tracking_compatible()) { -- return WT_SUPPORT_AVAILABLE; -- } -- -- return WT_SUPPORT_COMPATIBLE; --} -- --/** -- * @migration_caps_check - check capability compatibility -- * -- * @old_caps: old capability list -- * @new_caps: new capability list -- * @errp: set *errp if the check failed, with reason -- * -- * Returns true if check passed, otherwise false. -- */ --static bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) --{ -- MigrationIncomingState *mis = migration_incoming_get_current(); -- --#ifndef CONFIG_LIVE_BLOCK_MIGRATION -- if (new_caps[MIGRATION_CAPABILITY_BLOCK]) { -- error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) " -- "block migration"); -- error_append_hint(errp, "Use drive_mirror+NBD instead.\n"); -- return false; -- } --#endif -- --#ifndef CONFIG_REPLICATION -- if (new_caps[MIGRATION_CAPABILITY_X_COLO]) { -- error_setg(errp, "QEMU compiled without replication module" -- " can't enable COLO"); -- error_append_hint(errp, "Please enable replication before COLO.\n"); -- return false; -- } --#endif -- -- if (new_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { -- /* This check is reasonably expensive, so only when it's being -- * set the first time, also it's only the destination that needs -- * special support. -- */ -- if (!old_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM] && -- runstate_check(RUN_STATE_INMIGRATE) && -- !postcopy_ram_supported_by_host(mis)) { -- /* postcopy_ram_supported_by_host will have emitted a more -- * detailed message -- */ -- error_setg(errp, "Postcopy is not supported"); -- return false; -- } -- -- if (new_caps[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) { -- error_setg(errp, "Postcopy is not compatible with ignore-shared"); -- return false; -- } -- } -- -- if (new_caps[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) { -- WriteTrackingSupport wt_support; -- int idx; -- /* -- * Check if 'background-snapshot' capability is supported by -- * host kernel and compatible with guest memory configuration. -- */ -- wt_support = migrate_query_write_tracking(); -- if (wt_support < WT_SUPPORT_AVAILABLE) { -- error_setg(errp, "Background-snapshot is not supported by host kernel"); -- return false; -- } -- if (wt_support < WT_SUPPORT_COMPATIBLE) { -- error_setg(errp, "Background-snapshot is not compatible " -- "with guest memory configuration"); -- return false; -- } -- -- /* -- * Check if there are any migration capabilities -- * incompatible with 'background-snapshot'. -- */ -- for (idx = 0; idx < check_caps_background_snapshot.size; idx++) { -- int incomp_cap = check_caps_background_snapshot.caps[idx]; -- if (new_caps[incomp_cap]) { -- error_setg(errp, -- "Background-snapshot is not compatible with %s", -- MigrationCapability_str(incomp_cap)); -- return false; -- } -- } -- } -- --#ifdef CONFIG_LINUX -- if (new_caps[MIGRATION_CAPABILITY_ZERO_COPY_SEND] && -- (!new_caps[MIGRATION_CAPABILITY_MULTIFD] || -- new_caps[MIGRATION_CAPABILITY_COMPRESS] || -- new_caps[MIGRATION_CAPABILITY_XBZRLE] || -- migrate_multifd_compression() || -- migrate_use_tls())) { -- error_setg(errp, -- "Zero copy only available for non-compressed non-TLS multifd migration"); -- return false; -- } --#else -- if (new_caps[MIGRATION_CAPABILITY_ZERO_COPY_SEND]) { -- error_setg(errp, -- "Zero copy currently only available on Linux"); -- return false; -- } --#endif -- -- if (new_caps[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]) { -- if (!new_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { -- error_setg(errp, "Postcopy preempt requires postcopy-ram"); -- return false; -- } -- -- /* -- * Preempt mode requires urgent pages to be sent in separate -- * channel, OTOH compression logic will disorder all pages into -- * different compression channels, which is not compatible with the -- * preempt assumptions on channel assignments. -- */ -- if (new_caps[MIGRATION_CAPABILITY_COMPRESS]) { -- error_setg(errp, "Postcopy preempt not compatible with compress"); -- return false; -- } -- } -- -- if (new_caps[MIGRATION_CAPABILITY_MULTIFD]) { -- if (new_caps[MIGRATION_CAPABILITY_COMPRESS]) { -- error_setg(errp, "Multifd is not compatible with compress"); -- return false; -- } -- } -- -- return true; --} -- - static void fill_destination_migration_info(MigrationInfo *info) - { - MigrationIncomingState *mis = migration_incoming_get_current(); -diff --git a/migration/options.c b/migration/options.c -index 9c9b8e5863..367c930f46 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -12,7 +12,10 @@ - */ - - #include "qemu/osdep.h" -+#include "qapi/error.h" -+#include "sysemu/runstate.h" - #include "migration.h" -+#include "ram.h" - #include "options.h" - - bool migrate_auto_converge(void) -@@ -198,3 +201,192 @@ bool migrate_zero_copy_send(void) - - return s->capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; - } -+typedef enum WriteTrackingSupport { -+ WT_SUPPORT_UNKNOWN = 0, -+ WT_SUPPORT_ABSENT, -+ WT_SUPPORT_AVAILABLE, -+ WT_SUPPORT_COMPATIBLE -+} WriteTrackingSupport; -+ -+static -+WriteTrackingSupport migrate_query_write_tracking(void) -+{ -+ /* Check if kernel supports required UFFD features */ -+ if (!ram_write_tracking_available()) { -+ return WT_SUPPORT_ABSENT; -+ } -+ /* -+ * Check if current memory configuration is -+ * compatible with required UFFD features. -+ */ -+ if (!ram_write_tracking_compatible()) { -+ return WT_SUPPORT_AVAILABLE; -+ } -+ -+ return WT_SUPPORT_COMPATIBLE; -+} -+ -+/* Migration capabilities set */ -+struct MigrateCapsSet { -+ int size; /* Capability set size */ -+ MigrationCapability caps[]; /* Variadic array of capabilities */ -+}; -+typedef struct MigrateCapsSet MigrateCapsSet; -+ -+/* Define and initialize MigrateCapsSet */ -+#define INITIALIZE_MIGRATE_CAPS_SET(_name, ...) \ -+ MigrateCapsSet _name = { \ -+ .size = sizeof((int []) { __VA_ARGS__ }) / sizeof(int), \ -+ .caps = { __VA_ARGS__ } \ -+ } -+ -+/* Background-snapshot compatibility check list */ -+static const -+INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot, -+ MIGRATION_CAPABILITY_POSTCOPY_RAM, -+ MIGRATION_CAPABILITY_DIRTY_BITMAPS, -+ MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME, -+ MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE, -+ MIGRATION_CAPABILITY_RETURN_PATH, -+ MIGRATION_CAPABILITY_MULTIFD, -+ MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER, -+ MIGRATION_CAPABILITY_AUTO_CONVERGE, -+ MIGRATION_CAPABILITY_RELEASE_RAM, -+ MIGRATION_CAPABILITY_RDMA_PIN_ALL, -+ MIGRATION_CAPABILITY_COMPRESS, -+ MIGRATION_CAPABILITY_XBZRLE, -+ MIGRATION_CAPABILITY_X_COLO, -+ MIGRATION_CAPABILITY_VALIDATE_UUID, -+ MIGRATION_CAPABILITY_ZERO_COPY_SEND); -+ -+/** -+ * @migration_caps_check - check capability compatibility -+ * -+ * @old_caps: old capability list -+ * @new_caps: new capability list -+ * @errp: set *errp if the check failed, with reason -+ * -+ * Returns true if check passed, otherwise false. -+ */ -+bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) -+{ -+ MigrationIncomingState *mis = migration_incoming_get_current(); -+ -+#ifndef CONFIG_LIVE_BLOCK_MIGRATION -+ if (new_caps[MIGRATION_CAPABILITY_BLOCK]) { -+ error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) " -+ "block migration"); -+ error_append_hint(errp, "Use drive_mirror+NBD instead.\n"); -+ return false; -+ } -+#endif -+ -+#ifndef CONFIG_REPLICATION -+ if (new_caps[MIGRATION_CAPABILITY_X_COLO]) { -+ error_setg(errp, "QEMU compiled without replication module" -+ " can't enable COLO"); -+ error_append_hint(errp, "Please enable replication before COLO.\n"); -+ return false; -+ } -+#endif -+ -+ if (new_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { -+ /* This check is reasonably expensive, so only when it's being -+ * set the first time, also it's only the destination that needs -+ * special support. -+ */ -+ if (!old_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM] && -+ runstate_check(RUN_STATE_INMIGRATE) && -+ !postcopy_ram_supported_by_host(mis)) { -+ /* postcopy_ram_supported_by_host will have emitted a more -+ * detailed message -+ */ -+ error_setg(errp, "Postcopy is not supported"); -+ return false; -+ } -+ -+ if (new_caps[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) { -+ error_setg(errp, "Postcopy is not compatible with ignore-shared"); -+ return false; -+ } -+ } -+ -+ if (new_caps[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) { -+ WriteTrackingSupport wt_support; -+ int idx; -+ /* -+ * Check if 'background-snapshot' capability is supported by -+ * host kernel and compatible with guest memory configuration. -+ */ -+ wt_support = migrate_query_write_tracking(); -+ if (wt_support < WT_SUPPORT_AVAILABLE) { -+ error_setg(errp, "Background-snapshot is not supported by host kernel"); -+ return false; -+ } -+ if (wt_support < WT_SUPPORT_COMPATIBLE) { -+ error_setg(errp, "Background-snapshot is not compatible " -+ "with guest memory configuration"); -+ return false; -+ } -+ -+ /* -+ * Check if there are any migration capabilities -+ * incompatible with 'background-snapshot'. -+ */ -+ for (idx = 0; idx < check_caps_background_snapshot.size; idx++) { -+ int incomp_cap = check_caps_background_snapshot.caps[idx]; -+ if (new_caps[incomp_cap]) { -+ error_setg(errp, -+ "Background-snapshot is not compatible with %s", -+ MigrationCapability_str(incomp_cap)); -+ return false; -+ } -+ } -+ } -+ -+#ifdef CONFIG_LINUX -+ if (new_caps[MIGRATION_CAPABILITY_ZERO_COPY_SEND] && -+ (!new_caps[MIGRATION_CAPABILITY_MULTIFD] || -+ new_caps[MIGRATION_CAPABILITY_COMPRESS] || -+ new_caps[MIGRATION_CAPABILITY_XBZRLE] || -+ migrate_multifd_compression() || -+ migrate_use_tls())) { -+ error_setg(errp, -+ "Zero copy only available for non-compressed non-TLS multifd migration"); -+ return false; -+ } -+#else -+ if (new_caps[MIGRATION_CAPABILITY_ZERO_COPY_SEND]) { -+ error_setg(errp, -+ "Zero copy currently only available on Linux"); -+ return false; -+ } -+#endif -+ -+ if (new_caps[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]) { -+ if (!new_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { -+ error_setg(errp, "Postcopy preempt requires postcopy-ram"); -+ return false; -+ } -+ -+ /* -+ * Preempt mode requires urgent pages to be sent in separate -+ * channel, OTOH compression logic will disorder all pages into -+ * different compression channels, which is not compatible with the -+ * preempt assumptions on channel assignments. -+ */ -+ if (new_caps[MIGRATION_CAPABILITY_COMPRESS]) { -+ error_setg(errp, "Postcopy preempt not compatible with compress"); -+ return false; -+ } -+ } -+ -+ if (new_caps[MIGRATION_CAPABILITY_MULTIFD]) { -+ if (new_caps[MIGRATION_CAPABILITY_COMPRESS]) { -+ error_setg(errp, "Multifd is not compatible with compress"); -+ return false; -+ } -+ } -+ -+ return true; -+} -diff --git a/migration/options.h b/migration/options.h -index 25c002b37a..e779f14161 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -38,4 +38,8 @@ bool migrate_xbzrle(void); - bool migrate_zero_blocks(void); - bool migrate_zero_copy_send(void); - -+/* capabilities helpers */ -+ -+bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp); -+ - #endif --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_colo_enabled-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_colo_enabled-to-options.c.patch deleted file mode 100644 index 47c6f83..0000000 --- a/SOURCES/kvm-migration-Move-migrate_colo_enabled-to-options.c.patch +++ /dev/null @@ -1,136 +0,0 @@ -From 13da9060fa2dfc666cd6f4b9bc85b7cee0fef45e Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 22:00:16 +0100 -Subject: [PATCH 24/56] migration: Move migrate_colo_enabled() to options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [23/50] 4809b1091edee38bd222af41b6313133705785c7 (peterx/qemu-kvm) - -Once that we are there, we rename the function to migrate_colo() to be -consistent with all other capabilities. - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 5e8046445575dc5879e63c5d07af893d174813d0) -Signed-off-by: Peter Xu ---- - migration/migration.c | 16 +++++----------- - migration/migration.h | 1 - - migration/options.c | 6 ++++++ - migration/options.h | 1 + - 4 files changed, 12 insertions(+), 12 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 66ea55be06..59ee0ef82b 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -2411,7 +2411,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, - } - - if (blk || blk_inc) { -- if (migrate_colo_enabled()) { -+ if (migrate_colo()) { - error_setg(errp, "No disk migration is required in COLO mode"); - return false; - } -@@ -3304,7 +3304,7 @@ static void migration_completion(MigrationState *s) - * have done so in order to remember to reactivate - * them if migration fails or is cancelled. - */ -- s->block_inactive = !migrate_colo_enabled(); -+ s->block_inactive = !migrate_colo(); - qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); - ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, - s->block_inactive); -@@ -3357,7 +3357,7 @@ static void migration_completion(MigrationState *s) - goto fail; - } - -- if (migrate_colo_enabled() && s->state == MIGRATION_STATUS_ACTIVE) { -+ if (migrate_colo() && s->state == MIGRATION_STATUS_ACTIVE) { - /* COLO does not support postcopy */ - migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE, - MIGRATION_STATUS_COLO); -@@ -3435,12 +3435,6 @@ fail: - MIGRATION_STATUS_FAILED); - } - --bool migrate_colo_enabled(void) --{ -- MigrationState *s = migrate_get_current(); -- return s->capabilities[MIGRATION_CAPABILITY_X_COLO]; --} -- - typedef enum MigThrError { - /* No error detected */ - MIG_THR_ERR_NONE = 0, -@@ -3771,7 +3765,7 @@ static void migration_iteration_finish(MigrationState *s) - runstate_set(RUN_STATE_POSTMIGRATE); - break; - case MIGRATION_STATUS_COLO: -- if (!migrate_colo_enabled()) { -+ if (!migrate_colo()) { - error_report("%s: critical error: calling COLO code without " - "COLO enabled", __func__); - } -@@ -3967,7 +3961,7 @@ static void *migration_thread(void *opaque) - qemu_savevm_send_postcopy_advise(s->to_dst_file); - } - -- if (migrate_colo_enabled()) { -+ if (migrate_colo()) { - /* Notify migration destination that we enable COLO */ - qemu_savevm_send_colo_enable(s->to_dst_file); - } -diff --git a/migration/migration.h b/migration/migration.h -index a25fed6ef0..42f0c68b6f 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -463,7 +463,6 @@ bool migrate_use_zero_copy_send(void); - int migrate_use_tls(void); - int migrate_use_xbzrle(void); - uint64_t migrate_xbzrle_cache_size(void); --bool migrate_colo_enabled(void); - - bool migrate_use_block(void); - bool migrate_use_block_incremental(void); -diff --git a/migration/options.c b/migration/options.c -index 88a9a45913..bd33c5da0a 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -33,6 +33,12 @@ bool migrate_background_snapshot(void) - return s->capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; - } - -+bool migrate_colo(void) -+{ -+ MigrationState *s = migrate_get_current(); -+ return s->capabilities[MIGRATION_CAPABILITY_X_COLO]; -+} -+ - bool migrate_dirty_bitmaps(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index 0dfa0af245..2a0ee61ff8 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -18,6 +18,7 @@ - - bool migrate_auto_converge(void); - bool migrate_background_snapshot(void); -+bool migrate_colo(void); - bool migrate_dirty_bitmaps(void); - bool migrate_ignore_shared(void); - bool migrate_late_block_activate(void); --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_postcopy-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_postcopy-to-options.c.patch deleted file mode 100644 index 892ec9e..0000000 --- a/SOURCES/kvm-migration-Move-migrate_postcopy-to-options.c.patch +++ /dev/null @@ -1,98 +0,0 @@ -From 710fe195a3c13ffe96795a7a2b550c00319997ea Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 10:44:20 +0100 -Subject: [PATCH 47/56] migration: Move migrate_postcopy() to options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [46/50] a4f3455b3524a331f44b481bf7a79318aef5abaa (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Fabiano Rosas -(cherry picked from commit f774fde5d4e97cbfc64dab6622c2c53c5fe5c9fe) -Signed-off-by: Peter Xu ---- - migration/migration.c | 5 ----- - migration/migration.h | 2 -- - migration/options.c | 8 ++++++++ - migration/options.h | 9 +++++++++ - 4 files changed, 17 insertions(+), 7 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index f27ce30be2..46a5ea4d42 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -2245,11 +2245,6 @@ void qmp_migrate_continue(MigrationStatus state, Error **errp) - qemu_sem_post(&s->pause_sem); - } - --bool migrate_postcopy(void) --{ -- return migrate_postcopy_ram() || migrate_dirty_bitmaps(); --} -- - int migrate_use_tls(void) - { - MigrationState *s; -diff --git a/migration/migration.h b/migration/migration.h -index 3ae938b19c..dcf906868d 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -447,8 +447,6 @@ bool migration_is_blocked(Error **errp); - bool migration_in_postcopy(void); - MigrationState *migrate_get_current(void); - --bool migrate_postcopy(void); -- - int migrate_use_tls(void); - - uint64_t ram_get_total_transferred_pages(void); -diff --git a/migration/options.c b/migration/options.c -index 615534c151..8bd2d949ae 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -204,6 +204,14 @@ bool migrate_zero_copy_send(void) - - return s->capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; - } -+ -+/* pseudo capabilities */ -+ -+bool migrate_postcopy(void) -+{ -+ return migrate_postcopy_ram() || migrate_dirty_bitmaps(); -+} -+ - typedef enum WriteTrackingSupport { - WT_SUPPORT_UNKNOWN = 0, - WT_SUPPORT_ABSENT, -diff --git a/migration/options.h b/migration/options.h -index 99f6bbd7a1..093bc907a1 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -38,6 +38,15 @@ bool migrate_xbzrle(void); - bool migrate_zero_blocks(void); - bool migrate_zero_copy_send(void); - -+/* -+ * pseudo capabilities -+ * -+ * These are functions that are used in a similar way to capabilities -+ * check, but they are not a capability. -+ */ -+ -+bool migrate_postcopy(void); -+ - /* capabilities helpers */ - - bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp); --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_use_block-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_use_block-to-options.c.patch deleted file mode 100644 index f7cb338..0000000 --- a/SOURCES/kvm-migration-Move-migrate_use_block-to-options.c.patch +++ /dev/null @@ -1,134 +0,0 @@ -From 276877a71778a5cef0dc5bc843e2679f0fdabb77 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 22:23:57 +0100 -Subject: [PATCH 30/56] migration: Move migrate_use_block() to options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [29/50] fcaeb0e07cf828f3cd0d115515b30d913525a0a2 (peterx/qemu-kvm) - -Once that we are there, we rename the function to migrate_block() -to be consistent with all other capabilities. - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 9d4b1e5f22a838285ebeb8f0eb7cc8df1161998f) -Signed-off-by: Peter Xu ---- - migration/block.c | 2 +- - migration/migration.c | 11 +---------- - migration/migration.h | 1 - - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - migration/savevm.c | 2 +- - 6 files changed, 13 insertions(+), 13 deletions(-) - -diff --git a/migration/block.c b/migration/block.c -index 4b167fa5cf..f0977217cf 100644 ---- a/migration/block.c -+++ b/migration/block.c -@@ -1001,7 +1001,7 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) - - static bool block_is_active(void *opaque) - { -- return migrate_use_block(); -+ return migrate_block(); - } - - static SaveVMHandlers savevm_block_handlers = { -diff --git a/migration/migration.c b/migration/migration.c -index a4ede4294e..96f82bd165 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -2415,7 +2415,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, - error_setg(errp, "No disk migration is required in COLO mode"); - return false; - } -- if (migrate_use_block() || migrate_use_block_incremental()) { -+ if (migrate_block() || migrate_use_block_incremental()) { - error_setg(errp, "Command options are incompatible with " - "current migration capabilities"); - return false; -@@ -2622,15 +2622,6 @@ static int64_t migrate_max_postcopy_bandwidth(void) - return s->parameters.max_postcopy_bandwidth; - } - --bool migrate_use_block(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_BLOCK]; --} -- - bool migrate_use_return_path(void) - { - MigrationState *s; -diff --git a/migration/migration.h b/migration/migration.h -index e2bb5b1e2f..d4b68b08a5 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -457,7 +457,6 @@ int migrate_multifd_zstd_level(void); - int migrate_use_tls(void); - uint64_t migrate_xbzrle_cache_size(void); - --bool migrate_use_block(void); - bool migrate_use_block_incremental(void); - int migrate_max_cpu_throttle(void); - bool migrate_use_return_path(void); -diff --git a/migration/options.c b/migration/options.c -index 25264c500e..fe1eadeed6 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -33,6 +33,15 @@ bool migrate_background_snapshot(void) - return s->capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; - } - -+bool migrate_block(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_BLOCK]; -+} -+ - bool migrate_colo(void) - { - MigrationState *s = migrate_get_current(); -diff --git a/migration/options.h b/migration/options.h -index 8f76a88329..e985a5233e 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -18,6 +18,7 @@ - - bool migrate_auto_converge(void); - bool migrate_background_snapshot(void); -+bool migrate_block(void); - bool migrate_colo(void); - bool migrate_compress(void); - bool migrate_dirty_bitmaps(void); -diff --git a/migration/savevm.c b/migration/savevm.c -index ebcf571e37..9671211339 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -1612,7 +1612,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) - return -EINVAL; - } - -- if (migrate_use_block()) { -+ if (migrate_block()) { - error_setg(errp, "Block migration and snapshots are incompatible"); - return -EINVAL; - } --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_use_block_incremental-to-opti.patch b/SOURCES/kvm-migration-Move-migrate_use_block_incremental-to-opti.patch deleted file mode 100644 index 3f20289..0000000 --- a/SOURCES/kvm-migration-Move-migrate_use_block_incremental-to-opti.patch +++ /dev/null @@ -1,121 +0,0 @@ -From def66503f4ccb97cf8029f88efe8e955edc8d32f Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 00:49:47 +0100 -Subject: [PATCH 39/56] migration: Move migrate_use_block_incremental() to - option.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [38/50] 961fda6464df3384fbcee88c726b56a33c26e14e (peterx/qemu-kvm) - -To be consistent with every other parameter, rename to -migrate_block_incremental(). - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 6f8be7080a1f79bf3832cf798fba1697c409c597) -Signed-off-by: Peter Xu ---- - migration/block.c | 2 +- - migration/migration.c | 11 +---------- - migration/migration.h | 1 - - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - 5 files changed, 12 insertions(+), 12 deletions(-) - -diff --git a/migration/block.c b/migration/block.c -index f0977217cf..6d532ac7a2 100644 ---- a/migration/block.c -+++ b/migration/block.c -@@ -417,7 +417,7 @@ static int init_blk_migration(QEMUFile *f) - bmds->bulk_completed = 0; - bmds->total_sectors = sectors; - bmds->completed_sectors = 0; -- bmds->shared_base = migrate_use_block_incremental(); -+ bmds->shared_base = migrate_block_incremental(); - - assert(i < num_bs); - bmds_bs[i].bmds = bmds; -diff --git a/migration/migration.c b/migration/migration.c -index 78bca9a93f..724e841eb9 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -2157,7 +2157,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, - error_setg(errp, "No disk migration is required in COLO mode"); - return false; - } -- if (migrate_block() || migrate_use_block_incremental()) { -+ if (migrate_block() || migrate_block_incremental()) { - error_setg(errp, "Command options are incompatible with " - "current migration capabilities"); - return false; -@@ -2273,15 +2273,6 @@ int migrate_use_tls(void) - return s->parameters.tls_creds && *s->parameters.tls_creds; - } - --bool migrate_use_block_incremental(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->parameters.block_incremental; --} -- - /* migration thread support */ - /* - * Something bad happened to the RP stream, mark an error -diff --git a/migration/migration.h b/migration/migration.h -index 8451e5f2fe..86051af132 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -451,7 +451,6 @@ bool migrate_postcopy(void); - - int migrate_use_tls(void); - --bool migrate_use_block_incremental(void); - int migrate_max_cpu_throttle(void); - - uint64_t ram_get_total_transferred_pages(void); -diff --git a/migration/options.c b/migration/options.c -index 8d15be858c..2b6d88b4b9 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -463,6 +463,15 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, - - /* parameters */ - -+bool migrate_block_incremental(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.block_incremental; -+} -+ - int migrate_compress_level(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index b24ee92283..96d5a8e6e4 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -45,6 +45,7 @@ bool migrate_cap_set(int cap, bool value, Error **errp); - - /* parameters */ - -+bool migrate_block_incremental(void); - int migrate_compress_level(void); - int migrate_compress_threads(void); - int migrate_compress_wait_thread(void); --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_use_compression-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_use_compression-to-options.c.patch deleted file mode 100644 index 8b74183..0000000 --- a/SOURCES/kvm-migration-Move-migrate_use_compression-to-options.c.patch +++ /dev/null @@ -1,183 +0,0 @@ -From ae183bfc9d7b001d3c4929556b095a76203bc08d Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 22:03:48 +0100 -Subject: [PATCH 25/56] migration: Move migrate_use_compression() to options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [24/50] 126b865f51bd4a1ae3a46411fdcd59033bfc5376 (peterx/qemu-kvm) - -Once that we are there, we rename the function to migrate_compress() -to be consistent with all other capabilities. - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit a7a94d14358dd7b445e20c2f26218ff987747642) -Signed-off-by: Peter Xu ---- - migration/migration.c | 11 +---------- - migration/migration.h | 1 - - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - migration/ram.c | 16 ++++++++-------- - 5 files changed, 19 insertions(+), 19 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 59ee0ef82b..c6e32555a8 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1133,7 +1133,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) - info->xbzrle_cache->overflow = xbzrle_counters.overflow; - } - -- if (migrate_use_compression()) { -+ if (migrate_compress()) { - info->compression = g_malloc0(sizeof(*info->compression)); - info->compression->pages = compression_counters.pages; - info->compression->busy = compression_counters.busy; -@@ -2522,15 +2522,6 @@ bool migrate_postcopy(void) - return migrate_postcopy_ram() || migrate_dirty_bitmaps(); - } - --bool migrate_use_compression(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_COMPRESS]; --} -- - int migrate_compress_level(void) - { - MigrationState *s; -diff --git a/migration/migration.h b/migration/migration.h -index 42f0c68b6f..77aa91c840 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -471,7 +471,6 @@ bool migrate_use_return_path(void); - - uint64_t ram_get_total_transferred_pages(void); - --bool migrate_use_compression(void); - int migrate_compress_level(void); - int migrate_compress_threads(void); - int migrate_compress_wait_thread(void); -diff --git a/migration/options.c b/migration/options.c -index bd33c5da0a..fa7a13d3dc 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -39,6 +39,15 @@ bool migrate_colo(void) - return s->capabilities[MIGRATION_CAPABILITY_X_COLO]; - } - -+bool migrate_compress(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_COMPRESS]; -+} -+ - bool migrate_dirty_bitmaps(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index 2a0ee61ff8..da2193fd94 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -19,6 +19,7 @@ - bool migrate_auto_converge(void); - bool migrate_background_snapshot(void); - bool migrate_colo(void); -+bool migrate_compress(void); - bool migrate_dirty_bitmaps(void); - bool migrate_ignore_shared(void); - bool migrate_late_block_activate(void); -diff --git a/migration/ram.c b/migration/ram.c -index 912ccd89fa..d050d0c5fd 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -586,7 +586,7 @@ static void compress_threads_save_cleanup(void) - { - int i, thread_count; - -- if (!migrate_use_compression() || !comp_param) { -+ if (!migrate_compress() || !comp_param) { - return; - } - -@@ -625,7 +625,7 @@ static int compress_threads_save_setup(void) - { - int i, thread_count; - -- if (!migrate_use_compression()) { -+ if (!migrate_compress()) { - return 0; - } - thread_count = migrate_compress_threads(); -@@ -1155,7 +1155,7 @@ static void migration_update_rates(RAMState *rs, int64_t end_time) - rs->xbzrle_bytes_prev = xbzrle_counters.bytes; - } - -- if (migrate_use_compression()) { -+ if (migrate_compress()) { - compression_counters.busy_rate = (double)(compression_counters.busy - - rs->compress_thread_busy_prev) / page_count; - rs->compress_thread_busy_prev = compression_counters.busy; -@@ -2270,7 +2270,7 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len) - - static bool save_page_use_compression(RAMState *rs) - { -- if (!migrate_use_compression()) { -+ if (!migrate_compress()) { - return false; - } - -@@ -3734,7 +3734,7 @@ static int wait_for_decompress_done(void) - { - int idx, thread_count; - -- if (!migrate_use_compression()) { -+ if (!migrate_compress()) { - return 0; - } - -@@ -3753,7 +3753,7 @@ static void compress_threads_load_cleanup(void) - { - int i, thread_count; - -- if (!migrate_use_compression()) { -+ if (!migrate_compress()) { - return; - } - thread_count = migrate_decompress_threads(); -@@ -3794,7 +3794,7 @@ static int compress_threads_load_setup(QEMUFile *f) - { - int i, thread_count; - -- if (!migrate_use_compression()) { -+ if (!migrate_compress()) { - return 0; - } - -@@ -4260,7 +4260,7 @@ static int ram_load_precopy(QEMUFile *f) - int flags = 0, ret = 0, invalid_flags = 0, len = 0, i = 0; - /* ADVISE is earlier, it shows the source has the postcopy capability on */ - bool postcopy_advised = migration_incoming_postcopy_advised(); -- if (!migrate_use_compression()) { -+ if (!migrate_compress()) { - invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE; - } - --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_use_events-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_use_events-to-options.c.patch deleted file mode 100644 index 41e05c3..0000000 --- a/SOURCES/kvm-migration-Move-migrate_use_events-to-options.c.patch +++ /dev/null @@ -1,120 +0,0 @@ -From 940f1eb4347c72edb3e1abc02c8d7e7c95753dcf Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 22:08:09 +0100 -Subject: [PATCH 26/56] migration: Move migrate_use_events() to options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [25/50] b3acd949af2a0fae18061d360e4f51dc12d32c6c (peterx/qemu-kvm) - -Once that we are there, we rename the function to migrate_events() -to be consistent with all other capabilities. - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit b890902c9c025b87d02e718eec3090fd3525ab18) -Signed-off-by: Peter Xu ---- - migration/migration.c | 11 +---------- - migration/migration.h | 1 - - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - migration/ram.c | 2 +- - 5 files changed, 12 insertions(+), 12 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index c6e32555a8..032cd5c050 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -353,7 +353,7 @@ void migration_incoming_state_destroy(void) - - static void migrate_generate_event(int new_state) - { -- if (migrate_use_events()) { -+ if (migrate_events()) { - qapi_event_send_migration(new_state); - } - } -@@ -2558,15 +2558,6 @@ int migrate_decompress_threads(void) - return s->parameters.decompress_threads; - } - --bool migrate_use_events(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_EVENTS]; --} -- - bool migrate_use_multifd(void) - { - MigrationState *s; -diff --git a/migration/migration.h b/migration/migration.h -index 77aa91c840..bd06520c19 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -475,7 +475,6 @@ int migrate_compress_level(void); - int migrate_compress_threads(void); - int migrate_compress_wait_thread(void); - int migrate_decompress_threads(void); --bool migrate_use_events(void); - - /* Sending on the return path - generic and then for each message type */ - void migrate_send_rp_shut(MigrationIncomingState *mis, -diff --git a/migration/options.c b/migration/options.c -index fa7a13d3dc..d2219ee0e4 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -57,6 +57,15 @@ bool migrate_dirty_bitmaps(void) - return s->capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; - } - -+bool migrate_events(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_EVENTS]; -+} -+ - bool migrate_ignore_shared(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index da2193fd94..b998024eba 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -21,6 +21,7 @@ bool migrate_background_snapshot(void); - bool migrate_colo(void); - bool migrate_compress(void); - bool migrate_dirty_bitmaps(void); -+bool migrate_events(void); - bool migrate_ignore_shared(void); - bool migrate_late_block_activate(void); - bool migrate_pause_before_switchover(void); -diff --git a/migration/ram.c b/migration/ram.c -index d050d0c5fd..ee454a3849 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -1246,7 +1246,7 @@ static void migration_bitmap_sync(RAMState *rs) - rs->num_dirty_pages_period = 0; - rs->bytes_xfer_prev = stat64_get(&ram_counters.transferred); - } -- if (migrate_use_events()) { -+ if (migrate_events()) { - uint64_t generation = stat64_get(&ram_counters.dirty_sync_count); - qapi_event_send_migration_pass(generation); - } --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_use_multifd-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_use_multifd-to-options.c.patch deleted file mode 100644 index 97d6597..0000000 --- a/SOURCES/kvm-migration-Move-migrate_use_multifd-to-options.c.patch +++ /dev/null @@ -1,247 +0,0 @@ -From afd8fb766af2be5cff97753b026847b91b09a30e Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 22:10:29 +0100 -Subject: [PATCH 27/56] migration: Move migrate_use_multifd() to options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [26/50] f2d72eae9cc80b2402ef613e809b40aa296d2e4c (peterx/qemu-kvm) - -Once that we are there, we rename the function to migrate_multifd() -to be consistent with all other capabilities. - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 51b07548f7c31793adc178c7460c5f4369733c61) -Signed-off-by: Peter Xu ---- - migration/migration.c | 19 +++++-------------- - migration/migration.h | 1 - - migration/multifd.c | 16 ++++++++-------- - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - migration/ram.c | 2 +- - migration/socket.c | 2 +- - 7 files changed, 25 insertions(+), 25 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 032cd5c050..e1d7f25786 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -186,7 +186,7 @@ static void migrate_fd_cancel(MigrationState *s); - - static bool migration_needs_multiple_sockets(void) - { -- return migrate_use_multifd() || migrate_postcopy_preempt(); -+ return migrate_multifd() || migrate_postcopy_preempt(); - } - - static bool uri_supports_multi_channels(const char *uri) -@@ -732,7 +732,7 @@ void migration_fd_process_incoming(QEMUFile *f, Error **errp) - static bool migration_should_start_incoming(bool main_channel) - { - /* Multifd doesn't start unless all channels are established */ -- if (migrate_use_multifd()) { -+ if (migrate_multifd()) { - return migration_has_all_channels(); - } - -@@ -759,7 +759,7 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) - uint32_t channel_magic = 0; - int ret = 0; - -- if (migrate_use_multifd() && !migrate_postcopy_ram() && -+ if (migrate_multifd() && !migrate_postcopy_ram() && - qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { - /* - * With multiple channels, it is possible that we receive channels -@@ -798,7 +798,7 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) - } else { - /* Multiple connections */ - assert(migration_needs_multiple_sockets()); -- if (migrate_use_multifd()) { -+ if (migrate_multifd()) { - multifd_recv_new_channel(ioc, &local_err); - } else { - assert(migrate_postcopy_preempt()); -@@ -834,7 +834,7 @@ bool migration_has_all_channels(void) - return false; - } - -- if (migrate_use_multifd()) { -+ if (migrate_multifd()) { - return multifd_recv_all_channels_created(); - } - -@@ -2558,15 +2558,6 @@ int migrate_decompress_threads(void) - return s->parameters.decompress_threads; - } - --bool migrate_use_multifd(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_MULTIFD]; --} -- - int migrate_multifd_channels(void) - { - MigrationState *s; -diff --git a/migration/migration.h b/migration/migration.h -index bd06520c19..49c0e13f41 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -449,7 +449,6 @@ MigrationState *migrate_get_current(void); - - bool migrate_postcopy(void); - --bool migrate_use_multifd(void); - int migrate_multifd_channels(void); - MultiFDCompression migrate_multifd_compression(void); - int migrate_multifd_zlib_level(void); -diff --git a/migration/multifd.c b/migration/multifd.c -index 903df2117b..6807328189 100644 ---- a/migration/multifd.c -+++ b/migration/multifd.c -@@ -516,7 +516,7 @@ void multifd_save_cleanup(void) - { - int i; - -- if (!migrate_use_multifd()) { -+ if (!migrate_multifd()) { - return; - } - multifd_send_terminate_threads(NULL); -@@ -587,7 +587,7 @@ int multifd_send_sync_main(QEMUFile *f) - int i; - bool flush_zero_copy; - -- if (!migrate_use_multifd()) { -+ if (!migrate_multifd()) { - return 0; - } - if (multifd_send_state->pages->num) { -@@ -911,7 +911,7 @@ int multifd_save_setup(Error **errp) - uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size(); - uint8_t i; - -- if (!migrate_use_multifd()) { -+ if (!migrate_multifd()) { - return 0; - } - -@@ -1016,7 +1016,7 @@ static void multifd_recv_terminate_threads(Error *err) - - void multifd_load_shutdown(void) - { -- if (migrate_use_multifd()) { -+ if (migrate_multifd()) { - multifd_recv_terminate_threads(NULL); - } - } -@@ -1025,7 +1025,7 @@ void multifd_load_cleanup(void) - { - int i; - -- if (!migrate_use_multifd()) { -+ if (!migrate_multifd()) { - return; - } - multifd_recv_terminate_threads(NULL); -@@ -1072,7 +1072,7 @@ void multifd_recv_sync_main(void) - { - int i; - -- if (!migrate_use_multifd()) { -+ if (!migrate_multifd()) { - return; - } - for (i = 0; i < migrate_multifd_channels(); i++) { -@@ -1170,7 +1170,7 @@ int multifd_load_setup(Error **errp) - * Return successfully if multiFD recv state is already initialised - * or multiFD is not enabled. - */ -- if (multifd_recv_state || !migrate_use_multifd()) { -+ if (multifd_recv_state || !migrate_multifd()) { - return 0; - } - -@@ -1216,7 +1216,7 @@ bool multifd_recv_all_channels_created(void) - { - int thread_count = migrate_multifd_channels(); - -- if (!migrate_use_multifd()) { -+ if (!migrate_multifd()) { - return true; - } - -diff --git a/migration/options.c b/migration/options.c -index d2219ee0e4..58673fc101 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -84,6 +84,15 @@ bool migrate_late_block_activate(void) - return s->capabilities[MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; - } - -+bool migrate_multifd(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_MULTIFD]; -+} -+ - bool migrate_pause_before_switchover(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index b998024eba..d07269ee38 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -24,6 +24,7 @@ bool migrate_dirty_bitmaps(void); - bool migrate_events(void); - bool migrate_ignore_shared(void); - bool migrate_late_block_activate(void); -+bool migrate_multifd(void); - bool migrate_pause_before_switchover(void); - bool migrate_postcopy_blocktime(void); - bool migrate_postcopy_preempt(void); -diff --git a/migration/ram.c b/migration/ram.c -index ee454a3849..859dd7b63f 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -2362,7 +2362,7 @@ static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss) - * if host page size == guest page size the dest guest during run may - * still see partially copied pages which is data corruption. - */ -- if (migrate_use_multifd() && !migration_in_postcopy()) { -+ if (migrate_multifd() && !migration_in_postcopy()) { - return ram_save_multifd_page(pss->pss_channel, block, offset); - } - -diff --git a/migration/socket.c b/migration/socket.c -index ebf9ac41af..f4835a256a 100644 ---- a/migration/socket.c -+++ b/migration/socket.c -@@ -183,7 +183,7 @@ socket_start_incoming_migration_internal(SocketAddress *saddr, - - qio_net_listener_set_name(listener, "migration-socket-listener"); - -- if (migrate_use_multifd()) { -+ if (migrate_multifd()) { - num = migrate_multifd_channels(); - } else if (migrate_postcopy_preempt()) { - num = RAM_CHANNEL_MAX; --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_use_return-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_use_return-to-options.c.patch deleted file mode 100644 index b250d40..0000000 --- a/SOURCES/kvm-migration-Move-migrate_use_return-to-options.c.patch +++ /dev/null @@ -1,138 +0,0 @@ -From 145b630767dbc7020ddf39b20075f4691f71321a Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 22:25:47 +0100 -Subject: [PATCH 31/56] migration: Move migrate_use_return() to options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [30/50] 5cc150188bcc61b69ea0844253597594ab18fc13 (peterx/qemu-kvm) - -Once that we are there, we rename the function to migrate_return_path() -to be consistent with all other capabilities. - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 38ad1110e368bf91453c0abbd657224d57b65d47) -Signed-off-by: Peter Xu ---- - migration/migration.c | 11 +---------- - migration/migration.h | 1 - - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - migration/rdma.c | 6 +++--- - 5 files changed, 14 insertions(+), 14 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 96f82bd165..f7facecd66 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -2622,15 +2622,6 @@ static int64_t migrate_max_postcopy_bandwidth(void) - return s->parameters.max_postcopy_bandwidth; - } - --bool migrate_use_return_path(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; --} -- - bool migrate_use_block_incremental(void) - { - MigrationState *s; -@@ -4175,7 +4166,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) - * precopy, only if user specified "return-path" capability would - * QEMU uses the return path. - */ -- if (migrate_postcopy_ram() || migrate_use_return_path()) { -+ if (migrate_postcopy_ram() || migrate_return_path()) { - if (open_return_path_on_source(s, !resume)) { - error_report("Unable to open return-path for postcopy"); - migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); -diff --git a/migration/migration.h b/migration/migration.h -index d4b68b08a5..24184622a8 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -459,7 +459,6 @@ uint64_t migrate_xbzrle_cache_size(void); - - bool migrate_use_block_incremental(void); - int migrate_max_cpu_throttle(void); --bool migrate_use_return_path(void); - - uint64_t ram_get_total_transferred_pages(void); - -diff --git a/migration/options.c b/migration/options.c -index fe1eadeed6..2003e413da 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -147,6 +147,15 @@ bool migrate_release_ram(void) - return s->capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; - } - -+bool migrate_return_path(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; -+} -+ - bool migrate_validate_uuid(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index e985a5233e..316efd1063 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -31,6 +31,7 @@ bool migrate_postcopy_blocktime(void); - bool migrate_postcopy_preempt(void); - bool migrate_postcopy_ram(void); - bool migrate_release_ram(void); -+bool migrate_return_path(void); - bool migrate_validate_uuid(void); - bool migrate_xbzrle(void); - bool migrate_zero_blocks(void); -diff --git a/migration/rdma.c b/migration/rdma.c -index f35f021963..bf55e2f163 100644 ---- a/migration/rdma.c -+++ b/migration/rdma.c -@@ -3373,7 +3373,7 @@ static int qemu_rdma_accept(RDMAContext *rdma) - * initialize the RDMAContext for return path for postcopy after first - * connection request reached. - */ -- if ((migrate_postcopy() || migrate_use_return_path()) -+ if ((migrate_postcopy() || migrate_return_path()) - && !rdma->is_return_path) { - rdma_return_path = qemu_rdma_data_init(rdma->host_port, NULL); - if (rdma_return_path == NULL) { -@@ -3456,7 +3456,7 @@ static int qemu_rdma_accept(RDMAContext *rdma) - } - - /* Accept the second connection request for return path */ -- if ((migrate_postcopy() || migrate_use_return_path()) -+ if ((migrate_postcopy() || migrate_return_path()) - && !rdma->is_return_path) { - qemu_set_fd_handler(rdma->channel->fd, rdma_accept_incoming_migration, - NULL, -@@ -4193,7 +4193,7 @@ void rdma_start_outgoing_migration(void *opaque, - } - - /* RDMA postcopy need a separate queue pair for return path */ -- if (migrate_postcopy() || migrate_use_return_path()) { -+ if (migrate_postcopy() || migrate_return_path()) { - rdma_return_path = qemu_rdma_data_init(host_port, errp); - - if (rdma_return_path == NULL) { --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_use_tls-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_use_tls-to-options.c.patch deleted file mode 100644 index 84734af..0000000 --- a/SOURCES/kvm-migration-Move-migrate_use_tls-to-options.c.patch +++ /dev/null @@ -1,134 +0,0 @@ -From 2e2df63892e191e91216b8253171162f69b93387 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 10:41:23 +0100 -Subject: [PATCH 49/56] migration: Move migrate_use_tls() to options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [48/50] 314431b0f5e92d2211e58a8161f32d7b67d69e38 (peterx/qemu-kvm) - -Once there, rename it to migrate_tls() and make it return bool for -consistency. - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy - ---- - -Fix typos found by fabiano - -(cherry picked from commit 10d4703be5d884bbbb6ecafe0e8bb270ad6ea937) -Signed-off-by: Peter Xu ---- - migration/migration.c | 9 --------- - migration/migration.h | 2 -- - migration/options.c | 11 ++++++++++- - migration/options.h | 1 + - migration/tls.c | 3 ++- - 5 files changed, 13 insertions(+), 13 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index c2e109329d..22ef83c619 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -2177,15 +2177,6 @@ void qmp_migrate_continue(MigrationStatus state, Error **errp) - qemu_sem_post(&s->pause_sem); - } - --int migrate_use_tls(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->parameters.tls_creds && *s->parameters.tls_creds; --} -- - /* migration thread support */ - /* - * Something bad happened to the RP stream, mark an error -diff --git a/migration/migration.h b/migration/migration.h -index dcf906868d..2b71df8617 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -447,8 +447,6 @@ bool migration_is_blocked(Error **errp); - bool migration_in_postcopy(void); - MigrationState *migrate_get_current(void); - --int migrate_use_tls(void); -- - uint64_t ram_get_total_transferred_pages(void); - - /* Sending on the return path - generic and then for each message type */ -diff --git a/migration/options.c b/migration/options.c -index 8e8753d9be..d4c0714683 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -214,6 +214,15 @@ bool migrate_postcopy(void) - return migrate_postcopy_ram() || migrate_dirty_bitmaps(); - } - -+bool migrate_tls(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.tls_creds && *s->parameters.tls_creds; -+} -+ - typedef enum WriteTrackingSupport { - WT_SUPPORT_UNKNOWN = 0, - WT_SUPPORT_ABSENT, -@@ -363,7 +372,7 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) - new_caps[MIGRATION_CAPABILITY_COMPRESS] || - new_caps[MIGRATION_CAPABILITY_XBZRLE] || - migrate_multifd_compression() || -- migrate_use_tls())) { -+ migrate_tls())) { - error_setg(errp, - "Zero copy only available for non-compressed non-TLS multifd migration"); - return false; -diff --git a/migration/options.h b/migration/options.h -index 1b78fa9f3d..13318a16c7 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -46,6 +46,7 @@ bool migrate_zero_copy_send(void); - */ - - bool migrate_postcopy(void); -+bool migrate_tls(void); - - /* capabilities helpers */ - -diff --git a/migration/tls.c b/migration/tls.c -index 4d2166a209..acd38e0b62 100644 ---- a/migration/tls.c -+++ b/migration/tls.c -@@ -22,6 +22,7 @@ - #include "channel.h" - #include "migration.h" - #include "tls.h" -+#include "options.h" - #include "crypto/tlscreds.h" - #include "qemu/error-report.h" - #include "qapi/error.h" -@@ -165,7 +166,7 @@ void migration_tls_channel_connect(MigrationState *s, - - bool migrate_channel_requires_tls_upgrade(QIOChannel *ioc) - { -- if (!migrate_use_tls()) { -+ if (!migrate_tls()) { - return false; - } - --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_use_xbzrle-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_use_xbzrle-to-options.c.patch deleted file mode 100644 index e3a8bab..0000000 --- a/SOURCES/kvm-migration-Move-migrate_use_xbzrle-to-options.c.patch +++ /dev/null @@ -1,156 +0,0 @@ -From 2184f7dae0df5fa52deba2dc884e09c6bdbc7b5f Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 22:20:13 +0100 -Subject: [PATCH 29/56] migration: Move migrate_use_xbzrle() to options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [28/50] fc8bee0f691a96e6bd0b41f2511abe507b81fea5 (peterx/qemu-kvm) - -Once that we are there, we rename the function to migrate_xbzrle() -to be consistent with all other capabilities. -We change the type to return bool also for consistency. - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 87dca0c9bb63014ef73ad82f7aedea1cb5a822e7) -Signed-off-by: Peter Xu ---- - migration/migration.c | 11 +---------- - migration/migration.h | 1 - - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - migration/ram.c | 10 +++++----- - 5 files changed, 16 insertions(+), 16 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 1d63718e88..a4ede4294e 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1122,7 +1122,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) - info->ram->downtime_bytes = stat64_get(&ram_counters.downtime_bytes); - info->ram->postcopy_bytes = stat64_get(&ram_counters.postcopy_bytes); - -- if (migrate_use_xbzrle()) { -+ if (migrate_xbzrle()) { - info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache)); - info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size(); - info->xbzrle_cache->bytes = xbzrle_counters.bytes; -@@ -2604,15 +2604,6 @@ int migrate_use_tls(void) - return s->parameters.tls_creds && *s->parameters.tls_creds; - } - --int migrate_use_xbzrle(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_XBZRLE]; --} -- - uint64_t migrate_xbzrle_cache_size(void) - { - MigrationState *s; -diff --git a/migration/migration.h b/migration/migration.h -index c939f82d53..e2bb5b1e2f 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -455,7 +455,6 @@ int migrate_multifd_zlib_level(void); - int migrate_multifd_zstd_level(void); - - int migrate_use_tls(void); --int migrate_use_xbzrle(void); - uint64_t migrate_xbzrle_cache_size(void); - - bool migrate_use_block(void); -diff --git a/migration/options.c b/migration/options.c -index f357c99996..25264c500e 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -147,6 +147,15 @@ bool migrate_validate_uuid(void) - return s->capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; - } - -+bool migrate_xbzrle(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_XBZRLE]; -+} -+ - bool migrate_zero_blocks(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index ad22f4d24a..8f76a88329 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -31,6 +31,7 @@ bool migrate_postcopy_preempt(void); - bool migrate_postcopy_ram(void); - bool migrate_release_ram(void); - bool migrate_validate_uuid(void); -+bool migrate_xbzrle(void); - bool migrate_zero_blocks(void); - bool migrate_zero_copy_send(void); - -diff --git a/migration/ram.c b/migration/ram.c -index 859dd7b63f..4576d0d849 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -156,14 +156,14 @@ static struct { - - static void XBZRLE_cache_lock(void) - { -- if (migrate_use_xbzrle()) { -+ if (migrate_xbzrle()) { - qemu_mutex_lock(&XBZRLE.lock); - } - } - - static void XBZRLE_cache_unlock(void) - { -- if (migrate_use_xbzrle()) { -+ if (migrate_xbzrle()) { - qemu_mutex_unlock(&XBZRLE.lock); - } - } -@@ -1137,7 +1137,7 @@ static void migration_update_rates(RAMState *rs, int64_t end_time) - return; - } - -- if (migrate_use_xbzrle()) { -+ if (migrate_xbzrle()) { - double encoded_size, unencoded_size; - - xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss - -@@ -1626,7 +1626,7 @@ static int find_dirty_block(RAMState *rs, PageSearchStatus *pss) - /* Flag that we've looped */ - pss->complete_round = true; - /* After the first round, enable XBZRLE. */ -- if (migrate_use_xbzrle()) { -+ if (migrate_xbzrle()) { - rs->xbzrle_enabled = true; - } - } -@@ -2979,7 +2979,7 @@ static int xbzrle_init(void) - { - Error *local_err = NULL; - -- if (!migrate_use_xbzrle()) { -+ if (!migrate_xbzrle()) { - return 0; - } - --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_use_zero_copy_send-to-options.patch b/SOURCES/kvm-migration-Move-migrate_use_zero_copy_send-to-options.patch deleted file mode 100644 index 90031df..0000000 --- a/SOURCES/kvm-migration-Move-migrate_use_zero_copy_send-to-options.patch +++ /dev/null @@ -1,167 +0,0 @@ -From 6eb252887378d639ad2e90dd426a1812d4b72ca6 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 22:17:14 +0100 -Subject: [PATCH 28/56] migration: Move migrate_use_zero_copy_send() to - options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [27/50] 5a4c2b5e75c62e0f60f9c4121a2756bd140a60d9 (peterx/qemu-kvm) - -Once that we are there, we rename the function to -migrate_zero_copy_send() to be consistent with all other capabilities. - -We can remove the CONFIG_LINUX guard. We already check that we can't -setup this capability in migrate_caps_check(). - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit b4bc342c766640e0cb8a0b72f71e0ee5545fb790) -Signed-off-by: Peter Xu ---- - migration/migration.c | 13 +------------ - migration/migration.h | 5 ----- - migration/multifd.c | 8 ++++---- - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - migration/socket.c | 2 +- - 6 files changed, 16 insertions(+), 22 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index e1d7f25786..1d63718e88 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1609,7 +1609,7 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) - } - - #ifdef CONFIG_LINUX -- if (migrate_use_zero_copy_send() && -+ if (migrate_zero_copy_send() && - ((params->has_multifd_compression && params->multifd_compression) || - (params->tls_creds && *params->tls_creds))) { - error_setg(errp, -@@ -2595,17 +2595,6 @@ int migrate_multifd_zstd_level(void) - return s->parameters.multifd_zstd_level; - } - --#ifdef CONFIG_LINUX --bool migrate_use_zero_copy_send(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; --} --#endif -- - int migrate_use_tls(void) - { - MigrationState *s; -diff --git a/migration/migration.h b/migration/migration.h -index 49c0e13f41..c939f82d53 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -454,11 +454,6 @@ MultiFDCompression migrate_multifd_compression(void); - int migrate_multifd_zlib_level(void); - int migrate_multifd_zstd_level(void); - --#ifdef CONFIG_LINUX --bool migrate_use_zero_copy_send(void); --#else --#define migrate_use_zero_copy_send() (false) --#endif - int migrate_use_tls(void); - int migrate_use_xbzrle(void); - uint64_t migrate_xbzrle_cache_size(void); -diff --git a/migration/multifd.c b/migration/multifd.c -index 6807328189..cce3ad6988 100644 ---- a/migration/multifd.c -+++ b/migration/multifd.c -@@ -25,7 +25,7 @@ - #include "trace.h" - #include "multifd.h" - #include "threadinfo.h" -- -+#include "options.h" - #include "qemu/yank.h" - #include "io/channel-socket.h" - #include "yank_functions.h" -@@ -608,7 +608,7 @@ int multifd_send_sync_main(QEMUFile *f) - * all the dirty bitmaps. - */ - -- flush_zero_copy = migrate_use_zero_copy_send(); -+ flush_zero_copy = migrate_zero_copy_send(); - - for (i = 0; i < migrate_multifd_channels(); i++) { - MultiFDSendParams *p = &multifd_send_state->params[i]; -@@ -653,7 +653,7 @@ static void *multifd_send_thread(void *opaque) - MigrationThread *thread = NULL; - Error *local_err = NULL; - int ret = 0; -- bool use_zero_copy_send = migrate_use_zero_copy_send(); -+ bool use_zero_copy_send = migrate_zero_copy_send(); - - thread = MigrationThreadAdd(p->name, qemu_get_thread_id()); - -@@ -945,7 +945,7 @@ int multifd_save_setup(Error **errp) - p->page_size = qemu_target_page_size(); - p->page_count = page_count; - -- if (migrate_use_zero_copy_send()) { -+ if (migrate_zero_copy_send()) { - p->write_flags = QIO_CHANNEL_WRITE_FLAG_ZERO_COPY; - } else { - p->write_flags = 0; -diff --git a/migration/options.c b/migration/options.c -index 58673fc101..f357c99996 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -155,3 +155,12 @@ bool migrate_zero_blocks(void) - - return s->capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; - } -+ -+bool migrate_zero_copy_send(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; -+} -diff --git a/migration/options.h b/migration/options.h -index d07269ee38..ad22f4d24a 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -32,5 +32,6 @@ bool migrate_postcopy_ram(void); - bool migrate_release_ram(void); - bool migrate_validate_uuid(void); - bool migrate_zero_blocks(void); -+bool migrate_zero_copy_send(void); - - #endif -diff --git a/migration/socket.c b/migration/socket.c -index f4835a256a..1b6f5baefb 100644 ---- a/migration/socket.c -+++ b/migration/socket.c -@@ -98,7 +98,7 @@ static void socket_outgoing_migration(QIOTask *task, - - trace_migration_socket_outgoing_connected(data->hostname); - -- if (migrate_use_zero_copy_send() && -+ if (migrate_zero_copy_send() && - !qio_channel_has_feature(sioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) { - error_setg(&err, "Zero copy send feature not detected in host kernel"); - } --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migration_properties-to-options.c.patch b/SOURCES/kvm-migration-Move-migration_properties-to-options.c.patch deleted file mode 100644 index 145b510..0000000 --- a/SOURCES/kvm-migration-Move-migration_properties-to-options.c.patch +++ /dev/null @@ -1,409 +0,0 @@ -From 0911e025a9dc8a0c85944ac11fb9df72e5ad0677 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 09/37] migration: Move migration_properties to options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [7/28] ff07358afa0c90f13125b177b0e08c74ef1b9905 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit f9436522c8dd -Author: Juan Quintela -Date: Thu Mar 2 12:55:57 2023 +0100 - - migration: Move migration_properties to options.c - - Signed-off-by: Juan Quintela - Reviewed-by: Vladimir Sementsov-Ogievskiy - -Signed-off-by: Cédric Le Goater ---- - migration/migration.c | 157 ------------------------------------------ - migration/options.c | 155 +++++++++++++++++++++++++++++++++++++++++ - migration/options.h | 7 ++ - 3 files changed, 162 insertions(+), 157 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 08f87f2b0e..1ac5f19bc2 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -52,8 +52,6 @@ - #include "io/channel-tls.h" - #include "migration/colo.h" - #include "hw/boards.h" --#include "hw/qdev-properties.h" --#include "hw/qdev-properties-system.h" - #include "monitor/monitor.h" - #include "net/announce.h" - #include "qemu/queue.h" -@@ -65,51 +63,6 @@ - #include "sysemu/qtest.h" - #include "options.h" - --#define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ -- --/* Time in milliseconds we are allowed to stop the source, -- * for sending the last part */ --#define DEFAULT_MIGRATE_SET_DOWNTIME 300 -- --/* Default compression thread count */ --#define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8 --/* Default decompression thread count, usually decompression is at -- * least 4 times as fast as compression.*/ --#define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2 --/*0: means nocompress, 1: best speed, ... 9: best compress ratio */ --#define DEFAULT_MIGRATE_COMPRESS_LEVEL 1 --/* Define default autoconverge cpu throttle migration parameters */ --#define DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD 50 --#define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20 --#define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10 --#define DEFAULT_MIGRATE_MAX_CPU_THROTTLE 99 -- --/* Migration XBZRLE default cache size */ --#define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024) -- --/* The delay time (in ms) between two COLO checkpoints */ --#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100) --#define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2 --#define DEFAULT_MIGRATE_MULTIFD_COMPRESSION MULTIFD_COMPRESSION_NONE --/* 0: means nocompress, 1: best speed, ... 9: best compress ratio */ --#define DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL 1 --/* 0: means nocompress, 1: best speed, ... 20: best compress ratio */ --#define DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL 1 -- --/* Background transfer rate for postcopy, 0 means unlimited, note -- * that page requests can still exceed this limit. -- */ --#define DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH 0 -- --/* -- * Parameters for self_announce_delay giving a stream of RARP/ARP -- * packets after migration. -- */ --#define DEFAULT_MIGRATE_ANNOUNCE_INITIAL 50 --#define DEFAULT_MIGRATE_ANNOUNCE_MAX 550 --#define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS 5 --#define DEFAULT_MIGRATE_ANNOUNCE_STEP 100 -- - static NotifierList migration_state_notifiers = - NOTIFIER_LIST_INITIALIZER(migration_state_notifiers); - -@@ -3336,116 +3289,6 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) - s->migration_thread_running = true; - } - --#define DEFINE_PROP_MIG_CAP(name, x) \ -- DEFINE_PROP_BOOL(name, MigrationState, capabilities[x], false) -- --static Property migration_properties[] = { -- DEFINE_PROP_BOOL("store-global-state", MigrationState, -- store_global_state, true), -- DEFINE_PROP_BOOL("send-configuration", MigrationState, -- send_configuration, true), -- DEFINE_PROP_BOOL("send-section-footer", MigrationState, -- send_section_footer, true), -- DEFINE_PROP_BOOL("decompress-error-check", MigrationState, -- decompress_error_check, true), -- DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState, -- clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT), -- DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState, -- preempt_pre_7_2, false), -- -- /* Migration parameters */ -- DEFINE_PROP_UINT8("x-compress-level", MigrationState, -- parameters.compress_level, -- DEFAULT_MIGRATE_COMPRESS_LEVEL), -- DEFINE_PROP_UINT8("x-compress-threads", MigrationState, -- parameters.compress_threads, -- DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT), -- DEFINE_PROP_BOOL("x-compress-wait-thread", MigrationState, -- parameters.compress_wait_thread, true), -- DEFINE_PROP_UINT8("x-decompress-threads", MigrationState, -- parameters.decompress_threads, -- DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT), -- DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState, -- parameters.throttle_trigger_threshold, -- DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD), -- DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState, -- parameters.cpu_throttle_initial, -- DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL), -- DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState, -- parameters.cpu_throttle_increment, -- DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT), -- DEFINE_PROP_BOOL("x-cpu-throttle-tailslow", MigrationState, -- parameters.cpu_throttle_tailslow, false), -- DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState, -- parameters.max_bandwidth, MAX_THROTTLE), -- DEFINE_PROP_UINT64("x-downtime-limit", MigrationState, -- parameters.downtime_limit, -- DEFAULT_MIGRATE_SET_DOWNTIME), -- DEFINE_PROP_UINT32("x-checkpoint-delay", MigrationState, -- parameters.x_checkpoint_delay, -- DEFAULT_MIGRATE_X_CHECKPOINT_DELAY), -- DEFINE_PROP_UINT8("multifd-channels", MigrationState, -- parameters.multifd_channels, -- DEFAULT_MIGRATE_MULTIFD_CHANNELS), -- DEFINE_PROP_MULTIFD_COMPRESSION("multifd-compression", MigrationState, -- parameters.multifd_compression, -- DEFAULT_MIGRATE_MULTIFD_COMPRESSION), -- DEFINE_PROP_UINT8("multifd-zlib-level", MigrationState, -- parameters.multifd_zlib_level, -- DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL), -- DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, -- parameters.multifd_zstd_level, -- DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), -- DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, -- parameters.xbzrle_cache_size, -- DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), -- DEFINE_PROP_SIZE("max-postcopy-bandwidth", MigrationState, -- parameters.max_postcopy_bandwidth, -- DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH), -- DEFINE_PROP_UINT8("max-cpu-throttle", MigrationState, -- parameters.max_cpu_throttle, -- DEFAULT_MIGRATE_MAX_CPU_THROTTLE), -- DEFINE_PROP_SIZE("announce-initial", MigrationState, -- parameters.announce_initial, -- DEFAULT_MIGRATE_ANNOUNCE_INITIAL), -- DEFINE_PROP_SIZE("announce-max", MigrationState, -- parameters.announce_max, -- DEFAULT_MIGRATE_ANNOUNCE_MAX), -- DEFINE_PROP_SIZE("announce-rounds", MigrationState, -- parameters.announce_rounds, -- DEFAULT_MIGRATE_ANNOUNCE_ROUNDS), -- DEFINE_PROP_SIZE("announce-step", MigrationState, -- parameters.announce_step, -- DEFAULT_MIGRATE_ANNOUNCE_STEP), -- DEFINE_PROP_STRING("tls-creds", MigrationState, parameters.tls_creds), -- DEFINE_PROP_STRING("tls-hostname", MigrationState, parameters.tls_hostname), -- DEFINE_PROP_STRING("tls-authz", MigrationState, parameters.tls_authz), -- -- /* Migration capabilities */ -- DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE), -- DEFINE_PROP_MIG_CAP("x-rdma-pin-all", MIGRATION_CAPABILITY_RDMA_PIN_ALL), -- DEFINE_PROP_MIG_CAP("x-auto-converge", MIGRATION_CAPABILITY_AUTO_CONVERGE), -- DEFINE_PROP_MIG_CAP("x-zero-blocks", MIGRATION_CAPABILITY_ZERO_BLOCKS), -- DEFINE_PROP_MIG_CAP("x-compress", MIGRATION_CAPABILITY_COMPRESS), -- DEFINE_PROP_MIG_CAP("x-events", MIGRATION_CAPABILITY_EVENTS), -- DEFINE_PROP_MIG_CAP("x-postcopy-ram", MIGRATION_CAPABILITY_POSTCOPY_RAM), -- DEFINE_PROP_MIG_CAP("x-postcopy-preempt", -- MIGRATION_CAPABILITY_POSTCOPY_PREEMPT), -- DEFINE_PROP_MIG_CAP("x-colo", MIGRATION_CAPABILITY_X_COLO), -- DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM), -- DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK), -- DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH), -- DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD), -- DEFINE_PROP_MIG_CAP("x-background-snapshot", -- MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT), --#ifdef CONFIG_LINUX -- DEFINE_PROP_MIG_CAP("x-zero-copy-send", -- MIGRATION_CAPABILITY_ZERO_COPY_SEND), --#endif -- -- DEFINE_PROP_END_OF_LIST(), --}; -- - static void migration_class_init(ObjectClass *klass, void *data) - { - DeviceClass *dc = DEVICE_CLASS(klass); -diff --git a/migration/options.c b/migration/options.c -index bcfe244fa9..a76984276d 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -31,6 +31,161 @@ - #define MAX_MIGRATE_DOWNTIME_SECONDS 2000 - #define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000) - -+#define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ -+ -+/* Time in milliseconds we are allowed to stop the source, -+ * for sending the last part */ -+#define DEFAULT_MIGRATE_SET_DOWNTIME 300 -+ -+/* Default compression thread count */ -+#define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8 -+/* Default decompression thread count, usually decompression is at -+ * least 4 times as fast as compression.*/ -+#define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2 -+/*0: means nocompress, 1: best speed, ... 9: best compress ratio */ -+#define DEFAULT_MIGRATE_COMPRESS_LEVEL 1 -+/* Define default autoconverge cpu throttle migration parameters */ -+#define DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD 50 -+#define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20 -+#define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10 -+#define DEFAULT_MIGRATE_MAX_CPU_THROTTLE 99 -+ -+/* Migration XBZRLE default cache size */ -+#define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024) -+ -+/* The delay time (in ms) between two COLO checkpoints */ -+#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100) -+#define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2 -+#define DEFAULT_MIGRATE_MULTIFD_COMPRESSION MULTIFD_COMPRESSION_NONE -+/* 0: means nocompress, 1: best speed, ... 9: best compress ratio */ -+#define DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL 1 -+/* 0: means nocompress, 1: best speed, ... 20: best compress ratio */ -+#define DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL 1 -+ -+/* Background transfer rate for postcopy, 0 means unlimited, note -+ * that page requests can still exceed this limit. -+ */ -+#define DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH 0 -+ -+/* -+ * Parameters for self_announce_delay giving a stream of RARP/ARP -+ * packets after migration. -+ */ -+#define DEFAULT_MIGRATE_ANNOUNCE_INITIAL 50 -+#define DEFAULT_MIGRATE_ANNOUNCE_MAX 550 -+#define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS 5 -+#define DEFAULT_MIGRATE_ANNOUNCE_STEP 100 -+ -+#define DEFINE_PROP_MIG_CAP(name, x) \ -+ DEFINE_PROP_BOOL(name, MigrationState, capabilities[x], false) -+ -+Property migration_properties[] = { -+ DEFINE_PROP_BOOL("store-global-state", MigrationState, -+ store_global_state, true), -+ DEFINE_PROP_BOOL("send-configuration", MigrationState, -+ send_configuration, true), -+ DEFINE_PROP_BOOL("send-section-footer", MigrationState, -+ send_section_footer, true), -+ DEFINE_PROP_BOOL("decompress-error-check", MigrationState, -+ decompress_error_check, true), -+ DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState, -+ clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT), -+ DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState, -+ preempt_pre_7_2, false), -+ -+ /* Migration parameters */ -+ DEFINE_PROP_UINT8("x-compress-level", MigrationState, -+ parameters.compress_level, -+ DEFAULT_MIGRATE_COMPRESS_LEVEL), -+ DEFINE_PROP_UINT8("x-compress-threads", MigrationState, -+ parameters.compress_threads, -+ DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT), -+ DEFINE_PROP_BOOL("x-compress-wait-thread", MigrationState, -+ parameters.compress_wait_thread, true), -+ DEFINE_PROP_UINT8("x-decompress-threads", MigrationState, -+ parameters.decompress_threads, -+ DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT), -+ DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState, -+ parameters.throttle_trigger_threshold, -+ DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD), -+ DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState, -+ parameters.cpu_throttle_initial, -+ DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL), -+ DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState, -+ parameters.cpu_throttle_increment, -+ DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT), -+ DEFINE_PROP_BOOL("x-cpu-throttle-tailslow", MigrationState, -+ parameters.cpu_throttle_tailslow, false), -+ DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState, -+ parameters.max_bandwidth, MAX_THROTTLE), -+ DEFINE_PROP_UINT64("x-downtime-limit", MigrationState, -+ parameters.downtime_limit, -+ DEFAULT_MIGRATE_SET_DOWNTIME), -+ DEFINE_PROP_UINT32("x-checkpoint-delay", MigrationState, -+ parameters.x_checkpoint_delay, -+ DEFAULT_MIGRATE_X_CHECKPOINT_DELAY), -+ DEFINE_PROP_UINT8("multifd-channels", MigrationState, -+ parameters.multifd_channels, -+ DEFAULT_MIGRATE_MULTIFD_CHANNELS), -+ DEFINE_PROP_MULTIFD_COMPRESSION("multifd-compression", MigrationState, -+ parameters.multifd_compression, -+ DEFAULT_MIGRATE_MULTIFD_COMPRESSION), -+ DEFINE_PROP_UINT8("multifd-zlib-level", MigrationState, -+ parameters.multifd_zlib_level, -+ DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL), -+ DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, -+ parameters.multifd_zstd_level, -+ DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), -+ DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, -+ parameters.xbzrle_cache_size, -+ DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), -+ DEFINE_PROP_SIZE("max-postcopy-bandwidth", MigrationState, -+ parameters.max_postcopy_bandwidth, -+ DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH), -+ DEFINE_PROP_UINT8("max-cpu-throttle", MigrationState, -+ parameters.max_cpu_throttle, -+ DEFAULT_MIGRATE_MAX_CPU_THROTTLE), -+ DEFINE_PROP_SIZE("announce-initial", MigrationState, -+ parameters.announce_initial, -+ DEFAULT_MIGRATE_ANNOUNCE_INITIAL), -+ DEFINE_PROP_SIZE("announce-max", MigrationState, -+ parameters.announce_max, -+ DEFAULT_MIGRATE_ANNOUNCE_MAX), -+ DEFINE_PROP_SIZE("announce-rounds", MigrationState, -+ parameters.announce_rounds, -+ DEFAULT_MIGRATE_ANNOUNCE_ROUNDS), -+ DEFINE_PROP_SIZE("announce-step", MigrationState, -+ parameters.announce_step, -+ DEFAULT_MIGRATE_ANNOUNCE_STEP), -+ DEFINE_PROP_STRING("tls-creds", MigrationState, parameters.tls_creds), -+ DEFINE_PROP_STRING("tls-hostname", MigrationState, parameters.tls_hostname), -+ DEFINE_PROP_STRING("tls-authz", MigrationState, parameters.tls_authz), -+ -+ /* Migration capabilities */ -+ DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE), -+ DEFINE_PROP_MIG_CAP("x-rdma-pin-all", MIGRATION_CAPABILITY_RDMA_PIN_ALL), -+ DEFINE_PROP_MIG_CAP("x-auto-converge", MIGRATION_CAPABILITY_AUTO_CONVERGE), -+ DEFINE_PROP_MIG_CAP("x-zero-blocks", MIGRATION_CAPABILITY_ZERO_BLOCKS), -+ DEFINE_PROP_MIG_CAP("x-compress", MIGRATION_CAPABILITY_COMPRESS), -+ DEFINE_PROP_MIG_CAP("x-events", MIGRATION_CAPABILITY_EVENTS), -+ DEFINE_PROP_MIG_CAP("x-postcopy-ram", MIGRATION_CAPABILITY_POSTCOPY_RAM), -+ DEFINE_PROP_MIG_CAP("x-postcopy-preempt", -+ MIGRATION_CAPABILITY_POSTCOPY_PREEMPT), -+ DEFINE_PROP_MIG_CAP("x-colo", MIGRATION_CAPABILITY_X_COLO), -+ DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM), -+ DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK), -+ DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH), -+ DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD), -+ DEFINE_PROP_MIG_CAP("x-background-snapshot", -+ MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT), -+#ifdef CONFIG_LINUX -+ DEFINE_PROP_MIG_CAP("x-zero-copy-send", -+ MIGRATION_CAPABILITY_ZERO_COPY_SEND), -+#endif -+ -+ DEFINE_PROP_END_OF_LIST(), -+}; -+ - bool migrate_auto_converge(void) - { - MigrationState *s = migrate_get_current(); -diff --git a/migration/options.h b/migration/options.h -index 89067e59a0..7b0f7245ad 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -14,6 +14,9 @@ - #ifndef QEMU_MIGRATION_OPTIONS_H - #define QEMU_MIGRATION_OPTIONS_H - -+#include "hw/qdev-properties.h" -+#include "hw/qdev-properties-system.h" -+ - /* constants */ - - /* Amount of time to allocate to each "chunk" of bandwidth-throttled -@@ -21,6 +24,10 @@ - #define BUFFER_DELAY 100 - #define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY) - -+/* migration properties */ -+ -+extern Property migration_properties[]; -+ - /* capabilities */ - - bool migrate_auto_converge(void); --- -2.39.3 - diff --git a/SOURCES/kvm-migration-Move-more-initializations-to-migrate_init.patch b/SOURCES/kvm-migration-Move-more-initializations-to-migrate_init.patch deleted file mode 100644 index 10e5fe7..0000000 --- a/SOURCES/kvm-migration-Move-more-initializations-to-migrate_init.patch +++ /dev/null @@ -1,94 +0,0 @@ -From a90cae0dae6382cc1af63dfed8a51a3a27dc4bae Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Mon, 11 Sep 2023 16:10:19 +0200 -Subject: [PATCH 2/4] migration: Move more initializations to migrate_init() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 318: VFIO migration: fix a QEMU crash when postcopy is enabled -RH-Bugzilla: 2229868 -RH-Acked-by: Alex Williamson -RH-Acked-by: Peter Xu -RH-Commit: [2/4] 3706a3308c33046e2658ee511b364087e202708e - -Bugzilla: https://bugzilla.redhat.com/2229868 - -commit f543aa222da183ac37424d1ea3a65e5fb6202732 -Author: Avihai Horon -Date: Wed Sep 6 18:08:50 2023 +0300 - - migration: Move more initializations to migrate_init() - - Initialization of mig_stats, compression_counters and VFIO bytes - transferred is hard-coded in migration code path and snapshot code path. - - Make the code cleaner by initializing them in migrate_init(). - - Suggested-by: Cédric Le Goater - Signed-off-by: Avihai Horon - Reviewed-by: Cédric Le Goater - Signed-off-by: Cédric Le Goater - -Conflicts: - - migration/migration.c, migration/savevm.c - context changes in migrate_prepare() and qemu_savevm_state() due - to missing commit aff3f6606d14 ("migration: Rename ram_counters - to mig_stats") - -Signed-off-by: Cédric Le Goater ---- - migration/migration.c | 14 +++++++------- - migration/savevm.c | 3 --- - 2 files changed, 7 insertions(+), 10 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 5aa9e5dada..a85c8936d9 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1422,6 +1422,13 @@ void migrate_init(MigrationState *s) - s->iteration_initial_bytes = 0; - s->threshold_size = 0; - s->switchover_acked = false; -+ /* -+ * set mig_stats compression_counters memory to zero for a -+ * new migration -+ */ -+ memset(&ram_counters, 0, sizeof(ram_counters)); -+ memset(&compression_counters, 0, sizeof(compression_counters)); -+ migration_reset_vfio_bytes_transferred(); - } - - int migrate_add_blocker_internal(Error *reason, Error **errp) -@@ -1632,13 +1639,6 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, - } - - migrate_init(s); -- /* -- * set ram_counters compression_counters memory to zero for a -- * new migration -- */ -- memset(&ram_counters, 0, sizeof(ram_counters)); -- memset(&compression_counters, 0, sizeof(compression_counters)); -- migration_reset_vfio_bytes_transferred(); - - return true; - } -diff --git a/migration/savevm.c b/migration/savevm.c -index 05db79bfad..13c1a9afa1 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -1618,9 +1618,6 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) - } - - migrate_init(ms); -- memset(&ram_counters, 0, sizeof(ram_counters)); -- memset(&compression_counters, 0, sizeof(compression_counters)); -- migration_reset_vfio_bytes_transferred(); - ms->to_dst_file = f; - - qemu_mutex_unlock_iothread(); --- -2.39.3 - diff --git a/SOURCES/kvm-migration-Move-parameters-functions-to-option.c.patch b/SOURCES/kvm-migration-Move-parameters-functions-to-option.c.patch deleted file mode 100644 index ad4510b..0000000 --- a/SOURCES/kvm-migration-Move-parameters-functions-to-option.c.patch +++ /dev/null @@ -1,317 +0,0 @@ -From d5ea4c82c44a59ac70313eb1eac77999ca5fde36 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 00:39:03 +0100 -Subject: [PATCH 37/56] migration: Move parameters functions to option.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [36/50] 2540921028025504723e762c0a1d2f295ac5a6d1 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 1dfc4b9e19bcf1ad41a1be9ac82db35b9647c3c1) -Signed-off-by: Peter Xu ---- - migration/migration.c | 91 --------------------------------------- - migration/migration.h | 11 ----- - migration/multifd-zlib.c | 1 + - migration/multifd-zstd.c | 1 + - migration/options.c | 93 ++++++++++++++++++++++++++++++++++++++++ - migration/options.h | 13 ++++++ - 6 files changed, 108 insertions(+), 102 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 880a51210e..7f2e770deb 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -2264,79 +2264,6 @@ bool migrate_postcopy(void) - return migrate_postcopy_ram() || migrate_dirty_bitmaps(); - } - --int migrate_compress_level(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->parameters.compress_level; --} -- --int migrate_compress_threads(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->parameters.compress_threads; --} -- --int migrate_compress_wait_thread(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->parameters.compress_wait_thread; --} -- --int migrate_decompress_threads(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->parameters.decompress_threads; --} -- --int migrate_multifd_channels(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->parameters.multifd_channels; --} -- --MultiFDCompression migrate_multifd_compression(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- assert(s->parameters.multifd_compression < MULTIFD_COMPRESSION__MAX); -- return s->parameters.multifd_compression; --} -- --int migrate_multifd_zlib_level(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->parameters.multifd_zlib_level; --} -- --int migrate_multifd_zstd_level(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->parameters.multifd_zstd_level; --} -- - int migrate_use_tls(void) - { - MigrationState *s; -@@ -2346,24 +2273,6 @@ int migrate_use_tls(void) - return s->parameters.tls_creds && *s->parameters.tls_creds; - } - --uint64_t migrate_xbzrle_cache_size(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->parameters.xbzrle_cache_size; --} -- --static int64_t migrate_max_postcopy_bandwidth(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->parameters.max_postcopy_bandwidth; --} -- - bool migrate_use_block_incremental(void) - { - MigrationState *s; -diff --git a/migration/migration.h b/migration/migration.h -index 24184622a8..8451e5f2fe 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -449,24 +449,13 @@ MigrationState *migrate_get_current(void); - - bool migrate_postcopy(void); - --int migrate_multifd_channels(void); --MultiFDCompression migrate_multifd_compression(void); --int migrate_multifd_zlib_level(void); --int migrate_multifd_zstd_level(void); -- - int migrate_use_tls(void); --uint64_t migrate_xbzrle_cache_size(void); - - bool migrate_use_block_incremental(void); - int migrate_max_cpu_throttle(void); - - uint64_t ram_get_total_transferred_pages(void); - --int migrate_compress_level(void); --int migrate_compress_threads(void); --int migrate_compress_wait_thread(void); --int migrate_decompress_threads(void); -- - /* Sending on the return path - generic and then for each message type */ - void migrate_send_rp_shut(MigrationIncomingState *mis, - uint32_t value); -diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c -index 37770248e1..81701250ad 100644 ---- a/migration/multifd-zlib.c -+++ b/migration/multifd-zlib.c -@@ -18,6 +18,7 @@ - #include "qapi/error.h" - #include "migration.h" - #include "trace.h" -+#include "options.h" - #include "multifd.h" - - struct zlib_data { -diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c -index f4a8e1ed1f..d1d29e76cc 100644 ---- a/migration/multifd-zstd.c -+++ b/migration/multifd-zstd.c -@@ -18,6 +18,7 @@ - #include "qapi/error.h" - #include "migration.h" - #include "trace.h" -+#include "options.h" - #include "multifd.h" - - struct zstd_data { -diff --git a/migration/options.c b/migration/options.c -index f3b2d6e482..8d15be858c 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -460,3 +460,96 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, - s->capabilities[cap->value->capability] = cap->value->state; - } - } -+ -+/* parameters */ -+ -+int migrate_compress_level(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.compress_level; -+} -+ -+int migrate_compress_threads(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.compress_threads; -+} -+ -+int migrate_compress_wait_thread(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.compress_wait_thread; -+} -+ -+int migrate_decompress_threads(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.decompress_threads; -+} -+ -+int64_t migrate_max_postcopy_bandwidth(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.max_postcopy_bandwidth; -+} -+ -+int migrate_multifd_channels(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.multifd_channels; -+} -+ -+MultiFDCompression migrate_multifd_compression(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ assert(s->parameters.multifd_compression < MULTIFD_COMPRESSION__MAX); -+ return s->parameters.multifd_compression; -+} -+ -+int migrate_multifd_zlib_level(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.multifd_zlib_level; -+} -+ -+int migrate_multifd_zstd_level(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.multifd_zstd_level; -+} -+ -+uint64_t migrate_xbzrle_cache_size(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.xbzrle_cache_size; -+} -diff --git a/migration/options.h b/migration/options.h -index 5979e4ff90..b24ee92283 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -43,4 +43,17 @@ bool migrate_zero_copy_send(void); - bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp); - bool migrate_cap_set(int cap, bool value, Error **errp); - -+/* parameters */ -+ -+int migrate_compress_level(void); -+int migrate_compress_threads(void); -+int migrate_compress_wait_thread(void); -+int migrate_decompress_threads(void); -+int64_t migrate_max_postcopy_bandwidth(void); -+int migrate_multifd_channels(void); -+MultiFDCompression migrate_multifd_compression(void); -+int migrate_multifd_zlib_level(void); -+int migrate_multifd_zstd_level(void); -+uint64_t migrate_xbzrle_cache_size(void); -+ - #endif --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-qmp_migrate_set_capabilities-to-optio.patch b/SOURCES/kvm-migration-Move-qmp_migrate_set_capabilities-to-optio.patch deleted file mode 100644 index 10f185b..0000000 --- a/SOURCES/kvm-migration-Move-qmp_migrate_set_capabilities-to-optio.patch +++ /dev/null @@ -1,100 +0,0 @@ -From d967ec22cdb20e0a846f050a2bc7bd4caa87940d Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 23:18:02 +0100 -Subject: [PATCH 35/56] migration: Move qmp_migrate_set_capabilities() to - options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [34/50] 16b62ca7e06c58d71389c449dc19c11939dd0882 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 45c1de13f09b1fd4ea26f54e6da12aae52f34cb8) -Signed-off-by: Peter Xu ---- - migration/migration.c | 26 -------------------------- - migration/options.c | 26 ++++++++++++++++++++++++++ - 2 files changed, 26 insertions(+), 26 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 3dc8ee4875..369cd91796 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1222,32 +1222,6 @@ MigrationInfo *qmp_query_migrate(Error **errp) - return info; - } - --void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, -- Error **errp) --{ -- MigrationState *s = migrate_get_current(); -- MigrationCapabilityStatusList *cap; -- bool new_caps[MIGRATION_CAPABILITY__MAX]; -- -- if (migration_is_running(s->state)) { -- error_setg(errp, QERR_MIGRATION_ACTIVE); -- return; -- } -- -- memcpy(new_caps, s->capabilities, sizeof(new_caps)); -- for (cap = params; cap; cap = cap->next) { -- new_caps[cap->value->capability] = cap->value->state; -- } -- -- if (!migrate_caps_check(s->capabilities, new_caps, errp)) { -- return; -- } -- -- for (cap = params; cap; cap = cap->next) { -- s->capabilities[cap->value->capability] = cap->value->state; -- } --} -- - /* - * Check whether the parameters are valid. Error will be put into errp - * (if provided). Return true if valid, otherwise false. -diff --git a/migration/options.c b/migration/options.c -index ff621bdeb3..4cbe77e35a 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -413,3 +413,29 @@ MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) - - return head; - } -+ -+void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, -+ Error **errp) -+{ -+ MigrationState *s = migrate_get_current(); -+ MigrationCapabilityStatusList *cap; -+ bool new_caps[MIGRATION_CAPABILITY__MAX]; -+ -+ if (migration_is_running(s->state)) { -+ error_setg(errp, QERR_MIGRATION_ACTIVE); -+ return; -+ } -+ -+ memcpy(new_caps, s->capabilities, sizeof(new_caps)); -+ for (cap = params; cap; cap = cap->next) { -+ new_caps[cap->value->capability] = cap->value->state; -+ } -+ -+ if (!migrate_caps_check(s->capabilities, new_caps, errp)) { -+ return; -+ } -+ -+ for (cap = params; cap; cap = cap->next) { -+ s->capabilities[cap->value->capability] = cap->value->state; -+ } -+} --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-qmp_migrate_set_parameters-to-options.patch b/SOURCES/kvm-migration-Move-qmp_migrate_set_parameters-to-options.patch deleted file mode 100644 index 3685a33..0000000 --- a/SOURCES/kvm-migration-Move-qmp_migrate_set_parameters-to-options.patch +++ /dev/null @@ -1,943 +0,0 @@ -From 944bf4759d1279c342ddd29c47d47c9670b64625 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 10:13:16 +0100 -Subject: [PATCH 50/56] migration: Move qmp_migrate_set_parameters() to - options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [49/50] b55f7afe868e117d4212f1518b9a37514cc99b33 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 09d6c9658474e8573c5ada58dca8b20fe47dd99e) -Signed-off-by: Peter Xu ---- - migration/migration.c | 420 ------------------------------------------ - migration/options.c | 418 +++++++++++++++++++++++++++++++++++++++++ - migration/options.h | 11 ++ - 3 files changed, 429 insertions(+), 420 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 22ef83c619..08f87f2b0e 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -67,19 +67,10 @@ - - #define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ - --/* Amount of time to allocate to each "chunk" of bandwidth-throttled -- * data. */ --#define BUFFER_DELAY 100 --#define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY) -- - /* Time in milliseconds we are allowed to stop the source, - * for sending the last part */ - #define DEFAULT_MIGRATE_SET_DOWNTIME 300 - --/* Maximum migrate downtime set to 2000 seconds */ --#define MAX_MIGRATE_DOWNTIME_SECONDS 2000 --#define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000) -- - /* Default compression thread count */ - #define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8 - /* Default decompression thread count, usually decompression is at -@@ -1140,417 +1131,6 @@ MigrationInfo *qmp_query_migrate(Error **errp) - return info; - } - --/* -- * Check whether the parameters are valid. Error will be put into errp -- * (if provided). Return true if valid, otherwise false. -- */ --static bool migrate_params_check(MigrationParameters *params, Error **errp) --{ -- if (params->has_compress_level && -- (params->compress_level > 9)) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level", -- "a value between 0 and 9"); -- return false; -- } -- -- if (params->has_compress_threads && (params->compress_threads < 1)) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "compress_threads", -- "a value between 1 and 255"); -- return false; -- } -- -- if (params->has_decompress_threads && (params->decompress_threads < 1)) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "decompress_threads", -- "a value between 1 and 255"); -- return false; -- } -- -- if (params->has_throttle_trigger_threshold && -- (params->throttle_trigger_threshold < 1 || -- params->throttle_trigger_threshold > 100)) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "throttle_trigger_threshold", -- "an integer in the range of 1 to 100"); -- return false; -- } -- -- if (params->has_cpu_throttle_initial && -- (params->cpu_throttle_initial < 1 || -- params->cpu_throttle_initial > 99)) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "cpu_throttle_initial", -- "an integer in the range of 1 to 99"); -- return false; -- } -- -- if (params->has_cpu_throttle_increment && -- (params->cpu_throttle_increment < 1 || -- params->cpu_throttle_increment > 99)) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "cpu_throttle_increment", -- "an integer in the range of 1 to 99"); -- return false; -- } -- -- if (params->has_max_bandwidth && (params->max_bandwidth > SIZE_MAX)) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "max_bandwidth", -- "an integer in the range of 0 to "stringify(SIZE_MAX) -- " bytes/second"); -- return false; -- } -- -- if (params->has_downtime_limit && -- (params->downtime_limit > MAX_MIGRATE_DOWNTIME)) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "downtime_limit", -- "an integer in the range of 0 to " -- stringify(MAX_MIGRATE_DOWNTIME)" ms"); -- return false; -- } -- -- /* x_checkpoint_delay is now always positive */ -- -- if (params->has_multifd_channels && (params->multifd_channels < 1)) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "multifd_channels", -- "a value between 1 and 255"); -- return false; -- } -- -- if (params->has_multifd_zlib_level && -- (params->multifd_zlib_level > 9)) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zlib_level", -- "a value between 0 and 9"); -- return false; -- } -- -- if (params->has_multifd_zstd_level && -- (params->multifd_zstd_level > 20)) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zstd_level", -- "a value between 0 and 20"); -- return false; -- } -- -- if (params->has_xbzrle_cache_size && -- (params->xbzrle_cache_size < qemu_target_page_size() || -- !is_power_of_2(params->xbzrle_cache_size))) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "xbzrle_cache_size", -- "a power of two no less than the target page size"); -- return false; -- } -- -- if (params->has_max_cpu_throttle && -- (params->max_cpu_throttle < params->cpu_throttle_initial || -- params->max_cpu_throttle > 99)) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "max_cpu_throttle", -- "an integer in the range of cpu_throttle_initial to 99"); -- return false; -- } -- -- if (params->has_announce_initial && -- params->announce_initial > 100000) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "announce_initial", -- "a value between 0 and 100000"); -- return false; -- } -- if (params->has_announce_max && -- params->announce_max > 100000) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "announce_max", -- "a value between 0 and 100000"); -- return false; -- } -- if (params->has_announce_rounds && -- params->announce_rounds > 1000) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "announce_rounds", -- "a value between 0 and 1000"); -- return false; -- } -- if (params->has_announce_step && -- (params->announce_step < 1 || -- params->announce_step > 10000)) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "announce_step", -- "a value between 0 and 10000"); -- return false; -- } -- -- if (params->has_block_bitmap_mapping && -- !check_dirty_bitmap_mig_alias_map(params->block_bitmap_mapping, errp)) { -- error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); -- return false; -- } -- --#ifdef CONFIG_LINUX -- if (migrate_zero_copy_send() && -- ((params->has_multifd_compression && params->multifd_compression) || -- (params->tls_creds && *params->tls_creds))) { -- error_setg(errp, -- "Zero copy only available for non-compressed non-TLS multifd migration"); -- return false; -- } --#endif -- -- return true; --} -- --static void migrate_params_test_apply(MigrateSetParameters *params, -- MigrationParameters *dest) --{ -- *dest = migrate_get_current()->parameters; -- -- /* TODO use QAPI_CLONE() instead of duplicating it inline */ -- -- if (params->has_compress_level) { -- dest->compress_level = params->compress_level; -- } -- -- if (params->has_compress_threads) { -- dest->compress_threads = params->compress_threads; -- } -- -- if (params->has_compress_wait_thread) { -- dest->compress_wait_thread = params->compress_wait_thread; -- } -- -- if (params->has_decompress_threads) { -- dest->decompress_threads = params->decompress_threads; -- } -- -- if (params->has_throttle_trigger_threshold) { -- dest->throttle_trigger_threshold = params->throttle_trigger_threshold; -- } -- -- if (params->has_cpu_throttle_initial) { -- dest->cpu_throttle_initial = params->cpu_throttle_initial; -- } -- -- if (params->has_cpu_throttle_increment) { -- dest->cpu_throttle_increment = params->cpu_throttle_increment; -- } -- -- if (params->has_cpu_throttle_tailslow) { -- dest->cpu_throttle_tailslow = params->cpu_throttle_tailslow; -- } -- -- if (params->tls_creds) { -- assert(params->tls_creds->type == QTYPE_QSTRING); -- dest->tls_creds = params->tls_creds->u.s; -- } -- -- if (params->tls_hostname) { -- assert(params->tls_hostname->type == QTYPE_QSTRING); -- dest->tls_hostname = params->tls_hostname->u.s; -- } -- -- if (params->has_max_bandwidth) { -- dest->max_bandwidth = params->max_bandwidth; -- } -- -- if (params->has_downtime_limit) { -- dest->downtime_limit = params->downtime_limit; -- } -- -- if (params->has_x_checkpoint_delay) { -- dest->x_checkpoint_delay = params->x_checkpoint_delay; -- } -- -- if (params->has_block_incremental) { -- dest->block_incremental = params->block_incremental; -- } -- if (params->has_multifd_channels) { -- dest->multifd_channels = params->multifd_channels; -- } -- if (params->has_multifd_compression) { -- dest->multifd_compression = params->multifd_compression; -- } -- if (params->has_xbzrle_cache_size) { -- dest->xbzrle_cache_size = params->xbzrle_cache_size; -- } -- if (params->has_max_postcopy_bandwidth) { -- dest->max_postcopy_bandwidth = params->max_postcopy_bandwidth; -- } -- if (params->has_max_cpu_throttle) { -- dest->max_cpu_throttle = params->max_cpu_throttle; -- } -- if (params->has_announce_initial) { -- dest->announce_initial = params->announce_initial; -- } -- if (params->has_announce_max) { -- dest->announce_max = params->announce_max; -- } -- if (params->has_announce_rounds) { -- dest->announce_rounds = params->announce_rounds; -- } -- if (params->has_announce_step) { -- dest->announce_step = params->announce_step; -- } -- -- if (params->has_block_bitmap_mapping) { -- dest->has_block_bitmap_mapping = true; -- dest->block_bitmap_mapping = params->block_bitmap_mapping; -- } --} -- --static void migrate_params_apply(MigrateSetParameters *params, Error **errp) --{ -- MigrationState *s = migrate_get_current(); -- -- /* TODO use QAPI_CLONE() instead of duplicating it inline */ -- -- if (params->has_compress_level) { -- s->parameters.compress_level = params->compress_level; -- } -- -- if (params->has_compress_threads) { -- s->parameters.compress_threads = params->compress_threads; -- } -- -- if (params->has_compress_wait_thread) { -- s->parameters.compress_wait_thread = params->compress_wait_thread; -- } -- -- if (params->has_decompress_threads) { -- s->parameters.decompress_threads = params->decompress_threads; -- } -- -- if (params->has_throttle_trigger_threshold) { -- s->parameters.throttle_trigger_threshold = params->throttle_trigger_threshold; -- } -- -- if (params->has_cpu_throttle_initial) { -- s->parameters.cpu_throttle_initial = params->cpu_throttle_initial; -- } -- -- if (params->has_cpu_throttle_increment) { -- s->parameters.cpu_throttle_increment = params->cpu_throttle_increment; -- } -- -- if (params->has_cpu_throttle_tailslow) { -- s->parameters.cpu_throttle_tailslow = params->cpu_throttle_tailslow; -- } -- -- if (params->tls_creds) { -- g_free(s->parameters.tls_creds); -- assert(params->tls_creds->type == QTYPE_QSTRING); -- s->parameters.tls_creds = g_strdup(params->tls_creds->u.s); -- } -- -- if (params->tls_hostname) { -- g_free(s->parameters.tls_hostname); -- assert(params->tls_hostname->type == QTYPE_QSTRING); -- s->parameters.tls_hostname = g_strdup(params->tls_hostname->u.s); -- } -- -- if (params->tls_authz) { -- g_free(s->parameters.tls_authz); -- assert(params->tls_authz->type == QTYPE_QSTRING); -- s->parameters.tls_authz = g_strdup(params->tls_authz->u.s); -- } -- -- if (params->has_max_bandwidth) { -- s->parameters.max_bandwidth = params->max_bandwidth; -- if (s->to_dst_file && !migration_in_postcopy()) { -- qemu_file_set_rate_limit(s->to_dst_file, -- s->parameters.max_bandwidth / XFER_LIMIT_RATIO); -- } -- } -- -- if (params->has_downtime_limit) { -- s->parameters.downtime_limit = params->downtime_limit; -- } -- -- if (params->has_x_checkpoint_delay) { -- s->parameters.x_checkpoint_delay = params->x_checkpoint_delay; -- if (migration_in_colo_state()) { -- colo_checkpoint_notify(s); -- } -- } -- -- if (params->has_block_incremental) { -- s->parameters.block_incremental = params->block_incremental; -- } -- if (params->has_multifd_channels) { -- s->parameters.multifd_channels = params->multifd_channels; -- } -- if (params->has_multifd_compression) { -- s->parameters.multifd_compression = params->multifd_compression; -- } -- if (params->has_xbzrle_cache_size) { -- s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; -- xbzrle_cache_resize(params->xbzrle_cache_size, errp); -- } -- if (params->has_max_postcopy_bandwidth) { -- s->parameters.max_postcopy_bandwidth = params->max_postcopy_bandwidth; -- if (s->to_dst_file && migration_in_postcopy()) { -- qemu_file_set_rate_limit(s->to_dst_file, -- s->parameters.max_postcopy_bandwidth / XFER_LIMIT_RATIO); -- } -- } -- if (params->has_max_cpu_throttle) { -- s->parameters.max_cpu_throttle = params->max_cpu_throttle; -- } -- if (params->has_announce_initial) { -- s->parameters.announce_initial = params->announce_initial; -- } -- if (params->has_announce_max) { -- s->parameters.announce_max = params->announce_max; -- } -- if (params->has_announce_rounds) { -- s->parameters.announce_rounds = params->announce_rounds; -- } -- if (params->has_announce_step) { -- s->parameters.announce_step = params->announce_step; -- } -- -- if (params->has_block_bitmap_mapping) { -- qapi_free_BitmapMigrationNodeAliasList( -- s->parameters.block_bitmap_mapping); -- -- s->parameters.has_block_bitmap_mapping = true; -- s->parameters.block_bitmap_mapping = -- QAPI_CLONE(BitmapMigrationNodeAliasList, -- params->block_bitmap_mapping); -- } --} -- --void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp) --{ -- MigrationParameters tmp; -- -- /* TODO Rewrite "" to null instead */ -- if (params->tls_creds -- && params->tls_creds->type == QTYPE_QNULL) { -- qobject_unref(params->tls_creds->u.n); -- params->tls_creds->type = QTYPE_QSTRING; -- params->tls_creds->u.s = strdup(""); -- } -- /* TODO Rewrite "" to null instead */ -- if (params->tls_hostname -- && params->tls_hostname->type == QTYPE_QNULL) { -- qobject_unref(params->tls_hostname->u.n); -- params->tls_hostname->type = QTYPE_QSTRING; -- params->tls_hostname->u.s = strdup(""); -- } -- -- migrate_params_test_apply(params, &tmp); -- -- if (!migrate_params_check(&tmp, errp)) { -- /* Invalid parameter */ -- return; -- } -- -- migrate_params_apply(params, errp); --} -- -- - void qmp_migrate_start_postcopy(Error **errp) - { - MigrationState *s = migrate_get_current(); -diff --git a/migration/options.c b/migration/options.c -index d4c0714683..4701c75a4d 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -12,17 +12,25 @@ - */ - - #include "qemu/osdep.h" -+#include "exec/target_page.h" - #include "qapi/clone-visitor.h" - #include "qapi/error.h" - #include "qapi/qapi-commands-migration.h" - #include "qapi/qapi-visit-migration.h" - #include "qapi/qmp/qerror.h" -+#include "qapi/qmp/qnull.h" - #include "sysemu/runstate.h" -+#include "migration/colo.h" - #include "migration/misc.h" - #include "migration.h" -+#include "qemu-file.h" - #include "ram.h" - #include "options.h" - -+/* Maximum migrate downtime set to 2000 seconds */ -+#define MAX_MIGRATE_DOWNTIME_SECONDS 2000 -+#define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000) -+ - bool migrate_auto_converge(void) - { - MigrationState *s; -@@ -729,3 +737,413 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) - - return params; - } -+ -+/* -+ * Check whether the parameters are valid. Error will be put into errp -+ * (if provided). Return true if valid, otherwise false. -+ */ -+bool migrate_params_check(MigrationParameters *params, Error **errp) -+{ -+ if (params->has_compress_level && -+ (params->compress_level > 9)) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level", -+ "a value between 0 and 9"); -+ return false; -+ } -+ -+ if (params->has_compress_threads && (params->compress_threads < 1)) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "compress_threads", -+ "a value between 1 and 255"); -+ return false; -+ } -+ -+ if (params->has_decompress_threads && (params->decompress_threads < 1)) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "decompress_threads", -+ "a value between 1 and 255"); -+ return false; -+ } -+ -+ if (params->has_throttle_trigger_threshold && -+ (params->throttle_trigger_threshold < 1 || -+ params->throttle_trigger_threshold > 100)) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "throttle_trigger_threshold", -+ "an integer in the range of 1 to 100"); -+ return false; -+ } -+ -+ if (params->has_cpu_throttle_initial && -+ (params->cpu_throttle_initial < 1 || -+ params->cpu_throttle_initial > 99)) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "cpu_throttle_initial", -+ "an integer in the range of 1 to 99"); -+ return false; -+ } -+ -+ if (params->has_cpu_throttle_increment && -+ (params->cpu_throttle_increment < 1 || -+ params->cpu_throttle_increment > 99)) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "cpu_throttle_increment", -+ "an integer in the range of 1 to 99"); -+ return false; -+ } -+ -+ if (params->has_max_bandwidth && (params->max_bandwidth > SIZE_MAX)) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "max_bandwidth", -+ "an integer in the range of 0 to "stringify(SIZE_MAX) -+ " bytes/second"); -+ return false; -+ } -+ -+ if (params->has_downtime_limit && -+ (params->downtime_limit > MAX_MIGRATE_DOWNTIME)) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "downtime_limit", -+ "an integer in the range of 0 to " -+ stringify(MAX_MIGRATE_DOWNTIME)" ms"); -+ return false; -+ } -+ -+ /* x_checkpoint_delay is now always positive */ -+ -+ if (params->has_multifd_channels && (params->multifd_channels < 1)) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "multifd_channels", -+ "a value between 1 and 255"); -+ return false; -+ } -+ -+ if (params->has_multifd_zlib_level && -+ (params->multifd_zlib_level > 9)) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zlib_level", -+ "a value between 0 and 9"); -+ return false; -+ } -+ -+ if (params->has_multifd_zstd_level && -+ (params->multifd_zstd_level > 20)) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zstd_level", -+ "a value between 0 and 20"); -+ return false; -+ } -+ -+ if (params->has_xbzrle_cache_size && -+ (params->xbzrle_cache_size < qemu_target_page_size() || -+ !is_power_of_2(params->xbzrle_cache_size))) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "xbzrle_cache_size", -+ "a power of two no less than the target page size"); -+ return false; -+ } -+ -+ if (params->has_max_cpu_throttle && -+ (params->max_cpu_throttle < params->cpu_throttle_initial || -+ params->max_cpu_throttle > 99)) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "max_cpu_throttle", -+ "an integer in the range of cpu_throttle_initial to 99"); -+ return false; -+ } -+ -+ if (params->has_announce_initial && -+ params->announce_initial > 100000) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "announce_initial", -+ "a value between 0 and 100000"); -+ return false; -+ } -+ if (params->has_announce_max && -+ params->announce_max > 100000) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "announce_max", -+ "a value between 0 and 100000"); -+ return false; -+ } -+ if (params->has_announce_rounds && -+ params->announce_rounds > 1000) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "announce_rounds", -+ "a value between 0 and 1000"); -+ return false; -+ } -+ if (params->has_announce_step && -+ (params->announce_step < 1 || -+ params->announce_step > 10000)) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "announce_step", -+ "a value between 0 and 10000"); -+ return false; -+ } -+ -+ if (params->has_block_bitmap_mapping && -+ !check_dirty_bitmap_mig_alias_map(params->block_bitmap_mapping, errp)) { -+ error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); -+ return false; -+ } -+ -+#ifdef CONFIG_LINUX -+ if (migrate_zero_copy_send() && -+ ((params->has_multifd_compression && params->multifd_compression) || -+ (params->tls_creds && *params->tls_creds))) { -+ error_setg(errp, -+ "Zero copy only available for non-compressed non-TLS multifd migration"); -+ return false; -+ } -+#endif -+ -+ return true; -+} -+ -+static void migrate_params_test_apply(MigrateSetParameters *params, -+ MigrationParameters *dest) -+{ -+ *dest = migrate_get_current()->parameters; -+ -+ /* TODO use QAPI_CLONE() instead of duplicating it inline */ -+ -+ if (params->has_compress_level) { -+ dest->compress_level = params->compress_level; -+ } -+ -+ if (params->has_compress_threads) { -+ dest->compress_threads = params->compress_threads; -+ } -+ -+ if (params->has_compress_wait_thread) { -+ dest->compress_wait_thread = params->compress_wait_thread; -+ } -+ -+ if (params->has_decompress_threads) { -+ dest->decompress_threads = params->decompress_threads; -+ } -+ -+ if (params->has_throttle_trigger_threshold) { -+ dest->throttle_trigger_threshold = params->throttle_trigger_threshold; -+ } -+ -+ if (params->has_cpu_throttle_initial) { -+ dest->cpu_throttle_initial = params->cpu_throttle_initial; -+ } -+ -+ if (params->has_cpu_throttle_increment) { -+ dest->cpu_throttle_increment = params->cpu_throttle_increment; -+ } -+ -+ if (params->has_cpu_throttle_tailslow) { -+ dest->cpu_throttle_tailslow = params->cpu_throttle_tailslow; -+ } -+ -+ if (params->tls_creds) { -+ assert(params->tls_creds->type == QTYPE_QSTRING); -+ dest->tls_creds = params->tls_creds->u.s; -+ } -+ -+ if (params->tls_hostname) { -+ assert(params->tls_hostname->type == QTYPE_QSTRING); -+ dest->tls_hostname = params->tls_hostname->u.s; -+ } -+ -+ if (params->has_max_bandwidth) { -+ dest->max_bandwidth = params->max_bandwidth; -+ } -+ -+ if (params->has_downtime_limit) { -+ dest->downtime_limit = params->downtime_limit; -+ } -+ -+ if (params->has_x_checkpoint_delay) { -+ dest->x_checkpoint_delay = params->x_checkpoint_delay; -+ } -+ -+ if (params->has_block_incremental) { -+ dest->block_incremental = params->block_incremental; -+ } -+ if (params->has_multifd_channels) { -+ dest->multifd_channels = params->multifd_channels; -+ } -+ if (params->has_multifd_compression) { -+ dest->multifd_compression = params->multifd_compression; -+ } -+ if (params->has_xbzrle_cache_size) { -+ dest->xbzrle_cache_size = params->xbzrle_cache_size; -+ } -+ if (params->has_max_postcopy_bandwidth) { -+ dest->max_postcopy_bandwidth = params->max_postcopy_bandwidth; -+ } -+ if (params->has_max_cpu_throttle) { -+ dest->max_cpu_throttle = params->max_cpu_throttle; -+ } -+ if (params->has_announce_initial) { -+ dest->announce_initial = params->announce_initial; -+ } -+ if (params->has_announce_max) { -+ dest->announce_max = params->announce_max; -+ } -+ if (params->has_announce_rounds) { -+ dest->announce_rounds = params->announce_rounds; -+ } -+ if (params->has_announce_step) { -+ dest->announce_step = params->announce_step; -+ } -+ -+ if (params->has_block_bitmap_mapping) { -+ dest->has_block_bitmap_mapping = true; -+ dest->block_bitmap_mapping = params->block_bitmap_mapping; -+ } -+} -+ -+static void migrate_params_apply(MigrateSetParameters *params, Error **errp) -+{ -+ MigrationState *s = migrate_get_current(); -+ -+ /* TODO use QAPI_CLONE() instead of duplicating it inline */ -+ -+ if (params->has_compress_level) { -+ s->parameters.compress_level = params->compress_level; -+ } -+ -+ if (params->has_compress_threads) { -+ s->parameters.compress_threads = params->compress_threads; -+ } -+ -+ if (params->has_compress_wait_thread) { -+ s->parameters.compress_wait_thread = params->compress_wait_thread; -+ } -+ -+ if (params->has_decompress_threads) { -+ s->parameters.decompress_threads = params->decompress_threads; -+ } -+ -+ if (params->has_throttle_trigger_threshold) { -+ s->parameters.throttle_trigger_threshold = params->throttle_trigger_threshold; -+ } -+ -+ if (params->has_cpu_throttle_initial) { -+ s->parameters.cpu_throttle_initial = params->cpu_throttle_initial; -+ } -+ -+ if (params->has_cpu_throttle_increment) { -+ s->parameters.cpu_throttle_increment = params->cpu_throttle_increment; -+ } -+ -+ if (params->has_cpu_throttle_tailslow) { -+ s->parameters.cpu_throttle_tailslow = params->cpu_throttle_tailslow; -+ } -+ -+ if (params->tls_creds) { -+ g_free(s->parameters.tls_creds); -+ assert(params->tls_creds->type == QTYPE_QSTRING); -+ s->parameters.tls_creds = g_strdup(params->tls_creds->u.s); -+ } -+ -+ if (params->tls_hostname) { -+ g_free(s->parameters.tls_hostname); -+ assert(params->tls_hostname->type == QTYPE_QSTRING); -+ s->parameters.tls_hostname = g_strdup(params->tls_hostname->u.s); -+ } -+ -+ if (params->tls_authz) { -+ g_free(s->parameters.tls_authz); -+ assert(params->tls_authz->type == QTYPE_QSTRING); -+ s->parameters.tls_authz = g_strdup(params->tls_authz->u.s); -+ } -+ -+ if (params->has_max_bandwidth) { -+ s->parameters.max_bandwidth = params->max_bandwidth; -+ if (s->to_dst_file && !migration_in_postcopy()) { -+ qemu_file_set_rate_limit(s->to_dst_file, -+ s->parameters.max_bandwidth / XFER_LIMIT_RATIO); -+ } -+ } -+ -+ if (params->has_downtime_limit) { -+ s->parameters.downtime_limit = params->downtime_limit; -+ } -+ -+ if (params->has_x_checkpoint_delay) { -+ s->parameters.x_checkpoint_delay = params->x_checkpoint_delay; -+ if (migration_in_colo_state()) { -+ colo_checkpoint_notify(s); -+ } -+ } -+ -+ if (params->has_block_incremental) { -+ s->parameters.block_incremental = params->block_incremental; -+ } -+ if (params->has_multifd_channels) { -+ s->parameters.multifd_channels = params->multifd_channels; -+ } -+ if (params->has_multifd_compression) { -+ s->parameters.multifd_compression = params->multifd_compression; -+ } -+ if (params->has_xbzrle_cache_size) { -+ s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; -+ xbzrle_cache_resize(params->xbzrle_cache_size, errp); -+ } -+ if (params->has_max_postcopy_bandwidth) { -+ s->parameters.max_postcopy_bandwidth = params->max_postcopy_bandwidth; -+ if (s->to_dst_file && migration_in_postcopy()) { -+ qemu_file_set_rate_limit(s->to_dst_file, -+ s->parameters.max_postcopy_bandwidth / XFER_LIMIT_RATIO); -+ } -+ } -+ if (params->has_max_cpu_throttle) { -+ s->parameters.max_cpu_throttle = params->max_cpu_throttle; -+ } -+ if (params->has_announce_initial) { -+ s->parameters.announce_initial = params->announce_initial; -+ } -+ if (params->has_announce_max) { -+ s->parameters.announce_max = params->announce_max; -+ } -+ if (params->has_announce_rounds) { -+ s->parameters.announce_rounds = params->announce_rounds; -+ } -+ if (params->has_announce_step) { -+ s->parameters.announce_step = params->announce_step; -+ } -+ -+ if (params->has_block_bitmap_mapping) { -+ qapi_free_BitmapMigrationNodeAliasList( -+ s->parameters.block_bitmap_mapping); -+ -+ s->parameters.has_block_bitmap_mapping = true; -+ s->parameters.block_bitmap_mapping = -+ QAPI_CLONE(BitmapMigrationNodeAliasList, -+ params->block_bitmap_mapping); -+ } -+} -+ -+void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp) -+{ -+ MigrationParameters tmp; -+ -+ /* TODO Rewrite "" to null instead */ -+ if (params->tls_creds -+ && params->tls_creds->type == QTYPE_QNULL) { -+ qobject_unref(params->tls_creds->u.n); -+ params->tls_creds->type = QTYPE_QSTRING; -+ params->tls_creds->u.s = strdup(""); -+ } -+ /* TODO Rewrite "" to null instead */ -+ if (params->tls_hostname -+ && params->tls_hostname->type == QTYPE_QNULL) { -+ qobject_unref(params->tls_hostname->u.n); -+ params->tls_hostname->type = QTYPE_QSTRING; -+ params->tls_hostname->u.s = strdup(""); -+ } -+ -+ migrate_params_test_apply(params, &tmp); -+ -+ if (!migrate_params_check(&tmp, errp)) { -+ /* Invalid parameter */ -+ return; -+ } -+ -+ migrate_params_apply(params, errp); -+} -diff --git a/migration/options.h b/migration/options.h -index 13318a16c7..89067e59a0 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -14,6 +14,13 @@ - #ifndef QEMU_MIGRATION_OPTIONS_H - #define QEMU_MIGRATION_OPTIONS_H - -+/* constants */ -+ -+/* Amount of time to allocate to each "chunk" of bandwidth-throttled -+ * data. */ -+#define BUFFER_DELAY 100 -+#define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY) -+ - /* capabilities */ - - bool migrate_auto_converge(void); -@@ -74,4 +81,8 @@ int migrate_multifd_zstd_level(void); - uint8_t migrate_throttle_trigger_threshold(void); - uint64_t migrate_xbzrle_cache_size(void); - -+/* parameters helpers */ -+ -+bool migrate_params_check(MigrationParameters *params, Error **errp); -+ - #endif --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-qmp_query_migrate_capabilities-to-opt.patch b/SOURCES/kvm-migration-Move-qmp_query_migrate_capabilities-to-opt.patch deleted file mode 100644 index d2564de..0000000 --- a/SOURCES/kvm-migration-Move-qmp_query_migrate_capabilities-to-opt.patch +++ /dev/null @@ -1,100 +0,0 @@ -From 00cc3c3598828588619a7b3696819060bddaddb8 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 23:15:59 +0100 -Subject: [PATCH 34/56] migration: Move qmp_query_migrate_capabilities() to - options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [33/50] dbfa8f1e7aa7e000b4622ce2da12d7d418710f19 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 4d0c6b695bf5252402ebf967f83baebfd2f4b91e) -Signed-off-by: Peter Xu ---- - migration/migration.c | 22 ---------------------- - migration/options.c | 23 +++++++++++++++++++++++ - 2 files changed, 23 insertions(+), 22 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index d9e30ca918..3dc8ee4875 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -886,28 +886,6 @@ void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value) - migrate_send_rp_message(mis, MIG_RP_MSG_RESUME_ACK, sizeof(buf), &buf); - } - --MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) --{ -- MigrationCapabilityStatusList *head = NULL, **tail = &head; -- MigrationCapabilityStatus *caps; -- MigrationState *s = migrate_get_current(); -- int i; -- -- for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { --#ifndef CONFIG_LIVE_BLOCK_MIGRATION -- if (i == MIGRATION_CAPABILITY_BLOCK) { -- continue; -- } --#endif -- caps = g_malloc0(sizeof(*caps)); -- caps->capability = i; -- caps->state = s->capabilities[i]; -- QAPI_LIST_APPEND(tail, caps); -- } -- -- return head; --} -- - MigrationParameters *qmp_query_migrate_parameters(Error **errp) - { - MigrationParameters *params; -diff --git a/migration/options.c b/migration/options.c -index 367c930f46..ff621bdeb3 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -13,6 +13,7 @@ - - #include "qemu/osdep.h" - #include "qapi/error.h" -+#include "qapi/qapi-commands-migration.h" - #include "sysemu/runstate.h" - #include "migration.h" - #include "ram.h" -@@ -390,3 +391,25 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) - - return true; - } -+ -+MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) -+{ -+ MigrationCapabilityStatusList *head = NULL, **tail = &head; -+ MigrationCapabilityStatus *caps; -+ MigrationState *s = migrate_get_current(); -+ int i; -+ -+ for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { -+#ifndef CONFIG_LIVE_BLOCK_MIGRATION -+ if (i == MIGRATION_CAPABILITY_BLOCK) { -+ continue; -+ } -+#endif -+ caps = g_malloc0(sizeof(*caps)); -+ caps->capability = i; -+ caps->state = s->capabilities[i]; -+ QAPI_LIST_APPEND(tail, caps); -+ } -+ -+ return head; -+} --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Pass-migrate_caps_check-the-old-and-new-ca.patch b/SOURCES/kvm-migration-Pass-migrate_caps_check-the-old-and-new-ca.patch deleted file mode 100644 index 7339ce0..0000000 --- a/SOURCES/kvm-migration-Pass-migrate_caps_check-the-old-and-new-ca.patch +++ /dev/null @@ -1,226 +0,0 @@ -From 4782b59a8b0b5762f87505ac7a83b37ddd2e0b3f Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 20:28:56 +0100 -Subject: [PATCH 19/56] migration: Pass migrate_caps_check() the old and new - caps -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [18/50] df78d680d03f15d7cb7401ad89e68a4fc93fa835 (peterx/qemu-kvm) - -We used to pass the old capabilities array and the new -capabilities as a list. - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit b02c7fc9ef447787414e6fa67eff75e7b7b30180) -Signed-off-by: Peter Xu ---- - migration/migration.c | 80 +++++++++++++++++-------------------------- - 1 file changed, 31 insertions(+), 49 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index d8e5fb6226..e8f596bcfa 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1299,30 +1299,20 @@ WriteTrackingSupport migrate_query_write_tracking(void) - } - - /** -- * @migration_caps_check - check capability validity -+ * @migration_caps_check - check capability compatibility - * -- * @cap_list: old capability list, array of bool -- * @params: new capabilities to be applied soon -+ * @old_caps: old capability list -+ * @new_caps: new capability list - * @errp: set *errp if the check failed, with reason - * - * Returns true if check passed, otherwise false. - */ --static bool migrate_caps_check(bool *cap_list, -- MigrationCapabilityStatusList *params, -- Error **errp) -+static bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) - { -- MigrationCapabilityStatusList *cap; -- bool old_postcopy_cap; - MigrationIncomingState *mis = migration_incoming_get_current(); - -- old_postcopy_cap = cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]; -- -- for (cap = params; cap; cap = cap->next) { -- cap_list[cap->value->capability] = cap->value->state; -- } -- - #ifndef CONFIG_LIVE_BLOCK_MIGRATION -- if (cap_list[MIGRATION_CAPABILITY_BLOCK]) { -+ if (new_caps[MIGRATION_CAPABILITY_BLOCK]) { - error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) " - "block migration"); - error_append_hint(errp, "Use drive_mirror+NBD instead.\n"); -@@ -1331,7 +1321,7 @@ static bool migrate_caps_check(bool *cap_list, - #endif - - #ifndef CONFIG_REPLICATION -- if (cap_list[MIGRATION_CAPABILITY_X_COLO]) { -+ if (new_caps[MIGRATION_CAPABILITY_X_COLO]) { - error_setg(errp, "QEMU compiled without replication module" - " can't enable COLO"); - error_append_hint(errp, "Please enable replication before COLO.\n"); -@@ -1339,12 +1329,13 @@ static bool migrate_caps_check(bool *cap_list, - } - #endif - -- if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { -+ if (new_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { - /* This check is reasonably expensive, so only when it's being - * set the first time, also it's only the destination that needs - * special support. - */ -- if (!old_postcopy_cap && runstate_check(RUN_STATE_INMIGRATE) && -+ if (!old_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM] && -+ runstate_check(RUN_STATE_INMIGRATE) && - !postcopy_ram_supported_by_host(mis)) { - /* postcopy_ram_supported_by_host will have emitted a more - * detailed message -@@ -1353,13 +1344,13 @@ static bool migrate_caps_check(bool *cap_list, - return false; - } - -- if (cap_list[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) { -+ if (new_caps[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) { - error_setg(errp, "Postcopy is not compatible with ignore-shared"); - return false; - } - } - -- if (cap_list[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) { -+ if (new_caps[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) { - WriteTrackingSupport wt_support; - int idx; - /* -@@ -1383,7 +1374,7 @@ static bool migrate_caps_check(bool *cap_list, - */ - for (idx = 0; idx < check_caps_background_snapshot.size; idx++) { - int incomp_cap = check_caps_background_snapshot.caps[idx]; -- if (cap_list[incomp_cap]) { -+ if (new_caps[incomp_cap]) { - error_setg(errp, - "Background-snapshot is not compatible with %s", - MigrationCapability_str(incomp_cap)); -@@ -1393,10 +1384,10 @@ static bool migrate_caps_check(bool *cap_list, - } - - #ifdef CONFIG_LINUX -- if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND] && -- (!cap_list[MIGRATION_CAPABILITY_MULTIFD] || -- cap_list[MIGRATION_CAPABILITY_COMPRESS] || -- cap_list[MIGRATION_CAPABILITY_XBZRLE] || -+ if (new_caps[MIGRATION_CAPABILITY_ZERO_COPY_SEND] && -+ (!new_caps[MIGRATION_CAPABILITY_MULTIFD] || -+ new_caps[MIGRATION_CAPABILITY_COMPRESS] || -+ new_caps[MIGRATION_CAPABILITY_XBZRLE] || - migrate_multifd_compression() || - migrate_use_tls())) { - error_setg(errp, -@@ -1404,15 +1395,15 @@ static bool migrate_caps_check(bool *cap_list, - return false; - } - #else -- if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND]) { -+ if (new_caps[MIGRATION_CAPABILITY_ZERO_COPY_SEND]) { - error_setg(errp, - "Zero copy currently only available on Linux"); - return false; - } - #endif - -- if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]) { -- if (!cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { -+ if (new_caps[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]) { -+ if (!new_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { - error_setg(errp, "Postcopy preempt requires postcopy-ram"); - return false; - } -@@ -1423,14 +1414,14 @@ static bool migrate_caps_check(bool *cap_list, - * different compression channels, which is not compatible with the - * preempt assumptions on channel assignments. - */ -- if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) { -+ if (new_caps[MIGRATION_CAPABILITY_COMPRESS]) { - error_setg(errp, "Postcopy preempt not compatible with compress"); - return false; - } - } - -- if (cap_list[MIGRATION_CAPABILITY_MULTIFD]) { -- if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) { -+ if (new_caps[MIGRATION_CAPABILITY_MULTIFD]) { -+ if (new_caps[MIGRATION_CAPABILITY_COMPRESS]) { - error_setg(errp, "Multifd is not compatible with compress"); - return false; - } -@@ -1486,15 +1477,19 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, - { - MigrationState *s = migrate_get_current(); - MigrationCapabilityStatusList *cap; -- bool cap_list[MIGRATION_CAPABILITY__MAX]; -+ bool new_caps[MIGRATION_CAPABILITY__MAX]; - - if (migration_is_running(s->state)) { - error_setg(errp, QERR_MIGRATION_ACTIVE); - return; - } - -- memcpy(cap_list, s->capabilities, sizeof(cap_list)); -- if (!migrate_caps_check(cap_list, params, errp)) { -+ memcpy(new_caps, s->capabilities, sizeof(new_caps)); -+ for (cap = params; cap; cap = cap->next) { -+ new_caps[cap->value->capability] = cap->value->state; -+ } -+ -+ if (!migrate_caps_check(s->capabilities, new_caps, errp)) { - return; - } - -@@ -4634,27 +4629,14 @@ static void migration_instance_init(Object *obj) - */ - static bool migration_object_check(MigrationState *ms, Error **errp) - { -- MigrationCapabilityStatusList *head = NULL; - /* Assuming all off */ -- bool cap_list[MIGRATION_CAPABILITY__MAX] = { 0 }, ret; -- int i; -+ bool old_caps[MIGRATION_CAPABILITY__MAX] = { 0 }; - - if (!migrate_params_check(&ms->parameters, errp)) { - return false; - } - -- for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { -- if (ms->capabilities[i]) { -- QAPI_LIST_PREPEND(head, migrate_cap_add(i, true)); -- } -- } -- -- ret = migrate_caps_check(cap_list, head, errp); -- -- /* It works with head == NULL */ -- qapi_free_MigrationCapabilityStatusList(head); -- -- return ret; -+ return migrate_caps_check(old_caps, ms->capabilities, errp); - } - - static const TypeInfo migration_type = { --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Rename-duplicate-to-zero_pages.patch b/SOURCES/kvm-migration-Rename-duplicate-to-zero_pages.patch deleted file mode 100644 index 22acab5..0000000 --- a/SOURCES/kvm-migration-Rename-duplicate-to-zero_pages.patch +++ /dev/null @@ -1,109 +0,0 @@ -From 3cecf66655a0dd599666bcac8add2dee85d5651f Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 19 Apr 2023 18:16:05 +0200 -Subject: [PATCH 16/56] migration: Rename duplicate to zero_pages -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [15/50] 89db3c8b167c0f411ba95ce2730540c0e8f1206b (peterx/qemu-kvm) - -Rest of counters that refer to pages has a _pages suffix. -And historically, this showed the number of pages composed of the same -character, here comes the name "duplicated". But since years ago, it -refers to the number of zero_pages. - -Signed-off-by: Juan Quintela -Reviewed-by: Peter Xu -(cherry picked from commit 1a386e8de5995fb5478ea99baa6d3e71abcf4b80) -Signed-off-by: Peter Xu ---- - migration/migration.c | 2 +- - migration/ram.c | 10 +++++----- - migration/ram.h | 2 +- - 3 files changed, 7 insertions(+), 7 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 39501a0ed8..c15e2a61ca 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1142,7 +1142,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) - info->ram = g_malloc0(sizeof(*info->ram)); - info->ram->transferred = stat64_get(&ram_counters.transferred); - info->ram->total = ram_bytes_total(); -- info->ram->duplicate = stat64_get(&ram_counters.duplicate); -+ info->ram->duplicate = stat64_get(&ram_counters.zero_pages); - /* legacy value. It is not used anymore */ - info->ram->skipped = 0; - info->ram->normal = stat64_get(&ram_counters.normal); -diff --git a/migration/ram.c b/migration/ram.c -index fe69ecaef4..19d345a030 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -1119,7 +1119,7 @@ uint64_t ram_pagesize_summary(void) - uint64_t ram_get_total_transferred_pages(void) - { - return stat64_get(&ram_counters.normal) + -- stat64_get(&ram_counters.duplicate) + -+ stat64_get(&ram_counters.zero_pages) + - compression_counters.pages + xbzrle_counters.pages; - } - -@@ -1320,7 +1320,7 @@ static int save_zero_page(PageSearchStatus *pss, QEMUFile *f, RAMBlock *block, - int len = save_zero_page_to_file(pss, f, block, offset); - - if (len) { -- stat64_add(&ram_counters.duplicate, 1); -+ stat64_add(&ram_counters.zero_pages, 1); - ram_transferred_add(len); - return 1; - } -@@ -1359,7 +1359,7 @@ static bool control_save_page(PageSearchStatus *pss, RAMBlock *block, - if (bytes_xmit > 0) { - stat64_add(&ram_counters.normal, 1); - } else if (bytes_xmit == 0) { -- stat64_add(&ram_counters.duplicate, 1); -+ stat64_add(&ram_counters.zero_pages, 1); - } - - return true; -@@ -1486,7 +1486,7 @@ update_compress_thread_counts(const CompressParam *param, int bytes_xmit) - ram_transferred_add(bytes_xmit); - - if (param->zero_page) { -- stat64_add(&ram_counters.duplicate, 1); -+ stat64_add(&ram_counters.zero_pages, 1); - return; - } - -@@ -2621,7 +2621,7 @@ void acct_update_position(QEMUFile *f, size_t size, bool zero) - uint64_t pages = size / TARGET_PAGE_SIZE; - - if (zero) { -- stat64_add(&ram_counters.duplicate, pages); -+ stat64_add(&ram_counters.zero_pages, pages); - } else { - stat64_add(&ram_counters.normal, pages); - ram_transferred_add(size); -diff --git a/migration/ram.h b/migration/ram.h -index afa68521d7..55258334fe 100644 ---- a/migration/ram.h -+++ b/migration/ram.h -@@ -45,7 +45,7 @@ typedef struct { - Stat64 dirty_sync_count; - Stat64 dirty_sync_missed_zero_copy; - Stat64 downtime_bytes; -- Stat64 duplicate; -+ Stat64 zero_pages; - Stat64 multifd_bytes; - Stat64 normal; - Stat64 postcopy_bytes; --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Rename-normal-to-normal_pages.patch b/SOURCES/kvm-migration-Rename-normal-to-normal_pages.patch deleted file mode 100644 index 8ad6447..0000000 --- a/SOURCES/kvm-migration-Rename-normal-to-normal_pages.patch +++ /dev/null @@ -1,109 +0,0 @@ -From 7e27e7ea83856e1a7222ff46d91495f48fb6be4d Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 19 Apr 2023 18:19:45 +0200 -Subject: [PATCH 17/56] migration: Rename normal to normal_pages -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [16/50] 7df8b946918def9657bbe357861a6d72b5399ac6 (peterx/qemu-kvm) - -Rest of counters that refer to pages has a _pages suffix. -And historically, this showed the number of full pages transferred. -The name "normal" refered to the fact that they were sent without any -optimization (compression, xbzrle, zero_page, ...). - -Signed-off-by: Juan Quintela -Reviewed-by: Peter Xu -(cherry picked from commit 8c0cda8fa0de0a50148e2c60552afca9cffca643) -Signed-off-by: Peter Xu ---- - migration/migration.c | 2 +- - migration/ram.c | 10 +++++----- - migration/ram.h | 2 +- - 3 files changed, 7 insertions(+), 7 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index c15e2a61ca..f1b3439e5f 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1145,7 +1145,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) - info->ram->duplicate = stat64_get(&ram_counters.zero_pages); - /* legacy value. It is not used anymore */ - info->ram->skipped = 0; -- info->ram->normal = stat64_get(&ram_counters.normal); -+ info->ram->normal = stat64_get(&ram_counters.normal_pages); - info->ram->normal_bytes = info->ram->normal * page_size; - info->ram->mbps = s->mbps; - info->ram->dirty_sync_count = -diff --git a/migration/ram.c b/migration/ram.c -index 19d345a030..229714045a 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -1118,7 +1118,7 @@ uint64_t ram_pagesize_summary(void) - - uint64_t ram_get_total_transferred_pages(void) - { -- return stat64_get(&ram_counters.normal) + -+ return stat64_get(&ram_counters.normal_pages) + - stat64_get(&ram_counters.zero_pages) + - compression_counters.pages + xbzrle_counters.pages; - } -@@ -1357,7 +1357,7 @@ static bool control_save_page(PageSearchStatus *pss, RAMBlock *block, - } - - if (bytes_xmit > 0) { -- stat64_add(&ram_counters.normal, 1); -+ stat64_add(&ram_counters.normal_pages, 1); - } else if (bytes_xmit == 0) { - stat64_add(&ram_counters.zero_pages, 1); - } -@@ -1391,7 +1391,7 @@ static int save_normal_page(PageSearchStatus *pss, RAMBlock *block, - qemu_put_buffer(file, buf, TARGET_PAGE_SIZE); - } - ram_transferred_add(TARGET_PAGE_SIZE); -- stat64_add(&ram_counters.normal, 1); -+ stat64_add(&ram_counters.normal_pages, 1); - return 1; - } - -@@ -1447,7 +1447,7 @@ static int ram_save_multifd_page(QEMUFile *file, RAMBlock *block, - if (multifd_queue_page(file, block, offset) < 0) { - return -1; - } -- stat64_add(&ram_counters.normal, 1); -+ stat64_add(&ram_counters.normal_pages, 1); - - return 1; - } -@@ -2623,7 +2623,7 @@ void acct_update_position(QEMUFile *f, size_t size, bool zero) - if (zero) { - stat64_add(&ram_counters.zero_pages, pages); - } else { -- stat64_add(&ram_counters.normal, pages); -+ stat64_add(&ram_counters.normal_pages, pages); - ram_transferred_add(size); - qemu_file_credit_transfer(f, size); - } -diff --git a/migration/ram.h b/migration/ram.h -index 55258334fe..a6e0d70226 100644 ---- a/migration/ram.h -+++ b/migration/ram.h -@@ -47,7 +47,7 @@ typedef struct { - Stat64 downtime_bytes; - Stat64 zero_pages; - Stat64 multifd_bytes; -- Stat64 normal; -+ Stat64 normal_pages; - Stat64 postcopy_bytes; - Stat64 postcopy_requests; - Stat64 precopy_bytes; --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Update-atomic-stats-out-of-the-mutex.patch b/SOURCES/kvm-migration-Update-atomic-stats-out-of-the-mutex.patch deleted file mode 100644 index 7e78d82..0000000 --- a/SOURCES/kvm-migration-Update-atomic-stats-out-of-the-mutex.patch +++ /dev/null @@ -1,52 +0,0 @@ -From c0d377e1bf442a09b82fddbb8588fcddf6439854 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 24 Nov 2022 17:26:19 +0100 -Subject: [PATCH 09/56] migration: Update atomic stats out of the mutex -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [8/50] 88e9dbc9a3e5aef60a7c98c871144904c7062b1f (peterx/qemu-kvm) - -Reviewed-by: David Edmondson -Reviewed-by: Peter Xu -Signed-off-by: Juan Quintela -(cherry picked from commit 30fb22cda45bea43a3c0e26049ebdd71a9503ffd) -Signed-off-by: Peter Xu ---- - migration/multifd.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/migration/multifd.c b/migration/multifd.c -index 01fab01a92..6ef3a27938 100644 ---- a/migration/multifd.c -+++ b/migration/multifd.c -@@ -433,8 +433,8 @@ static int multifd_send_pages(QEMUFile *f) - transferred = ((uint64_t) pages->num) * p->page_size + p->packet_len; - qemu_file_acct_rate_limit(f, transferred); - ram_counters.multifd_bytes += transferred; -- stat64_add(&ram_counters.transferred, transferred); - qemu_mutex_unlock(&p->mutex); -+ stat64_add(&ram_counters.transferred, transferred); - qemu_sem_post(&p->sem); - - return 1; -@@ -628,8 +628,8 @@ int multifd_send_sync_main(QEMUFile *f) - p->pending_job++; - qemu_file_acct_rate_limit(f, p->packet_len); - ram_counters.multifd_bytes += p->packet_len; -- stat64_add(&ram_counters.transferred, p->packet_len); - qemu_mutex_unlock(&p->mutex); -+ stat64_add(&ram_counters.transferred, p->packet_len); - qemu_sem_post(&p->sem); - } - for (i = 0; i < migrate_multifd_channels(); i++) { --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Use-migrate_max_postcopy_bandwidth.patch b/SOURCES/kvm-migration-Use-migrate_max_postcopy_bandwidth.patch deleted file mode 100644 index f179761..0000000 --- a/SOURCES/kvm-migration-Use-migrate_max_postcopy_bandwidth.patch +++ /dev/null @@ -1,40 +0,0 @@ -From 8d203baa6cbd1f371e308c2c9d59a5ca7d29dca8 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 10:55:30 +0100 -Subject: [PATCH 38/56] migration: Use migrate_max_postcopy_bandwidth() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [37/50] d62948e9ee40a85ed9b460a583c3b0e43cd5d47f (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 5390adec03a7d8bc6bcf5887f726b0ddaeb90681) -Signed-off-by: Peter Xu ---- - migration/migration.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 7f2e770deb..78bca9a93f 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -3799,7 +3799,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) - - if (resume) { - /* This is a resumed migration */ -- rate_limit = s->parameters.max_postcopy_bandwidth / -+ rate_limit = migrate_max_postcopy_bandwidth() / - XFER_LIMIT_RATIO; - } else { - /* This is a fresh new migration */ --- -2.39.1 - diff --git a/SOURCES/kvm-migration-mark-mixed-functions-that-can-suspend.patch b/SOURCES/kvm-migration-mark-mixed-functions-that-can-suspend.patch deleted file mode 100644 index 9451696..0000000 --- a/SOURCES/kvm-migration-mark-mixed-functions-that-can-suspend.patch +++ /dev/null @@ -1,153 +0,0 @@ -From cfdf5715a2334ad06b5966ec986d134bbd5ba08b Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Fri, 16 Dec 2022 12:48:16 +0100 -Subject: [PATCH 05/56] migration: mark mixed functions that can suspend -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [4/50] 9f055b526edd06a3440999d5de91e5d624678c7d (peterx/qemu-kvm) - -There should be no paths from a coroutine_fn to aio_poll, however in -practice coroutine_mixed_fn will call aio_poll in the !qemu_in_coroutine() -path. By marking mixed functions, we can track accurately the call paths -that execute entirely in coroutine context, and find more missing -coroutine_fn markers. This results in more accurate checks that -coroutine code does not end up blocking. - -If the marking were extended transitively to all functions that call -these ones, static analysis could be done much more efficiently. -However, this is a start and makes it possible to use vrc's path-based -searches to find potential bugs where coroutine_fns call blocking functions. - -Signed-off-by: Paolo Bonzini -(cherry picked from commit 394b9407e4c515f96df6647d629ee28cbb86f07c) -Signed-off-by: Peter Xu ---- - include/migration/qemu-file-types.h | 4 ++-- - migration/qemu-file.c | 14 +++++++------- - migration/qemu-file.h | 6 +++--- - 3 files changed, 12 insertions(+), 12 deletions(-) - -diff --git a/include/migration/qemu-file-types.h b/include/migration/qemu-file-types.h -index 2867e3da84..1436f9ce92 100644 ---- a/include/migration/qemu-file-types.h -+++ b/include/migration/qemu-file-types.h -@@ -35,7 +35,7 @@ void qemu_put_byte(QEMUFile *f, int v); - void qemu_put_be16(QEMUFile *f, unsigned int v); - void qemu_put_be32(QEMUFile *f, unsigned int v); - void qemu_put_be64(QEMUFile *f, uint64_t v); --size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size); -+size_t coroutine_mixed_fn qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size); - - int qemu_get_byte(QEMUFile *f); - -@@ -161,7 +161,7 @@ static inline void qemu_get_sbe64s(QEMUFile *f, int64_t *pv) - qemu_get_be64s(f, (uint64_t *)pv); - } - --size_t qemu_get_counted_string(QEMUFile *f, char buf[256]); -+size_t coroutine_mixed_fn qemu_get_counted_string(QEMUFile *f, char buf[256]); - - void qemu_put_counted_string(QEMUFile *f, const char *name); - -diff --git a/migration/qemu-file.c b/migration/qemu-file.c -index 102ab3b439..ee04240a21 100644 ---- a/migration/qemu-file.c -+++ b/migration/qemu-file.c -@@ -392,7 +392,7 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset, - * case if the underlying file descriptor gives a short read, and that can - * happen even on a blocking fd. - */ --static ssize_t qemu_fill_buffer(QEMUFile *f) -+static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f) - { - int len; - int pending; -@@ -585,7 +585,7 @@ void qemu_file_skip(QEMUFile *f, int size) - * return as many as it managed to read (assuming blocking fd's which - * all current QEMUFile are) - */ --size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset) -+size_t coroutine_mixed_fn qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset) - { - ssize_t pending; - size_t index; -@@ -633,7 +633,7 @@ size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset) - * return as many as it managed to read (assuming blocking fd's which - * all current QEMUFile are) - */ --size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size) -+size_t coroutine_mixed_fn qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size) - { - size_t pending = size; - size_t done = 0; -@@ -674,7 +674,7 @@ size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size) - * Note: Since **buf may get changed, the caller should take care to - * keep a pointer to the original buffer if it needs to deallocate it. - */ --size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size) -+size_t coroutine_mixed_fn qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size) - { - if (size < IO_BUF_SIZE) { - size_t res; -@@ -696,7 +696,7 @@ size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size) - * Peeks a single byte from the buffer; this isn't guaranteed to work if - * offset leaves a gap after the previous read/peeked data. - */ --int qemu_peek_byte(QEMUFile *f, int offset) -+int coroutine_mixed_fn qemu_peek_byte(QEMUFile *f, int offset) - { - int index = f->buf_index + offset; - -@@ -713,7 +713,7 @@ int qemu_peek_byte(QEMUFile *f, int offset) - return f->buf[index]; - } - --int qemu_get_byte(QEMUFile *f) -+int coroutine_mixed_fn qemu_get_byte(QEMUFile *f) - { - int result; - -@@ -894,7 +894,7 @@ int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_src) - * else 0 - * (Note a 0 length string will return 0 either way) - */ --size_t qemu_get_counted_string(QEMUFile *f, char buf[256]) -+size_t coroutine_fn qemu_get_counted_string(QEMUFile *f, char buf[256]) - { - size_t len = qemu_get_byte(f); - size_t res = qemu_get_buffer(f, (uint8_t *)buf, len); -diff --git a/migration/qemu-file.h b/migration/qemu-file.h -index 9d0155a2a1..d16cd50448 100644 ---- a/migration/qemu-file.h -+++ b/migration/qemu-file.h -@@ -108,8 +108,8 @@ bool qemu_file_is_writable(QEMUFile *f); - - #include "migration/qemu-file-types.h" - --size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset); --size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size); -+size_t coroutine_mixed_fn qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset); -+size_t coroutine_mixed_fn qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size); - ssize_t qemu_put_compression_data(QEMUFile *f, z_stream *stream, - const uint8_t *p, size_t size); - int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_src); -@@ -119,7 +119,7 @@ int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_src); - * is; you aren't guaranteed to be able to peak to +n bytes unless you've - * previously peeked +n-1. - */ --int qemu_peek_byte(QEMUFile *f, int offset); -+int coroutine_mixed_fn qemu_peek_byte(QEMUFile *f, int offset); - void qemu_file_skip(QEMUFile *f, int size); - /* - * qemu_file_credit_transfer: --- -2.39.1 - diff --git a/SOURCES/kvm-migration-move-migration_global_dump-to-migration-hm.patch b/SOURCES/kvm-migration-move-migration_global_dump-to-migration-hm.patch deleted file mode 100644 index 4e73c80..0000000 --- a/SOURCES/kvm-migration-move-migration_global_dump-to-migration-hm.patch +++ /dev/null @@ -1,121 +0,0 @@ -From 96e6914cbfb18bb8287c57b9ac9a6b364d3e7a22 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 22 Feb 2023 17:18:05 +0100 -Subject: [PATCH 20/56] migration: move migration_global_dump() to - migration-hmp-cmds.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [19/50] c8d330a2833c706b9bd78f7154be882e3977ad06 (peterx/qemu-kvm) - -It is only used there, so we can make it static. -Once there, remove spice.h that it is not used. - -Signed-off-by: Juan Quintela -Reviewed-by: Peter Xu -Reviewed-by: Philippe Mathieu-Daudé - ---- - -fix David Edmonson ui/qemu-spice.h unintended removal - -(cherry picked from commit c938157713e723165a42cb6e8364adb6fcbd0e22) -Signed-off-by: Peter Xu ---- - include/migration/misc.h | 1 - - migration/migration-hmp-cmds.c | 22 +++++++++++++++++++++- - migration/migration.c | 19 ------------------- - 3 files changed, 21 insertions(+), 21 deletions(-) - -diff --git a/include/migration/misc.h b/include/migration/misc.h -index 8b49841016..5ebe13b4b9 100644 ---- a/include/migration/misc.h -+++ b/include/migration/misc.h -@@ -66,7 +66,6 @@ bool migration_has_finished(MigrationState *); - bool migration_has_failed(MigrationState *); - /* ...and after the device transmission */ - bool migration_in_postcopy_after_devices(MigrationState *); --void migration_global_dump(Monitor *mon); - /* True if incoming migration entered POSTCOPY_INCOMING_DISCARD */ - bool migration_in_incoming_postcopy(void); - /* True if incoming migration entered POSTCOPY_INCOMING_ADVISE */ -diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c -index 72519ea99f..71da91967a 100644 ---- a/migration/migration-hmp-cmds.c -+++ b/migration/migration-hmp-cmds.c -@@ -15,7 +15,6 @@ - - #include "qemu/osdep.h" - #include "block/qapi.h" --#include "migration/misc.h" - #include "migration/snapshot.h" - #include "monitor/hmp.h" - #include "monitor/monitor.h" -@@ -30,6 +29,27 @@ - #include "qemu/sockets.h" - #include "sysemu/runstate.h" - #include "ui/qemu-spice.h" -+#include "sysemu/sysemu.h" -+#include "migration.h" -+ -+static void migration_global_dump(Monitor *mon) -+{ -+ MigrationState *ms = migrate_get_current(); -+ -+ monitor_printf(mon, "globals:\n"); -+ monitor_printf(mon, "store-global-state: %s\n", -+ ms->store_global_state ? "on" : "off"); -+ monitor_printf(mon, "only-migratable: %s\n", -+ only_migratable ? "on" : "off"); -+ monitor_printf(mon, "send-configuration: %s\n", -+ ms->send_configuration ? "on" : "off"); -+ monitor_printf(mon, "send-section-footer: %s\n", -+ ms->send_section_footer ? "on" : "off"); -+ monitor_printf(mon, "decompress-error-check: %s\n", -+ ms->decompress_error_check ? "on" : "off"); -+ monitor_printf(mon, "clear-bitmap-shift: %u\n", -+ ms->clear_bitmap_shift); -+} - - void hmp_info_migrate(Monitor *mon, const QDict *qdict) - { -diff --git a/migration/migration.c b/migration/migration.c -index e8f596bcfa..aa96ffdc5b 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -4420,25 +4420,6 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) - s->migration_thread_running = true; - } - --void migration_global_dump(Monitor *mon) --{ -- MigrationState *ms = migrate_get_current(); -- -- monitor_printf(mon, "globals:\n"); -- monitor_printf(mon, "store-global-state: %s\n", -- ms->store_global_state ? "on" : "off"); -- monitor_printf(mon, "only-migratable: %s\n", -- only_migratable ? "on" : "off"); -- monitor_printf(mon, "send-configuration: %s\n", -- ms->send_configuration ? "on" : "off"); -- monitor_printf(mon, "send-section-footer: %s\n", -- ms->send_section_footer ? "on" : "off"); -- monitor_printf(mon, "decompress-error-check: %s\n", -- ms->decompress_error_check ? "on" : "off"); -- monitor_printf(mon, "clear-bitmap-shift: %u\n", -- ms->clear_bitmap_shift); --} -- - #define DEFINE_PROP_MIG_CAP(name, x) \ - DEFINE_PROP_BOOL(name, MigrationState, capabilities[x], false) - --- -2.39.1 - diff --git a/SOURCES/kvm-migration-postcopy-Detect-file-system-on-dest-host.patch b/SOURCES/kvm-migration-postcopy-Detect-file-system-on-dest-host.patch deleted file mode 100644 index 7700466..0000000 --- a/SOURCES/kvm-migration-postcopy-Detect-file-system-on-dest-host.patch +++ /dev/null @@ -1,117 +0,0 @@ -From 4827d5be5357ab89e0c46f606ad828bf97d36471 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Wed, 19 Apr 2023 12:17:38 -0400 -Subject: [PATCH 04/56] migration/postcopy: Detect file system on dest host -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [3/50] 121aeeda8a019f79dba6c077c7018bd1c86f3d71 (peterx/qemu-kvm) - -Postcopy requires the memory support userfaultfd to work. Right now we -check it but it's a bit too late (when switching to postcopy migration). - -Do that early right at enabling of postcopy. - -Note that this is still only a best effort because ramblocks can be -dynamically created. We can add check in hostmem creations and fail if -postcopy enabled, but maybe that's too aggressive. - -Still, we have chance to fail the most obvious where we know there's an -existing unsupported ramblock. - -Signed-off-by: Peter Xu -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -(cherry picked from commit ae30b9b2892b85e6c3d5c0b8d1949c4d77a2954a) -Signed-off-by: Peter Xu ---- - migration/postcopy-ram.c | 34 ++++++++++++++++++++++++++++++---- - 1 file changed, 30 insertions(+), 4 deletions(-) - -diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c -index 93f39f8e06..bbb8af61ae 100644 ---- a/migration/postcopy-ram.c -+++ b/migration/postcopy-ram.c -@@ -36,6 +36,7 @@ - #include "yank_functions.h" - #include "tls.h" - #include "qemu/userfaultfd.h" -+#include "qemu/mmap-alloc.h" - - /* Arbitrary limit on size of each discard command, - * keeps them around ~200 bytes -@@ -336,11 +337,12 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) - - /* Callback from postcopy_ram_supported_by_host block iterator. - */ --static int test_ramblock_postcopiable(RAMBlock *rb, void *opaque) -+static int test_ramblock_postcopiable(RAMBlock *rb) - { - const char *block_name = qemu_ram_get_idstr(rb); - ram_addr_t length = qemu_ram_get_used_length(rb); - size_t pagesize = qemu_ram_pagesize(rb); -+ QemuFsType fs; - - if (length % pagesize) { - error_report("Postcopy requires RAM blocks to be a page size multiple," -@@ -348,6 +350,15 @@ static int test_ramblock_postcopiable(RAMBlock *rb, void *opaque) - "page size of 0x%zx", block_name, length, pagesize); - return 1; - } -+ -+ if (rb->fd >= 0) { -+ fs = qemu_fd_getfs(rb->fd); -+ if (fs != QEMU_FS_TYPE_TMPFS && fs != QEMU_FS_TYPE_HUGETLBFS) { -+ error_report("Host backend files need to be TMPFS or HUGETLBFS only"); -+ return 1; -+ } -+ } -+ - return 0; - } - -@@ -366,6 +377,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) - struct uffdio_range range_struct; - uint64_t feature_mask; - Error *local_err = NULL; -+ RAMBlock *block; - - if (qemu_target_page_size() > pagesize) { - error_report("Target page size bigger than host page size"); -@@ -390,9 +402,23 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) - goto out; - } - -- /* We don't support postcopy with shared RAM yet */ -- if (foreach_not_ignored_block(test_ramblock_postcopiable, NULL)) { -- goto out; -+ /* -+ * We don't support postcopy with some type of ramblocks. -+ * -+ * NOTE: we explicitly ignored ramblock_is_ignored() instead we checked -+ * all possible ramblocks. This is because this function can be called -+ * when creating the migration object, during the phase RAM_MIGRATABLE -+ * is not even properly set for all the ramblocks. -+ * -+ * A side effect of this is we'll also check against RAM_SHARED -+ * ramblocks even if migrate_ignore_shared() is set (in which case -+ * we'll never migrate RAM_SHARED at all), but normally this shouldn't -+ * affect in reality, or we can revisit. -+ */ -+ RAMBLOCK_FOREACH(block) { -+ if (test_ramblock_postcopiable(block)) { -+ goto out; -+ } - } - - /* --- -2.39.1 - diff --git a/SOURCES/kvm-migration-remove-extra-whitespace-character-for-code.patch b/SOURCES/kvm-migration-remove-extra-whitespace-character-for-code.patch deleted file mode 100644 index 88eb791..0000000 --- a/SOURCES/kvm-migration-remove-extra-whitespace-character-for-code.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 93c9a1ae812720d3a29980a3c5fcfc1e916993de Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?=E6=9D=8E=E7=9A=86=E4=BF=8A?= -Date: Fri, 17 Mar 2023 09:57:13 +0000 -Subject: [PATCH 07/56] migration: remove extra whitespace character for code - style -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [6/50] bc1cd812f8dfc18e47e1644b5333c703eae23d2d (peterx/qemu-kvm) - -Fix code style. - -Signed-off-by: 李皆俊 -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -(cherry picked from commit 8ebb6ecc3798e66a9ba98355983762bedfa1b72d) -Signed-off-by: Peter Xu ---- - migration/ram.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/migration/ram.c b/migration/ram.c -index 79d881f735..0e68099bf9 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -3293,7 +3293,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) - - migration_ops = g_malloc0(sizeof(MigrationOps)); - migration_ops->ram_save_target_page = ram_save_target_page_legacy; -- ret = multifd_send_sync_main(f); -+ ret = multifd_send_sync_main(f); - if (ret < 0) { - return ret; - } --- -2.39.1 - diff --git a/SOURCES/kvm-migration-rename-enabled_capabilities-to-capabilitie.patch b/SOURCES/kvm-migration-rename-enabled_capabilities-to-capabilitie.patch deleted file mode 100644 index 52b19b3..0000000 --- a/SOURCES/kvm-migration-rename-enabled_capabilities-to-capabilitie.patch +++ /dev/null @@ -1,329 +0,0 @@ -From ee566ec12099992f9134bda1db92dd568427245a Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 18:26:59 +0100 -Subject: [PATCH 18/56] migration: rename enabled_capabilities to capabilities -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [17/50] 841a27addf273d8f559bc8ebd2c854200e8ca673 (peterx/qemu-kvm) - -It is clear from the context what that means, and such a long name -with the extra long names of the capabilities make very difficilut to -stay inside the 80 columns limit. - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 0cec2056ff67557c18d7b8ab1b70ab47c9e31f2f) -Signed-off-by: Peter Xu ---- - migration/migration.c | 52 +++++++++++++++++++++---------------------- - migration/migration.h | 2 +- - migration/rdma.c | 4 ++-- - migration/savevm.c | 6 ++--- - 4 files changed, 31 insertions(+), 33 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index f1b3439e5f..d8e5fb6226 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -364,8 +364,7 @@ static bool migrate_late_block_activate(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[ -- MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; -+ return s->capabilities[MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; - } - - /* -@@ -944,7 +943,7 @@ MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) - #endif - caps = g_malloc0(sizeof(*caps)); - caps->capability = i; -- caps->state = s->enabled_capabilities[i]; -+ caps->state = s->capabilities[i]; - QAPI_LIST_APPEND(tail, caps); - } - -@@ -1494,13 +1493,13 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, - return; - } - -- memcpy(cap_list, s->enabled_capabilities, sizeof(cap_list)); -+ memcpy(cap_list, s->capabilities, sizeof(cap_list)); - if (!migrate_caps_check(cap_list, params, errp)) { - return; - } - - for (cap = params; cap; cap = cap->next) { -- s->enabled_capabilities[cap->value->capability] = cap->value->state; -+ s->capabilities[cap->value->capability] = cap->value->state; - } - } - -@@ -2569,7 +2568,7 @@ bool migrate_release_ram(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; -+ return s->capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; - } - - bool migrate_postcopy_ram(void) -@@ -2578,7 +2577,7 @@ bool migrate_postcopy_ram(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; -+ return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; - } - - bool migrate_postcopy(void) -@@ -2592,7 +2591,7 @@ bool migrate_auto_converge(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; -+ return s->capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; - } - - bool migrate_zero_blocks(void) -@@ -2601,7 +2600,7 @@ bool migrate_zero_blocks(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; -+ return s->capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; - } - - bool migrate_postcopy_blocktime(void) -@@ -2610,7 +2609,7 @@ bool migrate_postcopy_blocktime(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; -+ return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; - } - - bool migrate_use_compression(void) -@@ -2619,7 +2618,7 @@ bool migrate_use_compression(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS]; -+ return s->capabilities[MIGRATION_CAPABILITY_COMPRESS]; - } - - int migrate_compress_level(void) -@@ -2664,7 +2663,7 @@ bool migrate_dirty_bitmaps(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; -+ return s->capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; - } - - bool migrate_ignore_shared(void) -@@ -2673,7 +2672,7 @@ bool migrate_ignore_shared(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED]; -+ return s->capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED]; - } - - bool migrate_validate_uuid(void) -@@ -2682,7 +2681,7 @@ bool migrate_validate_uuid(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; -+ return s->capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; - } - - bool migrate_use_events(void) -@@ -2691,7 +2690,7 @@ bool migrate_use_events(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS]; -+ return s->capabilities[MIGRATION_CAPABILITY_EVENTS]; - } - - bool migrate_use_multifd(void) -@@ -2700,7 +2699,7 @@ bool migrate_use_multifd(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_MULTIFD]; -+ return s->capabilities[MIGRATION_CAPABILITY_MULTIFD]; - } - - bool migrate_pause_before_switchover(void) -@@ -2709,8 +2708,7 @@ bool migrate_pause_before_switchover(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[ -- MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; -+ return s->capabilities[MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; - } - - int migrate_multifd_channels(void) -@@ -2757,7 +2755,7 @@ bool migrate_use_zero_copy_send(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; -+ return s->capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; - } - #endif - -@@ -2776,7 +2774,7 @@ int migrate_use_xbzrle(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE]; -+ return s->capabilities[MIGRATION_CAPABILITY_XBZRLE]; - } - - uint64_t migrate_xbzrle_cache_size(void) -@@ -2803,7 +2801,7 @@ bool migrate_use_block(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK]; -+ return s->capabilities[MIGRATION_CAPABILITY_BLOCK]; - } - - bool migrate_use_return_path(void) -@@ -2812,7 +2810,7 @@ bool migrate_use_return_path(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; -+ return s->capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; - } - - bool migrate_use_block_incremental(void) -@@ -2830,7 +2828,7 @@ bool migrate_background_snapshot(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; -+ return s->capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; - } - - bool migrate_postcopy_preempt(void) -@@ -2839,7 +2837,7 @@ bool migrate_postcopy_preempt(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]; -+ return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]; - } - - /* migration thread support */ -@@ -3584,7 +3582,7 @@ fail: - bool migrate_colo_enabled(void) - { - MigrationState *s = migrate_get_current(); -- return s->enabled_capabilities[MIGRATION_CAPABILITY_X_COLO]; -+ return s->capabilities[MIGRATION_CAPABILITY_X_COLO]; - } - - typedef enum MigThrError { -@@ -4447,7 +4445,7 @@ void migration_global_dump(Monitor *mon) - } - - #define DEFINE_PROP_MIG_CAP(name, x) \ -- DEFINE_PROP_BOOL(name, MigrationState, enabled_capabilities[x], false) -+ DEFINE_PROP_BOOL(name, MigrationState, capabilities[x], false) - - static Property migration_properties[] = { - DEFINE_PROP_BOOL("store-global-state", MigrationState, -@@ -4646,7 +4644,7 @@ static bool migration_object_check(MigrationState *ms, Error **errp) - } - - for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { -- if (ms->enabled_capabilities[i]) { -+ if (ms->capabilities[i]) { - QAPI_LIST_PREPEND(head, migrate_cap_add(i, true)); - } - } -diff --git a/migration/migration.h b/migration/migration.h -index 310ae8901b..04e0860b4e 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -310,7 +310,7 @@ struct MigrationState { - int64_t downtime_start; - int64_t downtime; - int64_t expected_downtime; -- bool enabled_capabilities[MIGRATION_CAPABILITY__MAX]; -+ bool capabilities[MIGRATION_CAPABILITY__MAX]; - int64_t setup_time; - /* - * Whether guest was running when we enter the completion stage. -diff --git a/migration/rdma.c b/migration/rdma.c -index df646be35e..f35f021963 100644 ---- a/migration/rdma.c -+++ b/migration/rdma.c -@@ -4179,7 +4179,7 @@ void rdma_start_outgoing_migration(void *opaque, - } - - ret = qemu_rdma_source_init(rdma, -- s->enabled_capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp); -+ s->capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp); - - if (ret) { - goto err; -@@ -4201,7 +4201,7 @@ void rdma_start_outgoing_migration(void *opaque, - } - - ret = qemu_rdma_source_init(rdma_return_path, -- s->enabled_capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp); -+ s->capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp); - - if (ret) { - goto return_path_err; -diff --git a/migration/savevm.c b/migration/savevm.c -index aa54a67fda..589ef926ab 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -253,7 +253,7 @@ static uint32_t get_validatable_capabilities_count(void) - uint32_t result = 0; - int i; - for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { -- if (should_validate_capability(i) && s->enabled_capabilities[i]) { -+ if (should_validate_capability(i) && s->capabilities[i]) { - result++; - } - } -@@ -275,7 +275,7 @@ static int configuration_pre_save(void *opaque) - state->capabilities = g_renew(MigrationCapability, state->capabilities, - state->caps_count); - for (i = j = 0; i < MIGRATION_CAPABILITY__MAX; i++) { -- if (should_validate_capability(i) && s->enabled_capabilities[i]) { -+ if (should_validate_capability(i) && s->capabilities[i]) { - state->capabilities[j++] = i; - } - } -@@ -325,7 +325,7 @@ static bool configuration_validate_capabilities(SaveState *state) - continue; - } - source_state = test_bit(i, source_caps_bm); -- target_state = s->enabled_capabilities[i]; -+ target_state = s->capabilities[i]; - if (source_state != target_state) { - error_report("Capability %s is %s, but received capability is %s", - MigrationCapability_str(i), --- -2.39.1 - diff --git a/SOURCES/kvm-mirror-Don-t-call-job_pause_point-under-graph-lock.patch b/SOURCES/kvm-mirror-Don-t-call-job_pause_point-under-graph-lock.patch new file mode 100644 index 0000000..871a80e --- /dev/null +++ b/SOURCES/kvm-mirror-Don-t-call-job_pause_point-under-graph-lock.patch @@ -0,0 +1,90 @@ +From 21cadc8ed200ad9af13b217b441b9f12cd2163ee Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 13 Mar 2024 16:30:00 +0100 +Subject: [PATCH 1/4] mirror: Don't call job_pause_point() under graph lock + +RH-Author: Kevin Wolf +RH-MergeRequest: 231: Fix deadlock and crash during storage migration +RH-Jira: RHEL-28125 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [1/3] 093910fcd5ea1516d0ed48a9cd73dad6170590f3 (kmwolf/centos-qemu-kvm) + +Calling job_pause_point() while holding the graph reader lock +potentially results in a deadlock: bdrv_graph_wrlock() first drains +everything, including the mirror job, which pauses it. The job is only +unpaused at the end of the drain section, which is when the graph writer +lock has been successfully taken. However, if the job happens to be +paused at a pause point where it still holds the reader lock, the writer +lock can't be taken as long as the job is still paused. + +Mark job_pause_point() as GRAPH_UNLOCKED and fix mirror accordingly. + +Cc: qemu-stable@nongnu.org +Buglink: https://issues.redhat.com/browse/RHEL-28125 +Fixes: 004915a96a7a ("block: Protect bs->backing with graph_lock") +Signed-off-by: Kevin Wolf +Message-ID: <20240313153000.33121-1-kwolf@redhat.com> +Reviewed-by: Eric Blake +Signed-off-by: Kevin Wolf +(cherry picked from commit ae5a40e8581185654a667fbbf7e4adbc2a2a3e45) +Signed-off-by: Kevin Wolf +--- + block/mirror.c | 10 ++++++---- + include/qemu/job.h | 2 +- + 2 files changed, 7 insertions(+), 5 deletions(-) + +diff --git a/block/mirror.c b/block/mirror.c +index 5145eb53e1..1bdce3b657 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -479,9 +479,9 @@ static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset, + return bytes_handled; + } + +-static void coroutine_fn GRAPH_RDLOCK mirror_iteration(MirrorBlockJob *s) ++static void coroutine_fn GRAPH_UNLOCKED mirror_iteration(MirrorBlockJob *s) + { +- BlockDriverState *source = s->mirror_top_bs->backing->bs; ++ BlockDriverState *source; + MirrorOp *pseudo_op; + int64_t offset; + /* At least the first dirty chunk is mirrored in one iteration. */ +@@ -489,6 +489,10 @@ static void coroutine_fn GRAPH_RDLOCK mirror_iteration(MirrorBlockJob *s) + bool write_zeroes_ok = bdrv_can_write_zeroes_with_unmap(blk_bs(s->target)); + int max_io_bytes = MAX(s->buf_size / MAX_IN_FLIGHT, MAX_IO_BYTES); + ++ bdrv_graph_co_rdlock(); ++ source = s->mirror_top_bs->backing->bs; ++ bdrv_graph_co_rdunlock(); ++ + bdrv_dirty_bitmap_lock(s->dirty_bitmap); + offset = bdrv_dirty_iter_next(s->dbi); + if (offset < 0) { +@@ -1066,9 +1070,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) + mirror_wait_for_free_in_flight_slot(s); + continue; + } else if (cnt != 0) { +- bdrv_graph_co_rdlock(); + mirror_iteration(s); +- bdrv_graph_co_rdunlock(); + } + } + +diff --git a/include/qemu/job.h b/include/qemu/job.h +index 9ea98b5927..2b873f2576 100644 +--- a/include/qemu/job.h ++++ b/include/qemu/job.h +@@ -483,7 +483,7 @@ void job_enter(Job *job); + * + * Called with job_mutex *not* held. + */ +-void coroutine_fn job_pause_point(Job *job); ++void coroutine_fn GRAPH_UNLOCKED job_pause_point(Job *job); + + /** + * @job: The job that calls the function. +-- +2.39.3 + diff --git a/SOURCES/kvm-monitor-only-run-coroutine-commands-in-qemu_aio_cont.patch b/SOURCES/kvm-monitor-only-run-coroutine-commands-in-qemu_aio_cont.patch new file mode 100644 index 0000000..345a2b4 --- /dev/null +++ b/SOURCES/kvm-monitor-only-run-coroutine-commands-in-qemu_aio_cont.patch @@ -0,0 +1,1630 @@ +From 972e553e605e8916fc47c2d51cdbde940fd7d855 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 18 Jan 2024 09:48:23 -0500 +Subject: [PATCH 13/22] monitor: only run coroutine commands in + qemu_aio_context + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter +RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Czenczek +RH-Commit: [9/17] ec5690fcade04a88bd1815bf2ae0377e80fe3d51 (stefanha/centos-stream-qemu-kvm) + +monitor_qmp_dispatcher_co() runs in the iohandler AioContext that is not +polled during nested event loops. The coroutine currently reschedules +itself in the main loop's qemu_aio_context AioContext, which is polled +during nested event loops. One known problem is that QMP device-add +calls drain_call_rcu(), which temporarily drops the BQL, leading to all +sorts of havoc like other vCPU threads re-entering device emulation code +while another vCPU thread is waiting in device emulation code with +aio_poll(). + +Paolo Bonzini suggested running non-coroutine QMP handlers in the +iohandler AioContext. This avoids trouble with nested event loops. His +original idea was to move coroutine rescheduling to +monitor_qmp_dispatch(), but I resorted to moving it to qmp_dispatch() +because we don't know if the QMP handler needs to run in coroutine +context in monitor_qmp_dispatch(). monitor_qmp_dispatch() would have +been nicer since it's associated with the monitor implementation and not +as general as qmp_dispatch(), which is also used by qemu-ga. + +A number of qemu-iotests need updated .out files because the order of +QMP events vs QMP responses has changed. + +Solves Issue #1933. + +Cc: qemu-stable@nongnu.org +Fixes: 7bed89958bfbf40df9ca681cefbdca63abdde39d ("device_core: use drain_call_rcu in in qmp_device_add") +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2215192 +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2214985 +Buglink: https://issues.redhat.com/browse/RHEL-17369 +Signed-off-by: Stefan Hajnoczi +Message-ID: <20240118144823.1497953-4-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Tested-by: Fiona Ebner +Signed-off-by: Kevin Wolf +(cherry picked from commit effd60c878176bcaf97fa7ce2b12d04bb8ead6f7) +Signed-off-by: Stefan Hajnoczi +--- + monitor/qmp.c | 17 ---- + qapi/qmp-dispatch.c | 24 +++++- + tests/qemu-iotests/060.out | 4 +- + tests/qemu-iotests/071.out | 4 +- + tests/qemu-iotests/081.out | 16 ++-- + tests/qemu-iotests/087.out | 12 +-- + tests/qemu-iotests/108.out | 2 +- + tests/qemu-iotests/109 | 4 +- + tests/qemu-iotests/109.out | 78 ++++++++----------- + tests/qemu-iotests/117.out | 2 +- + tests/qemu-iotests/120.out | 2 +- + tests/qemu-iotests/127.out | 2 +- + tests/qemu-iotests/140.out | 2 +- + tests/qemu-iotests/143.out | 2 +- + tests/qemu-iotests/156.out | 2 +- + tests/qemu-iotests/176.out | 16 ++-- + tests/qemu-iotests/182.out | 2 +- + tests/qemu-iotests/183.out | 4 +- + tests/qemu-iotests/184.out | 32 ++++---- + tests/qemu-iotests/185 | 6 +- + tests/qemu-iotests/185.out | 45 +++++++++-- + tests/qemu-iotests/191.out | 16 ++-- + tests/qemu-iotests/195.out | 16 ++-- + tests/qemu-iotests/223.out | 12 +-- + tests/qemu-iotests/227.out | 32 ++++---- + tests/qemu-iotests/247.out | 2 +- + tests/qemu-iotests/273.out | 8 +- + tests/qemu-iotests/308 | 4 +- + tests/qemu-iotests/308.out | 4 +- + tests/qemu-iotests/tests/file-io-error | 5 +- + tests/qemu-iotests/tests/iothreads-resize.out | 2 +- + tests/qemu-iotests/tests/qsd-jobs.out | 4 +- + 32 files changed, 205 insertions(+), 178 deletions(-) + +diff --git a/monitor/qmp.c b/monitor/qmp.c +index 6eee450fe4..a239945e8d 100644 +--- a/monitor/qmp.c ++++ b/monitor/qmp.c +@@ -321,14 +321,6 @@ void coroutine_fn monitor_qmp_dispatcher_co(void *data) + qemu_coroutine_yield(); + } + +- /* +- * Move the coroutine from iohandler_ctx to qemu_aio_context for +- * executing the command handler so that it can make progress if it +- * involves an AIO_WAIT_WHILE(). +- */ +- aio_co_schedule(qemu_get_aio_context(), qmp_dispatcher_co); +- qemu_coroutine_yield(); +- + /* Process request */ + if (req_obj->req) { + if (trace_event_get_state(TRACE_MONITOR_QMP_CMD_IN_BAND)) { +@@ -355,15 +347,6 @@ void coroutine_fn monitor_qmp_dispatcher_co(void *data) + } + + qmp_request_free(req_obj); +- +- /* +- * Yield and reschedule so the main loop stays responsive. +- * +- * Move back to iohandler_ctx so that nested event loops for +- * qemu_aio_context don't start new monitor commands. +- */ +- aio_co_schedule(iohandler_get_aio_context(), qmp_dispatcher_co); +- qemu_coroutine_yield(); + } + qatomic_set(&qmp_dispatcher_co, NULL); + } +diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c +index 555528b6bb..176b549473 100644 +--- a/qapi/qmp-dispatch.c ++++ b/qapi/qmp-dispatch.c +@@ -206,9 +206,31 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *requ + assert(!(oob && qemu_in_coroutine())); + assert(monitor_cur() == NULL); + if (!!(cmd->options & QCO_COROUTINE) == qemu_in_coroutine()) { ++ if (qemu_in_coroutine()) { ++ /* ++ * Move the coroutine from iohandler_ctx to qemu_aio_context for ++ * executing the command handler so that it can make progress if it ++ * involves an AIO_WAIT_WHILE(). ++ */ ++ aio_co_schedule(qemu_get_aio_context(), qemu_coroutine_self()); ++ qemu_coroutine_yield(); ++ } ++ + monitor_set_cur(qemu_coroutine_self(), cur_mon); + cmd->fn(args, &ret, &err); + monitor_set_cur(qemu_coroutine_self(), NULL); ++ ++ if (qemu_in_coroutine()) { ++ /* ++ * Yield and reschedule so the main loop stays responsive. ++ * ++ * Move back to iohandler_ctx so that nested event loops for ++ * qemu_aio_context don't start new monitor commands. ++ */ ++ aio_co_schedule(iohandler_get_aio_context(), ++ qemu_coroutine_self()); ++ qemu_coroutine_yield(); ++ } + } else { + /* + * Actual context doesn't match the one the command needs. +@@ -232,7 +254,7 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *requ + .errp = &err, + .co = qemu_coroutine_self(), + }; +- aio_bh_schedule_oneshot(qemu_get_aio_context(), do_qmp_dispatch_bh, ++ aio_bh_schedule_oneshot(iohandler_get_aio_context(), do_qmp_dispatch_bh, + &data); + qemu_coroutine_yield(); + } +diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out +index 329977d9b9..a37bf446e9 100644 +--- a/tests/qemu-iotests/060.out ++++ b/tests/qemu-iotests/060.out +@@ -421,8 +421,8 @@ QMP_VERSION + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_IMAGE_CORRUPTED", "data": {"device": "none0", "msg": "Preventing invalid write on metadata (overlaps with refcount table)", "offset": 65536, "node-name": "drive", "fatal": true, "size": 65536}} + write failed: Input/output error + {"return": ""} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + === Testing incoming inactive corrupted image === + +@@ -432,8 +432,8 @@ QMP_VERSION + qcow2: Image is corrupt: L2 table offset 0x2a2a2a00 unaligned (L1 index: 0); further non-fatal corruption events will be suppressed + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_IMAGE_CORRUPTED", "data": {"device": "", "msg": "L2 table offset 0x2a2a2a00 unaligned (L1 index: 0)", "node-name": "drive", "fatal": false}} + {"return": ""} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + corrupt: false + *** done +diff --git a/tests/qemu-iotests/071.out b/tests/qemu-iotests/071.out +index bca0c02f5c..a2923b05c2 100644 +--- a/tests/qemu-iotests/071.out ++++ b/tests/qemu-iotests/071.out +@@ -45,8 +45,8 @@ QMP_VERSION + {"return": {}} + read failed: Input/output error + {"return": ""} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + + === Testing blkverify on existing block device === +@@ -84,9 +84,9 @@ wrote 512/512 bytes at offset 0 + {"return": ""} + read failed: Input/output error + {"return": ""} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} + QEMU_PROG: Failed to flush the L2 table cache: Input/output error + QEMU_PROG: Failed to flush the refcount block cache: Input/output error ++{"return": {}} + + *** done +diff --git a/tests/qemu-iotests/081.out b/tests/qemu-iotests/081.out +index 615c083549..aba85ea564 100644 +--- a/tests/qemu-iotests/081.out ++++ b/tests/qemu-iotests/081.out +@@ -35,8 +35,8 @@ QMP_VERSION + read 10485760/10485760 bytes at offset 0 + 10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + {"return": ""} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + + == using quorum rewrite corrupted mode == +@@ -67,8 +67,8 @@ QMP_VERSION + read 10485760/10485760 bytes at offset 0 + 10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + {"return": ""} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + -- checking that the image has been corrected -- + read 10485760/10485760 bytes at offset 0 +@@ -106,8 +106,8 @@ QMP_VERSION + {"return": {}} + {"return": {}} + {"return": {}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + Testing: + QMP_VERSION +@@ -115,8 +115,8 @@ QMP_VERSION + {"return": {}} + {"return": {}} + {"error": {"class": "GenericError", "desc": "Cannot add a child to a quorum in blkverify mode"}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + + == dynamically removing a child from a quorum == +@@ -125,31 +125,31 @@ QMP_VERSION + {"return": {}} + {"return": {}} + {"return": {}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + Testing: + QMP_VERSION + {"return": {}} + {"return": {}} + {"error": {"class": "GenericError", "desc": "The number of children cannot be lower than the vote threshold 2"}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + Testing: + QMP_VERSION + {"return": {}} + {"error": {"class": "GenericError", "desc": "blkverify=on can only be set if there are exactly two files and vote-threshold is 2"}} + {"error": {"class": "GenericError", "desc": "Cannot find device='drive0-quorum' nor node-name='drive0-quorum'"}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + Testing: + QMP_VERSION + {"return": {}} + {"return": {}} + {"error": {"class": "GenericError", "desc": "The number of children cannot be lower than the vote threshold 2"}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + *** done +diff --git a/tests/qemu-iotests/087.out b/tests/qemu-iotests/087.out +index e1c23a6983..97b6d8036d 100644 +--- a/tests/qemu-iotests/087.out ++++ b/tests/qemu-iotests/087.out +@@ -7,8 +7,8 @@ Testing: + QMP_VERSION + {"return": {}} + {"error": {"class": "GenericError", "desc": "'node-name' must be specified for the root node"}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + + === Duplicate ID === +@@ -18,8 +18,8 @@ QMP_VERSION + {"return": {}} + {"error": {"class": "GenericError", "desc": "node-name=disk is conflicting with a device id"}} + {"error": {"class": "GenericError", "desc": "Duplicate nodes with node-name='test-node'"}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + + === aio=native without O_DIRECT === +@@ -28,8 +28,8 @@ Testing: + QMP_VERSION + {"return": {}} + {"error": {"class": "GenericError", "desc": "aio=native was specified, but it requires cache.direct=on, which was not specified."}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + + === Encrypted image QCow === +@@ -40,8 +40,8 @@ QMP_VERSION + {"return": {}} + {"return": {}} + {"error": {"class": "GenericError", "desc": "Use of AES-CBC encrypted IMGFMT images is no longer supported in system emulators"}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + + === Encrypted image LUKS === +@@ -52,8 +52,8 @@ QMP_VERSION + {"return": {}} + {"return": {}} + {"return": {}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + + === Missing driver === +@@ -63,7 +63,7 @@ Testing: -S + QMP_VERSION + {"return": {}} + {"error": {"class": "GenericError", "desc": "Parameter 'driver' is missing"}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + *** done +diff --git a/tests/qemu-iotests/108.out b/tests/qemu-iotests/108.out +index b5401d788d..b9c876b394 100644 +--- a/tests/qemu-iotests/108.out ++++ b/tests/qemu-iotests/108.out +@@ -173,8 +173,8 @@ OK: Reftable is where we expect it + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "create"}} + {"return": {}} + { "execute": "quit" } +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + wrote 65536/65536 bytes at offset 0 + 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +diff --git a/tests/qemu-iotests/109 b/tests/qemu-iotests/109 +index e207a555f3..0fb580f9a5 100755 +--- a/tests/qemu-iotests/109 ++++ b/tests/qemu-iotests/109 +@@ -57,13 +57,13 @@ run_qemu() + _launch_qemu -drive file="${source_img}",format=raw,cache=${CACHEMODE},aio=${AIOMODE},id=src + _send_qemu_cmd $QEMU_HANDLE "{ 'execute': 'qmp_capabilities' }" "return" + +- _send_qemu_cmd $QEMU_HANDLE \ ++ capture_events="$qmp_event" _send_qemu_cmd $QEMU_HANDLE \ + "{'execute':'drive-mirror', 'arguments':{ + 'device': 'src', 'target': '$raw_img', $qmp_format + 'mode': 'existing', 'sync': 'full'}}" \ + "return" + +- _send_qemu_cmd $QEMU_HANDLE '' "$qmp_event" ++ capture_events="$qmp_event JOB_STATUS_CHANGE" _wait_event $QEMU_HANDLE "$qmp_event" + if test "$qmp_event" = BLOCK_JOB_ERROR; then + _send_qemu_cmd $QEMU_HANDLE '' '"status": "null"' + fi +diff --git a/tests/qemu-iotests/109.out b/tests/qemu-iotests/109.out +index 965c9a6a0a..3ae8552ff7 100644 +--- a/tests/qemu-iotests/109.out ++++ b/tests/qemu-iotests/109.out +@@ -7,7 +7,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ +- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', ++ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', + 'mode': 'existing', 'sync': 'full'}} + WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. + Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. +@@ -23,8 +23,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed + {"execute":"query-block-jobs"} + {"return": []} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + read 512/512 bytes at offset 0 + 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + { 'execute': 'qmp_capabilities' } +@@ -35,12 +35,10 @@ read 512/512 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} + {"return": {}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 1024, "offset": 1024, "speed": 0, "type": "mirror"}} + {"execute":"query-block-jobs"} + {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 1024, "offset": 1024, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} +@@ -50,6 +48,7 @@ read 512/512 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 1024, "offset": 1024, "speed": 0, "type": "mirror"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} ++{"return": {}} + Images are identical. + + === Writing a qcow2 header into raw === +@@ -59,7 +58,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ +- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', ++ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', + 'mode': 'existing', 'sync': 'full'}} + WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. + Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. +@@ -75,8 +74,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed + {"execute":"query-block-jobs"} + {"return": []} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + read 512/512 bytes at offset 0 + 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + { 'execute': 'qmp_capabilities' } +@@ -87,12 +86,10 @@ read 512/512 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} + {"return": {}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 197120, "offset": 197120, "speed": 0, "type": "mirror"}} + {"execute":"query-block-jobs"} + {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 197120, "offset": 197120, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} +@@ -102,6 +99,7 @@ read 512/512 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 197120, "offset": 197120, "speed": 0, "type": "mirror"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} ++{"return": {}} + Images are identical. + + === Writing a qed header into raw === +@@ -111,7 +109,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ +- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', ++ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', + 'mode': 'existing', 'sync': 'full'}} + WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. + Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. +@@ -127,8 +125,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed + {"execute":"query-block-jobs"} + {"return": []} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + read 512/512 bytes at offset 0 + 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + { 'execute': 'qmp_capabilities' } +@@ -139,12 +137,10 @@ read 512/512 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} + {"return": {}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 327680, "offset": 327680, "speed": 0, "type": "mirror"}} + {"execute":"query-block-jobs"} + {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 327680, "offset": 327680, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} +@@ -154,6 +150,7 @@ read 512/512 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 327680, "offset": 327680, "speed": 0, "type": "mirror"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} ++{"return": {}} + Images are identical. + + === Writing a vdi header into raw === +@@ -163,7 +160,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ +- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', ++ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', + 'mode': 'existing', 'sync': 'full'}} + WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. + Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. +@@ -179,8 +176,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed + {"execute":"query-block-jobs"} + {"return": []} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + read 512/512 bytes at offset 0 + 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + { 'execute': 'qmp_capabilities' } +@@ -191,12 +188,10 @@ read 512/512 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} + {"return": {}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 1024, "offset": 1024, "speed": 0, "type": "mirror"}} + {"execute":"query-block-jobs"} + {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 1024, "offset": 1024, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} +@@ -206,6 +201,7 @@ read 512/512 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 1024, "offset": 1024, "speed": 0, "type": "mirror"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} ++{"return": {}} + Images are identical. + + === Writing a vmdk header into raw === +@@ -215,7 +211,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ +- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', ++ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', + 'mode': 'existing', 'sync': 'full'}} + WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. + Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. +@@ -231,8 +227,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed + {"execute":"query-block-jobs"} + {"return": []} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + read 512/512 bytes at offset 0 + 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + { 'execute': 'qmp_capabilities' } +@@ -243,12 +239,10 @@ read 512/512 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} + {"return": {}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 65536, "offset": 65536, "speed": 0, "type": "mirror"}} + {"execute":"query-block-jobs"} + {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 65536, "offset": 65536, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} +@@ -258,6 +252,7 @@ read 512/512 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 65536, "offset": 65536, "speed": 0, "type": "mirror"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} ++{"return": {}} + Images are identical. + + === Writing a vpc header into raw === +@@ -267,7 +262,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ +- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', ++ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', + 'mode': 'existing', 'sync': 'full'}} + WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. + Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. +@@ -283,8 +278,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed + {"execute":"query-block-jobs"} + {"return": []} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + read 512/512 bytes at offset 0 + 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + { 'execute': 'qmp_capabilities' } +@@ -295,12 +290,10 @@ read 512/512 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} + {"return": {}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 2560, "offset": 2560, "speed": 0, "type": "mirror"}} + {"execute":"query-block-jobs"} + {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 2560, "offset": 2560, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} +@@ -310,6 +303,7 @@ read 512/512 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 2560, "offset": 2560, "speed": 0, "type": "mirror"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} ++{"return": {}} + Images are identical. + + === Copying sample image empty.bochs into raw === +@@ -318,7 +312,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ +- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', ++ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', + 'mode': 'existing', 'sync': 'full'}} + WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. + Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. +@@ -334,8 +328,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed + {"execute":"query-block-jobs"} + {"return": []} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + read 512/512 bytes at offset 0 + 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + { 'execute': 'qmp_capabilities' } +@@ -346,12 +340,10 @@ read 512/512 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} + {"return": {}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 2560, "offset": 2560, "speed": 0, "type": "mirror"}} + {"execute":"query-block-jobs"} + {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 2560, "offset": 2560, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} +@@ -361,6 +353,7 @@ read 512/512 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 2560, "offset": 2560, "speed": 0, "type": "mirror"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} ++{"return": {}} + Images are identical. + + === Copying sample image iotest-dirtylog-10G-4M.vhdx into raw === +@@ -369,7 +362,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ +- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', ++ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', + 'mode': 'existing', 'sync': 'full'}} + WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. + Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. +@@ -385,8 +378,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed + {"execute":"query-block-jobs"} + {"return": []} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + read 512/512 bytes at offset 0 + 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + { 'execute': 'qmp_capabilities' } +@@ -397,12 +390,10 @@ read 512/512 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} + {"return": {}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 31457280, "offset": 31457280, "speed": 0, "type": "mirror"}} + {"execute":"query-block-jobs"} + {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 31457280, "offset": 31457280, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} +@@ -412,6 +403,7 @@ read 512/512 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 31457280, "offset": 31457280, "speed": 0, "type": "mirror"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} ++{"return": {}} + Images are identical. + + === Copying sample image parallels-v1 into raw === +@@ -420,7 +412,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ +- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', ++ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', + 'mode': 'existing', 'sync': 'full'}} + WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. + Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. +@@ -436,8 +428,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed + {"execute":"query-block-jobs"} + {"return": []} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + read 512/512 bytes at offset 0 + 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + { 'execute': 'qmp_capabilities' } +@@ -448,12 +440,10 @@ read 512/512 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} + {"return": {}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 327680, "offset": 327680, "speed": 0, "type": "mirror"}} + {"execute":"query-block-jobs"} + {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 327680, "offset": 327680, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} +@@ -463,6 +453,7 @@ read 512/512 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 327680, "offset": 327680, "speed": 0, "type": "mirror"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} ++{"return": {}} + Images are identical. + + === Copying sample image simple-pattern.cloop into raw === +@@ -471,7 +462,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ +- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', ++ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', + 'mode': 'existing', 'sync': 'full'}} + WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. + Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. +@@ -487,8 +478,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed + {"execute":"query-block-jobs"} + {"return": []} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + read 512/512 bytes at offset 0 + 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + { 'execute': 'qmp_capabilities' } +@@ -499,12 +490,10 @@ read 512/512 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} + {"return": {}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 2048, "offset": 2048, "speed": 0, "type": "mirror"}} + {"execute":"query-block-jobs"} + {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 2048, "offset": 2048, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} +@@ -514,6 +503,7 @@ read 512/512 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 2048, "offset": 2048, "speed": 0, "type": "mirror"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} ++{"return": {}} + Images are identical. + + === Write legitimate MBR into raw === +@@ -522,7 +512,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ +- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', ++ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', + 'mode': 'existing', 'sync': 'full'}} + WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. + Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. +@@ -530,12 +520,10 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} + {"return": {}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 512, "offset": 512, "speed": 0, "type": "mirror"}} + {"execute":"query-block-jobs"} + {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 512, "offset": 512, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} +@@ -545,6 +533,7 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 512, "offset": 512, "speed": 0, "type": "mirror"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} ++{"return": {}} + Images are identical. + { 'execute': 'qmp_capabilities' } + {"return": {}} +@@ -554,12 +543,10 @@ Images are identical. + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} + {"return": {}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 512, "offset": 512, "speed": 0, "type": "mirror"}} + {"execute":"query-block-jobs"} + {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 512, "offset": 512, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} +@@ -569,5 +556,6 @@ Images are identical. + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 512, "offset": 512, "speed": 0, "type": "mirror"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} ++{"return": {}} + Images are identical. + *** done +diff --git a/tests/qemu-iotests/117.out b/tests/qemu-iotests/117.out +index 735ffd25c6..1cea9e0217 100644 +--- a/tests/qemu-iotests/117.out ++++ b/tests/qemu-iotests/117.out +@@ -18,8 +18,8 @@ wrote 65536/65536 bytes at offset 0 + 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + {"return": ""} + { 'execute': 'quit' } +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + No errors were found on the image. + read 65536/65536 bytes at offset 0 + 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +diff --git a/tests/qemu-iotests/120.out b/tests/qemu-iotests/120.out +index 0744c1f136..35d84a5bc5 100644 +--- a/tests/qemu-iotests/120.out ++++ b/tests/qemu-iotests/120.out +@@ -5,8 +5,8 @@ QMP_VERSION + wrote 65536/65536 bytes at offset 0 + 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + {"return": ""} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + read 65536/65536 bytes at offset 0 + 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + read 65536/65536 bytes at offset 0 +diff --git a/tests/qemu-iotests/127.out b/tests/qemu-iotests/127.out +index 1685c4850a..dd8c4a8aa9 100644 +--- a/tests/qemu-iotests/127.out ++++ b/tests/qemu-iotests/127.out +@@ -28,6 +28,6 @@ wrote 42/42 bytes at offset 0 + { 'execute': 'quit' } + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "mirror"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "mirror"}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + *** done +diff --git a/tests/qemu-iotests/140.out b/tests/qemu-iotests/140.out +index 312f76d5da..32866440ae 100644 +--- a/tests/qemu-iotests/140.out ++++ b/tests/qemu-iotests/140.out +@@ -19,6 +19,6 @@ read 65536/65536 bytes at offset 0 + qemu-io: can't open device nbd+unix:///drv?socket=SOCK_DIR/nbd: Requested export not available + server reported: export 'drv' not present + { 'execute': 'quit' } +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + *** done +diff --git a/tests/qemu-iotests/143.out b/tests/qemu-iotests/143.out +index 9ec5888e0e..d6afa32abc 100644 +--- a/tests/qemu-iotests/143.out ++++ b/tests/qemu-iotests/143.out +@@ -10,6 +10,6 @@ server reported: export 'no_such_export' not present + qemu-io: can't open device nbd+unix:///aa--aa1?socket=SOCK_DIR/nbd: Requested export not available + server reported: export 'aa--aa...' not present + { 'execute': 'quit' } +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + *** done +diff --git a/tests/qemu-iotests/156.out b/tests/qemu-iotests/156.out +index 4a22f0c41a..07e5e83f5d 100644 +--- a/tests/qemu-iotests/156.out ++++ b/tests/qemu-iotests/156.out +@@ -72,8 +72,8 @@ read 65536/65536 bytes at offset 196608 + {"return": ""} + + { 'execute': 'quit' } +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + read 65536/65536 bytes at offset 0 + 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +diff --git a/tests/qemu-iotests/176.out b/tests/qemu-iotests/176.out +index 9d09b60452..45e9153ef3 100644 +--- a/tests/qemu-iotests/176.out ++++ b/tests/qemu-iotests/176.out +@@ -169,8 +169,8 @@ QMP_VERSION + {"return": {}} + {"return": {}} + {"return": {}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + wrote 196608/196608 bytes at offset 2147287040 + 192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + wrote 131072/131072 bytes at offset 2147352576 +@@ -206,8 +206,8 @@ QMP_VERSION + {"return": {}} + {"return": {}} + {"return": {"sha256": HASH}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + === Test pass bitmap.1 === + +@@ -218,8 +218,8 @@ QMP_VERSION + {"return": {}} + {"return": {}} + {"return": {}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + wrote 196608/196608 bytes at offset 2147287040 + 192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + wrote 131072/131072 bytes at offset 2147352576 +@@ -256,8 +256,8 @@ QMP_VERSION + {"return": {}} + {"return": {}} + {"return": {"sha256": HASH}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + === Test pass bitmap.2 === + +@@ -268,8 +268,8 @@ QMP_VERSION + {"return": {}} + {"return": {}} + {"return": {}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + wrote 196608/196608 bytes at offset 2147287040 + 192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + wrote 131072/131072 bytes at offset 2147352576 +@@ -306,8 +306,8 @@ QMP_VERSION + {"return": {}} + {"return": {}} + {"return": {"sha256": HASH}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + === Test pass bitmap.3 === + +@@ -318,8 +318,8 @@ QMP_VERSION + {"return": {}} + {"return": {}} + {"return": {}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + wrote 196608/196608 bytes at offset 2147287040 + 192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + wrote 131072/131072 bytes at offset 2147352576 +@@ -353,6 +353,6 @@ QMP_VERSION + {"return": {}} + {"return": {}} + {"return": {"sha256": HASH}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + *** done +diff --git a/tests/qemu-iotests/182.out b/tests/qemu-iotests/182.out +index 57f7265458..83fc1a4797 100644 +--- a/tests/qemu-iotests/182.out ++++ b/tests/qemu-iotests/182.out +@@ -53,6 +53,6 @@ Formatting 'TEST_DIR/t.qcow2.overlay', fmt=qcow2 cluster_size=65536 extended_l2= + {'execute': 'qmp_capabilities'} + {"return": {}} + {'execute': 'quit'} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + *** done +diff --git a/tests/qemu-iotests/183.out b/tests/qemu-iotests/183.out +index fd9c2e52a5..51aa41c888 100644 +--- a/tests/qemu-iotests/183.out ++++ b/tests/qemu-iotests/183.out +@@ -53,11 +53,11 @@ wrote 65536/65536 bytes at offset 1048576 + === Shut down and check image === + + {"execute":"quit"} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} + {"return": {}} + {"execute":"quit"} +-{"return": {}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + No errors were found on the image. + No errors were found on the image. + wrote 65536/65536 bytes at offset 1048576 +diff --git a/tests/qemu-iotests/184.out b/tests/qemu-iotests/184.out +index 77e5489d65..e8f631f853 100644 +--- a/tests/qemu-iotests/184.out ++++ b/tests/qemu-iotests/184.out +@@ -89,10 +89,6 @@ Testing: + "return": [ + ] + } +-{ +- "return": { +- } +-} + { + "timestamp": { + "seconds": TIMESTAMP, +@@ -104,6 +100,10 @@ Testing: + "reason": "host-qmp-quit" + } + } ++{ ++ "return": { ++ } ++} + + + == property changes in ThrottleGroup == +@@ -169,10 +169,6 @@ Testing: + "iops-total-max": 0 + } + } +-{ +- "return": { +- } +-} + { + "timestamp": { + "seconds": TIMESTAMP, +@@ -184,6 +180,10 @@ Testing: + "reason": "host-qmp-quit" + } + } ++{ ++ "return": { ++ } ++} + + + == object creation/set errors == +@@ -211,10 +211,6 @@ Testing: + "desc": "bps/iops/max total values and read/write values cannot be used at the same time" + } + } +-{ +- "return": { +- } +-} + { + "timestamp": { + "seconds": TIMESTAMP, +@@ -226,6 +222,10 @@ Testing: + "reason": "host-qmp-quit" + } + } ++{ ++ "return": { ++ } ++} + + + == don't specify group == +@@ -247,10 +247,6 @@ Testing: + "desc": "Parameter 'throttle-group' is missing" + } + } +-{ +- "return": { +- } +-} + { + "timestamp": { + "seconds": TIMESTAMP, +@@ -262,6 +258,10 @@ Testing: + "reason": "host-qmp-quit" + } + } ++{ ++ "return": { ++ } ++} + + + *** done +diff --git a/tests/qemu-iotests/185 b/tests/qemu-iotests/185 +index 2ae0a85bbf..17489fb91c 100755 +--- a/tests/qemu-iotests/185 ++++ b/tests/qemu-iotests/185 +@@ -344,14 +344,14 @@ wait_for_job_and_quit() { + + sleep 1 + ++ # List of expected events ++ capture_events='BLOCK_JOB_CANCELLED JOB_STATUS_CHANGE SHUTDOWN' ++ + _send_qemu_cmd $h \ + '{"execute": "quit"}' \ + 'return' + +- # List of expected events +- capture_events='BLOCK_JOB_CANCELLED JOB_STATUS_CHANGE SHUTDOWN' + _wait_event $h 'SHUTDOWN' +- QEMU_EVENTS= # Ignore all JOB_STATUS_CHANGE events that came before SHUTDOWN + _wait_event $h 'JOB_STATUS_CHANGE' # standby + _wait_event $h 'JOB_STATUS_CHANGE' # ready + _wait_event $h 'JOB_STATUS_CHANGE' # standby +diff --git a/tests/qemu-iotests/185.out b/tests/qemu-iotests/185.out +index 7292c26bae..6af0953c4d 100644 +--- a/tests/qemu-iotests/185.out ++++ b/tests/qemu-iotests/185.out +@@ -40,9 +40,16 @@ Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off comp + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} + {"return": {}} + { 'execute': 'quit' } +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "disk"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 67108864, "offset": 524288, "speed": 65536, "type": "commit"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "disk"}} ++{"return": {}} + + === Start active commit job and exit qemu === + +@@ -56,9 +63,16 @@ Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off comp + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} + {"return": {}} + { 'execute': 'quit' } +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "disk"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 4194304, "offset": 4194304, "speed": 65536, "type": "commit"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "disk"}} ++{"return": {}} + + === Start mirror job and exit qemu === + +@@ -75,9 +89,16 @@ Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 cluster_size=65536 extended_l2=off + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} + {"return": {}} + { 'execute': 'quit' } +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "disk"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 4194304, "offset": 4194304, "speed": 65536, "type": "mirror"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "disk"}} ++{"return": {}} + + === Start backup job and exit qemu === + +@@ -97,9 +118,16 @@ Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 cluster_size=65536 extended_l2=off + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} + {"return": {}} + { 'execute': 'quit' } +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "disk"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 67108864, "offset": 65536, "speed": 65536, "type": "backup"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "disk"}} ++{"return": {}} + + === Start streaming job and exit qemu === + +@@ -112,9 +140,16 @@ Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 cluster_size=65536 extended_l2=off + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} + {"return": {}} + { 'execute': 'quit' } +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "disk"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 67108864, "offset": 524288, "speed": 65536, "type": "stream"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "disk"}} ++{"return": {}} + No errors were found on the image. + + === Start mirror to throttled QSD and exit qemu === +diff --git a/tests/qemu-iotests/191.out b/tests/qemu-iotests/191.out +index ea88777374..c3309e4bc6 100644 +--- a/tests/qemu-iotests/191.out ++++ b/tests/qemu-iotests/191.out +@@ -378,10 +378,6 @@ wrote 65536/65536 bytes at offset 1048576 + ] + } + { 'execute': 'quit' } +-{ +- "return": { +- } +-} + { + "timestamp": { + "seconds": TIMESTAMP, +@@ -393,6 +389,10 @@ wrote 65536/65536 bytes at offset 1048576 + "reason": "host-qmp-quit" + } + } ++{ ++ "return": { ++ } ++} + image: TEST_DIR/t.IMGFMT + file format: IMGFMT + virtual size: 64 MiB (67108864 bytes) +@@ -796,10 +796,6 @@ wrote 65536/65536 bytes at offset 1048576 + ] + } + { 'execute': 'quit' } +-{ +- "return": { +- } +-} + { + "timestamp": { + "seconds": TIMESTAMP, +@@ -811,6 +807,10 @@ wrote 65536/65536 bytes at offset 1048576 + "reason": "host-qmp-quit" + } + } ++{ ++ "return": { ++ } ++} + image: TEST_DIR/t.IMGFMT + file format: IMGFMT + virtual size: 64 MiB (67108864 bytes) +diff --git a/tests/qemu-iotests/195.out b/tests/qemu-iotests/195.out +index ec84df5012..91717d302e 100644 +--- a/tests/qemu-iotests/195.out ++++ b/tests/qemu-iotests/195.out +@@ -17,10 +17,6 @@ Testing: -drive if=none,file=TEST_DIR/t.IMGFMT,backing.node-name=mid + "return": { + } + } +-{ +- "return": { +- } +-} + { + "timestamp": { + "seconds": TIMESTAMP, +@@ -32,6 +28,10 @@ Testing: -drive if=none,file=TEST_DIR/t.IMGFMT,backing.node-name=mid + "reason": "host-qmp-quit" + } + } ++{ ++ "return": { ++ } ++} + + image: TEST_DIR/t.IMGFMT.mid + file format: IMGFMT +@@ -55,10 +55,6 @@ Testing: -drive if=none,file=TEST_DIR/t.IMGFMT,node-name=top + "return": { + } + } +-{ +- "return": { +- } +-} + { + "timestamp": { + "seconds": TIMESTAMP, +@@ -70,6 +66,10 @@ Testing: -drive if=none,file=TEST_DIR/t.IMGFMT,node-name=top + "reason": "host-qmp-quit" + } + } ++{ ++ "return": { ++ } ++} + + image: TEST_DIR/t.IMGFMT + file format: IMGFMT +diff --git a/tests/qemu-iotests/223.out b/tests/qemu-iotests/223.out +index e5e7f42caa..5f5b42e2dc 100644 +--- a/tests/qemu-iotests/223.out ++++ b/tests/qemu-iotests/223.out +@@ -11,8 +11,8 @@ QMP_VERSION + {"return": {}} + {"return": {}} + {"return": {}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + + === Write part of the file under active bitmap === +@@ -145,14 +145,14 @@ read 2097152/2097152 bytes at offset 2097152 + + {"execute":"nbd-server-remove", + "arguments":{"name":"n"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n"}} + {"return": {}} + {"execute":"nbd-server-remove", + "arguments":{"name":"n2"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n2"}} + {"return": {}} + {"execute":"nbd-server-remove", + "arguments":{"name":"n2"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n2"}} + {"error": {"class": "GenericError", "desc": "Export 'n2' is not found"}} + {"execute":"nbd-server-stop"} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n3"}} +@@ -267,14 +267,14 @@ read 2097152/2097152 bytes at offset 2097152 + + {"execute":"nbd-server-remove", + "arguments":{"name":"n"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n"}} + {"return": {}} + {"execute":"nbd-server-remove", + "arguments":{"name":"n2"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n2"}} + {"return": {}} + {"execute":"nbd-server-remove", + "arguments":{"name":"n2"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n2"}} + {"error": {"class": "GenericError", "desc": "Export 'n2' is not found"}} + {"execute":"nbd-server-stop"} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n3"}} +@@ -282,8 +282,8 @@ read 2097152/2097152 bytes at offset 2097152 + {"execute":"nbd-server-stop"} + {"error": {"class": "GenericError", "desc": "NBD server not running"}} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + === Use qemu-nbd as server === + +diff --git a/tests/qemu-iotests/227.out b/tests/qemu-iotests/227.out +index a947b1a87d..d6a1d4ecb6 100644 +--- a/tests/qemu-iotests/227.out ++++ b/tests/qemu-iotests/227.out +@@ -54,10 +54,6 @@ Testing: -drive driver=null-co,read-zeroes=on,if=virtio + } + ] + } +-{ +- "return": { +- } +-} + { + "timestamp": { + "seconds": TIMESTAMP, +@@ -69,6 +65,10 @@ Testing: -drive driver=null-co,read-zeroes=on,if=virtio + "reason": "host-qmp-quit" + } + } ++{ ++ "return": { ++ } ++} + + + === blockstats with -drive if=none === +@@ -124,10 +124,6 @@ Testing: -drive driver=null-co,if=none + } + ] + } +-{ +- "return": { +- } +-} + { + "timestamp": { + "seconds": TIMESTAMP, +@@ -139,6 +135,10 @@ Testing: -drive driver=null-co,if=none + "reason": "host-qmp-quit" + } + } ++{ ++ "return": { ++ } ++} + + + === blockstats with -blockdev === +@@ -155,10 +155,6 @@ Testing: -blockdev driver=null-co,node-name=null + "return": [ + ] + } +-{ +- "return": { +- } +-} + { + "timestamp": { + "seconds": TIMESTAMP, +@@ -170,6 +166,10 @@ Testing: -blockdev driver=null-co,node-name=null + "reason": "host-qmp-quit" + } + } ++{ ++ "return": { ++ } ++} + + + === blockstats with -blockdev and -device === +@@ -226,10 +226,6 @@ Testing: -blockdev driver=null-co,read-zeroes=on,node-name=null -device virtio-b + } + ] + } +-{ +- "return": { +- } +-} + { + "timestamp": { + "seconds": TIMESTAMP, +@@ -241,5 +237,9 @@ Testing: -blockdev driver=null-co,read-zeroes=on,node-name=null -device virtio-b + "reason": "host-qmp-quit" + } + } ++{ ++ "return": { ++ } ++} + + *** done +diff --git a/tests/qemu-iotests/247.out b/tests/qemu-iotests/247.out +index e909e83994..7d252e7fe4 100644 +--- a/tests/qemu-iotests/247.out ++++ b/tests/qemu-iotests/247.out +@@ -17,6 +17,6 @@ QMP_VERSION + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 134217728, "offset": 134217728, "speed": 0, "type": "commit"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + *** done +diff --git a/tests/qemu-iotests/273.out b/tests/qemu-iotests/273.out +index 6a74a8138b..71843f02de 100644 +--- a/tests/qemu-iotests/273.out ++++ b/tests/qemu-iotests/273.out +@@ -282,10 +282,6 @@ Testing: -blockdev file,node-name=base,filename=TEST_DIR/t.IMGFMT.base -blockdev + ] + } + } +-{ +- "return": { +- } +-} + { + "timestamp": { + "seconds": TIMESTAMP, +@@ -297,5 +293,9 @@ Testing: -blockdev file,node-name=base,filename=TEST_DIR/t.IMGFMT.base -blockdev + "reason": "host-qmp-quit" + } + } ++{ ++ "return": { ++ } ++} + + *** done +diff --git a/tests/qemu-iotests/308 b/tests/qemu-iotests/308 +index de12b2b1b9..ea81dc496a 100755 +--- a/tests/qemu-iotests/308 ++++ b/tests/qemu-iotests/308 +@@ -77,6 +77,7 @@ fuse_export_add() + # $1: Export ID + fuse_export_del() + { ++ capture_events="BLOCK_EXPORT_DELETED" \ + _send_qemu_cmd $QEMU_HANDLE \ + "{'execute': 'block-export-del', + 'arguments': { +@@ -84,8 +85,7 @@ fuse_export_del() + } }" \ + 'return' + +- _send_qemu_cmd $QEMU_HANDLE \ +- '' \ ++ _wait_event $QEMU_HANDLE \ + 'BLOCK_EXPORT_DELETED' + } + +diff --git a/tests/qemu-iotests/308.out b/tests/qemu-iotests/308.out +index d5767133b1..e5e233691d 100644 +--- a/tests/qemu-iotests/308.out ++++ b/tests/qemu-iotests/308.out +@@ -165,9 +165,9 @@ OK: Post-truncate image size is as expected + + === Tear down === + {'execute': 'quit'} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "export-mp"}} ++{"return": {}} + + === Compare copy with original === + Images are identical. +@@ -201,9 +201,9 @@ wrote 67108864/67108864 bytes at offset 0 + read 67108864/67108864 bytes at offset 0 + 64 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + {'execute': 'quit'} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "export"}} ++{"return": {}} + read 67108864/67108864 bytes at offset 0 + 64 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + *** done +diff --git a/tests/qemu-iotests/tests/file-io-error b/tests/qemu-iotests/tests/file-io-error +index 88ee5f670c..fb8db73b31 100755 +--- a/tests/qemu-iotests/tests/file-io-error ++++ b/tests/qemu-iotests/tests/file-io-error +@@ -99,13 +99,12 @@ echo + $QEMU_IO -f file -c 'write 0 64M' "$TEST_DIR/fuse-export" | _filter_qemu_io + echo + +-_send_qemu_cmd $QEMU_HANDLE \ ++capture_events=BLOCK_EXPORT_DELETED _send_qemu_cmd $QEMU_HANDLE \ + "{'execute': 'block-export-del', + 'arguments': {'id': 'exp0'}}" \ + 'return' + +-_send_qemu_cmd $QEMU_HANDLE \ +- '' \ ++_wait_event $QEMU_HANDLE \ + 'BLOCK_EXPORT_DELETED' + + _send_qemu_cmd $QEMU_HANDLE \ +diff --git a/tests/qemu-iotests/tests/iothreads-resize.out b/tests/qemu-iotests/tests/iothreads-resize.out +index 2ca5a9d964..2967ac8f0d 100644 +--- a/tests/qemu-iotests/tests/iothreads-resize.out ++++ b/tests/qemu-iotests/tests/iothreads-resize.out +@@ -3,8 +3,8 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 + QMP_VERSION + {"return": {}} + {"return": {}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + image: TEST_DIR/t.IMGFMT + file format: IMGFMT + virtual size: 128 MiB (134217728 bytes) +diff --git a/tests/qemu-iotests/tests/qsd-jobs.out b/tests/qemu-iotests/tests/qsd-jobs.out +index c1bc9b8356..aa6b6d1aef 100644 +--- a/tests/qemu-iotests/tests/qsd-jobs.out ++++ b/tests/qemu-iotests/tests/qsd-jobs.out +@@ -7,8 +7,8 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/ + QMP_VERSION + {"return": {}} + {"return": {}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}} ++{"return": {}} + + === Streaming can't get permission on base node === + +@@ -17,6 +17,6 @@ QMP_VERSION + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}} + {"error": {"class": "GenericError", "desc": "Permission conflict on node 'fmt_base': permissions 'write' are both required by an unnamed block device (uses node 'fmt_base' as 'root' child) and unshared by stream job 'job0' (uses node 'fmt_base' as 'intermediate node' child)."}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "export1"}} ++{"return": {}} + *** done +-- +2.39.3 + diff --git a/SOURCES/kvm-multifd-Create-property-multifd-flush-after-each-sec.patch b/SOURCES/kvm-multifd-Create-property-multifd-flush-after-each-sec.patch deleted file mode 100644 index 0bebd2e..0000000 --- a/SOURCES/kvm-multifd-Create-property-multifd-flush-after-each-sec.patch +++ /dev/null @@ -1,127 +0,0 @@ -From 2a5ea92ca0a5dffad54e4d06a683f683996cea9a Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 21 Jun 2022 12:13:14 +0200 -Subject: [PATCH 05/12] multifd: Create property - multifd-flush-after-each-section -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: quintela1 -RH-MergeRequest: 186: Multifd flushes its channels 10 times per second -RH-Bugzilla: 2196295 -RH-Acked-by: Peter Xu -RH-Acked-by: Leonardo Brás -RH-Commit: [1/3] 5bf5348e8be5b1d1629b859ce1ddb7aa0d72c0d6 (juan.quintela/c9s-qemu-kvm) - -We used to flush all channels at the end of each RAM section -sent. That is not needed, so preparing to only flush after a full -iteration through all the RAM. - -Default value of the property is false. But we return "true" in -migrate_multifd_flush_after_each_section() until we implement the code -in following patches. - -Signed-off-by: Juan Quintela -Reviewed-by: Dr. David Alan Gilbert -Acked-by: Peter Xu - ---- - -Rename each-iteration to after-each-section -Rename multifd-sync-after-each-section to - multifd-flush-after-each-section -Move to machine-8.0 (peter) - -conflit hw_compat_8_0 and hw_compat_rhel_9_2 - -(cherry picked from commit 77c259a4cb1c9799754b48f570301ebf1de5ded8) ---- - hw/core/machine.c | 2 ++ - migration/migration.h | 12 ++++++++++++ - migration/options.c | 13 +++++++++++++ - migration/options.h | 1 + - 4 files changed, 28 insertions(+) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 5abdc8c39b..5ea52317b9 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -54,6 +54,8 @@ const char *rhel_old_machine_deprecation = - "machine types for previous major releases are deprecated"; - - GlobalProperty hw_compat_rhel_9_2[] = { -+ /* hw_compat_rhel_9_2 from hw_compat_8_0 */ -+ { "migration", "multifd-flush-after-each-section", "on"}, - /* hw_compat_rhel_9_2 from hw_compat_7_2 */ - { "e1000e", "migrate-timadj", "off" }, - /* hw_compat_rhel_9_2 from hw_compat_7_2 */ -diff --git a/migration/migration.h b/migration/migration.h -index 7ccf460aa2..04c78c1fd6 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -411,6 +411,18 @@ struct MigrationState { - */ - bool preempt_pre_7_2; - -+ /* -+ * flush every channel after each section sent. -+ * -+ * This assures that we can't mix pages from one iteration through -+ * ram pages with pages for the following iteration. We really -+ * only need to do this flush after we have go through all the -+ * dirty pages. For historical reasons, we do that after each -+ * section. This is suboptimal (we flush too many times). -+ * Default value is false. Setting this property has no effect -+ * until the patch that removes this comment. (since 8.1) -+ */ -+ bool multifd_flush_after_each_section; - /* - * This decides the size of guest memory chunk that will be used - * to track dirty bitmap clearing. The size of memory chunk will -diff --git a/migration/options.c b/migration/options.c -index ccd7ef3907..5b0d080ecb 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -88,6 +88,8 @@ Property migration_properties[] = { - send_section_footer, true), - DEFINE_PROP_BOOL("decompress-error-check", MigrationState, - decompress_error_check, true), -+ DEFINE_PROP_BOOL("multifd-flush-after-each-section", MigrationState, -+ multifd_flush_after_each_section, true), - DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState, - clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT), - DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState, -@@ -344,6 +346,17 @@ bool migrate_zero_copy_send(void) - - /* pseudo capabilities */ - -+bool migrate_multifd_flush_after_each_section(void) -+{ -+ MigrationState *s = migrate_get_current(); -+ -+ /* -+ * Until the patch that remove this comment, we always return that -+ * the property is enabled. -+ */ -+ return true || s->multifd_flush_after_each_section; -+} -+ - bool migrate_postcopy(void) - { - return migrate_postcopy_ram() || migrate_dirty_bitmaps(); -diff --git a/migration/options.h b/migration/options.h -index 0fc7be6869..271f49ae5f 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -60,6 +60,7 @@ bool migrate_zero_copy_send(void); - * check, but they are not a capability. - */ - -+bool migrate_multifd_flush_after_each_section(void); - bool migrate_postcopy(void); - bool migrate_tls(void); - --- -2.39.3 - diff --git a/SOURCES/kvm-multifd-Fix-the-number-of-channels-ready.patch b/SOURCES/kvm-multifd-Fix-the-number-of-channels-ready.patch deleted file mode 100644 index abf21e6..0000000 --- a/SOURCES/kvm-multifd-Fix-the-number-of-channels-ready.patch +++ /dev/null @@ -1,58 +0,0 @@ -From af6f2a543c7db6d67d33fd12615a50e57fc3fe66 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 26 Apr 2023 12:20:36 +0200 -Subject: [PATCH 19/21] multifd: Fix the number of channels ready -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 171: multifd: Fix the number of channels ready -RH-Bugzilla: 2196289 -RH-Acked-by: Peter Xu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/1] a5e271ba249d85b27a68d3cff10480ca3a112c5d (LeoBras/centos-qemu-kvm) - -We don't wait in the sem when we are doing a sync_main. Make it wait -there. To make things clearer, we mark the channel ready at the -begining of the thread loop. - -Signed-off-by: Juan Quintela -Reviewed-by: Fabiano Rosas -(cherry picked from commit d2026ee117147893f8d80f060cede6d872ecbd7f) -Signed-off-by: Leonardo Bras ---- - migration/multifd.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/migration/multifd.c b/migration/multifd.c -index cce3ad6988..6a59c03dd2 100644 ---- a/migration/multifd.c -+++ b/migration/multifd.c -@@ -635,6 +635,7 @@ int multifd_send_sync_main(QEMUFile *f) - for (i = 0; i < migrate_multifd_channels(); i++) { - MultiFDSendParams *p = &multifd_send_state->params[i]; - -+ qemu_sem_wait(&multifd_send_state->channels_ready); - trace_multifd_send_sync_main_wait(p->id); - qemu_sem_wait(&p->sem_sync); - -@@ -668,6 +669,7 @@ static void *multifd_send_thread(void *opaque) - p->num_packets = 1; - - while (true) { -+ qemu_sem_post(&multifd_send_state->channels_ready); - qemu_sem_wait(&p->sem); - - if (qatomic_read(&multifd_send_state->exiting)) { -@@ -736,7 +738,6 @@ static void *multifd_send_thread(void *opaque) - if (flags & MULTIFD_FLAG_SYNC) { - qemu_sem_post(&p->sem_sync); - } -- qemu_sem_post(&multifd_send_state->channels_ready); - } else if (p->quit) { - qemu_mutex_unlock(&p->mutex); - break; --- -2.39.3 - diff --git a/SOURCES/kvm-multifd-Only-flush-once-each-full-round-of-memory.patch b/SOURCES/kvm-multifd-Only-flush-once-each-full-round-of-memory.patch deleted file mode 100644 index 3f76384..0000000 --- a/SOURCES/kvm-multifd-Only-flush-once-each-full-round-of-memory.patch +++ /dev/null @@ -1,166 +0,0 @@ -From e6f770506091eada46c63ac1c8b934b508e3807f Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 21 Jun 2022 13:36:11 +0200 -Subject: [PATCH 07/12] multifd: Only flush once each full round of memory -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: quintela1 -RH-MergeRequest: 186: Multifd flushes its channels 10 times per second -RH-Bugzilla: 2196295 -RH-Acked-by: Peter Xu -RH-Acked-by: Leonardo Brás -RH-Commit: [3/3] 33f76dfc72a2552a42dc7f0fe3923564185a7bf7 (juan.quintela/c9s-qemu-kvm) - -We need to add a new flag to mean to flush at that point. -Notice that we still flush at the end of setup and at the end of -complete stages. - -Signed-off-by: Juan Quintela -Acked-by: Peter Xu - ---- - -Add missing qemu_fflush(), now it passes all tests always. -In the previous version, the check that changes the default value to -false got lost in some rebase. Get it back. - -(cherry picked from commit 294e5a4034e81b3d8db03b4e0f691386f20d6ed3) ---- - migration/migration.h | 3 +-- - migration/options.c | 8 ++------ - migration/ram.c | 28 +++++++++++++++++++++++++++- - 3 files changed, 30 insertions(+), 9 deletions(-) - -diff --git a/migration/migration.h b/migration/migration.h -index 04c78c1fd6..dfec649af8 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -419,8 +419,7 @@ struct MigrationState { - * only need to do this flush after we have go through all the - * dirty pages. For historical reasons, we do that after each - * section. This is suboptimal (we flush too many times). -- * Default value is false. Setting this property has no effect -- * until the patch that removes this comment. (since 8.1) -+ * Default value is false. (since 8.1) - */ - bool multifd_flush_after_each_section; - /* -diff --git a/migration/options.c b/migration/options.c -index 5b0d080ecb..e13c7cb8e5 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -89,7 +89,7 @@ Property migration_properties[] = { - DEFINE_PROP_BOOL("decompress-error-check", MigrationState, - decompress_error_check, true), - DEFINE_PROP_BOOL("multifd-flush-after-each-section", MigrationState, -- multifd_flush_after_each_section, true), -+ multifd_flush_after_each_section, false), - DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState, - clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT), - DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState, -@@ -350,11 +350,7 @@ bool migrate_multifd_flush_after_each_section(void) - { - MigrationState *s = migrate_get_current(); - -- /* -- * Until the patch that remove this comment, we always return that -- * the property is enabled. -- */ -- return true || s->multifd_flush_after_each_section; -+ return s->multifd_flush_after_each_section; - } - - bool migrate_postcopy(void) -diff --git a/migration/ram.c b/migration/ram.c -index 1e2414d681..e9dcda8b9d 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -86,6 +86,7 @@ - #define RAM_SAVE_FLAG_XBZRLE 0x40 - /* 0x80 is reserved in qemu-file.h for RAM_SAVE_FLAG_HOOK */ - #define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100 -+#define RAM_SAVE_FLAG_MULTIFD_FLUSH 0x200 - /* We can't use any flag that is bigger than 0x200 */ - - int (*xbzrle_encode_buffer_func)(uint8_t *, uint8_t *, int, -@@ -1581,6 +1582,7 @@ retry: - * associated with the search process. - * - * Returns: -+ * <0: An error happened - * PAGE_ALL_CLEAN: no dirty page found, give up - * PAGE_TRY_AGAIN: no dirty page found, retry for next block - * PAGE_DIRTY_FOUND: dirty page found -@@ -1608,6 +1610,15 @@ static int find_dirty_block(RAMState *rs, PageSearchStatus *pss) - pss->page = 0; - pss->block = QLIST_NEXT_RCU(pss->block, next); - if (!pss->block) { -+ if (!migrate_multifd_flush_after_each_section()) { -+ QEMUFile *f = rs->pss[RAM_CHANNEL_PRECOPY].pss_channel; -+ int ret = multifd_send_sync_main(f); -+ if (ret < 0) { -+ return ret; -+ } -+ qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH); -+ qemu_fflush(f); -+ } - /* - * If memory migration starts over, we will meet a dirtied page - * which may still exists in compression threads's ring, so we -@@ -2600,6 +2611,9 @@ static int ram_find_and_save_block(RAMState *rs) - break; - } else if (res == PAGE_TRY_AGAIN) { - continue; -+ } else if (res < 0) { -+ pages = res; -+ break; - } - } - } -@@ -3286,6 +3300,10 @@ static int ram_save_setup(QEMUFile *f, void *opaque) - return ret; - } - -+ if (!migrate_multifd_flush_after_each_section()) { -+ qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH); -+ } -+ - qemu_put_be64(f, RAM_SAVE_FLAG_EOS); - qemu_fflush(f); - -@@ -3471,6 +3489,9 @@ static int ram_save_complete(QEMUFile *f, void *opaque) - return ret; - } - -+ if (!migrate_multifd_flush_after_each_section()) { -+ qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH); -+ } - qemu_put_be64(f, RAM_SAVE_FLAG_EOS); - qemu_fflush(f); - -@@ -4152,7 +4173,9 @@ int ram_load_postcopy(QEMUFile *f, int channel) - } - decompress_data_with_multi_threads(f, page_buffer, len); - break; -- -+ case RAM_SAVE_FLAG_MULTIFD_FLUSH: -+ multifd_recv_sync_main(); -+ break; - case RAM_SAVE_FLAG_EOS: - /* normal exit */ - if (migrate_multifd_flush_after_each_section()) { -@@ -4426,6 +4449,9 @@ static int ram_load_precopy(QEMUFile *f) - break; - } - break; -+ case RAM_SAVE_FLAG_MULTIFD_FLUSH: -+ multifd_recv_sync_main(); -+ break; - case RAM_SAVE_FLAG_EOS: - /* normal exit */ - if (migrate_multifd_flush_after_each_section()) { --- -2.39.3 - diff --git a/SOURCES/kvm-multifd-Protect-multifd_send_sync_main-calls.patch b/SOURCES/kvm-multifd-Protect-multifd_send_sync_main-calls.patch deleted file mode 100644 index 779841f..0000000 --- a/SOURCES/kvm-multifd-Protect-multifd_send_sync_main-calls.patch +++ /dev/null @@ -1,78 +0,0 @@ -From c4bfb4900b95e13bef2d86b83c33786c7c4f6289 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 21 Jun 2022 12:21:32 +0200 -Subject: [PATCH 06/12] multifd: Protect multifd_send_sync_main() calls -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: quintela1 -RH-MergeRequest: 186: Multifd flushes its channels 10 times per second -RH-Bugzilla: 2196295 -RH-Acked-by: Peter Xu -RH-Acked-by: Leonardo Brás -RH-Commit: [2/3] a91adf59c6b2f39bf4a308f566b00e39cae6e0ae (juan.quintela/c9s-qemu-kvm) - -We only need to do that on the ram_save_iterate() call on sending and -on destination when we get a RAM_SAVE_FLAG_EOS. - -In setup() and complete() we need to synch in both new and old cases, -so don't add a check there. - -Signed-off-by: Juan Quintela -Reviewed-by: Dr. David Alan Gilbert -Acked-by: Peter Xu - ---- - -Remove the wrappers that we take out on patch 5. - -(cherry picked from commit b05292c237030343516d073b1a1e5f49ffc017a8) ---- - migration/ram.c | 16 +++++++++++----- - 1 file changed, 11 insertions(+), 5 deletions(-) - -diff --git a/migration/ram.c b/migration/ram.c -index 01356f60a4..1e2414d681 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -3394,9 +3394,11 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) - out: - if (ret >= 0 - && migration_is_setup_or_active(migrate_get_current()->state)) { -- ret = multifd_send_sync_main(rs->pss[RAM_CHANNEL_PRECOPY].pss_channel); -- if (ret < 0) { -- return ret; -+ if (migrate_multifd_flush_after_each_section()) { -+ ret = multifd_send_sync_main(rs->pss[RAM_CHANNEL_PRECOPY].pss_channel); -+ if (ret < 0) { -+ return ret; -+ } - } - - qemu_put_be64(f, RAM_SAVE_FLAG_EOS); -@@ -4153,7 +4155,9 @@ int ram_load_postcopy(QEMUFile *f, int channel) - - case RAM_SAVE_FLAG_EOS: - /* normal exit */ -- multifd_recv_sync_main(); -+ if (migrate_multifd_flush_after_each_section()) { -+ multifd_recv_sync_main(); -+ } - break; - default: - error_report("Unknown combination of migration flags: 0x%x" -@@ -4424,7 +4428,9 @@ static int ram_load_precopy(QEMUFile *f) - break; - case RAM_SAVE_FLAG_EOS: - /* normal exit */ -- multifd_recv_sync_main(); -+ if (migrate_multifd_flush_after_each_section()) { -+ multifd_recv_sync_main(); -+ } - break; - default: - if (flags & RAM_SAVE_FLAG_HOOK) { --- -2.39.3 - diff --git a/SOURCES/kvm-nbd-server-Fix-drained_poll-to-wake-coroutine-in-rig.patch b/SOURCES/kvm-nbd-server-Fix-drained_poll-to-wake-coroutine-in-rig.patch deleted file mode 100644 index 214b6dd..0000000 --- a/SOURCES/kvm-nbd-server-Fix-drained_poll-to-wake-coroutine-in-rig.patch +++ /dev/null @@ -1,159 +0,0 @@ -From 639f65d2cd4c6627a1d22c4b418b41400fe40154 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 17 May 2023 17:28:33 +0200 -Subject: [PATCH 03/21] nbd/server: Fix drained_poll to wake coroutine in right - AioContext - -RH-Author: Kevin Wolf -RH-MergeRequest: 166: block/graph-lock: Disable locking for now -RH-Bugzilla: 2186725 -RH-Acked-by: Eric Blake -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [3/4] 177092e61360c2feb04377890b32fdeb2d1cfefc (kmwolf/centos-qemu-kvm) - -nbd_drained_poll() generally runs in the main thread, not whatever -iothread the NBD server coroutine is meant to run in, so it can't -directly reenter the coroutines to wake them up. - -The code seems to have the right intention, it specifies the correct -AioContext when it calls qemu_aio_coroutine_enter(). However, this -functions doesn't schedule the coroutine to run in that AioContext, but -it assumes it is already called in the home thread of the AioContext. - -To fix this, add a new thread-safe qio_channel_wake_read() that can be -called in the main thread to wake up the coroutine in its AioContext, -and use this in nbd_drained_poll(). - -Cc: qemu-stable@nongnu.org -Signed-off-by: Kevin Wolf -Message-Id: <20230517152834.277483-3-kwolf@redhat.com> -Reviewed-by: Eric Blake -Signed-off-by: Kevin Wolf -(cherry picked from commit 7c1f51bf38de8cea4ed5030467646c37b46edeb7) -Signed-off-by: Kevin Wolf ---- - include/io/channel.h | 10 ++++++++++ - io/channel.c | 33 +++++++++++++++++++++++++++------ - nbd/server.c | 3 +-- - 3 files changed, 38 insertions(+), 8 deletions(-) - -diff --git a/include/io/channel.h b/include/io/channel.h -index 153fbd2904..2b905423a9 100644 ---- a/include/io/channel.h -+++ b/include/io/channel.h -@@ -757,6 +757,16 @@ void qio_channel_detach_aio_context(QIOChannel *ioc); - void coroutine_fn qio_channel_yield(QIOChannel *ioc, - GIOCondition condition); - -+/** -+ * qio_channel_wake_read: -+ * @ioc: the channel object -+ * -+ * If qio_channel_yield() is currently waiting for the channel to become -+ * readable, interrupt it and reenter immediately. This function is safe to call -+ * from any thread. -+ */ -+void qio_channel_wake_read(QIOChannel *ioc); -+ - /** - * qio_channel_wait: - * @ioc: the channel object -diff --git a/io/channel.c b/io/channel.c -index a8c7f11649..3c9b7beb65 100644 ---- a/io/channel.c -+++ b/io/channel.c -@@ -19,6 +19,7 @@ - */ - - #include "qemu/osdep.h" -+#include "block/aio-wait.h" - #include "io/channel.h" - #include "qapi/error.h" - #include "qemu/main-loop.h" -@@ -514,7 +515,11 @@ int qio_channel_flush(QIOChannel *ioc, - static void qio_channel_restart_read(void *opaque) - { - QIOChannel *ioc = opaque; -- Coroutine *co = ioc->read_coroutine; -+ Coroutine *co = qatomic_xchg(&ioc->read_coroutine, NULL); -+ -+ if (!co) { -+ return; -+ } - - /* Assert that aio_co_wake() reenters the coroutine directly */ - assert(qemu_get_current_aio_context() == -@@ -525,7 +530,11 @@ static void qio_channel_restart_read(void *opaque) - static void qio_channel_restart_write(void *opaque) - { - QIOChannel *ioc = opaque; -- Coroutine *co = ioc->write_coroutine; -+ Coroutine *co = qatomic_xchg(&ioc->write_coroutine, NULL); -+ -+ if (!co) { -+ return; -+ } - - /* Assert that aio_co_wake() reenters the coroutine directly */ - assert(qemu_get_current_aio_context() == -@@ -568,7 +577,11 @@ void qio_channel_detach_aio_context(QIOChannel *ioc) - void coroutine_fn qio_channel_yield(QIOChannel *ioc, - GIOCondition condition) - { -+ AioContext *ioc_ctx = ioc->ctx ?: qemu_get_aio_context(); -+ - assert(qemu_in_coroutine()); -+ assert(in_aio_context_home_thread(ioc_ctx)); -+ - if (condition == G_IO_IN) { - assert(!ioc->read_coroutine); - ioc->read_coroutine = qemu_coroutine_self(); -@@ -580,18 +593,26 @@ void coroutine_fn qio_channel_yield(QIOChannel *ioc, - } - qio_channel_set_aio_fd_handlers(ioc); - qemu_coroutine_yield(); -+ assert(in_aio_context_home_thread(ioc_ctx)); - - /* Allow interrupting the operation by reentering the coroutine other than - * through the aio_fd_handlers. */ -- if (condition == G_IO_IN && ioc->read_coroutine) { -- ioc->read_coroutine = NULL; -+ if (condition == G_IO_IN) { -+ assert(ioc->read_coroutine == NULL); - qio_channel_set_aio_fd_handlers(ioc); -- } else if (condition == G_IO_OUT && ioc->write_coroutine) { -- ioc->write_coroutine = NULL; -+ } else if (condition == G_IO_OUT) { -+ assert(ioc->write_coroutine == NULL); - qio_channel_set_aio_fd_handlers(ioc); - } - } - -+void qio_channel_wake_read(QIOChannel *ioc) -+{ -+ Coroutine *co = qatomic_xchg(&ioc->read_coroutine, NULL); -+ if (co) { -+ aio_co_wake(co); -+ } -+} - - static gboolean qio_channel_wait_complete(QIOChannel *ioc, - GIOCondition condition, -diff --git a/nbd/server.c b/nbd/server.c -index 3d8d0d81df..ea47522e8f 100644 ---- a/nbd/server.c -+++ b/nbd/server.c -@@ -1599,8 +1599,7 @@ static bool nbd_drained_poll(void *opaque) - * enter it here so we don't depend on the client to wake it up. - */ - if (client->recv_coroutine != NULL && client->read_yielding) { -- qemu_aio_coroutine_enter(exp->common.ctx, -- client->recv_coroutine); -+ qio_channel_wake_read(client->ioc); - } - - return true; --- -2.39.3 - diff --git a/SOURCES/kvm-nbd-server-Fix-race-in-draining-the-export.patch b/SOURCES/kvm-nbd-server-Fix-race-in-draining-the-export.patch new file mode 100644 index 0000000..7298992 --- /dev/null +++ b/SOURCES/kvm-nbd-server-Fix-race-in-draining-the-export.patch @@ -0,0 +1,95 @@ +From dc4dd11233522d8195782a2196aaae44bd924575 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 14 Mar 2024 17:58:24 +0100 +Subject: [PATCH 2/4] nbd/server: Fix race in draining the export + +RH-Author: Kevin Wolf +RH-MergeRequest: 231: Fix deadlock and crash during storage migration +RH-Jira: RHEL-28125 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [2/3] 036e1b171986099904dee468c59d77437e3d3d61 (kmwolf/centos-qemu-kvm) + +When draining an NBD export, nbd_drained_begin() first sets +client->quiescing so that nbd_client_receive_next_request() won't start +any new request coroutines. Then nbd_drained_poll() tries to makes sure +that we wait for any existing request coroutines by checking that +client->nb_requests has become 0. + +However, there is a small window between creating a new request +coroutine and increasing client->nb_requests. If a coroutine is in this +state, it won't be waited for and drain returns too early. + +In the context of switching to a different AioContext, this means that +blk_aio_attached() will see client->recv_coroutine != NULL and fail its +assertion. + +Fix this by increasing client->nb_requests immediately when starting the +coroutine. Doing this after the checks if we should create a new +coroutine is okay because client->lock is held. + +Cc: qemu-stable@nongnu.org +Fixes: fd6afc501a01 ("nbd/server: Use drained block ops to quiesce the server") +Signed-off-by: Kevin Wolf +Message-ID: <20240314165825.40261-2-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 9c707525cbb1dd1e56876e45c70c0c08f2876d41) +Signed-off-by: Kevin Wolf +--- + nbd/server.c | 15 +++++++-------- + 1 file changed, 7 insertions(+), 8 deletions(-) + +diff --git a/nbd/server.c b/nbd/server.c +index 941832f178..c3484cc1eb 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -3007,8 +3007,8 @@ static coroutine_fn int nbd_handle_request(NBDClient *client, + /* Owns a reference to the NBDClient passed as opaque. */ + static coroutine_fn void nbd_trip(void *opaque) + { +- NBDClient *client = opaque; +- NBDRequestData *req = NULL; ++ NBDRequestData *req = opaque; ++ NBDClient *client = req->client; + NBDRequest request = { 0 }; /* GCC thinks it can be used uninitialized */ + int ret; + Error *local_err = NULL; +@@ -3037,8 +3037,6 @@ static coroutine_fn void nbd_trip(void *opaque) + goto done; + } + +- req = nbd_request_get(client); +- + /* + * nbd_co_receive_request() returns -EAGAIN when nbd_drained_begin() has + * set client->quiescing but by the time we get back nbd_drained_end() may +@@ -3112,9 +3110,7 @@ static coroutine_fn void nbd_trip(void *opaque) + } + + done: +- if (req) { +- nbd_request_put(req); +- } ++ nbd_request_put(req); + + qemu_mutex_unlock(&client->lock); + +@@ -3143,10 +3139,13 @@ disconnect: + */ + static void nbd_client_receive_next_request(NBDClient *client) + { ++ NBDRequestData *req; ++ + if (!client->recv_coroutine && client->nb_requests < MAX_NBD_REQUESTS && + !client->quiescing) { + nbd_client_get(client); +- client->recv_coroutine = qemu_coroutine_create(nbd_trip, client); ++ req = nbd_request_get(client); ++ client->recv_coroutine = qemu_coroutine_create(nbd_trip, req); + aio_co_schedule(client->exp->common.ctx, client->recv_coroutine); + } + } +-- +2.39.3 + diff --git a/SOURCES/kvm-nbd-server-avoid-per-NBDRequest-nbd_client_get-put.patch b/SOURCES/kvm-nbd-server-avoid-per-NBDRequest-nbd_client_get-put.patch new file mode 100644 index 0000000..339e234 --- /dev/null +++ b/SOURCES/kvm-nbd-server-avoid-per-NBDRequest-nbd_client_get-put.patch @@ -0,0 +1,53 @@ +From cd7788a857a6099206c4063e3ef69cb9e4aebcbc Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 21 Dec 2023 14:24:50 -0500 +Subject: [PATCH 070/101] nbd/server: avoid per-NBDRequest nbd_client_get/put() + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [1/26] 5acb090ac4adf4260cd9e9c5605a27012b2a33aa (kmwolf/centos-qemu-kvm) + +nbd_trip() processes a single NBD request from start to finish and holds +an NBDClient reference throughout. NBDRequest does not outlive the scope +of nbd_trip(). Therefore it is unnecessary to ref/unref NBDClient for +each NBDRequest. + +Removing these nbd_client_get()/nbd_client_put() calls will make +thread-safety easier in the commits that follow. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Paolo Bonzini +Message-ID: <20231221192452.1785567-5-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +--- + nbd/server.c | 3 --- + 1 file changed, 3 deletions(-) + +diff --git a/nbd/server.c b/nbd/server.c +index 895cf0a752..0b09ccc8dc 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -1557,7 +1557,6 @@ static NBDRequestData *nbd_request_get(NBDClient *client) + client->nb_requests++; + + req = g_new0(NBDRequestData, 1); +- nbd_client_get(client); + req->client = client; + return req; + } +@@ -1578,8 +1577,6 @@ static void nbd_request_put(NBDRequestData *req) + } + + nbd_client_receive_next_request(client); +- +- nbd_client_put(client); + } + + static void blk_aio_attached(AioContext *ctx, void *opaque) +-- +2.39.3 + diff --git a/SOURCES/kvm-nbd-server-introduce-NBDClient-lock-to-protect-field.patch b/SOURCES/kvm-nbd-server-introduce-NBDClient-lock-to-protect-field.patch new file mode 100644 index 0000000..e0d763d --- /dev/null +++ b/SOURCES/kvm-nbd-server-introduce-NBDClient-lock-to-protect-field.patch @@ -0,0 +1,373 @@ +From bb0a6afff7f23a3ddb460dc1b2e70c06565f8a3f Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 21 Dec 2023 14:24:52 -0500 +Subject: [PATCH 072/101] nbd/server: introduce NBDClient->lock to protect + fields + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [3/26] 49b64adaaf8b1c30f339d1ecc8ea89fb9db63f1c (kmwolf/centos-qemu-kvm) + +NBDClient has a number of fields that are accessed by both the export +AioContext and the main loop thread. When the AioContext lock is removed +these fields will need another form of protection. + +Add NBDClient->lock and protect fields that are accessed by both +threads. Also add assertions where possible and otherwise add doc +comments stating assumptions about which thread and lock holding. + +Note this patch moves the client->recv_coroutine assertion from +nbd_co_receive_request() to nbd_trip() where client->lock is held. + +Signed-off-by: Stefan Hajnoczi +Message-ID: <20231221192452.1785567-7-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +--- + nbd/server.c | 144 +++++++++++++++++++++++++++++++++++++++------------ + 1 file changed, 111 insertions(+), 33 deletions(-) + +diff --git a/nbd/server.c b/nbd/server.c +index e91e2e0903..941832f178 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -125,23 +125,25 @@ struct NBDClient { + int refcount; /* atomic */ + void (*close_fn)(NBDClient *client, bool negotiated); + ++ QemuMutex lock; ++ + NBDExport *exp; + QCryptoTLSCreds *tlscreds; + char *tlsauthz; + QIOChannelSocket *sioc; /* The underlying data channel */ + QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */ + +- Coroutine *recv_coroutine; ++ Coroutine *recv_coroutine; /* protected by lock */ + + CoMutex send_lock; + Coroutine *send_coroutine; + +- bool read_yielding; +- bool quiescing; ++ bool read_yielding; /* protected by lock */ ++ bool quiescing; /* protected by lock */ + + QTAILQ_ENTRY(NBDClient) next; +- int nb_requests; +- bool closing; ++ int nb_requests; /* protected by lock */ ++ bool closing; /* protected by lock */ + + uint32_t check_align; /* If non-zero, check for aligned client requests */ + +@@ -1415,11 +1417,18 @@ nbd_read_eof(NBDClient *client, void *buffer, size_t size, Error **errp) + + len = qio_channel_readv(client->ioc, &iov, 1, errp); + if (len == QIO_CHANNEL_ERR_BLOCK) { +- client->read_yielding = true; ++ WITH_QEMU_LOCK_GUARD(&client->lock) { ++ client->read_yielding = true; ++ ++ /* Prompt main loop thread to re-run nbd_drained_poll() */ ++ aio_wait_kick(); ++ } + qio_channel_yield(client->ioc, G_IO_IN); +- client->read_yielding = false; +- if (client->quiescing) { +- return -EAGAIN; ++ WITH_QEMU_LOCK_GUARD(&client->lock) { ++ client->read_yielding = false; ++ if (client->quiescing) { ++ return -EAGAIN; ++ } + } + continue; + } else if (len < 0) { +@@ -1528,6 +1537,7 @@ void nbd_client_put(NBDClient *client) + blk_exp_unref(&client->exp->common); + } + g_free(client->contexts.bitmaps); ++ qemu_mutex_destroy(&client->lock); + g_free(client); + } + } +@@ -1561,11 +1571,13 @@ static void client_close(NBDClient *client, bool negotiated) + { + assert(qemu_in_main_thread()); + +- if (client->closing) { +- return; +- } ++ WITH_QEMU_LOCK_GUARD(&client->lock) { ++ if (client->closing) { ++ return; ++ } + +- client->closing = true; ++ client->closing = true; ++ } + + /* Force requests to finish. They will drop their own references, + * then we'll close the socket and free the NBDClient. +@@ -1579,6 +1591,7 @@ static void client_close(NBDClient *client, bool negotiated) + } + } + ++/* Runs in export AioContext with client->lock held */ + static NBDRequestData *nbd_request_get(NBDClient *client) + { + NBDRequestData *req; +@@ -1591,6 +1604,7 @@ static NBDRequestData *nbd_request_get(NBDClient *client) + return req; + } + ++/* Runs in export AioContext with client->lock held */ + static void nbd_request_put(NBDRequestData *req) + { + NBDClient *client = req->client; +@@ -1614,14 +1628,18 @@ static void blk_aio_attached(AioContext *ctx, void *opaque) + NBDExport *exp = opaque; + NBDClient *client; + ++ assert(qemu_in_main_thread()); ++ + trace_nbd_blk_aio_attached(exp->name, ctx); + + exp->common.ctx = ctx; + + QTAILQ_FOREACH(client, &exp->clients, next) { +- assert(client->nb_requests == 0); +- assert(client->recv_coroutine == NULL); +- assert(client->send_coroutine == NULL); ++ WITH_QEMU_LOCK_GUARD(&client->lock) { ++ assert(client->nb_requests == 0); ++ assert(client->recv_coroutine == NULL); ++ assert(client->send_coroutine == NULL); ++ } + } + } + +@@ -1629,6 +1647,8 @@ static void blk_aio_detach(void *opaque) + { + NBDExport *exp = opaque; + ++ assert(qemu_in_main_thread()); ++ + trace_nbd_blk_aio_detach(exp->name, exp->common.ctx); + + exp->common.ctx = NULL; +@@ -1639,8 +1659,12 @@ static void nbd_drained_begin(void *opaque) + NBDExport *exp = opaque; + NBDClient *client; + ++ assert(qemu_in_main_thread()); ++ + QTAILQ_FOREACH(client, &exp->clients, next) { +- client->quiescing = true; ++ WITH_QEMU_LOCK_GUARD(&client->lock) { ++ client->quiescing = true; ++ } + } + } + +@@ -1649,28 +1673,48 @@ static void nbd_drained_end(void *opaque) + NBDExport *exp = opaque; + NBDClient *client; + ++ assert(qemu_in_main_thread()); ++ + QTAILQ_FOREACH(client, &exp->clients, next) { +- client->quiescing = false; +- nbd_client_receive_next_request(client); ++ WITH_QEMU_LOCK_GUARD(&client->lock) { ++ client->quiescing = false; ++ nbd_client_receive_next_request(client); ++ } + } + } + ++/* Runs in export AioContext */ ++static void nbd_wake_read_bh(void *opaque) ++{ ++ NBDClient *client = opaque; ++ qio_channel_wake_read(client->ioc); ++} ++ + static bool nbd_drained_poll(void *opaque) + { + NBDExport *exp = opaque; + NBDClient *client; + ++ assert(qemu_in_main_thread()); ++ + QTAILQ_FOREACH(client, &exp->clients, next) { +- if (client->nb_requests != 0) { +- /* +- * If there's a coroutine waiting for a request on nbd_read_eof() +- * enter it here so we don't depend on the client to wake it up. +- */ +- if (client->recv_coroutine != NULL && client->read_yielding) { +- qio_channel_wake_read(client->ioc); +- } ++ WITH_QEMU_LOCK_GUARD(&client->lock) { ++ if (client->nb_requests != 0) { ++ /* ++ * If there's a coroutine waiting for a request on nbd_read_eof() ++ * enter it here so we don't depend on the client to wake it up. ++ * ++ * Schedule a BH in the export AioContext to avoid missing the ++ * wake up due to the race between qio_channel_wake_read() and ++ * qio_channel_yield(). ++ */ ++ if (client->recv_coroutine != NULL && client->read_yielding) { ++ aio_bh_schedule_oneshot(nbd_export_aio_context(client->exp), ++ nbd_wake_read_bh, client); ++ } + +- return true; ++ return true; ++ } + } + } + +@@ -1681,6 +1725,8 @@ static void nbd_eject_notifier(Notifier *n, void *data) + { + NBDExport *exp = container_of(n, NBDExport, eject_notifier); + ++ assert(qemu_in_main_thread()); ++ + blk_exp_request_shutdown(&exp->common); + } + +@@ -2566,7 +2612,6 @@ static int coroutine_fn nbd_co_receive_request(NBDRequestData *req, + int ret; + + g_assert(qemu_in_coroutine()); +- assert(client->recv_coroutine == qemu_coroutine_self()); + ret = nbd_receive_request(client, request, errp); + if (ret < 0) { + return ret; +@@ -2975,6 +3020,9 @@ static coroutine_fn void nbd_trip(void *opaque) + */ + + trace_nbd_trip(); ++ ++ qemu_mutex_lock(&client->lock); ++ + if (client->closing) { + goto done; + } +@@ -2990,7 +3038,21 @@ static coroutine_fn void nbd_trip(void *opaque) + } + + req = nbd_request_get(client); +- ret = nbd_co_receive_request(req, &request, &local_err); ++ ++ /* ++ * nbd_co_receive_request() returns -EAGAIN when nbd_drained_begin() has ++ * set client->quiescing but by the time we get back nbd_drained_end() may ++ * have already cleared client->quiescing. In that case we try again ++ * because nothing else will spawn an nbd_trip() coroutine until we set ++ * client->recv_coroutine = NULL further down. ++ */ ++ do { ++ assert(client->recv_coroutine == qemu_coroutine_self()); ++ qemu_mutex_unlock(&client->lock); ++ ret = nbd_co_receive_request(req, &request, &local_err); ++ qemu_mutex_lock(&client->lock); ++ } while (ret == -EAGAIN && !client->quiescing); ++ + client->recv_coroutine = NULL; + + if (client->closing) { +@@ -3002,15 +3064,16 @@ static coroutine_fn void nbd_trip(void *opaque) + } + + if (ret == -EAGAIN) { +- assert(client->quiescing); + goto done; + } + + nbd_client_receive_next_request(client); ++ + if (ret == -EIO) { + goto disconnect; + } + ++ qemu_mutex_unlock(&client->lock); + qio_channel_set_cork(client->ioc, true); + + if (ret < 0) { +@@ -3030,6 +3093,10 @@ static coroutine_fn void nbd_trip(void *opaque) + g_free(request.contexts->bitmaps); + g_free(request.contexts); + } ++ ++ qio_channel_set_cork(client->ioc, false); ++ qemu_mutex_lock(&client->lock); ++ + if (ret < 0) { + error_prepend(&local_err, "Failed to send reply: "); + goto disconnect; +@@ -3044,11 +3111,13 @@ static coroutine_fn void nbd_trip(void *opaque) + goto disconnect; + } + +- qio_channel_set_cork(client->ioc, false); + done: + if (req) { + nbd_request_put(req); + } ++ ++ qemu_mutex_unlock(&client->lock); ++ + if (!nbd_client_put_nonzero(client)) { + aio_co_reschedule_self(qemu_get_aio_context()); + nbd_client_put(client); +@@ -3059,13 +3128,19 @@ disconnect: + if (local_err) { + error_reportf_err(local_err, "Disconnect client, due to: "); + } ++ + nbd_request_put(req); ++ qemu_mutex_unlock(&client->lock); + + aio_co_reschedule_self(qemu_get_aio_context()); + client_close(client, true); + nbd_client_put(client); + } + ++/* ++ * Runs in export AioContext and main loop thread. Caller must hold ++ * client->lock. ++ */ + static void nbd_client_receive_next_request(NBDClient *client) + { + if (!client->recv_coroutine && client->nb_requests < MAX_NBD_REQUESTS && +@@ -3091,7 +3166,9 @@ static coroutine_fn void nbd_co_client_start(void *opaque) + return; + } + +- nbd_client_receive_next_request(client); ++ WITH_QEMU_LOCK_GUARD(&client->lock) { ++ nbd_client_receive_next_request(client); ++ } + } + + /* +@@ -3108,6 +3185,7 @@ void nbd_client_new(QIOChannelSocket *sioc, + Coroutine *co; + + client = g_new0(NBDClient, 1); ++ qemu_mutex_init(&client->lock); + client->refcount = 1; + client->tlscreds = tlscreds; + if (tlscreds) { +-- +2.39.3 + diff --git a/SOURCES/kvm-nbd-server-only-traverse-NBDExport-clients-from-main.patch b/SOURCES/kvm-nbd-server-only-traverse-NBDExport-clients-from-main.patch new file mode 100644 index 0000000..3ca11a9 --- /dev/null +++ b/SOURCES/kvm-nbd-server-only-traverse-NBDExport-clients-from-main.patch @@ -0,0 +1,176 @@ +From 8b60d72532b6511b41d82d591fb4f509314ef15f Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 21 Dec 2023 14:24:51 -0500 +Subject: [PATCH 071/101] nbd/server: only traverse NBDExport->clients from + main loop thread + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [2/26] e7794a3a5c363c7508ee505c4ba03d9ef8862ca9 (kmwolf/centos-qemu-kvm) + +The NBD clients list is currently accessed from both the export +AioContext and the main loop thread. When the AioContext lock is removed +there will be nothing protecting the clients list. + +Adding a lock around the clients list is tricky because NBDClient +structs are refcounted and may be freed from the export AioContext or +the main loop thread. nbd_export_request_shutdown() -> client_close() -> +nbd_client_put() is also tricky because the list lock would be held +while indirectly dropping references to NDBClients. + +A simpler approach is to only allow nbd_client_put() and client_close() +calls from the main loop thread. Then the NBD clients list is only +accessed from the main loop thread and no fancy locking is needed. + +nbd_trip() just needs to reschedule itself in the main loop AioContext +before calling nbd_client_put() and client_close(). This costs more CPU +cycles per NBD request so add nbd_client_put_nonzero() to optimize the +common case where more references to NBDClient remain. + +Note that nbd_client_get() can still be called from either thread, so +make NBDClient->refcount atomic. + +Signed-off-by: Stefan Hajnoczi +Message-ID: <20231221192452.1785567-6-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +--- + nbd/server.c | 61 +++++++++++++++++++++++++++++++++++++++++++--------- + 1 file changed, 51 insertions(+), 10 deletions(-) + +diff --git a/nbd/server.c b/nbd/server.c +index 0b09ccc8dc..e91e2e0903 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -122,7 +122,7 @@ struct NBDMetaContexts { + }; + + struct NBDClient { +- int refcount; ++ int refcount; /* atomic */ + void (*close_fn)(NBDClient *client, bool negotiated); + + NBDExport *exp; +@@ -1501,14 +1501,17 @@ static int coroutine_fn nbd_receive_request(NBDClient *client, NBDRequest *reque + + #define MAX_NBD_REQUESTS 16 + ++/* Runs in export AioContext and main loop thread */ + void nbd_client_get(NBDClient *client) + { +- client->refcount++; ++ qatomic_inc(&client->refcount); + } + + void nbd_client_put(NBDClient *client) + { +- if (--client->refcount == 0) { ++ assert(qemu_in_main_thread()); ++ ++ if (qatomic_fetch_dec(&client->refcount) == 1) { + /* The last reference should be dropped by client->close, + * which is called by client_close. + */ +@@ -1529,8 +1532,35 @@ void nbd_client_put(NBDClient *client) + } + } + ++/* ++ * Tries to release the reference to @client, but only if other references ++ * remain. This is an optimization for the common case where we want to avoid ++ * the expense of scheduling nbd_client_put() in the main loop thread. ++ * ++ * Returns true upon success or false if the reference was not released because ++ * it is the last reference. ++ */ ++static bool nbd_client_put_nonzero(NBDClient *client) ++{ ++ int old = qatomic_read(&client->refcount); ++ int expected; ++ ++ do { ++ if (old == 1) { ++ return false; ++ } ++ ++ expected = old; ++ old = qatomic_cmpxchg(&client->refcount, expected, expected - 1); ++ } while (old != expected); ++ ++ return true; ++} ++ + static void client_close(NBDClient *client, bool negotiated) + { ++ assert(qemu_in_main_thread()); ++ + if (client->closing) { + return; + } +@@ -2933,15 +2963,20 @@ static coroutine_fn int nbd_handle_request(NBDClient *client, + static coroutine_fn void nbd_trip(void *opaque) + { + NBDClient *client = opaque; +- NBDRequestData *req; ++ NBDRequestData *req = NULL; + NBDRequest request = { 0 }; /* GCC thinks it can be used uninitialized */ + int ret; + Error *local_err = NULL; + ++ /* ++ * Note that nbd_client_put() and client_close() must be called from the ++ * main loop thread. Use aio_co_reschedule_self() to switch AioContext ++ * before calling these functions. ++ */ ++ + trace_nbd_trip(); + if (client->closing) { +- nbd_client_put(client); +- return; ++ goto done; + } + + if (client->quiescing) { +@@ -2949,10 +2984,9 @@ static coroutine_fn void nbd_trip(void *opaque) + * We're switching between AIO contexts. Don't attempt to receive a new + * request and kick the main context which may be waiting for us. + */ +- nbd_client_put(client); + client->recv_coroutine = NULL; + aio_wait_kick(); +- return; ++ goto done; + } + + req = nbd_request_get(client); +@@ -3012,8 +3046,13 @@ static coroutine_fn void nbd_trip(void *opaque) + + qio_channel_set_cork(client->ioc, false); + done: +- nbd_request_put(req); +- nbd_client_put(client); ++ if (req) { ++ nbd_request_put(req); ++ } ++ if (!nbd_client_put_nonzero(client)) { ++ aio_co_reschedule_self(qemu_get_aio_context()); ++ nbd_client_put(client); ++ } + return; + + disconnect: +@@ -3021,6 +3060,8 @@ disconnect: + error_reportf_err(local_err, "Disconnect client, due to: "); + } + nbd_request_put(req); ++ ++ aio_co_reschedule_self(qemu_get_aio_context()); + client_close(client, true); + nbd_client_put(client); + } +-- +2.39.3 + diff --git a/SOURCES/kvm-net-socket-move-fd-type-checking-to-its-own-function.patch b/SOURCES/kvm-net-socket-move-fd-type-checking-to-its-own-function.patch deleted file mode 100644 index 20b9c04..0000000 --- a/SOURCES/kvm-net-socket-move-fd-type-checking-to-its-own-function.patch +++ /dev/null @@ -1,78 +0,0 @@ -From d6b3f9e4b388b8d621761104ddf075d6087f6d6c Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Fri, 9 Jun 2023 09:27:47 +0200 -Subject: [PATCH 09/12] net: socket: move fd type checking to its own function - -RH-Author: Laurent Vivier -RH-MergeRequest: 187: net: socket: do not close file descriptor if it's not a socket -RH-Jira: RHEL-582 -RH-Acked-by: Stefano Brivio -RH-Acked-by: Jason Wang -RH-Acked-by: David Gibson (Red Hat) -RH-Commit: [2/3] 9726f0ae81ac209b5db33dc7767f652867d8ca0a (lvivier/qemu-kvm-centos) - -JIRA: https://issues.redhat.com/browse/RHEL-582 - -Reviewed-by: David Gibson -Signed-off-by: Laurent Vivier -Signed-off-by: Jason Wang -(cherry picked from commit 23455ae341656ca867ee4a171826b9d280d6acb5) ---- - net/socket.c | 28 ++++++++++++++++++++-------- - 1 file changed, 20 insertions(+), 8 deletions(-) - -diff --git a/net/socket.c b/net/socket.c -index 24dcaa55bc..6b1f0fec3a 100644 ---- a/net/socket.c -+++ b/net/socket.c -@@ -446,16 +446,32 @@ static NetSocketState *net_socket_fd_init_stream(NetClientState *peer, - return s; - } - -+static int net_socket_fd_check(int fd, Error **errp) -+{ -+ int so_type, optlen = sizeof(so_type); -+ -+ if (getsockopt(fd, SOL_SOCKET, SO_TYPE, (char *)&so_type, -+ (socklen_t *)&optlen) < 0) { -+ error_setg(errp, "can't get socket option SO_TYPE"); -+ return -1; -+ } -+ if (so_type != SOCK_DGRAM && so_type != SOCK_STREAM) { -+ error_setg(errp, "socket type=%d for fd=%d must be either" -+ " SOCK_DGRAM or SOCK_STREAM", so_type, fd); -+ return -1; -+ } -+ return so_type; -+} -+ - static NetSocketState *net_socket_fd_init(NetClientState *peer, - const char *model, const char *name, - int fd, int is_connected, - const char *mc, Error **errp) - { -- int so_type = -1, optlen=sizeof(so_type); -+ int so_type; - -- if(getsockopt(fd, SOL_SOCKET, SO_TYPE, (char *)&so_type, -- (socklen_t *)&optlen)< 0) { -- error_setg(errp, "can't get socket option SO_TYPE"); -+ so_type = net_socket_fd_check(fd, errp); -+ if (so_type < 0) { - close(fd); - return NULL; - } -@@ -465,10 +481,6 @@ static NetSocketState *net_socket_fd_init(NetClientState *peer, - mc, errp); - case SOCK_STREAM: - return net_socket_fd_init_stream(peer, model, name, fd, is_connected); -- default: -- error_setg(errp, "socket type=%d for fd=%d must be either" -- " SOCK_DGRAM or SOCK_STREAM", so_type, fd); -- close(fd); - } - return NULL; - } --- -2.39.3 - diff --git a/SOURCES/kvm-net-socket-prepare-to-cleanup-net_init_socket.patch b/SOURCES/kvm-net-socket-prepare-to-cleanup-net_init_socket.patch deleted file mode 100644 index 269da29..0000000 --- a/SOURCES/kvm-net-socket-prepare-to-cleanup-net_init_socket.patch +++ /dev/null @@ -1,60 +0,0 @@ -From a467540e49e76c5961d86e3f47d3f8fcad8cef09 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Fri, 9 Jun 2023 09:27:46 +0200 -Subject: [PATCH 08/12] net: socket: prepare to cleanup net_init_socket() - -RH-Author: Laurent Vivier -RH-MergeRequest: 187: net: socket: do not close file descriptor if it's not a socket -RH-Jira: RHEL-582 -RH-Acked-by: Stefano Brivio -RH-Acked-by: Jason Wang -RH-Acked-by: David Gibson (Red Hat) -RH-Commit: [1/3] 3e4f8370586ae1ac2474fef971a239edb31eeb67 (lvivier/qemu-kvm-centos) - -JIRA: https://issues.redhat.com/browse/RHEL-582 - -Use directly net_socket_fd_init_stream() and net_socket_fd_init_dgram() -when the socket type is already known. - -Reviewed-by: David Gibson -Signed-off-by: Laurent Vivier -Signed-off-by: Jason Wang -(cherry picked from commit 006c3fa74c3edb978ff46d2851699e9a95609da5) ---- - net/socket.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/net/socket.c b/net/socket.c -index ba6e5b0b00..24dcaa55bc 100644 ---- a/net/socket.c -+++ b/net/socket.c -@@ -587,7 +587,7 @@ static int net_socket_connect_init(NetClientState *peer, - break; - } - } -- s = net_socket_fd_init(peer, model, name, fd, connected, NULL, errp); -+ s = net_socket_fd_init_stream(peer, model, name, fd, connected); - if (!s) { - return -1; - } -@@ -629,7 +629,7 @@ static int net_socket_mcast_init(NetClientState *peer, - return -1; - } - -- s = net_socket_fd_init(peer, model, name, fd, 0, NULL, errp); -+ s = net_socket_fd_init_dgram(peer, model, name, fd, 0, NULL, errp); - if (!s) { - return -1; - } -@@ -683,7 +683,7 @@ static int net_socket_udp_init(NetClientState *peer, - } - qemu_socket_set_nonblock(fd); - -- s = net_socket_fd_init(peer, model, name, fd, 0, NULL, errp); -+ s = net_socket_fd_init_dgram(peer, model, name, fd, 0, NULL, errp); - if (!s) { - return -1; - } --- -2.39.3 - diff --git a/SOURCES/kvm-net-socket-remove-net_init_socket.patch b/SOURCES/kvm-net-socket-remove-net_init_socket.patch deleted file mode 100644 index 98c96f2..0000000 --- a/SOURCES/kvm-net-socket-remove-net_init_socket.patch +++ /dev/null @@ -1,102 +0,0 @@ -From ecb4f97895849c562112b76a30ddc2037e8df79e Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Fri, 9 Jun 2023 09:27:48 +0200 -Subject: [PATCH 10/12] net: socket: remove net_init_socket() - -RH-Author: Laurent Vivier -RH-MergeRequest: 187: net: socket: do not close file descriptor if it's not a socket -RH-Jira: RHEL-582 -RH-Acked-by: Stefano Brivio -RH-Acked-by: Jason Wang -RH-Acked-by: David Gibson (Red Hat) -RH-Commit: [3/3] e1d7939f5df4a77c2fff62d1ae4899a7a3615ad9 (lvivier/qemu-kvm-centos) - -JIRA: https://issues.redhat.com/browse/RHEL-582 - -Move the file descriptor type checking before doing anything with it. -If it's not usable, don't close it as it could be in use by another -part of QEMU, only fail and report an error. - -Reviewed-by: David Gibson -Signed-off-by: Laurent Vivier -Signed-off-by: Jason Wang -(cherry picked from commit b6aeee02980e193f744f74c48fd900940feb2799) ---- - net/socket.c | 43 +++++++++++++++++-------------------------- - 1 file changed, 17 insertions(+), 26 deletions(-) - -diff --git a/net/socket.c b/net/socket.c -index 6b1f0fec3a..8e3702e1f3 100644 ---- a/net/socket.c -+++ b/net/socket.c -@@ -463,28 +463,6 @@ static int net_socket_fd_check(int fd, Error **errp) - return so_type; - } - --static NetSocketState *net_socket_fd_init(NetClientState *peer, -- const char *model, const char *name, -- int fd, int is_connected, -- const char *mc, Error **errp) --{ -- int so_type; -- -- so_type = net_socket_fd_check(fd, errp); -- if (so_type < 0) { -- close(fd); -- return NULL; -- } -- switch(so_type) { -- case SOCK_DGRAM: -- return net_socket_fd_init_dgram(peer, model, name, fd, is_connected, -- mc, errp); -- case SOCK_STREAM: -- return net_socket_fd_init_stream(peer, model, name, fd, is_connected); -- } -- return NULL; --} -- - static void net_socket_accept(void *opaque) - { - NetSocketState *s = opaque; -@@ -728,21 +706,34 @@ int net_init_socket(const Netdev *netdev, const char *name, - } - - if (sock->fd) { -- int fd, ret; -+ int fd, ret, so_type; - - fd = monitor_fd_param(monitor_cur(), sock->fd, errp); - if (fd == -1) { - return -1; - } -+ so_type = net_socket_fd_check(fd, errp); -+ if (so_type < 0) { -+ return -1; -+ } - ret = qemu_socket_try_set_nonblock(fd); - if (ret < 0) { - error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d", - name, fd); - return -1; - } -- if (!net_socket_fd_init(peer, "socket", name, fd, 1, sock->mcast, -- errp)) { -- return -1; -+ switch (so_type) { -+ case SOCK_DGRAM: -+ if (!net_socket_fd_init_dgram(peer, "socket", name, fd, 1, -+ sock->mcast, errp)) { -+ return -1; -+ } -+ break; -+ case SOCK_STREAM: -+ if (!net_socket_fd_init_stream(peer, "socket", name, fd, 1)) { -+ return -1; -+ } -+ break; - } - return 0; - } --- -2.39.3 - diff --git a/SOURCES/kvm-numa-Validate-cluster-and-NUMA-node-boundary-if-requ.patch b/SOURCES/kvm-numa-Validate-cluster-and-NUMA-node-boundary-if-requ.patch deleted file mode 100644 index 66d68f1..0000000 --- a/SOURCES/kvm-numa-Validate-cluster-and-NUMA-node-boundary-if-requ.patch +++ /dev/null @@ -1,145 +0,0 @@ -From 760a2f284f6d4cd3cd3b1685411bbca21c4ad233 Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Tue, 27 Jun 2023 20:20:09 +1000 -Subject: [PATCH 1/6] numa: Validate cluster and NUMA node boundary if required -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Gavin Shan -RH-MergeRequest: 175: hw/arm: Validate CPU cluster and NUMA node boundary for RHEL machines -RH-Bugzilla: 2171363 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Eric Auger -RH-Commit: [1/3] 24580064b9a0076ec4d9a916839d85135ac48cd9 - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2171363 - -For some architectures like ARM64, multiple CPUs in one cluster can be -associated with different NUMA nodes, which is irregular configuration -because we shouldn't have this in baremetal environment. The irregular -configuration causes Linux guest to misbehave, as the following warning -messages indicate. - - -smp 6,maxcpus=6,sockets=2,clusters=1,cores=3,threads=1 \ - -numa node,nodeid=0,cpus=0-1,memdev=ram0 \ - -numa node,nodeid=1,cpus=2-3,memdev=ram1 \ - -numa node,nodeid=2,cpus=4-5,memdev=ram2 \ - - ------------[ cut here ]------------ - WARNING: CPU: 0 PID: 1 at kernel/sched/topology.c:2271 build_sched_domains+0x284/0x910 - Modules linked in: - CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-268.el9.aarch64 #1 - pstate: 00400005 (nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) - pc : build_sched_domains+0x284/0x910 - lr : build_sched_domains+0x184/0x910 - sp : ffff80000804bd50 - x29: ffff80000804bd50 x28: 0000000000000002 x27: 0000000000000000 - x26: ffff800009cf9a80 x25: 0000000000000000 x24: ffff800009cbf840 - x23: ffff000080325000 x22: ffff0000005df800 x21: ffff80000a4ce508 - x20: 0000000000000000 x19: ffff000080324440 x18: 0000000000000014 - x17: 00000000388925c0 x16: 000000005386a066 x15: 000000009c10cc2e - x14: 00000000000001c0 x13: 0000000000000001 x12: ffff00007fffb1a0 - x11: ffff00007fffb180 x10: ffff80000a4ce508 x9 : 0000000000000041 - x8 : ffff80000a4ce500 x7 : ffff80000a4cf920 x6 : 0000000000000001 - x5 : 0000000000000001 x4 : 0000000000000007 x3 : 0000000000000002 - x2 : 0000000000001000 x1 : ffff80000a4cf928 x0 : 0000000000000001 - Call trace: - build_sched_domains+0x284/0x910 - sched_init_domains+0xac/0xe0 - sched_init_smp+0x48/0xc8 - kernel_init_freeable+0x140/0x1ac - kernel_init+0x28/0x140 - ret_from_fork+0x10/0x20 - -Improve the situation to warn when multiple CPUs in one cluster have -been associated with different NUMA nodes. However, one NUMA node is -allowed to be associated with different clusters. - -Signed-off-by: Gavin Shan -Acked-by: Philippe Mathieu-Daudé -Acked-by: Igor Mammedov -Message-Id: <20230509002739.18388-2-gshan@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit a494fdb715832000ee9047a549a35aacfea8175e) -Signed-off-by: Gavin Shan ---- - hw/core/machine.c | 42 ++++++++++++++++++++++++++++++++++++++++++ - include/hw/boards.h | 1 + - 2 files changed, 43 insertions(+) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index c28702b690..5abdc8c39b 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -1496,6 +1496,45 @@ static void machine_numa_finish_cpu_init(MachineState *machine) - g_string_free(s, true); - } - -+static void validate_cpu_cluster_to_numa_boundary(MachineState *ms) -+{ -+ MachineClass *mc = MACHINE_GET_CLASS(ms); -+ NumaState *state = ms->numa_state; -+ const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms); -+ const CPUArchId *cpus = possible_cpus->cpus; -+ int i, j; -+ -+ if (state->num_nodes <= 1 || possible_cpus->len <= 1) { -+ return; -+ } -+ -+ /* -+ * The Linux scheduling domain can't be parsed when the multiple CPUs -+ * in one cluster have been associated with different NUMA nodes. However, -+ * it's fine to associate one NUMA node with CPUs in different clusters. -+ */ -+ for (i = 0; i < possible_cpus->len; i++) { -+ for (j = i + 1; j < possible_cpus->len; j++) { -+ if (cpus[i].props.has_socket_id && -+ cpus[i].props.has_cluster_id && -+ cpus[i].props.has_node_id && -+ cpus[j].props.has_socket_id && -+ cpus[j].props.has_cluster_id && -+ cpus[j].props.has_node_id && -+ cpus[i].props.socket_id == cpus[j].props.socket_id && -+ cpus[i].props.cluster_id == cpus[j].props.cluster_id && -+ cpus[i].props.node_id != cpus[j].props.node_id) { -+ warn_report("CPU-%d and CPU-%d in socket-%" PRId64 "-cluster-%" PRId64 -+ " have been associated with node-%" PRId64 " and node-%" PRId64 -+ " respectively. It can cause OSes like Linux to" -+ " misbehave", i, j, cpus[i].props.socket_id, -+ cpus[i].props.cluster_id, cpus[i].props.node_id, -+ cpus[j].props.node_id); -+ } -+ } -+ } -+} -+ - MemoryRegion *machine_consume_memdev(MachineState *machine, - HostMemoryBackend *backend) - { -@@ -1581,6 +1620,9 @@ void machine_run_board_init(MachineState *machine, const char *mem_path, Error * - numa_complete_configuration(machine); - if (machine->numa_state->num_nodes) { - machine_numa_finish_cpu_init(machine); -+ if (machine_class->cpu_cluster_has_numa_boundary) { -+ validate_cpu_cluster_to_numa_boundary(machine); -+ } - } - } - -diff --git a/include/hw/boards.h b/include/hw/boards.h -index 5f08bd7550..3628671228 100644 ---- a/include/hw/boards.h -+++ b/include/hw/boards.h -@@ -275,6 +275,7 @@ struct MachineClass { - bool nvdimm_supported; - bool numa_mem_supported; - bool auto_enable_numa; -+ bool cpu_cluster_has_numa_boundary; - SMPCompatProps smp_props; - const char *default_ram_id; - --- -2.39.3 - diff --git a/SOURCES/kvm-pc-bios-s390-ccw-Don-t-use-__bss_start-with-the-larl.patch b/SOURCES/kvm-pc-bios-s390-ccw-Don-t-use-__bss_start-with-the-larl.patch deleted file mode 100644 index 312af68..0000000 --- a/SOURCES/kvm-pc-bios-s390-ccw-Don-t-use-__bss_start-with-the-larl.patch +++ /dev/null @@ -1,78 +0,0 @@ -From 7495a51c586818925470fb247882f5ba0f7b0ffd Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Tue, 27 Jun 2023 09:47:03 +0200 -Subject: [PATCH 34/37] pc-bios/s390-ccw: Don't use __bss_start with the "larl" - instruction -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -RH-MergeRequest: 180: Fix misaligned symbol error in the s390-ccw image during qemu-kvm build with binutils 2.40 -RH-Bugzilla: 2220866 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Miroslav Rezanina -RH-Commit: [4/4] 2483a50c0ed37fa29db649ec44220ac83c215698 (thuth/qemu-kvm-cs9) - -start.S currently cannot be compiled with Clang 16 and binutils 2.40: - - ld: start.o(.text+0x8): misaligned symbol `__bss_start' (0xc1e5) for - relocation R_390_PC32DBL - -According to the built-in linker script of ld, the symbol __bss_start -can actually point *before* the .bss section and does not need to have -any alignment, so in certain situations (like when using the internal -assembler of Clang), the __bss_start symbol can indeed be unaligned -and thus it is not suitable for being used with the "larl" instruction -that needs an address that is at least aligned to halfwords. -The problem went unnoticed so far since binutils <= 2.39 did not -check the alignment, but starting with binutils 2.40, such unaligned -addresses are now refused. - -Fix it by loading the address indirectly instead. - -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2216662 -Reported-by: Miroslav Rezanina -Suggested-by: Andreas Krebbel -Message-Id: <20230629104821.194859-8-thuth@redhat.com> -Reviewed-by: Claudio Imbrenda -Signed-off-by: Thomas Huth -(cherry picked from commit 7cd50cbe4ca3e2860b31b06ec92c17c54bd82d48) ---- - pc-bios/s390-ccw/start.S | 6 ++++-- - 1 file changed, 4 insertions(+), 2 deletions(-) - -diff --git a/pc-bios/s390-ccw/start.S b/pc-bios/s390-ccw/start.S -index abd6fe6639..22c1c296df 100644 ---- a/pc-bios/s390-ccw/start.S -+++ b/pc-bios/s390-ccw/start.S -@@ -19,7 +19,8 @@ _start: - larl %r15,stack + STACK_SIZE - STACK_FRAME_SIZE /* Set up stack */ - - /* clear bss */ -- larl %r2,__bss_start -+ larl %r2,bss_start_literal /* __bss_start might be unaligned ... */ -+ lg %r2,0(%r2) /* ... so load it indirectly */ - larl %r3,_end - slgr %r3,%r2 /* get sizeof bss */ - ltgr %r3,%r3 /* bss empty? */ -@@ -45,7 +46,6 @@ done: - memsetxc: - xc 0(1,%r1),0(%r1) - -- - /* - * void disabled_wait(void) - * -@@ -113,6 +113,8 @@ io_new_code: - br %r14 - - .align 8 -+bss_start_literal: -+ .quad __bss_start - disabled_wait_psw: - .quad 0x0002000180000000,0x0000000000000000 - enabled_wait_psw: --- -2.39.3 - diff --git a/SOURCES/kvm-pc-bios-s390-ccw-Fix-indentation-in-start.S.patch b/SOURCES/kvm-pc-bios-s390-ccw-Fix-indentation-in-start.S.patch deleted file mode 100644 index bd13187..0000000 --- a/SOURCES/kvm-pc-bios-s390-ccw-Fix-indentation-in-start.S.patch +++ /dev/null @@ -1,218 +0,0 @@ -From 24bc8fc932ae1c88cc2e97f0f90786a7be411bb2 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Tue, 27 Jun 2023 09:47:00 +0200 -Subject: [PATCH 32/37] pc-bios/s390-ccw: Fix indentation in start.S -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -RH-MergeRequest: 180: Fix misaligned symbol error in the s390-ccw image during qemu-kvm build with binutils 2.40 -RH-Bugzilla: 2220866 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/4] cf8fa053602ce1cfac0b6efa67f491688d4f9348 (thuth/qemu-kvm-cs9) - -start.S is currently indented with a mixture of spaces and tabs, which -is quite ugly. QEMU coding style says indentation should be 4 spaces, -and this is also what we are using in the assembler files in the -tests/tcg/s390x/ folder already, so let's adjust start.S accordingly. - -Reviewed-by: Cédric Le Goater -Message-Id: <20230627074703.99608-2-thuth@redhat.com> -Reviewed-by: Claudio Imbrenda -Reviewed-by: Eric Farman -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Thomas Huth -(cherry picked from commit f52420fa4fd9f519dc42c20d2616aba4149adc25) ---- - pc-bios/s390-ccw/start.S | 136 +++++++++++++++++++-------------------- - 1 file changed, 68 insertions(+), 68 deletions(-) - -diff --git a/pc-bios/s390-ccw/start.S b/pc-bios/s390-ccw/start.S -index 6072906df4..d29de09cc6 100644 ---- a/pc-bios/s390-ccw/start.S -+++ b/pc-bios/s390-ccw/start.S -@@ -10,37 +10,37 @@ - * directory. - */ - -- .globl _start -+ .globl _start - _start: - -- larl %r15, stack + 0x8000 /* Set up stack */ -+ larl %r15,stack + 0x8000 /* Set up stack */ - -- /* clear bss */ -- larl %r2, __bss_start -- larl %r3, _end -- slgr %r3, %r2 /* get sizeof bss */ -- ltgr %r3,%r3 /* bss empty? */ -- jz done -- aghi %r3,-1 -- srlg %r4,%r3,8 /* how many 256 byte chunks? */ -- ltgr %r4,%r4 -- lgr %r1,%r2 -- jz remainder -+ /* clear bss */ -+ larl %r2,__bss_start -+ larl %r3,_end -+ slgr %r3,%r2 /* get sizeof bss */ -+ ltgr %r3,%r3 /* bss empty? */ -+ jz done -+ aghi %r3,-1 -+ srlg %r4,%r3,8 /* how many 256 byte chunks? */ -+ ltgr %r4,%r4 -+ lgr %r1,%r2 -+ jz remainder - loop: -- xc 0(256,%r1),0(%r1) -- la %r1,256(%r1) -- brctg %r4,loop -+ xc 0(256,%r1),0(%r1) -+ la %r1,256(%r1) -+ brctg %r4,loop - remainder: -- larl %r2,memsetxc -- ex %r3,0(%r2) -+ larl %r2,memsetxc -+ ex %r3,0(%r2) - done: -- /* set up a pgm exception disabled wait psw */ -- larl %r2, disabled_wait_psw -- mvc 0x01d0(16), 0(%r2) -- j main /* And call C */ -+ /* set up a pgm exception disabled wait psw */ -+ larl %r2,disabled_wait_psw -+ mvc 0x01d0(16),0(%r2) -+ j main /* And call C */ - - memsetxc: -- xc 0(1,%r1),0(%r1) -+ xc 0(1,%r1),0(%r1) - - - /* -@@ -48,11 +48,11 @@ memsetxc: - * - * stops the current guest cpu. - */ -- .globl disabled_wait -+ .globl disabled_wait - disabled_wait: -- larl %r1,disabled_wait_psw -- lpswe 0(%r1) --1: j 1b -+ larl %r1,disabled_wait_psw -+ lpswe 0(%r1) -+1: j 1b - - - /* -@@ -60,61 +60,61 @@ disabled_wait: - * - * eats one sclp interrupt - */ -- .globl consume_sclp_int -+ .globl consume_sclp_int - consume_sclp_int: -- /* enable service interrupts in cr0 */ -- stctg %c0,%c0,0(%r15) -- oi 6(%r15),0x2 -- lctlg %c0,%c0,0(%r15) -- /* prepare external call handler */ -- larl %r1, external_new_code -- stg %r1, 0x1b8 -- larl %r1, external_new_mask -- mvc 0x1b0(8),0(%r1) -- /* load enabled wait PSW */ -- larl %r1, enabled_wait_psw -- lpswe 0(%r1) -+ /* enable service interrupts in cr0 */ -+ stctg %c0,%c0,0(%r15) -+ oi 6(%r15),0x2 -+ lctlg %c0,%c0,0(%r15) -+ /* prepare external call handler */ -+ larl %r1,external_new_code -+ stg %r1,0x1b8 -+ larl %r1,external_new_mask -+ mvc 0x1b0(8),0(%r1) -+ /* load enabled wait PSW */ -+ larl %r1,enabled_wait_psw -+ lpswe 0(%r1) - - /* - * void consume_io_int(void) - * - * eats one I/O interrupt - */ -- .globl consume_io_int -+ .globl consume_io_int - consume_io_int: -- /* enable I/O interrupts in cr6 */ -- stctg %c6,%c6,0(%r15) -- oi 4(%r15), 0xff -- lctlg %c6,%c6,0(%r15) -- /* prepare i/o call handler */ -- larl %r1, io_new_code -- stg %r1, 0x1f8 -- larl %r1, io_new_mask -- mvc 0x1f0(8),0(%r1) -- /* load enabled wait PSW */ -- larl %r1, enabled_wait_psw -- lpswe 0(%r1) -+ /* enable I/O interrupts in cr6 */ -+ stctg %c6,%c6,0(%r15) -+ oi 4(%r15), 0xff -+ lctlg %c6,%c6,0(%r15) -+ /* prepare i/o call handler */ -+ larl %r1,io_new_code -+ stg %r1,0x1f8 -+ larl %r1,io_new_mask -+ mvc 0x1f0(8),0(%r1) -+ /* load enabled wait PSW */ -+ larl %r1,enabled_wait_psw -+ lpswe 0(%r1) - - external_new_code: -- /* disable service interrupts in cr0 */ -- stctg %c0,%c0,0(%r15) -- ni 6(%r15),0xfd -- lctlg %c0,%c0,0(%r15) -- br %r14 -+ /* disable service interrupts in cr0 */ -+ stctg %c0,%c0,0(%r15) -+ ni 6(%r15),0xfd -+ lctlg %c0,%c0,0(%r15) -+ br %r14 - - io_new_code: -- /* disable I/O interrupts in cr6 */ -- stctg %c6,%c6,0(%r15) -- ni 4(%r15), 0x00 -- lctlg %c6,%c6,0(%r15) -- br %r14 -+ /* disable I/O interrupts in cr6 */ -+ stctg %c6,%c6,0(%r15) -+ ni 4(%r15),0x00 -+ lctlg %c6,%c6,0(%r15) -+ br %r14 - -- .align 8 -+ .align 8 - disabled_wait_psw: -- .quad 0x0002000180000000,0x0000000000000000 -+ .quad 0x0002000180000000,0x0000000000000000 - enabled_wait_psw: -- .quad 0x0302000180000000,0x0000000000000000 -+ .quad 0x0302000180000000,0x0000000000000000 - external_new_mask: -- .quad 0x0000000180000000 -+ .quad 0x0000000180000000 - io_new_mask: -- .quad 0x0000000180000000 -+ .quad 0x0000000180000000 --- -2.39.3 - diff --git a/SOURCES/kvm-pc-bios-s390-ccw-Makefile-Use-z-noexecstack-to-silen.patch b/SOURCES/kvm-pc-bios-s390-ccw-Makefile-Use-z-noexecstack-to-silen.patch deleted file mode 100644 index 907fe43..0000000 --- a/SOURCES/kvm-pc-bios-s390-ccw-Makefile-Use-z-noexecstack-to-silen.patch +++ /dev/null @@ -1,50 +0,0 @@ -From b5b243cbbb897b236c08699529e13457e1e49924 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Thu, 22 Jun 2023 15:08:22 +0200 -Subject: [PATCH 31/37] pc-bios/s390-ccw/Makefile: Use -z noexecstack to - silence linker warning -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -RH-MergeRequest: 180: Fix misaligned symbol error in the s390-ccw image during qemu-kvm build with binutils 2.40 -RH-Bugzilla: 2220866 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/4] 04f6f83169f1c5545a0e2772b4babfc6a50bd5bf (thuth/qemu-kvm-cs9) - -Recent versions of ld complain when linking the s390-ccw bios: - - /usr/bin/ld: warning: start.o: missing .note.GNU-stack section implies - executable stack - /usr/bin/ld: NOTE: This behaviour is deprecated and will be removed in - a future version of the linker - -We can silence the warning by telling the linker to mark the stack -as not executable. - -Message-Id: <20230622130822.396793-1-thuth@redhat.com> -Acked-by: Christian Borntraeger -Signed-off-by: Thomas Huth -(cherry picked from commit 442ef32ee5b6059a8f247fb2def9d449578d0a89) ---- - pc-bios/s390-ccw/Makefile | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/pc-bios/s390-ccw/Makefile b/pc-bios/s390-ccw/Makefile -index 10e8f5cb63..2a590af4a9 100644 ---- a/pc-bios/s390-ccw/Makefile -+++ b/pc-bios/s390-ccw/Makefile -@@ -53,7 +53,7 @@ config-cc.mak: Makefile - $(call cc-option,-march=z900,-march=z10)) 3> config-cc.mak - -include config-cc.mak - --LDFLAGS += -Wl,-pie -nostdlib -+LDFLAGS += -Wl,-pie -nostdlib -z noexecstack - - build-all: s390-ccw.img s390-netboot.img - --- -2.39.3 - diff --git a/SOURCES/kvm-pc-bios-s390-ccw-Provide-space-for-initial-stack-fra.patch b/SOURCES/kvm-pc-bios-s390-ccw-Provide-space-for-initial-stack-fra.patch deleted file mode 100644 index 0c4ce6f..0000000 --- a/SOURCES/kvm-pc-bios-s390-ccw-Provide-space-for-initial-stack-fra.patch +++ /dev/null @@ -1,59 +0,0 @@ -From 2c52aebf90f28121a3e46a9305304406023b9747 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Tue, 27 Jun 2023 09:47:01 +0200 -Subject: [PATCH 33/37] pc-bios/s390-ccw: Provide space for initial stack frame - in start.S -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -RH-MergeRequest: 180: Fix misaligned symbol error in the s390-ccw image during qemu-kvm build with binutils 2.40 -RH-Bugzilla: 2220866 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/4] c2f69ce5998861fe20b799bf0113def8cf0cd128 (thuth/qemu-kvm-cs9) - -Providing the space of a stack frame is the duty of the caller, -so we should reserve 160 bytes before jumping into the main function. -Otherwise the main() function might write past the stack array. - -While we're at it, add a proper STACK_SIZE macro for the stack size -instead of using magic numbers (this is also required for the following -patch). - -Reviewed-by: Christian Borntraeger -Reviewed-by: Cédric Le Goater -Message-Id: <20230627074703.99608-3-thuth@redhat.com> -Reviewed-by: Eric Farman -Reviewed-by: Claudio Imbrenda -Reviewed-by: Marc Hartmayer -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Thomas Huth -(cherry picked from commit 74fe98ee7fb3344dbd085d1fa32c0dc2fc2c831f) ---- - pc-bios/s390-ccw/start.S | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/pc-bios/s390-ccw/start.S b/pc-bios/s390-ccw/start.S -index d29de09cc6..abd6fe6639 100644 ---- a/pc-bios/s390-ccw/start.S -+++ b/pc-bios/s390-ccw/start.S -@@ -10,10 +10,13 @@ - * directory. - */ - -+#define STACK_SIZE 0x8000 -+#define STACK_FRAME_SIZE 160 -+ - .globl _start - _start: - -- larl %r15,stack + 0x8000 /* Set up stack */ -+ larl %r15,stack + STACK_SIZE - STACK_FRAME_SIZE /* Set up stack */ - - /* clear bss */ - larl %r2,__bss_start --- -2.39.3 - diff --git a/SOURCES/kvm-pc-q35-set-SMBIOS-entry-point-type-to-auto-by-defaul.patch b/SOURCES/kvm-pc-q35-set-SMBIOS-entry-point-type-to-auto-by-defaul.patch new file mode 100644 index 0000000..f37f65f --- /dev/null +++ b/SOURCES/kvm-pc-q35-set-SMBIOS-entry-point-type-to-auto-by-defaul.patch @@ -0,0 +1,115 @@ +From 9ca64f73238d9f1b9f13d8e941ba42771a992afb Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Fri, 29 Dec 2023 14:06:05 +0100 +Subject: [PATCH 20/20] pc/q35: set SMBIOS entry point type to 'auto' by + default + +RH-Author: Igor Mammedov +RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS +RH-Jira: RHEL-21705 +RH-Acked-by: MST +RH-Acked-by: Ani Sinha +RH-Commit: [18/18] c7fc6ac7350bca3ff99e58620710a86218385781 + +JIRA: https://issues.redhat.com/browse/RHEL-21705 + + Use smbios-entry-point-type='auto' for newer machine types as a workaround + for Windows not detecting SMBIOS tables. Which makes QEMU pick SMBIOS tables + based on configuration (with 2.x preferred and fallback to 3.x if the former + isn't compatible with configuration) + + Default compat setting of smbios-entry-point-type after series + for pc/q35 machines: + * 9.0-newer: 'auto' + * 8.1-8.2: '64' + * 8.0-older: '32' + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2008 + Signed-off-by: Igor Mammedov + Reviewed-by: Ani Sinha + Tested-by: Fiona Ebner + +Conflicts: hw/i386/pc_piix.c hw/i386/pc_q35.c + due to RHEL machine types + +REHL only parts: + Fix RHEL 'pc' machine types at SMBIOS 2.X + for the latest RHEL 'q35' machine type use version autoselect + which propagates to RHEL 9.4 q35 macine type while RHEL 9.2 q35 and older + are kept at SMBIOS_ENTRY_POINT_TYPE_32 (see: pc_q35_machine_rhel920_options) + +Signed-off-by: Igor Mammedov +--- + hw/i386/pc.c | 2 +- + hw/i386/pc_piix.c | 7 +++++++ + hw/i386/pc_q35.c | 5 +++++ + 3 files changed, 13 insertions(+), 1 deletion(-) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index ae6777fc1a..d6f267b220 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -2004,7 +2004,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) + mc->nvdimm_supported = true; + mc->smp_props.dies_supported = true; + mc->default_ram_id = "pc.ram"; +- pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_64; ++ pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_AUTO; + + object_class_property_add(oc, PC_MACHINE_MAX_RAM_BELOW_4G, "size", + pc_machine_get_max_ram_below_4g, pc_machine_set_max_ram_below_4g, +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 7344b35cf1..54d1c58bce 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -539,9 +539,14 @@ static void pc_i440fx_machine_options(MachineClass *m) + + static void pc_i440fx_8_2_machine_options(MachineClass *m) + { ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ + pc_i440fx_machine_options(m); + m->alias = "pc"; + m->is_default = true; ++ ++ /* For pc-i44fx-8.2 and 8.1, use SMBIOS 3.X by default */ ++ pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_64; + } + + DEFINE_I440FX_MACHINE(v8_2, "pc-i440fx-8.2", NULL, +@@ -982,6 +987,8 @@ static void pc_machine_rhel7_options(MachineClass *m) + m->alias = "pc"; + m->is_default = 1; + m->smp_props.prefer_sockets = true; ++ /* there aren't ne PC macine types in RHEL9, keep it at SMBIOS 2.X */ ++ pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32; + } + + static void pc_init_rhel760(MachineState *machine) +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 9a22ff5dd6..cd5fb7380e 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -377,8 +377,11 @@ static void pc_q35_machine_options(MachineClass *m) + + static void pc_q35_8_2_machine_options(MachineClass *m) + { ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pc_q35_machine_options(m); + m->alias = "q35"; ++ /* For pc-q35-8.2 and 8.1, use SMBIOS 3.X by default */ ++ pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_64; + } + + DEFINE_Q35_MACHINE(v8_2, "pc-q35-8.2", NULL, +@@ -712,6 +715,8 @@ static void pc_q35_machine_rhel_options(MachineClass *m) + m->alias = "q35"; + m->max_cpus = 710; + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); ++ /* use SMBIOS version autoselect by default for the latest RHEL machine */ ++ pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_AUTO; + } + + static void pc_q35_init_rhel940(MachineState *machine) +-- +2.39.3 + diff --git a/SOURCES/kvm-pc-smbios-fixup-manufacturer-product-version-to-matc.patch b/SOURCES/kvm-pc-smbios-fixup-manufacturer-product-version-to-matc.patch new file mode 100644 index 0000000..9d3fd29 --- /dev/null +++ b/SOURCES/kvm-pc-smbios-fixup-manufacturer-product-version-to-matc.patch @@ -0,0 +1,44 @@ +From 03cad16743d9a4d377af66611d030eca9eda326d Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Tue, 19 Mar 2024 11:42:04 +0100 +Subject: [PATCH 4/4] pc: smbios: fixup manufacturer/product/version to match + downstream + +RH-Author: Igor Mammedov +RH-MergeRequest: 232: pc: smbios: fixup manufacturer/product/version to match downstream +RH-Jira: RHEL-21705 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Ani Sinha +RH-Commit: [1/1] 9235f64acb5ff46360282e889d0aedcb13374ac1 + +JIRA: https://issues.redhat.com/browse/RHEL-21705 + +commit [1] discarded RHEL only change that customizes +SMBIOS values for manufacturer/product/version for pc/q35 +machine types. +Fix it up by reverting back to ("Red Hat", "KVM", mc->desc) +tuple. + +1) +Fixes: 208239eb2 (hw/i386/pc: Defer smbios_set_defaults() to machine_done) +Signed-off-by: Igor Mammedov +--- + hw/i386/fw_cfg.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c +index 58429bb78d..d6a24177e2 100644 +--- a/hw/i386/fw_cfg.c ++++ b/hw/i386/fw_cfg.c +@@ -63,7 +63,7 @@ void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg, + + if (pcmc->smbios_defaults) { + /* These values are guest ABI, do not change */ +- smbios_set_defaults("QEMU", mc->desc, mc->name, ++ smbios_set_defaults("Red Hat", "KVM", mc->desc, + pcmc->smbios_uuid_encoded, + pcmc->smbios_stream_product, + pcmc->smbios_stream_version); +-- +2.39.3 + diff --git a/SOURCES/kvm-pcie-Add-hotplug-detect-state-register-to-cmask.patch b/SOURCES/kvm-pcie-Add-hotplug-detect-state-register-to-cmask.patch deleted file mode 100644 index 1ec1c82..0000000 --- a/SOURCES/kvm-pcie-Add-hotplug-detect-state-register-to-cmask.patch +++ /dev/null @@ -1,87 +0,0 @@ -From 2732b6c5ef249d3ec9affca66768cc2fc476ff7c Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Thu, 6 Jul 2023 01:55:47 -0300 -Subject: [PATCH 11/12] pcie: Add hotplug detect state register to cmask -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 188: pcie: Add hotplug detect state register to cmask -RH-Bugzilla: 2215819 -RH-Acked-by: Peter Xu -RH-Acked-by: quintela1 -RH-Commit: [1/1] a125fa337711bddbc957c399044393e82272b143 (LeoBras/centos-qemu-kvm) - -When trying to migrate a machine type pc-q35-6.0 or lower, with this -cmdline options, - --device driver=pcie-root-port,port=18,chassis=19,id=pcie-root-port18,bus=pcie.0,addr=0x12 \ --device driver=nec-usb-xhci,p2=4,p3=4,id=nex-usb-xhci0,bus=pcie-root-port18,addr=0x12.0x1 - -the following bug happens after all ram pages were sent: - -qemu-kvm: get_pci_config_device: Bad config data: i=0x6e read: 0 device: 40 cmask: ff wmask: 0 w1cmask:19 -qemu-kvm: Failed to load PCIDevice:config -qemu-kvm: Failed to load pcie-root-port:parent_obj.parent_obj.parent_obj -qemu-kvm: error while loading state for instance 0x0 of device '0000:00:12.0/pcie-root-port' -qemu-kvm: load of migration failed: Invalid argument - -This happens on pc-q35-6.0 or lower because of: -{ "ICH9-LPC", ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, "off" } - -In this scenario, hotplug_handler_plug() calls pcie_cap_slot_plug_cb(), -which sets dev->config byte 0x6e with bit PCI_EXP_SLTSTA_PDS to signal PCI -hotplug for the guest. After a while the guest will deal with this hotplug -and qemu will clear the above bit. - -Then, during migration, get_pci_config_device() will compare the -configs of both the freshly created device and the one that is being -received via migration, which will differ due to the PCI_EXP_SLTSTA_PDS bit -and cause the bug to reproduce. - -To avoid this fake incompatibility, there are tree fields in PCIDevice that -can help: - -- wmask: Used to implement R/W bytes, and -- w1cmask: Used to implement RW1C(Write 1 to Clear) bytes -- cmask: Used to enable config checks on load. - -According to PCI Express® Base Specification Revision 5.0 Version 1.0, -table 7-27 (Slot Status Register) bit 6, the "Presence Detect State" is -listed as RO (read-only), so it only makes sense to make use of the cmask -field. - -So, clear PCI_EXP_SLTSTA_PDS bit on cmask, so the fake incompatibility on -get_pci_config_device() does not abort the migration. - -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2215819 -Signed-off-by: Leonardo Bras -Message-Id: <20230706045546.593605-3-leobras@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Juan Quintela -(cherry picked from commit 625b370c45f4acd155ee625d61c0057d770a5b5e) -Signed-off-by: Leonardo Bras ---- - hw/pci/pcie.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c -index b8c24cf45f..8bc4a4ee57 100644 ---- a/hw/pci/pcie.c -+++ b/hw/pci/pcie.c -@@ -659,6 +659,10 @@ void pcie_cap_slot_init(PCIDevice *dev, PCIESlot *s) - pci_word_test_and_set_mask(dev->w1cmask + pos + PCI_EXP_SLTSTA, - PCI_EXP_HP_EV_SUPPORTED); - -+ /* Avoid migration abortion when this device hot-removed by guest */ -+ pci_word_test_and_clear_mask(dev->cmask + pos + PCI_EXP_SLTSTA, -+ PCI_EXP_SLTSTA_PDS); -+ - dev->exp.hpev_notified = false; - - qbus_set_hotplug_handler(BUS(pci_bridge_get_sec_bus(PCI_BRIDGE(dev))), --- -2.39.3 - diff --git a/SOURCES/kvm-postcopy-ram-do-not-use-qatomic_mb_read.patch b/SOURCES/kvm-postcopy-ram-do-not-use-qatomic_mb_read.patch deleted file mode 100644 index 0421e33..0000000 --- a/SOURCES/kvm-postcopy-ram-do-not-use-qatomic_mb_read.patch +++ /dev/null @@ -1,42 +0,0 @@ -From ab9b8620c62540f3267d005c198920671ef9abc3 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Fri, 3 Mar 2023 11:15:28 +0100 -Subject: [PATCH 06/56] postcopy-ram: do not use qatomic_mb_read -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [5/50] 534c0e13362dfc994fa90c79bfb5ed6ee8c27dfc (peterx/qemu-kvm) - -It does not even pair with a qatomic_mb_set(), so it is clearer to use -load-acquire in this case; they are synonyms. - -Signed-off-by: Paolo Bonzini -(cherry picked from commit 4592eaf38755a28300d113cd128f65b5b38495f2) -Signed-off-by: Peter Xu ---- - migration/postcopy-ram.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c -index bbb8af61ae..d7b48dd920 100644 ---- a/migration/postcopy-ram.c -+++ b/migration/postcopy-ram.c -@@ -1526,7 +1526,7 @@ static PostcopyState incoming_postcopy_state; - - PostcopyState postcopy_state_get(void) - { -- return qatomic_mb_read(&incoming_postcopy_state); -+ return qatomic_load_acquire(&incoming_postcopy_state); - } - - /* Set the state and return the old state */ --- -2.39.1 - diff --git a/SOURCES/kvm-qapi-add-fdset-feature-for-BlockdevOptionsVirtioBlkV.patch b/SOURCES/kvm-qapi-add-fdset-feature-for-BlockdevOptionsVirtioBlkV.patch deleted file mode 100644 index abaadf8..0000000 --- a/SOURCES/kvm-qapi-add-fdset-feature-for-BlockdevOptionsVirtioBlkV.patch +++ /dev/null @@ -1,79 +0,0 @@ -From 99f27e14856c528f442b628e8f4a7881e6e63179 Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Tue, 30 May 2023 09:19:41 +0200 -Subject: [PATCH 4/5] qapi: add '@fdset' feature for - BlockdevOptionsVirtioBlkVhostVdpa - -RH-Author: Stefano Garzarella -RH-MergeRequest: 169: block/blkio: support fd passing for virtio-blk-vhost-vdpa driver -RH-Bugzilla: 2180076 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [2/2] abee2a542e41f9eaa17dd204b74778e232d1eb60 (sgarzarella/qemu-kvm-c-9-s) - -The virtio-blk-vhost-vdpa driver in libblkio 1.3.0 supports the fd -passing through the new 'fd' property. - -Since now we are using qemu_open() on '@path' if the virtio-blk driver -supports the fd passing, let's announce it. -In this way, the management layer can pass the file descriptor of an -already opened vhost-vdpa character device. This is useful especially -when the device can only be accessed with certain privileges. - -Add the '@fdset' feature only when the virtio-blk-vhost-vdpa driver -in libblkio supports it. - -Suggested-by: Markus Armbruster -Reviewed-by: Stefan Hajnoczi -Signed-off-by: Stefano Garzarella -Message-id: 20230530071941.8954-3-sgarzare@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 98b126f5e3228a346c774e569e26689943b401dd) -- changed doc indentantion since QAPI parser failed downstream because - we don't have commit 08349786c84306863a3b659c8a9b28bb74c405c6 - downstream. It relaxed the indentation rules. -Signed-off-by: Stefano Garzarella ---- - meson.build | 4 ++++ - qapi/block-core.json | 6 ++++++ - 2 files changed, 10 insertions(+) - -diff --git a/meson.build b/meson.build -index d964e741e7..a18cc64531 100644 ---- a/meson.build -+++ b/meson.build -@@ -1843,6 +1843,10 @@ config_host_data.set('CONFIG_LZO', lzo.found()) - config_host_data.set('CONFIG_MPATH', mpathpersist.found()) - config_host_data.set('CONFIG_MPATH_NEW_API', mpathpersist_new_api) - config_host_data.set('CONFIG_BLKIO', blkio.found()) -+if blkio.found() -+ config_host_data.set('CONFIG_BLKIO_VHOST_VDPA_FD', -+ blkio.version().version_compare('>=1.3.0')) -+endif - config_host_data.set('CONFIG_CURL', curl.found()) - config_host_data.set('CONFIG_CURSES', curses.found()) - config_host_data.set('CONFIG_GBM', gbm.found()) -diff --git a/qapi/block-core.json b/qapi/block-core.json -index c05ad0c07e..81b48a8d3b 100644 ---- a/qapi/block-core.json -+++ b/qapi/block-core.json -@@ -3841,10 +3841,16 @@ - # - # @path: path to the vhost-vdpa character device. - # -+# Features: -+# @fdset: Member @path supports the special "/dev/fdset/N" path -+# (since 8.1) -+# - # Since: 7.2 - ## - { 'struct': 'BlockdevOptionsVirtioBlkVhostVdpa', - 'data': { 'path': 'str' }, -+ 'features': [ { 'name' :'fdset', -+ 'if': 'CONFIG_BLKIO_VHOST_VDPA_FD' } ], - 'if': 'CONFIG_BLKIO' } - - ## --- -2.39.3 - diff --git a/SOURCES/kvm-qapi-i386-sev-Change-the-reduced-phys-bits-value-fro.patch b/SOURCES/kvm-qapi-i386-sev-Change-the-reduced-phys-bits-value-fro.patch deleted file mode 100644 index a95895b..0000000 --- a/SOURCES/kvm-qapi-i386-sev-Change-the-reduced-phys-bits-value-fro.patch +++ /dev/null @@ -1,50 +0,0 @@ -From cbf9c74ef46d71c015b9de53f4514941dca8a035 Mon Sep 17 00:00:00 2001 -From: Bandan Das -Date: Thu, 3 Aug 2023 14:19:37 -0400 -Subject: [PATCH 10/14] qapi, i386/sev: Change the reduced-phys-bits value from - 5 to 1 - -RH-Author: Bandan Das -RH-MergeRequest: 196: Updates to SEV reduced-phys-bits parameter -RH-Bugzilla: 2214839 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/4] 4243578db33f89461e60b745eb96fee402218c9f (bdas1/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2214839 - -commit 798a818f50a9bfc01e8b5943090de458863b897b -Author: Tom Lendacky -Date: Fri Sep 30 10:14:27 2022 -0500 - - qapi, i386/sev: Change the reduced-phys-bits value from 5 to 1 - - A guest only ever experiences, at most, 1 bit of reduced physical - addressing. Change the query-sev-capabilities json comment to use 1. - - Fixes: 31dd67f684 ("sev/i386: qmp: add query-sev-capabilities command") - Signed-off-by: Tom Lendacky - Reviewed-by: Dr. David Alan Gilbert - Message-Id: - Signed-off-by: Paolo Bonzini - -Signed-off-by: Bandan Das ---- - qapi/misc-target.json | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/qapi/misc-target.json b/qapi/misc-target.json -index de91054523..bf04042f45 100644 ---- a/qapi/misc-target.json -+++ b/qapi/misc-target.json -@@ -172,7 +172,7 @@ - # -> { "execute": "query-sev-capabilities" } - # <- { "return": { "pdh": "8CCDD8DDD", "cert-chain": "888CCCDDDEE", - # "cpu0-id": "2lvmGwo+...61iEinw==", --# "cbitpos": 47, "reduced-phys-bits": 5}} -+# "cbitpos": 47, "reduced-phys-bits": 1}} - # - ## - { 'command': 'query-sev-capabilities', 'returns': 'SevCapability', --- -2.39.3 - diff --git a/SOURCES/kvm-qdev-add-IOThreadVirtQueueMappingList-property-type.patch b/SOURCES/kvm-qdev-add-IOThreadVirtQueueMappingList-property-type.patch new file mode 100644 index 0000000..b31142e --- /dev/null +++ b/SOURCES/kvm-qdev-add-IOThreadVirtQueueMappingList-property-type.patch @@ -0,0 +1,167 @@ +From f1e82fe5076b4030d385dfa49b8284899386114d Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Wed, 20 Dec 2023 08:47:54 -0500 +Subject: [PATCH 08/22] qdev: add IOThreadVirtQueueMappingList property type + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter +RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Czenczek +RH-Commit: [4/17] 817aa1339da8ed3814730473342ba045e66d5b51 (stefanha/centos-stream-qemu-kvm) + +virtio-blk and virtio-scsi devices will need a way to specify the +mapping between IOThreads and virtqueues. At the moment all virtqueues +are assigned to a single IOThread or the main loop. This single thread +can be a CPU bottleneck, so it is necessary to allow finer-grained +assignment to spread the load. + +Introduce DEFINE_PROP_IOTHREAD_VQ_MAPPING_LIST() so devices can take a +parameter that maps virtqueues to IOThreads. The command-line syntax for +this new property is as follows: + + --device '{"driver":"foo","iothread-vq-mapping":[{"iothread":"iothread0","vqs":[0,1,2]},...]}' + +IOThreads are specified by name and virtqueues are specified by 0-based +index. + +It will be common to simply assign virtqueues round-robin across a set +of IOThreads. A convenient syntax that does not require specifying +individual virtqueue indices is available: + + --device '{"driver":"foo","iothread-vq-mapping":[{"iothread":"iothread0"},{"iothread":"iothread1"},...]}' + +Signed-off-by: Stefan Hajnoczi +Message-ID: <20231220134755.814917-4-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit cf03a152c5d749fd0083bfe540df9524f1d2ff1d) +Signed-off-by: Stefan Hajnoczi +--- + hw/core/qdev-properties-system.c | 46 +++++++++++++++++++++++++++++ + include/hw/qdev-properties-system.h | 5 ++++ + qapi/virtio.json | 29 ++++++++++++++++++ + 3 files changed, 80 insertions(+) + +diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c +index 73cced4626..1a396521d5 100644 +--- a/hw/core/qdev-properties-system.c ++++ b/hw/core/qdev-properties-system.c +@@ -18,6 +18,7 @@ + #include "qapi/qapi-types-block.h" + #include "qapi/qapi-types-machine.h" + #include "qapi/qapi-types-migration.h" ++#include "qapi/qapi-visit-virtio.h" + #include "qapi/qmp/qerror.h" + #include "qemu/ctype.h" + #include "qemu/cutils.h" +@@ -1160,3 +1161,48 @@ const PropertyInfo qdev_prop_cpus390entitlement = { + .set = qdev_propinfo_set_enum, + .set_default_value = qdev_propinfo_set_default_value_enum, + }; ++ ++/* --- IOThreadVirtQueueMappingList --- */ ++ ++static void get_iothread_vq_mapping_list(Object *obj, Visitor *v, ++ const char *name, void *opaque, Error **errp) ++{ ++ IOThreadVirtQueueMappingList **prop_ptr = ++ object_field_prop_ptr(obj, opaque); ++ ++ visit_type_IOThreadVirtQueueMappingList(v, name, prop_ptr, errp); ++} ++ ++static void set_iothread_vq_mapping_list(Object *obj, Visitor *v, ++ const char *name, void *opaque, Error **errp) ++{ ++ IOThreadVirtQueueMappingList **prop_ptr = ++ object_field_prop_ptr(obj, opaque); ++ IOThreadVirtQueueMappingList *list; ++ ++ if (!visit_type_IOThreadVirtQueueMappingList(v, name, &list, errp)) { ++ return; ++ } ++ ++ qapi_free_IOThreadVirtQueueMappingList(*prop_ptr); ++ *prop_ptr = list; ++} ++ ++static void release_iothread_vq_mapping_list(Object *obj, ++ const char *name, void *opaque) ++{ ++ IOThreadVirtQueueMappingList **prop_ptr = ++ object_field_prop_ptr(obj, opaque); ++ ++ qapi_free_IOThreadVirtQueueMappingList(*prop_ptr); ++ *prop_ptr = NULL; ++} ++ ++const PropertyInfo qdev_prop_iothread_vq_mapping_list = { ++ .name = "IOThreadVirtQueueMappingList", ++ .description = "IOThread virtqueue mapping list [{\"iothread\":\"\", " ++ "\"vqs\":[1,2,3,...]},...]", ++ .get = get_iothread_vq_mapping_list, ++ .set = set_iothread_vq_mapping_list, ++ .release = release_iothread_vq_mapping_list, ++}; +diff --git a/include/hw/qdev-properties-system.h b/include/hw/qdev-properties-system.h +index 91f7a2452d..06c359c190 100644 +--- a/include/hw/qdev-properties-system.h ++++ b/include/hw/qdev-properties-system.h +@@ -24,6 +24,7 @@ extern const PropertyInfo qdev_prop_off_auto_pcibar; + extern const PropertyInfo qdev_prop_pcie_link_speed; + extern const PropertyInfo qdev_prop_pcie_link_width; + extern const PropertyInfo qdev_prop_cpus390entitlement; ++extern const PropertyInfo qdev_prop_iothread_vq_mapping_list; + + #define DEFINE_PROP_PCI_DEVFN(_n, _s, _f, _d) \ + DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_pci_devfn, int32_t) +@@ -82,4 +83,8 @@ extern const PropertyInfo qdev_prop_cpus390entitlement; + DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_cpus390entitlement, \ + CpuS390Entitlement) + ++#define DEFINE_PROP_IOTHREAD_VQ_MAPPING_LIST(_name, _state, _field) \ ++ DEFINE_PROP(_name, _state, _field, qdev_prop_iothread_vq_mapping_list, \ ++ IOThreadVirtQueueMappingList *) ++ + #endif +diff --git a/qapi/virtio.json b/qapi/virtio.json +index e6dcee7b83..19c7c36e36 100644 +--- a/qapi/virtio.json ++++ b/qapi/virtio.json +@@ -928,3 +928,32 @@ + 'data': { 'path': 'str', 'queue': 'uint16', '*index': 'uint16' }, + 'returns': 'VirtioQueueElement', + 'features': [ 'unstable' ] } ++ ++## ++# @IOThreadVirtQueueMapping: ++# ++# Describes the subset of virtqueues assigned to an IOThread. ++# ++# @iothread: the id of IOThread object ++# ++# @vqs: an optional array of virtqueue indices that will be handled by this ++# IOThread. When absent, virtqueues are assigned round-robin across all ++# IOThreadVirtQueueMappings provided. Either all IOThreadVirtQueueMappings ++# must have @vqs or none of them must have it. ++# ++# Since: 9.0 ++## ++ ++{ 'struct': 'IOThreadVirtQueueMapping', ++ 'data': { 'iothread': 'str', '*vqs': ['uint16'] } } ++ ++## ++# @DummyVirtioForceArrays: ++# ++# Not used by QMP; hack to let us use IOThreadVirtQueueMappingList internally ++# ++# Since: 9.0 ++## ++ ++{ 'struct': 'DummyVirtioForceArrays', ++ 'data': { 'unused-iothread-vq-mapping': ['IOThreadVirtQueueMapping'] } } +-- +2.39.3 + diff --git a/SOURCES/kvm-qdev-properties-alias-all-object-class-properties.patch b/SOURCES/kvm-qdev-properties-alias-all-object-class-properties.patch new file mode 100644 index 0000000..94bb716 --- /dev/null +++ b/SOURCES/kvm-qdev-properties-alias-all-object-class-properties.patch @@ -0,0 +1,85 @@ +From 4251aab5b2beb68d1800cd4a329361ff6f57c430 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Wed, 20 Dec 2023 08:47:52 -0500 +Subject: [PATCH 07/22] qdev-properties: alias all object class properties + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter +RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Czenczek +RH-Commit: [3/17] bc5d0aafe4645dacf9277904a2b20760d6e676e1 (stefanha/centos-stream-qemu-kvm) + +qdev_alias_all_properties() aliases a DeviceState's qdev properties onto +an Object. This is used for VirtioPCIProxy types so that --device +virtio-blk-pci has properties of its embedded --device virtio-blk-device +object. + +Currently this function is implemented using qdev properties. Change the +function to use QOM object class properties instead. This works because +qdev properties create QOM object class properties, but it also catches +any QOM object class-only properties that have no qdev properties. + +This change ensures that properties of devices are shown with --device +foo,\? even if they are QOM object class properties. + +Signed-off-by: Stefan Hajnoczi +Message-ID: <20231220134755.814917-2-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 350147a871a545ab56b4a1062c8485635d9ffc24) +Signed-off-by: Stefan Hajnoczi +--- + hw/core/qdev-properties.c | 18 ++++++++++-------- + include/hw/qdev-properties.h | 4 ++-- + 2 files changed, 12 insertions(+), 10 deletions(-) + +diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c +index 840006e953..7d6fa726fd 100644 +--- a/hw/core/qdev-properties.c ++++ b/hw/core/qdev-properties.c +@@ -1076,16 +1076,18 @@ void device_class_set_props(DeviceClass *dc, Property *props) + void qdev_alias_all_properties(DeviceState *target, Object *source) + { + ObjectClass *class; +- Property *prop; ++ ObjectPropertyIterator iter; ++ ObjectProperty *prop; + + class = object_get_class(OBJECT(target)); +- do { +- DeviceClass *dc = DEVICE_CLASS(class); + +- for (prop = dc->props_; prop && prop->name; prop++) { +- object_property_add_alias(source, prop->name, +- OBJECT(target), prop->name); ++ object_class_property_iter_init(&iter, class); ++ while ((prop = object_property_iter_next(&iter))) { ++ if (object_property_find(source, prop->name)) { ++ continue; /* skip duplicate properties */ + } +- class = object_class_get_parent(class); +- } while (class != object_class_by_name(TYPE_DEVICE)); ++ ++ object_property_add_alias(source, prop->name, ++ OBJECT(target), prop->name); ++ } + } +diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h +index 25743a29a0..09aa04ca1e 100644 +--- a/include/hw/qdev-properties.h ++++ b/include/hw/qdev-properties.h +@@ -230,8 +230,8 @@ void qdev_property_add_static(DeviceState *dev, Property *prop); + * @target: Device which has properties to be aliased + * @source: Object to add alias properties to + * +- * Add alias properties to the @source object for all qdev properties on +- * the @target DeviceState. ++ * Add alias properties to the @source object for all properties on the @target ++ * DeviceState. + * + * This is useful when @target is an internal implementation object + * owned by @source, and you want to expose all the properties of that +-- +2.39.3 + diff --git a/SOURCES/kvm-qemu-options.hx-Update-the-reduced-phys-bits-documen.patch b/SOURCES/kvm-qemu-options.hx-Update-the-reduced-phys-bits-documen.patch deleted file mode 100644 index 6830692..0000000 --- a/SOURCES/kvm-qemu-options.hx-Update-the-reduced-phys-bits-documen.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 50c833fc3c7d8d3a5124cfdb2f2dc06b910c2252 Mon Sep 17 00:00:00 2001 -From: Bandan Das -Date: Thu, 3 Aug 2023 14:21:25 -0400 -Subject: [PATCH 11/14] qemu-options.hx: Update the reduced-phys-bits - documentation - -RH-Author: Bandan Das -RH-MergeRequest: 196: Updates to SEV reduced-phys-bits parameter -RH-Bugzilla: 2214839 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/4] b0c4a19e9f4185c97ddf71857bc9367cea01ffa8 (bdas1/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2214839 - -commit 326e3015c4c6f3197157ea0bb00826ae740e2fad -Author: Tom Lendacky -Date: Fri Sep 30 10:14:28 2022 -0500 - - qemu-options.hx: Update the reduced-phys-bits documentation - - A guest only ever experiences, at most, 1 bit of reduced physical - addressing. Update the documentation to reflect this as well as change - the example value on the reduced-phys-bits option. - - Fixes: a9b4942f48 ("target/i386: add Secure Encrypted Virtualization (SEV) object") - Signed-off-by: Tom Lendacky - Reviewed-by: Dr. David Alan Gilbert - Message-Id: <13a62ced1808546c1d398e2025cf85f4c94ae123.1664550870.git.thomas.lendacky@amd.com> - Signed-off-by: Paolo Bonzini - -Signed-off-by: Bandan Das ---- - qemu-options.hx | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/qemu-options.hx b/qemu-options.hx -index b18f933703..edf10a5aac 100644 ---- a/qemu-options.hx -+++ b/qemu-options.hx -@@ -5417,7 +5417,7 @@ SRST - physical address space. The ``reduced-phys-bits`` is used to - provide the number of bits we loose in physical address space. - Similar to C-bit, the value is Host family dependent. On EPYC, -- the value should be 5. -+ a guest will lose a maximum of 1 bit, so the value should be 1. - - The ``sev-device`` provides the device file to use for - communicating with the SEV firmware running inside AMD Secure -@@ -5452,7 +5452,7 @@ SRST - - # |qemu_system_x86| \\ - ...... \\ -- -object sev-guest,id=sev0,cbitpos=47,reduced-phys-bits=5 \\ -+ -object sev-guest,id=sev0,cbitpos=47,reduced-phys-bits=1 \\ - -machine ...,memory-encryption=sev0 \\ - ..... - --- -2.39.3 - diff --git a/SOURCES/kvm-qemu_init-increase-NOFILE-soft-limit-on-POSIX.patch b/SOURCES/kvm-qemu_init-increase-NOFILE-soft-limit-on-POSIX.patch new file mode 100644 index 0000000..7dc550c --- /dev/null +++ b/SOURCES/kvm-qemu_init-increase-NOFILE-soft-limit-on-POSIX.patch @@ -0,0 +1,135 @@ +From f2fe6c7a2def488633cbb67e28ac00279d6e8de4 Mon Sep 17 00:00:00 2001 +From: Cornelia Huck +Date: Tue, 27 Feb 2024 11:17:39 +0100 +Subject: [PATCH 1/2] qemu_init: increase NOFILE soft limit on POSIX +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cornelia Huck +RH-MergeRequest: 226: qemu_init: increase NOFILE soft limit on POSIX +RH-Jira: RHEL-26049 +RH-Acked-by: Gavin Shan +RH-Acked-by: Ani Sinha +RH-Acked-by: Shaoqin Huang +RH-Commit: [1/1] cee5404aef3f6437d45a1c43bdee73a57a528bee (cohuck/qemu-kvm-c9s) + +Jira: https://issues.redhat.com/browse/RHEL-26049 + +In many configurations, e.g. multiple vNICs with multiple queues or +with many Ceph OSDs, the default soft limit of 1024 is not enough. +QEMU is supposed to work fine with file descriptors >= 1024 and does +not use select() on POSIX. Bump the soft limit to the allowed hard +limit to avoid issues with the aforementioned configurations. + +Of course the limit could be raised from the outside, but the man page +of systemd.exec states about 'LimitNOFILE=': + +> Don't use. +> [...] +> Typically applications should increase their soft limit to the hard +> limit on their own, if they are OK with working with file +> descriptors above 1023, + +If the soft limit is already the same as the hard limit, avoid the +superfluous setrlimit call. This can avoid a warning with a strict +seccomp filter blocking setrlimit if NOFILE was already raised before +executing QEMU. + +Buglink: https://bugzilla.proxmox.com/show_bug.cgi?id=4507 +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Fiona Ebner +Signed-off-by: Daniel P. Berrangé +(cherry picked from commit 03e471c41d8b1b6eb16c9714f387449f52fe5c1d) +Signed-off-by: Cornelia Huck +--- + include/sysemu/os-posix.h | 1 + + include/sysemu/os-win32.h | 5 +++++ + os-posix.c | 22 ++++++++++++++++++++++ + system/vl.c | 2 ++ + 4 files changed, 30 insertions(+) + +diff --git a/include/sysemu/os-posix.h b/include/sysemu/os-posix.h +index dff32ae185..b881ac6c6f 100644 +--- a/include/sysemu/os-posix.h ++++ b/include/sysemu/os-posix.h +@@ -51,6 +51,7 @@ bool is_daemonized(void); + void os_daemonize(void); + bool os_set_runas(const char *user_id); + void os_set_chroot(const char *path); ++void os_setup_limits(void); + void os_setup_post(void); + int os_mlock(void); + +diff --git a/include/sysemu/os-win32.h b/include/sysemu/os-win32.h +index 1047d260cb..b82a5d3ad9 100644 +--- a/include/sysemu/os-win32.h ++++ b/include/sysemu/os-win32.h +@@ -128,6 +128,11 @@ static inline int os_mlock(void) + return -ENOSYS; + } + ++static inline void os_setup_limits(void) ++{ ++ return; ++} ++ + #define fsync _commit + + #if !defined(lseek) +diff --git a/os-posix.c b/os-posix.c +index 52ef6990ff..a4284e2c07 100644 +--- a/os-posix.c ++++ b/os-posix.c +@@ -24,6 +24,7 @@ + */ + + #include "qemu/osdep.h" ++#include + #include + #include + #include +@@ -256,6 +257,27 @@ void os_daemonize(void) + } + } + ++void os_setup_limits(void) ++{ ++ struct rlimit nofile; ++ ++ if (getrlimit(RLIMIT_NOFILE, &nofile) < 0) { ++ warn_report("unable to query NOFILE limit: %s", strerror(errno)); ++ return; ++ } ++ ++ if (nofile.rlim_cur == nofile.rlim_max) { ++ return; ++ } ++ ++ nofile.rlim_cur = nofile.rlim_max; ++ ++ if (setrlimit(RLIMIT_NOFILE, &nofile) < 0) { ++ warn_report("unable to set NOFILE limit: %s", strerror(errno)); ++ return; ++ } ++} ++ + void os_setup_post(void) + { + int fd = 0; +diff --git a/system/vl.c b/system/vl.c +index 93635ffc5b..6443b6e469 100644 +--- a/system/vl.c ++++ b/system/vl.c +@@ -2783,6 +2783,8 @@ void qemu_init(int argc, char **argv) + error_init(argv[0]); + qemu_init_exec_dir(argv[0]); + ++ os_setup_limits(); ++ + qemu_init_arch_modules(); + + qemu_init_subsystems(); +-- +2.39.3 + diff --git a/SOURCES/kvm-raven-disable-reentrancy-detection-for-iomem.patch b/SOURCES/kvm-raven-disable-reentrancy-detection-for-iomem.patch deleted file mode 100644 index 4a4a2cc..0000000 --- a/SOURCES/kvm-raven-disable-reentrancy-detection-for-iomem.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 936e21428a04524ccffeb36110d1aa61de9f44e5 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 9 May 2023 10:29:03 -0400 -Subject: [PATCH 11/21] raven: disable reentrancy detection for iomem - -RH-Author: Jon Maloy -RH-MergeRequest: 165: memory: prevent dma-reentracy issues -RH-Jira: RHEL-516 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [7/13] 48278583aa1ab08b912f49cd8b3a79d1bb3abf5f (jmaloy/jmaloy-qemu-kvm-2) - -Jira: https://issues.redhat.com/browse/RHEL-516 -Upstream: Merged -CVE: CVE-2023-2680 - -commit 6dad5a6810d9c60ca320d01276f6133bbcfa1fc7 -Author: Alexander Bulekov -Date: Thu Apr 27 17:10:12 2023 -0400 - - raven: disable reentrancy detection for iomem - - As the code is designed for re-entrant calls from raven_io_ops to - pci-conf, mark raven_io_ops as reentrancy-safe. - - Signed-off-by: Alexander Bulekov - Message-Id: <20230427211013.2994127-8-alxndr@bu.edu> - Signed-off-by: Thomas Huth - -Signed-off-by: Jon Maloy ---- - hw/pci-host/raven.c | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/hw/pci-host/raven.c b/hw/pci-host/raven.c -index 072ffe3c5e..9a11ac4b2b 100644 ---- a/hw/pci-host/raven.c -+++ b/hw/pci-host/raven.c -@@ -294,6 +294,13 @@ static void raven_pcihost_initfn(Object *obj) - memory_region_init(&s->pci_memory, obj, "pci-memory", 0x3f000000); - address_space_init(&s->pci_io_as, &s->pci_io, "raven-io"); - -+ /* -+ * Raven's raven_io_ops use the address-space API to access pci-conf-idx -+ * (which is also owned by the raven device). As such, mark the -+ * pci_io_non_contiguous as re-entrancy safe. -+ */ -+ s->pci_io_non_contiguous.disable_reentrancy_guard = true; -+ - /* CPU address space */ - memory_region_add_subregion(address_space_mem, PCI_IO_BASE_ADDR, - &s->pci_io); --- -2.39.3 - diff --git a/SOURCES/kvm-s390x-ap-Wire-up-the-device-request-notifier-interfa.patch b/SOURCES/kvm-s390x-ap-Wire-up-the-device-request-notifier-interfa.patch deleted file mode 100644 index d0eb303..0000000 --- a/SOURCES/kvm-s390x-ap-Wire-up-the-device-request-notifier-interfa.patch +++ /dev/null @@ -1,220 +0,0 @@ -From 41987ce0dd79d8734088002cbd34f20704dd017a Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Mon, 17 Jul 2023 17:36:07 +0200 -Subject: [PATCH 04/12] s390x/ap: Wire up the device request notifier interface -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 185: Backport s390x fixes from QEMU 8.1 -RH-Jira: RHEL-794 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Thomas Huth -RH-Commit: [1/1] ab6c912a1b8cdb584adacac16af79352fdfe7355 (clegoate/qemu-kvm-c9s) - -Jira: https://issues.redhat.com/browse/RHEL-794 - -commit 1360b2ad1f673d32a09de5826cd71ecd0510164a -Author: Tony Krowiak -Date: Fri Jun 2 10:11:25 2023 -0400 - - s390x/ap: Wire up the device request notifier interface - - Let's wire up the device request notifier interface to handle device unplug - requests for AP. - - Signed-off-by: Tony Krowiak - Link: https://lore.kernel.org/qemu-devel/20230530225544.280031-1-akrowiak@linux.ibm.com/ - Signed-off-by: Cédric Le Goater - -Backport note: - - - linux-headers/linux/vfio.h - updated to v6.5-rc1 level for VFIO_AP_REQ_IRQ_INDEX definition - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/ap.c | 113 +++++++++++++++++++++++++++++++++++++ - linux-headers/linux/vfio.h | 9 +++ - 2 files changed, 122 insertions(+) - -diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c -index e0dd561e85..6e21d1da5a 100644 ---- a/hw/vfio/ap.c -+++ b/hw/vfio/ap.c -@@ -18,6 +18,8 @@ - #include "hw/vfio/vfio-common.h" - #include "hw/s390x/ap-device.h" - #include "qemu/error-report.h" -+#include "qemu/event_notifier.h" -+#include "qemu/main-loop.h" - #include "qemu/module.h" - #include "qemu/option.h" - #include "qemu/config-file.h" -@@ -33,6 +35,7 @@ - struct VFIOAPDevice { - APDevice apdev; - VFIODevice vdev; -+ EventNotifier req_notifier; - }; - - OBJECT_DECLARE_SIMPLE_TYPE(VFIOAPDevice, VFIO_AP_DEVICE) -@@ -84,10 +87,110 @@ static VFIOGroup *vfio_ap_get_group(VFIOAPDevice *vapdev, Error **errp) - return vfio_get_group(groupid, &address_space_memory, errp); - } - -+static void vfio_ap_req_notifier_handler(void *opaque) -+{ -+ VFIOAPDevice *vapdev = opaque; -+ Error *err = NULL; -+ -+ if (!event_notifier_test_and_clear(&vapdev->req_notifier)) { -+ return; -+ } -+ -+ qdev_unplug(DEVICE(vapdev), &err); -+ -+ if (err) { -+ warn_reportf_err(err, VFIO_MSG_PREFIX, vapdev->vdev.name); -+ } -+} -+ -+static void vfio_ap_register_irq_notifier(VFIOAPDevice *vapdev, -+ unsigned int irq, Error **errp) -+{ -+ int fd; -+ size_t argsz; -+ IOHandler *fd_read; -+ EventNotifier *notifier; -+ struct vfio_irq_info *irq_info; -+ VFIODevice *vdev = &vapdev->vdev; -+ -+ switch (irq) { -+ case VFIO_AP_REQ_IRQ_INDEX: -+ notifier = &vapdev->req_notifier; -+ fd_read = vfio_ap_req_notifier_handler; -+ break; -+ default: -+ error_setg(errp, "vfio: Unsupported device irq(%d)", irq); -+ return; -+ } -+ -+ if (vdev->num_irqs < irq + 1) { -+ error_setg(errp, "vfio: IRQ %u not available (number of irqs %u)", -+ irq, vdev->num_irqs); -+ return; -+ } -+ -+ argsz = sizeof(*irq_info); -+ irq_info = g_malloc0(argsz); -+ irq_info->index = irq; -+ irq_info->argsz = argsz; -+ -+ if (ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, -+ irq_info) < 0 || irq_info->count < 1) { -+ error_setg_errno(errp, errno, "vfio: Error getting irq info"); -+ goto out_free_info; -+ } -+ -+ if (event_notifier_init(notifier, 0)) { -+ error_setg_errno(errp, errno, -+ "vfio: Unable to init event notifier for irq (%d)", -+ irq); -+ goto out_free_info; -+ } -+ -+ fd = event_notifier_get_fd(notifier); -+ qemu_set_fd_handler(fd, fd_read, NULL, vapdev); -+ -+ if (vfio_set_irq_signaling(vdev, irq, 0, VFIO_IRQ_SET_ACTION_TRIGGER, fd, -+ errp)) { -+ qemu_set_fd_handler(fd, NULL, NULL, vapdev); -+ event_notifier_cleanup(notifier); -+ } -+ -+out_free_info: -+ g_free(irq_info); -+ -+} -+ -+static void vfio_ap_unregister_irq_notifier(VFIOAPDevice *vapdev, -+ unsigned int irq) -+{ -+ Error *err = NULL; -+ EventNotifier *notifier; -+ -+ switch (irq) { -+ case VFIO_AP_REQ_IRQ_INDEX: -+ notifier = &vapdev->req_notifier; -+ break; -+ default: -+ error_report("vfio: Unsupported device irq(%d)", irq); -+ return; -+ } -+ -+ if (vfio_set_irq_signaling(&vapdev->vdev, irq, 0, -+ VFIO_IRQ_SET_ACTION_TRIGGER, -1, &err)) { -+ warn_reportf_err(err, VFIO_MSG_PREFIX, vapdev->vdev.name); -+ } -+ -+ qemu_set_fd_handler(event_notifier_get_fd(notifier), -+ NULL, NULL, vapdev); -+ event_notifier_cleanup(notifier); -+} -+ - static void vfio_ap_realize(DeviceState *dev, Error **errp) - { - int ret; - char *mdevid; -+ Error *err = NULL; - VFIOGroup *vfio_group; - APDevice *apdev = AP_DEVICE(dev); - VFIOAPDevice *vapdev = VFIO_AP_DEVICE(apdev); -@@ -116,6 +219,15 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp) - goto out_get_dev_err; - } - -+ vfio_ap_register_irq_notifier(vapdev, VFIO_AP_REQ_IRQ_INDEX, &err); -+ if (err) { -+ /* -+ * Report this error, but do not make it a failing condition. -+ * Lack of this IRQ in the host does not prevent normal operation. -+ */ -+ error_report_err(err); -+ } -+ - return; - - out_get_dev_err: -@@ -129,6 +241,7 @@ static void vfio_ap_unrealize(DeviceState *dev) - VFIOAPDevice *vapdev = VFIO_AP_DEVICE(apdev); - VFIOGroup *group = vapdev->vdev.group; - -+ vfio_ap_unregister_irq_notifier(vapdev, VFIO_AP_REQ_IRQ_INDEX); - vfio_ap_put_device(vapdev); - vfio_put_group(group); - } -diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h -index c59692ce0b..ce464957c8 100644 ---- a/linux-headers/linux/vfio.h -+++ b/linux-headers/linux/vfio.h -@@ -642,6 +642,15 @@ enum { - VFIO_CCW_NUM_IRQS - }; - -+/* -+ * The vfio-ap bus driver makes use of the following IRQ index mapping. -+ * Unimplemented IRQ types return a count of zero. -+ */ -+enum { -+ VFIO_AP_REQ_IRQ_INDEX, -+ VFIO_AP_NUM_IRQS -+}; -+ - /** - * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 12, - * struct vfio_pci_hot_reset_info) --- -2.39.3 - diff --git a/SOURCES/kvm-s390x-pci-avoid-double-enable-disable-of-aif.patch b/SOURCES/kvm-s390x-pci-avoid-double-enable-disable-of-aif.patch new file mode 100644 index 0000000..1b4a4ab --- /dev/null +++ b/SOURCES/kvm-s390x-pci-avoid-double-enable-disable-of-aif.patch @@ -0,0 +1,106 @@ +From 64b0180f5a52668f8ac4c444ba369231dbc4d5b9 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Mon, 22 Jan 2024 09:25:53 +0100 +Subject: [PATCH 096/101] s390x/pci: avoid double enable/disable of aif +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 215: s390x: Fix reset ordering of passthrough ISM devices +RH-Jira: RHEL-21169 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Commit: [1/3] ebdf8a474ea21486f5ec051683f17bae6d20f675 (clegoate/qemu-kvm-c9s) + +JIRA: https://issues.redhat.com/browse/RHEL-21169 + +commit 07b2c8e034d80ff92e202405c494d2ff80fcf848 +Author: Matthew Rosato +Date: Thu Jan 18 13:51:49 2024 -0500 + + s390x/pci: avoid double enable/disable of aif + + Use a flag to keep track of whether AIF is currently enabled. This can be + used to avoid enabling/disabling AIF multiple times as well as to determine + whether or not it should be disabled during reset processing. + + Fixes: d0bc7091c2 ("s390x/pci: enable adapter event notification for interpreted devices") + Reported-by: Cédric Le Goater + Reviewed-by: Eric Farman + Signed-off-by: Matthew Rosato + Message-ID: <20240118185151.265329-2-mjrosato@linux.ibm.com> + Reviewed-by: Cédric Le Goater + Signed-off-by: Thomas Huth + +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-kvm.c | 25 +++++++++++++++++++++++-- + include/hw/s390x/s390-pci-bus.h | 1 + + 2 files changed, 24 insertions(+), 2 deletions(-) + +diff --git a/hw/s390x/s390-pci-kvm.c b/hw/s390x/s390-pci-kvm.c +index ff41e4106d..1ee510436c 100644 +--- a/hw/s390x/s390-pci-kvm.c ++++ b/hw/s390x/s390-pci-kvm.c +@@ -27,6 +27,7 @@ bool s390_pci_kvm_interp_allowed(void) + + int s390_pci_kvm_aif_enable(S390PCIBusDevice *pbdev, ZpciFib *fib, bool assist) + { ++ int rc; + struct kvm_s390_zpci_op args = { + .fh = pbdev->fh, + .op = KVM_S390_ZPCIOP_REG_AEN, +@@ -38,15 +39,35 @@ int s390_pci_kvm_aif_enable(S390PCIBusDevice *pbdev, ZpciFib *fib, bool assist) + .u.reg_aen.flags = (assist) ? 0 : KVM_S390_ZPCIOP_REGAEN_HOST + }; + +- return kvm_vm_ioctl(kvm_state, KVM_S390_ZPCI_OP, &args); ++ if (pbdev->aif) { ++ return -EINVAL; ++ } ++ ++ rc = kvm_vm_ioctl(kvm_state, KVM_S390_ZPCI_OP, &args); ++ if (rc == 0) { ++ pbdev->aif = true; ++ } ++ ++ return rc; + } + + int s390_pci_kvm_aif_disable(S390PCIBusDevice *pbdev) + { ++ int rc; ++ + struct kvm_s390_zpci_op args = { + .fh = pbdev->fh, + .op = KVM_S390_ZPCIOP_DEREG_AEN + }; + +- return kvm_vm_ioctl(kvm_state, KVM_S390_ZPCI_OP, &args); ++ if (!pbdev->aif) { ++ return -EINVAL; ++ } ++ ++ rc = kvm_vm_ioctl(kvm_state, KVM_S390_ZPCI_OP, &args); ++ if (rc == 0) { ++ pbdev->aif = false; ++ } ++ ++ return rc; + } +diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h +index b1bdbeaeb5..435e788867 100644 +--- a/include/hw/s390x/s390-pci-bus.h ++++ b/include/hw/s390x/s390-pci-bus.h +@@ -361,6 +361,7 @@ struct S390PCIBusDevice { + bool unplug_requested; + bool interp; + bool forwarding_assist; ++ bool aif; + QTAILQ_ENTRY(S390PCIBusDevice) link; + }; + +-- +2.39.3 + diff --git a/SOURCES/kvm-s390x-pci-drive-ISM-reset-from-subsystem-reset.patch b/SOURCES/kvm-s390x-pci-drive-ISM-reset-from-subsystem-reset.patch new file mode 100644 index 0000000..f3a4129 --- /dev/null +++ b/SOURCES/kvm-s390x-pci-drive-ISM-reset-from-subsystem-reset.patch @@ -0,0 +1,137 @@ +From c885b17e09ab19a3e8d3b2e1765963811af6f764 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Mon, 22 Jan 2024 09:25:53 +0100 +Subject: [PATCH 098/101] s390x/pci: drive ISM reset from subsystem reset +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 215: s390x: Fix reset ordering of passthrough ISM devices +RH-Jira: RHEL-21169 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Commit: [3/3] 426cf156a2c67e6dcd7483a769fa3741e2700504 (clegoate/qemu-kvm-c9s) + +JIRA: https://issues.redhat.com/browse/RHEL-21169 + +commit 68c691ca99a2538d6a53a70ce8a9ce06ee307ff1 +Author: Matthew Rosato +Date: Thu Jan 18 13:51:51 2024 -0500 + + s390x/pci: drive ISM reset from subsystem reset + + ISM devices are sensitive to manipulation of the IOMMU, so the ISM device + needs to be reset before the vfio-pci device is reset (triggering a full + UNMAP). In order to ensure this occurs, trigger ISM device resets from + subsystem_reset before triggering the PCI bus reset (which will also + trigger vfio-pci reset). This only needs to be done for ISM devices + which were enabled for use by the guest. + Further, ensure that AIF is disabled as part of the reset event. + + Fixes: ef1535901a ("s390x: do a subsystem reset before the unprotect on reboot") + Fixes: 03451953c7 ("s390x/pci: reset ISM passthrough devices on shutdown and system reset") + Reported-by: Cédric Le Goater + Signed-off-by: Matthew Rosato + Message-ID: <20240118185151.265329-4-mjrosato@linux.ibm.com> + Reviewed-by: Eric Farman + Reviewed-by: Cédric Le Goater + Signed-off-by: Thomas Huth + +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-bus.c | 26 +++++++++++++++++--------- + hw/s390x/s390-virtio-ccw.c | 8 ++++++++ + include/hw/s390x/s390-pci-bus.h | 1 + + 3 files changed, 26 insertions(+), 9 deletions(-) + +diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c +index 347580ebac..3e57d5faca 100644 +--- a/hw/s390x/s390-pci-bus.c ++++ b/hw/s390x/s390-pci-bus.c +@@ -151,20 +151,12 @@ static void s390_pci_shutdown_notifier(Notifier *n, void *opaque) + pci_device_reset(pbdev->pdev); + } + +-static void s390_pci_reset_cb(void *opaque) +-{ +- S390PCIBusDevice *pbdev = opaque; +- +- pci_device_reset(pbdev->pdev); +-} +- + static void s390_pci_perform_unplug(S390PCIBusDevice *pbdev) + { + HotplugHandler *hotplug_ctrl; + + if (pbdev->pft == ZPCI_PFT_ISM) { + notifier_remove(&pbdev->shutdown_notifier); +- qemu_unregister_reset(s390_pci_reset_cb, pbdev); + } + + /* Unplug the PCI device */ +@@ -1132,7 +1124,6 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + if (pbdev->pft == ZPCI_PFT_ISM) { + pbdev->shutdown_notifier.notify = s390_pci_shutdown_notifier; + qemu_register_shutdown_notifier(&pbdev->shutdown_notifier); +- qemu_register_reset(s390_pci_reset_cb, pbdev); + } + } else { + pbdev->fh |= FH_SHM_EMUL; +@@ -1279,6 +1270,23 @@ static void s390_pci_enumerate_bridge(PCIBus *bus, PCIDevice *pdev, + pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, s->bus_no, 1); + } + ++void s390_pci_ism_reset(void) ++{ ++ S390pciState *s = s390_get_phb(); ++ ++ S390PCIBusDevice *pbdev, *next; ++ ++ /* Trigger reset event for each passthrough ISM device currently in-use */ ++ QTAILQ_FOREACH_SAFE(pbdev, &s->zpci_devs, link, next) { ++ if (pbdev->interp && pbdev->pft == ZPCI_PFT_ISM && ++ pbdev->fh & FH_MASK_ENABLE) { ++ s390_pci_kvm_aif_disable(pbdev); ++ ++ pci_device_reset(pbdev->pdev); ++ } ++ } ++} ++ + static void s390_pcihost_reset(DeviceState *dev) + { + S390pciState *s = S390_PCI_HOST_BRIDGE(dev); +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index e26ce26f5a..24f4773179 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -118,6 +118,14 @@ static void subsystem_reset(void) + DeviceState *dev; + int i; + ++ /* ++ * ISM firmware is sensitive to unexpected changes to the IOMMU, which can ++ * occur during reset of the vfio-pci device (unmap of entire aperture). ++ * Ensure any passthrough ISM devices are reset now, while CPUs are paused ++ * but before vfio-pci cleanup occurs. ++ */ ++ s390_pci_ism_reset(); ++ + for (i = 0; i < ARRAY_SIZE(reset_dev_types); i++) { + dev = DEVICE(object_resolve_path_type("", reset_dev_types[i], NULL)); + if (dev) { +diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h +index 435e788867..2c43ea123f 100644 +--- a/include/hw/s390x/s390-pci-bus.h ++++ b/include/hw/s390x/s390-pci-bus.h +@@ -401,5 +401,6 @@ S390PCIBusDevice *s390_pci_find_dev_by_target(S390pciState *s, + const char *target); + S390PCIBusDevice *s390_pci_find_next_avail_dev(S390pciState *s, + S390PCIBusDevice *pbdev); ++void s390_pci_ism_reset(void); + + #endif +-- +2.39.3 + diff --git a/SOURCES/kvm-s390x-pci-refresh-fh-before-disabling-aif.patch b/SOURCES/kvm-s390x-pci-refresh-fh-before-disabling-aif.patch new file mode 100644 index 0000000..845a467 --- /dev/null +++ b/SOURCES/kvm-s390x-pci-refresh-fh-before-disabling-aif.patch @@ -0,0 +1,71 @@ +From 49078bdfd4c116da3e920632ec6f7041f1b38015 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Mon, 22 Jan 2024 09:25:53 +0100 +Subject: [PATCH 097/101] s390x/pci: refresh fh before disabling aif +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 215: s390x: Fix reset ordering of passthrough ISM devices +RH-Jira: RHEL-21169 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Commit: [2/3] 3523067909c41818dfc769abdb93930833416c11 (clegoate/qemu-kvm-c9s) + +JIRA: https://issues.redhat.com/browse/RHEL-21169 + +commit 30e35258e25c75c9d799c34fd89afcafffb37084 +Author: Matthew Rosato +Date: Thu Jan 18 13:51:50 2024 -0500 + + s390x/pci: refresh fh before disabling aif + + Typically we refresh the host fh during CLP enable, however it's possible + that the device goes through multiple reset events before the guest + performs another CLP enable. Let's handle this for now by refreshing the + host handle from vfio before disabling aif. + + Fixes: 03451953c7 ("s390x/pci: reset ISM passthrough devices on shutdown and system reset") + Reported-by: Cédric Le Goater + Reviewed-by: Eric Farman + Signed-off-by: Matthew Rosato + Message-ID: <20240118185151.265329-3-mjrosato@linux.ibm.com> + Reviewed-by: Cédric Le Goater + Signed-off-by: Thomas Huth + +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-kvm.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/hw/s390x/s390-pci-kvm.c b/hw/s390x/s390-pci-kvm.c +index 1ee510436c..9eef4fc3ec 100644 +--- a/hw/s390x/s390-pci-kvm.c ++++ b/hw/s390x/s390-pci-kvm.c +@@ -18,6 +18,7 @@ + #include "hw/s390x/s390-pci-bus.h" + #include "hw/s390x/s390-pci-kvm.h" + #include "hw/s390x/s390-pci-inst.h" ++#include "hw/s390x/s390-pci-vfio.h" + #include "cpu_models.h" + + bool s390_pci_kvm_interp_allowed(void) +@@ -64,6 +65,14 @@ int s390_pci_kvm_aif_disable(S390PCIBusDevice *pbdev) + return -EINVAL; + } + ++ /* ++ * The device may have already been reset but we still want to relinquish ++ * the guest ISC, so always be sure to use an up-to-date host fh. ++ */ ++ if (!s390_pci_get_host_fh(pbdev, &args.fh)) { ++ return -EPERM; ++ } ++ + rc = kvm_vm_ioctl(kvm_state, KVM_S390_ZPCI_OP, &args); + if (rc == 0) { + pbdev->aif = false; +-- +2.39.3 + diff --git a/SOURCES/kvm-s390x-pv-Fix-spurious-warning-with-asynchronous-tear.patch b/SOURCES/kvm-s390x-pv-Fix-spurious-warning-with-asynchronous-tear.patch deleted file mode 100644 index ecf1353..0000000 --- a/SOURCES/kvm-s390x-pv-Fix-spurious-warning-with-asynchronous-tear.patch +++ /dev/null @@ -1,129 +0,0 @@ -From 3cab2a638a10ece2b76d9f33a3c5dc6f64f1bbaa Mon Sep 17 00:00:00 2001 -From: Claudio Imbrenda -Date: Wed, 10 May 2023 12:55:31 +0200 -Subject: [PATCH 21/21] s390x/pv: Fix spurious warning with asynchronous - teardown -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -RH-MergeRequest: 173: Improve memory reclaiming for z15 Secure Execution guests -RH-Bugzilla: 2168500 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Cédric Le Goater -RH-Commit: [2/2] cb690d3155ea22c6df00a4d75b72f501515e5556 (thuth/qemu-kvm-cs9) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168500 - -Kernel commit 292a7d6fca33 ("KVM: s390: pv: fix asynchronous teardown -for small VMs") causes the KVM_PV_ASYNC_CLEANUP_PREPARE ioctl to fail -if the VM is not larger than 2GiB. QEMU would attempt it and fail, -print an error message, and then proceed with a normal teardown. - -Avoid attempting to use asynchronous teardown altogether when the VM is -not larger than 2 GiB. This will avoid triggering the error message and -also avoid pointless overhead; normal teardown is fast enough for small -VMs. - -Reported-by: Marc Hartmayer -Fixes: c3a073c610 ("s390x/pv: Add support for asynchronous teardown for reboot") -Link: https://lore.kernel.org/all/20230421085036.52511-2-imbrenda@linux.ibm.com/ -Signed-off-by: Claudio Imbrenda -Message-Id: <20230510105531.30623-2-imbrenda@linux.ibm.com> -Reviewed-by: Thomas Huth -[thuth: Fix inline function parameter in pv.h] -Signed-off-by: Thomas Huth -(cherry picked from commit 88693ab2a53f2f3d25cb39a7b5034ab391bc5a81) ---- - hw/s390x/pv.c | 10 ++++++++-- - hw/s390x/s390-virtio-ccw.c | 2 +- - include/hw/s390x/pv.h | 6 +++--- - 3 files changed, 12 insertions(+), 6 deletions(-) - -diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c -index 49ea38236c..b63f3784c6 100644 ---- a/hw/s390x/pv.c -+++ b/hw/s390x/pv.c -@@ -13,6 +13,7 @@ - - #include - -+#include "qemu/units.h" - #include "qapi/error.h" - #include "qemu/error-report.h" - #include "sysemu/kvm.h" -@@ -115,7 +116,7 @@ static void *s390_pv_do_unprot_async_fn(void *p) - return NULL; - } - --bool s390_pv_vm_try_disable_async(void) -+bool s390_pv_vm_try_disable_async(S390CcwMachineState *ms) - { - /* - * t is only needed to create the thread; once qemu_thread_create -@@ -123,7 +124,12 @@ bool s390_pv_vm_try_disable_async(void) - */ - QemuThread t; - -- if (!kvm_check_extension(kvm_state, KVM_CAP_S390_PROTECTED_ASYNC_DISABLE)) { -+ /* -+ * If the feature is not present or if the VM is not larger than 2 GiB, -+ * KVM_PV_ASYNC_CLEANUP_PREPARE fill fail; no point in attempting it. -+ */ -+ if ((MACHINE(ms)->maxram_size <= 2 * GiB) || -+ !kvm_check_extension(kvm_state, KVM_CAP_S390_PROTECTED_ASYNC_DISABLE)) { - return false; - } - if (s390_pv_cmd(KVM_PV_ASYNC_CLEANUP_PREPARE, NULL) != 0) { -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 6a0b93c63d..d95c595f88 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -330,7 +330,7 @@ static inline void s390_do_cpu_ipl(CPUState *cs, run_on_cpu_data arg) - - static void s390_machine_unprotect(S390CcwMachineState *ms) - { -- if (!s390_pv_vm_try_disable_async()) { -+ if (!s390_pv_vm_try_disable_async(ms)) { - s390_pv_vm_disable(); - } - ms->pv = false; -diff --git a/include/hw/s390x/pv.h b/include/hw/s390x/pv.h -index 966306a9db..7b935e2246 100644 ---- a/include/hw/s390x/pv.h -+++ b/include/hw/s390x/pv.h -@@ -14,10 +14,10 @@ - - #include "qapi/error.h" - #include "sysemu/kvm.h" -+#include "hw/s390x/s390-virtio-ccw.h" - - #ifdef CONFIG_KVM - #include "cpu.h" --#include "hw/s390x/s390-virtio-ccw.h" - - static inline bool s390_is_pv(void) - { -@@ -41,7 +41,7 @@ static inline bool s390_is_pv(void) - int s390_pv_query_info(void); - int s390_pv_vm_enable(void); - void s390_pv_vm_disable(void); --bool s390_pv_vm_try_disable_async(void); -+bool s390_pv_vm_try_disable_async(S390CcwMachineState *ms); - int s390_pv_set_sec_parms(uint64_t origin, uint64_t length); - int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak); - void s390_pv_prep_reset(void); -@@ -61,7 +61,7 @@ static inline bool s390_is_pv(void) { return false; } - static inline int s390_pv_query_info(void) { return 0; } - static inline int s390_pv_vm_enable(void) { return 0; } - static inline void s390_pv_vm_disable(void) {} --static inline bool s390_pv_vm_try_disable_async(void) { return false; } -+static inline bool s390_pv_vm_try_disable_async(S390CcwMachineState *ms) { return false; } - static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t length) { return 0; } - static inline int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak) { return 0; } - static inline void s390_pv_prep_reset(void) {} --- -2.39.3 - diff --git a/SOURCES/kvm-scsi-Await-request-purging.patch b/SOURCES/kvm-scsi-Await-request-purging.patch new file mode 100644 index 0000000..9bd4399 --- /dev/null +++ b/SOURCES/kvm-scsi-Await-request-purging.patch @@ -0,0 +1,124 @@ +From 94d6458a58239b52394d58b6880509041186d5a8 Mon Sep 17 00:00:00 2001 +From: Hanna Czenczek +Date: Fri, 2 Feb 2024 15:47:55 +0100 +Subject: [PATCH 04/22] scsi: Await request purging + +RH-Author: Hanna Czenczek +RH-MergeRequest: 222: Allow concurrent BlockBackend context changes +RH-Jira: RHEL-24593 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Kevin Wolf +RH-Commit: [2/2] 35a89273cab0af8f999881e67d359fe1328363a0 (hreitz/qemu-kvm-c-9-s) + +scsi_device_for_each_req_async() currently does not provide any way to +be awaited. One of its callers is scsi_device_purge_requests(), which +therefore currently does not guarantee that all requests are fully +settled when it returns. + +We want all requests to be settled, because scsi_device_purge_requests() +is called through the unrealize path, including the one invoked by +virtio_scsi_hotunplug() through qdev_simple_device_unplug_cb(), which +most likely assumes that all SCSI requests are done then. + +In fact, scsi_device_purge_requests() already contains a blk_drain(), +but this will not fully await scsi_device_for_each_req_async(), only the +I/O requests it potentially cancels (not the non-I/O requests). +However, we can have scsi_device_for_each_req_async() increment the BB +in-flight counter, and have scsi_device_for_each_req_async_bh() +decrement it when it is done. This way, the blk_drain() will fully +await all SCSI requests to be purged. + +This also removes the need for scsi_device_for_each_req_async_bh() to +double-check the current context and potentially re-schedule itself, +should it now differ from the BB's context: Changing a BB's AioContext +with a root node is done through bdrv_try_change_aio_context(), which +creates a drained section. With this patch, we keep the BB in-flight +counter elevated throughout, so we know the BB's context cannot change. + +Signed-off-by: Hanna Czenczek +Message-ID: <20240202144755.671354-3-hreitz@redhat.com> +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 1604c0493193273e4eac547f86fbd2845e7f9af4) +--- + hw/scsi/scsi-bus.c | 30 +++++++++++++++++++++--------- + 1 file changed, 21 insertions(+), 9 deletions(-) + +diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c +index 5b08cbf60a..b1bf8e6433 100644 +--- a/hw/scsi/scsi-bus.c ++++ b/hw/scsi/scsi-bus.c +@@ -120,17 +120,13 @@ static void scsi_device_for_each_req_async_bh(void *opaque) + SCSIRequest *next; + + /* +- * If the AioContext changed before this BH was called then reschedule into +- * the new AioContext before accessing ->requests. This can happen when +- * scsi_device_for_each_req_async() is called and then the AioContext is +- * changed before BHs are run. ++ * The BB cannot have changed contexts between this BH being scheduled and ++ * now: BBs' AioContexts, when they have a node attached, can only be ++ * changed via bdrv_try_change_aio_context(), in a drained section. While ++ * we have the in-flight counter incremented, that drain must block. + */ + ctx = blk_get_aio_context(s->conf.blk); +- if (ctx != qemu_get_current_aio_context()) { +- aio_bh_schedule_oneshot(ctx, scsi_device_for_each_req_async_bh, +- g_steal_pointer(&data)); +- return; +- } ++ assert(ctx == qemu_get_current_aio_context()); + + QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) { + data->fn(req, data->fn_opaque); +@@ -138,11 +134,16 @@ static void scsi_device_for_each_req_async_bh(void *opaque) + + /* Drop the reference taken by scsi_device_for_each_req_async() */ + object_unref(OBJECT(s)); ++ ++ /* Paired with blk_inc_in_flight() in scsi_device_for_each_req_async() */ ++ blk_dec_in_flight(s->conf.blk); + } + + /* + * Schedule @fn() to be invoked for each enqueued request in device @s. @fn() + * runs in the AioContext that is executing the request. ++ * Keeps the BlockBackend's in-flight counter incremented until everything is ++ * done, so draining it will settle all scheduled @fn() calls. + */ + static void scsi_device_for_each_req_async(SCSIDevice *s, + void (*fn)(SCSIRequest *, void *), +@@ -163,6 +164,8 @@ static void scsi_device_for_each_req_async(SCSIDevice *s, + */ + object_ref(OBJECT(s)); + ++ /* Paired with blk_dec_in_flight() in scsi_device_for_each_req_async_bh() */ ++ blk_inc_in_flight(s->conf.blk); + aio_bh_schedule_oneshot(blk_get_aio_context(s->conf.blk), + scsi_device_for_each_req_async_bh, + data); +@@ -1728,11 +1731,20 @@ static void scsi_device_purge_one_req(SCSIRequest *req, void *opaque) + scsi_req_cancel_async(req, NULL); + } + ++/** ++ * Cancel all requests, and block until they are deleted. ++ */ + void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense) + { + scsi_device_for_each_req_async(sdev, scsi_device_purge_one_req, NULL); + ++ /* ++ * Await all the scsi_device_purge_one_req() calls scheduled by ++ * scsi_device_for_each_req_async(), and all I/O requests that were ++ * cancelled this way, but may still take a bit of time to settle. ++ */ + blk_drain(sdev->conf.blk); ++ + scsi_device_set_ua(sdev, sense); + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-scsi-assert-that-callbacks-run-in-the-correct-AioCon.patch b/SOURCES/kvm-scsi-assert-that-callbacks-run-in-the-correct-AioCon.patch new file mode 100644 index 0000000..6d43810 --- /dev/null +++ b/SOURCES/kvm-scsi-assert-that-callbacks-run-in-the-correct-AioCon.patch @@ -0,0 +1,88 @@ +From cd08d22a0da022d99fe6cfddb7de680abf66c8be Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 5 Dec 2023 13:19:59 -0500 +Subject: [PATCH 082/101] scsi: assert that callbacks run in the correct + AioContext + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [13/26] d2fd5065c3b72d9d2f4e37efee39fe12eba0f0a9 (kmwolf/centos-qemu-kvm) + +Since the removal of AioContext locking, the correctness of the code +relies on running requests from a single AioContext at any given time. + +Add assertions that verify that callbacks are invoked in the correct +AioContext. + +Signed-off-by: Stefan Hajnoczi +Message-ID: <20231205182011.1976568-3-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +--- + hw/scsi/scsi-disk.c | 14 ++++++++++++++ + system/dma-helpers.c | 3 +++ + 2 files changed, 17 insertions(+) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index 2c1bbb3530..a5048e0aaf 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -273,6 +273,10 @@ static void scsi_aio_complete(void *opaque, int ret) + SCSIDiskReq *r = (SCSIDiskReq *)opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + ++ /* The request must only run in the BlockBackend's AioContext */ ++ assert(blk_get_aio_context(s->qdev.conf.blk) == ++ qemu_get_current_aio_context()); ++ + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + +@@ -370,8 +374,13 @@ static void scsi_dma_complete(void *opaque, int ret) + + static void scsi_read_complete_noio(SCSIDiskReq *r, int ret) + { ++ SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + uint32_t n; + ++ /* The request must only run in the BlockBackend's AioContext */ ++ assert(blk_get_aio_context(s->qdev.conf.blk) == ++ qemu_get_current_aio_context()); ++ + assert(r->req.aiocb == NULL); + if (scsi_disk_req_check_error(r, ret, false)) { + goto done; +@@ -496,8 +505,13 @@ static void scsi_read_data(SCSIRequest *req) + + static void scsi_write_complete_noio(SCSIDiskReq *r, int ret) + { ++ SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + uint32_t n; + ++ /* The request must only run in the BlockBackend's AioContext */ ++ assert(blk_get_aio_context(s->qdev.conf.blk) == ++ qemu_get_current_aio_context()); ++ + assert (r->req.aiocb == NULL); + if (scsi_disk_req_check_error(r, ret, false)) { + goto done; +diff --git a/system/dma-helpers.c b/system/dma-helpers.c +index 528117f256..9b221cf94e 100644 +--- a/system/dma-helpers.c ++++ b/system/dma-helpers.c +@@ -119,6 +119,9 @@ static void dma_blk_cb(void *opaque, int ret) + + trace_dma_blk_cb(dbs, ret); + ++ /* DMAAIOCB is not thread-safe and must be accessed only from dbs->ctx */ ++ assert(ctx == qemu_get_current_aio_context()); ++ + dbs->acb = NULL; + dbs->offset += dbs->iov.size; + +-- +2.39.3 + diff --git a/SOURCES/kvm-scsi-cleanup-scsi_clear_unit_attention.patch b/SOURCES/kvm-scsi-cleanup-scsi_clear_unit_attention.patch deleted file mode 100644 index 11dda3a..0000000 --- a/SOURCES/kvm-scsi-cleanup-scsi_clear_unit_attention.patch +++ /dev/null @@ -1,81 +0,0 @@ -From 5dd7d26c034c26b2d4d9b91b8d1a7b605e19730f Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Wed, 12 Jul 2023 15:43:51 +0200 -Subject: [PATCH 02/12] scsi: cleanup scsi_clear_unit_attention() - -RH-Author: Stefano Garzarella -RH-MergeRequest: 184: scsi: fix issue with Linux guest and unit attention -RH-Bugzilla: 2176702 -RH-Acked-by: Thomas Huth -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [2/3] b3a06a91644e44fae3d76d0fbe72448652db517a (sgarzarella/qemu-kvm-c-9-s) - -The previous commit moved the unit attention clearing when we create -the request. So now we can clean scsi_clear_unit_attention() to handle -only the case of the REPORT LUNS command: this is the only case in -which a UNIT ATTENTION is cleared without having been reported. - -Suggested-by: Paolo Bonzini -Signed-off-by: Stefano Garzarella -Message-ID: <20230712134352.118655-3-sgarzare@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit ba947dab98e7cd4337c70975bd255701a2a6aad8) -Signed-off-by: Stefano Garzarella ---- - hw/scsi/scsi-bus.c | 28 ++++++---------------------- - 1 file changed, 6 insertions(+), 22 deletions(-) - -diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c -index 5d22313b9d..cecd26479e 100644 ---- a/hw/scsi/scsi-bus.c -+++ b/hw/scsi/scsi-bus.c -@@ -828,26 +828,12 @@ static void scsi_clear_unit_attention(SCSIRequest *req) - return; - } - -- if (req->dev->unit_attention.key != UNIT_ATTENTION && -- req->bus->unit_attention.key != UNIT_ATTENTION) { -- return; -- } -- -- /* -- * If an INQUIRY command enters the enabled command state, -- * the device server shall [not] clear any unit attention condition; -- * See also MMC-6, paragraphs 6.5 and 6.6.2. -- */ -- if (req->cmd.buf[0] == INQUIRY || -- req->cmd.buf[0] == GET_CONFIGURATION || -- req->cmd.buf[0] == GET_EVENT_STATUS_NOTIFICATION) { -- return; -- } -- - if (req->dev->unit_attention.key == UNIT_ATTENTION) { - ua = &req->dev->unit_attention; -- } else { -+ } else if (req->bus->unit_attention.key == UNIT_ATTENTION) { - ua = &req->bus->unit_attention; -+ } else { -+ return; - } - - /* -@@ -856,12 +842,10 @@ static void scsi_clear_unit_attention(SCSIRequest *req) - * with an additional sense code of REPORTED LUNS DATA HAS CHANGED. - */ - if (req->cmd.buf[0] == REPORT_LUNS && -- !(ua->asc == SENSE_CODE(REPORTED_LUNS_CHANGED).asc && -- ua->ascq == SENSE_CODE(REPORTED_LUNS_CHANGED).ascq)) { -- return; -+ ua->asc == SENSE_CODE(REPORTED_LUNS_CHANGED).asc && -+ ua->ascq == SENSE_CODE(REPORTED_LUNS_CHANGED).ascq) { -+ *ua = SENSE_CODE(NO_SENSE); - } -- -- *ua = SENSE_CODE(NO_SENSE); - } - - int scsi_req_get_sense(SCSIRequest *req, uint8_t *buf, int len) --- -2.39.3 - diff --git a/SOURCES/kvm-scsi-clear-unit-attention-only-for-REPORT-LUNS-comma.patch b/SOURCES/kvm-scsi-clear-unit-attention-only-for-REPORT-LUNS-comma.patch deleted file mode 100644 index cb3b24e..0000000 --- a/SOURCES/kvm-scsi-clear-unit-attention-only-for-REPORT-LUNS-comma.patch +++ /dev/null @@ -1,110 +0,0 @@ -From 0a784c45a7b7ee32c36bf86eebb24c8431a89f49 Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Wed, 12 Jul 2023 15:43:52 +0200 -Subject: [PATCH 03/12] scsi: clear unit attention only for REPORT LUNS - commands - -RH-Author: Stefano Garzarella -RH-MergeRequest: 184: scsi: fix issue with Linux guest and unit attention -RH-Bugzilla: 2176702 -RH-Acked-by: Thomas Huth -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [3/3] 01d5e112ef9ae204d96ceb01b4a453fdb4e8b669 (sgarzarella/qemu-kvm-c-9-s) - -scsi_clear_unit_attention() now only handles REPORTED LUNS DATA HAS -CHANGED. - -This only happens when we handle REPORT LUNS commands, so let's rename -the function in scsi_clear_reported_luns_changed() and call it only in -scsi_target_emulate_report_luns(). - -Suggested-by: Paolo Bonzini -Signed-off-by: Stefano Garzarella -Message-ID: <20230712134352.118655-4-sgarzare@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 2eb5599e8a73e70a9e86a97120818ff95a43a23a) -Signed-off-by: Stefano Garzarella ---- - hw/scsi/scsi-bus.c | 34 +++++++++++----------------------- - 1 file changed, 11 insertions(+), 23 deletions(-) - -diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c -index cecd26479e..9542410800 100644 ---- a/hw/scsi/scsi-bus.c -+++ b/hw/scsi/scsi-bus.c -@@ -22,6 +22,7 @@ static char *scsibus_get_fw_dev_path(DeviceState *dev); - static void scsi_req_dequeue(SCSIRequest *req); - static uint8_t *scsi_target_alloc_buf(SCSIRequest *req, size_t len); - static void scsi_target_free_buf(SCSIRequest *req); -+static void scsi_clear_reported_luns_changed(SCSIRequest *req); - - static int next_scsi_bus; - -@@ -518,6 +519,14 @@ static bool scsi_target_emulate_report_luns(SCSITargetReq *r) - - /* store the LUN list length */ - stl_be_p(&r->buf[0], len - 8); -+ -+ /* -+ * If a REPORT LUNS command enters the enabled command state, [...] -+ * the device server shall clear any pending unit attention condition -+ * with an additional sense code of REPORTED LUNS DATA HAS CHANGED. -+ */ -+ scsi_clear_reported_luns_changed(&r->req); -+ - return true; - } - -@@ -816,18 +825,10 @@ uint8_t *scsi_req_get_buf(SCSIRequest *req) - return req->ops->get_buf(req); - } - --static void scsi_clear_unit_attention(SCSIRequest *req) -+static void scsi_clear_reported_luns_changed(SCSIRequest *req) - { - SCSISense *ua; - -- /* -- * scsi_fetch_unit_attention_sense() already cleaned the unit attention -- * in this case. -- */ -- if (req->ops == &reqops_unit_attention) { -- return; -- } -- - if (req->dev->unit_attention.key == UNIT_ATTENTION) { - ua = &req->dev->unit_attention; - } else if (req->bus->unit_attention.key == UNIT_ATTENTION) { -@@ -836,13 +837,7 @@ static void scsi_clear_unit_attention(SCSIRequest *req) - return; - } - -- /* -- * If a REPORT LUNS command enters the enabled command state, [...] -- * the device server shall clear any pending unit attention condition -- * with an additional sense code of REPORTED LUNS DATA HAS CHANGED. -- */ -- if (req->cmd.buf[0] == REPORT_LUNS && -- ua->asc == SENSE_CODE(REPORTED_LUNS_CHANGED).asc && -+ if (ua->asc == SENSE_CODE(REPORTED_LUNS_CHANGED).asc && - ua->ascq == SENSE_CODE(REPORTED_LUNS_CHANGED).ascq) { - *ua = SENSE_CODE(NO_SENSE); - } -@@ -1528,13 +1523,6 @@ void scsi_req_complete(SCSIRequest *req, int status) - req->dev->sense_is_ua = false; - } - -- /* -- * Unit attention state is now stored in the device's sense buffer -- * if the HBA didn't do autosense. Clear the pending unit attention -- * flags. -- */ -- scsi_clear_unit_attention(req); -- - scsi_req_ref(req); - scsi_req_dequeue(req); - req->bus->info->complete(req, req->residual); --- -2.39.3 - diff --git a/SOURCES/kvm-scsi-don-t-lock-AioContext-in-I-O-code-path.patch b/SOURCES/kvm-scsi-don-t-lock-AioContext-in-I-O-code-path.patch new file mode 100644 index 0000000..65b08ce --- /dev/null +++ b/SOURCES/kvm-scsi-don-t-lock-AioContext-in-I-O-code-path.patch @@ -0,0 +1,245 @@ +From d1d384bd24a7aeb527f4abd8a0958146544ef9bb Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Mon, 4 Dec 2023 11:42:58 -0500 +Subject: [PATCH 079/101] scsi: don't lock AioContext in I/O code path + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [10/26] b5814cec94af5c254e300646d8783672b085bac3 (kmwolf/centos-qemu-kvm) + +blk_aio_*() doesn't require the AioContext lock and the SCSI subsystem's +internal state also does not anymore. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Eric Blake +Acked-by: Kevin Wolf +Message-ID: <20231204164259.1515217-4-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +--- + hw/scsi/scsi-disk.c | 23 ----------------------- + hw/scsi/scsi-generic.c | 20 +++----------------- + 2 files changed, 3 insertions(+), 40 deletions(-) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index 6691f5edb8..2c1bbb3530 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -273,8 +273,6 @@ static void scsi_aio_complete(void *opaque, int ret) + SCSIDiskReq *r = (SCSIDiskReq *)opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); +- + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + +@@ -286,7 +284,6 @@ static void scsi_aio_complete(void *opaque, int ret) + scsi_req_complete(&r->req, GOOD); + + done: +- aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); + scsi_req_unref(&r->req); + } + +@@ -394,8 +391,6 @@ static void scsi_read_complete(void *opaque, int ret) + SCSIDiskReq *r = (SCSIDiskReq *)opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); +- + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + +@@ -406,7 +401,6 @@ static void scsi_read_complete(void *opaque, int ret) + trace_scsi_disk_read_complete(r->req.tag, r->qiov.size); + } + scsi_read_complete_noio(r, ret); +- aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); + } + + /* Actually issue a read to the block device. */ +@@ -448,8 +442,6 @@ static void scsi_do_read_cb(void *opaque, int ret) + SCSIDiskReq *r = (SCSIDiskReq *)opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); +- + assert (r->req.aiocb != NULL); + r->req.aiocb = NULL; + +@@ -459,7 +451,6 @@ static void scsi_do_read_cb(void *opaque, int ret) + block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); + } + scsi_do_read(opaque, ret); +- aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); + } + + /* Read more data from scsi device into buffer. */ +@@ -533,8 +524,6 @@ static void scsi_write_complete(void * opaque, int ret) + SCSIDiskReq *r = (SCSIDiskReq *)opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); +- + assert (r->req.aiocb != NULL); + r->req.aiocb = NULL; + +@@ -544,7 +533,6 @@ static void scsi_write_complete(void * opaque, int ret) + block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); + } + scsi_write_complete_noio(r, ret); +- aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); + } + + static void scsi_write_data(SCSIRequest *req) +@@ -1742,8 +1730,6 @@ static void scsi_unmap_complete(void *opaque, int ret) + SCSIDiskReq *r = data->r; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); +- + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + +@@ -1754,7 +1740,6 @@ static void scsi_unmap_complete(void *opaque, int ret) + block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); + scsi_unmap_complete_noio(data, ret); + } +- aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); + } + + static void scsi_disk_emulate_unmap(SCSIDiskReq *r, uint8_t *inbuf) +@@ -1822,8 +1807,6 @@ static void scsi_write_same_complete(void *opaque, int ret) + SCSIDiskReq *r = data->r; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); +- + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + +@@ -1847,7 +1830,6 @@ static void scsi_write_same_complete(void *opaque, int ret) + data->sector << BDRV_SECTOR_BITS, + &data->qiov, 0, + scsi_write_same_complete, data); +- aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); + return; + } + +@@ -1857,7 +1839,6 @@ done: + scsi_req_unref(&r->req); + qemu_vfree(data->iov.iov_base); + g_free(data); +- aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); + } + + static void scsi_disk_emulate_write_same(SCSIDiskReq *r, uint8_t *inbuf) +@@ -2810,7 +2791,6 @@ static void scsi_block_sgio_complete(void *opaque, int ret) + { + SCSIBlockReq *req = (SCSIBlockReq *)opaque; + SCSIDiskReq *r = &req->req; +- SCSIDevice *s = r->req.dev; + sg_io_hdr_t *io_hdr = &req->io_header; + + if (ret == 0) { +@@ -2827,13 +2807,10 @@ static void scsi_block_sgio_complete(void *opaque, int ret) + } + + if (ret > 0) { +- aio_context_acquire(blk_get_aio_context(s->conf.blk)); + if (scsi_handle_rw_error(r, ret, true)) { +- aio_context_release(blk_get_aio_context(s->conf.blk)); + scsi_req_unref(&r->req); + return; + } +- aio_context_release(blk_get_aio_context(s->conf.blk)); + + /* Ignore error. */ + ret = 0; +diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c +index 2417f0ad84..b7b04e1d63 100644 +--- a/hw/scsi/scsi-generic.c ++++ b/hw/scsi/scsi-generic.c +@@ -109,15 +109,11 @@ done: + static void scsi_command_complete(void *opaque, int ret) + { + SCSIGenericReq *r = (SCSIGenericReq *)opaque; +- SCSIDevice *s = r->req.dev; +- +- aio_context_acquire(blk_get_aio_context(s->conf.blk)); + + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + + scsi_command_complete_noio(r, ret); +- aio_context_release(blk_get_aio_context(s->conf.blk)); + } + + static int execute_command(BlockBackend *blk, +@@ -274,14 +270,12 @@ static void scsi_read_complete(void * opaque, int ret) + SCSIDevice *s = r->req.dev; + int len; + +- aio_context_acquire(blk_get_aio_context(s->conf.blk)); +- + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + + if (ret || r->req.io_canceled) { + scsi_command_complete_noio(r, ret); +- goto done; ++ return; + } + + len = r->io_header.dxfer_len - r->io_header.resid; +@@ -320,7 +314,7 @@ static void scsi_read_complete(void * opaque, int ret) + r->io_header.status != GOOD || + len == 0) { + scsi_command_complete_noio(r, 0); +- goto done; ++ return; + } + + /* Snoop READ CAPACITY output to set the blocksize. */ +@@ -356,9 +350,6 @@ static void scsi_read_complete(void * opaque, int ret) + req_complete: + scsi_req_data(&r->req, len); + scsi_req_unref(&r->req); +- +-done: +- aio_context_release(blk_get_aio_context(s->conf.blk)); + } + + /* Read more data from scsi device into buffer. */ +@@ -391,14 +382,12 @@ static void scsi_write_complete(void * opaque, int ret) + + trace_scsi_generic_write_complete(ret); + +- aio_context_acquire(blk_get_aio_context(s->conf.blk)); +- + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + + if (ret || r->req.io_canceled) { + scsi_command_complete_noio(r, ret); +- goto done; ++ return; + } + + if (r->req.cmd.buf[0] == MODE_SELECT && r->req.cmd.buf[4] == 12 && +@@ -408,9 +397,6 @@ static void scsi_write_complete(void * opaque, int ret) + } + + scsi_command_complete_noio(r, ret); +- +-done: +- aio_context_release(blk_get_aio_context(s->conf.blk)); + } + + /* Write data to a scsi device. Returns nonzero on failure. +-- +2.39.3 + diff --git a/SOURCES/kvm-scsi-fetch-unit-attention-when-creating-the-request.patch b/SOURCES/kvm-scsi-fetch-unit-attention-when-creating-the-request.patch deleted file mode 100644 index a41ae82..0000000 --- a/SOURCES/kvm-scsi-fetch-unit-attention-when-creating-the-request.patch +++ /dev/null @@ -1,132 +0,0 @@ -From 562ea3a2d602cf41c548f3ddf52c43c04fded347 Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Wed, 12 Jul 2023 15:43:50 +0200 -Subject: [PATCH 01/12] scsi: fetch unit attention when creating the request - -RH-Author: Stefano Garzarella -RH-MergeRequest: 184: scsi: fix issue with Linux guest and unit attention -RH-Bugzilla: 2176702 -RH-Acked-by: Thomas Huth -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [1/3] 04563caac45d0110ea65eda8e55472556cd317c0 (sgarzarella/qemu-kvm-c-9-s) - -Commit 1880ad4f4e ("virtio-scsi: Batched prepare for cmd reqs") split -calls to scsi_req_new() and scsi_req_enqueue() in the virtio-scsi device. -No ill effects were observed until commit 8cc5583abe ("virtio-scsi: Send -"REPORTED LUNS CHANGED" sense data upon disk hotplug events") added a -unit attention that was easy to trigger with device hotplug and -hot-unplug. - -Because the two calls were separated, all requests in the batch were -prepared calling scsi_req_new() to report a sense. The first one -submitted would report the right sense and reset it to NO_SENSE, while -the others reported CHECK_CONDITION with no sense data. This caused -SCSI errors in Linux. - -To solve this issue, let's fetch the unit attention as early as possible -when we prepare the request, so that only the first request in the batch -will use the unit attention SCSIReqOps and the others will not report -CHECK CONDITION. - -Fixes: 1880ad4f4e ("virtio-scsi: Batched prepare for cmd reqs") -Fixes: 8cc5583abe ("virtio-scsi: Send "REPORTED LUNS CHANGED" sense data upon disk hotplug events") -Reported-by: Thomas Huth -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2176702 -Co-developed-by: Paolo Bonzini -Signed-off-by: Stefano Garzarella -Message-ID: <20230712134352.118655-2-sgarzare@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 9472083e642bfb9bc836b38662baddd9bc964ebc) -Signed-off-by: Stefano Garzarella ---- - hw/scsi/scsi-bus.c | 36 +++++++++++++++++++++++++++++++++--- - include/hw/scsi/scsi.h | 1 + - 2 files changed, 34 insertions(+), 3 deletions(-) - -diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c -index 3c20b47ad0..5d22313b9d 100644 ---- a/hw/scsi/scsi-bus.c -+++ b/hw/scsi/scsi-bus.c -@@ -413,19 +413,35 @@ static const struct SCSIReqOps reqops_invalid_opcode = { - - /* SCSIReqOps implementation for unit attention conditions. */ - --static int32_t scsi_unit_attention(SCSIRequest *req, uint8_t *buf) -+static void scsi_fetch_unit_attention_sense(SCSIRequest *req) - { -+ SCSISense *ua = NULL; -+ - if (req->dev->unit_attention.key == UNIT_ATTENTION) { -- scsi_req_build_sense(req, req->dev->unit_attention); -+ ua = &req->dev->unit_attention; - } else if (req->bus->unit_attention.key == UNIT_ATTENTION) { -- scsi_req_build_sense(req, req->bus->unit_attention); -+ ua = &req->bus->unit_attention; - } -+ -+ /* -+ * Fetch the unit attention sense immediately so that another -+ * scsi_req_new does not use reqops_unit_attention. -+ */ -+ if (ua) { -+ scsi_req_build_sense(req, *ua); -+ *ua = SENSE_CODE(NO_SENSE); -+ } -+} -+ -+static int32_t scsi_unit_attention(SCSIRequest *req, uint8_t *buf) -+{ - scsi_req_complete(req, CHECK_CONDITION); - return 0; - } - - static const struct SCSIReqOps reqops_unit_attention = { - .size = sizeof(SCSIRequest), -+ .init_req = scsi_fetch_unit_attention_sense, - .send_command = scsi_unit_attention - }; - -@@ -699,6 +715,11 @@ SCSIRequest *scsi_req_alloc(const SCSIReqOps *reqops, SCSIDevice *d, - object_ref(OBJECT(d)); - object_ref(OBJECT(qbus->parent)); - notifier_list_init(&req->cancel_notifiers); -+ -+ if (reqops->init_req) { -+ reqops->init_req(req); -+ } -+ - trace_scsi_req_alloc(req->dev->id, req->lun, req->tag); - return req; - } -@@ -798,6 +819,15 @@ uint8_t *scsi_req_get_buf(SCSIRequest *req) - static void scsi_clear_unit_attention(SCSIRequest *req) - { - SCSISense *ua; -+ -+ /* -+ * scsi_fetch_unit_attention_sense() already cleaned the unit attention -+ * in this case. -+ */ -+ if (req->ops == &reqops_unit_attention) { -+ return; -+ } -+ - if (req->dev->unit_attention.key != UNIT_ATTENTION && - req->bus->unit_attention.key != UNIT_ATTENTION) { - return; -diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h -index 6f23a7a73e..1787ddd01e 100644 ---- a/include/hw/scsi/scsi.h -+++ b/include/hw/scsi/scsi.h -@@ -108,6 +108,7 @@ int cdrom_read_toc_raw(int nb_sectors, uint8_t *buf, int msf, int session_num); - /* scsi-bus.c */ - struct SCSIReqOps { - size_t size; -+ void (*init_req)(SCSIRequest *req); - void (*free_req)(SCSIRequest *req); - int32_t (*send_command)(SCSIRequest *req, uint8_t *buf); - void (*read_data)(SCSIRequest *req); --- -2.39.3 - diff --git a/SOURCES/kvm-scsi-only-access-SCSIDevice-requests-from-one-thread.patch b/SOURCES/kvm-scsi-only-access-SCSIDevice-requests-from-one-thread.patch new file mode 100644 index 0000000..30f1c00 --- /dev/null +++ b/SOURCES/kvm-scsi-only-access-SCSIDevice-requests-from-one-thread.patch @@ -0,0 +1,307 @@ +From 42dd1357310bd1a68d6cacaa53cd5b1d1b02880d Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Mon, 4 Dec 2023 11:42:56 -0500 +Subject: [PATCH 077/101] scsi: only access SCSIDevice->requests from one + thread + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [8/26] 9df662e82a63e93d184b5763bebbe7e43bc9dabe (kmwolf/centos-qemu-kvm) + +Stop depending on the AioContext lock and instead access +SCSIDevice->requests from only one thread at a time: +- When the VM is running only the BlockBackend's AioContext may access + the requests list. +- When the VM is stopped only the main loop may access the requests + list. + +These constraints protect the requests list without the need for locking +in the I/O code path. + +Note that multiple IOThreads are not supported yet because the code +assumes all SCSIRequests are executed from a single AioContext. Leave +that as future work. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Eric Blake +Message-ID: <20231204164259.1515217-2-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +--- + hw/scsi/scsi-bus.c | 181 ++++++++++++++++++++++++++++------------- + include/hw/scsi/scsi.h | 7 +- + 2 files changed, 131 insertions(+), 57 deletions(-) + +diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c +index fc4b77fdb0..b649cdf555 100644 +--- a/hw/scsi/scsi-bus.c ++++ b/hw/scsi/scsi-bus.c +@@ -85,6 +85,89 @@ SCSIDevice *scsi_device_get(SCSIBus *bus, int channel, int id, int lun) + return d; + } + ++/* ++ * Invoke @fn() for each enqueued request in device @s. Must be called from the ++ * main loop thread while the guest is stopped. This is only suitable for ++ * vmstate ->put(), use scsi_device_for_each_req_async() for other cases. ++ */ ++static void scsi_device_for_each_req_sync(SCSIDevice *s, ++ void (*fn)(SCSIRequest *, void *), ++ void *opaque) ++{ ++ SCSIRequest *req; ++ SCSIRequest *next_req; ++ ++ assert(!runstate_is_running()); ++ assert(qemu_in_main_thread()); ++ ++ QTAILQ_FOREACH_SAFE(req, &s->requests, next, next_req) { ++ fn(req, opaque); ++ } ++} ++ ++typedef struct { ++ SCSIDevice *s; ++ void (*fn)(SCSIRequest *, void *); ++ void *fn_opaque; ++} SCSIDeviceForEachReqAsyncData; ++ ++static void scsi_device_for_each_req_async_bh(void *opaque) ++{ ++ g_autofree SCSIDeviceForEachReqAsyncData *data = opaque; ++ SCSIDevice *s = data->s; ++ AioContext *ctx; ++ SCSIRequest *req; ++ SCSIRequest *next; ++ ++ /* ++ * If the AioContext changed before this BH was called then reschedule into ++ * the new AioContext before accessing ->requests. This can happen when ++ * scsi_device_for_each_req_async() is called and then the AioContext is ++ * changed before BHs are run. ++ */ ++ ctx = blk_get_aio_context(s->conf.blk); ++ if (ctx != qemu_get_current_aio_context()) { ++ aio_bh_schedule_oneshot(ctx, scsi_device_for_each_req_async_bh, ++ g_steal_pointer(&data)); ++ return; ++ } ++ ++ QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) { ++ data->fn(req, data->fn_opaque); ++ } ++ ++ /* Drop the reference taken by scsi_device_for_each_req_async() */ ++ object_unref(OBJECT(s)); ++} ++ ++/* ++ * Schedule @fn() to be invoked for each enqueued request in device @s. @fn() ++ * runs in the AioContext that is executing the request. ++ */ ++static void scsi_device_for_each_req_async(SCSIDevice *s, ++ void (*fn)(SCSIRequest *, void *), ++ void *opaque) ++{ ++ assert(qemu_in_main_thread()); ++ ++ SCSIDeviceForEachReqAsyncData *data = ++ g_new(SCSIDeviceForEachReqAsyncData, 1); ++ ++ data->s = s; ++ data->fn = fn; ++ data->fn_opaque = opaque; ++ ++ /* ++ * Hold a reference to the SCSIDevice until ++ * scsi_device_for_each_req_async_bh() finishes. ++ */ ++ object_ref(OBJECT(s)); ++ ++ aio_bh_schedule_oneshot(blk_get_aio_context(s->conf.blk), ++ scsi_device_for_each_req_async_bh, ++ data); ++} ++ + static void scsi_device_realize(SCSIDevice *s, Error **errp) + { + SCSIDeviceClass *sc = SCSI_DEVICE_GET_CLASS(s); +@@ -144,20 +227,18 @@ void scsi_bus_init_named(SCSIBus *bus, size_t bus_size, DeviceState *host, + qbus_set_bus_hotplug_handler(BUS(bus)); + } + +-static void scsi_dma_restart_bh(void *opaque) ++void scsi_req_retry(SCSIRequest *req) + { +- SCSIDevice *s = opaque; +- SCSIRequest *req, *next; +- +- qemu_bh_delete(s->bh); +- s->bh = NULL; ++ req->retry = true; ++} + +- aio_context_acquire(blk_get_aio_context(s->conf.blk)); +- QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) { +- scsi_req_ref(req); +- if (req->retry) { +- req->retry = false; +- switch (req->cmd.mode) { ++/* Called in the AioContext that is executing the request */ ++static void scsi_dma_restart_req(SCSIRequest *req, void *opaque) ++{ ++ scsi_req_ref(req); ++ if (req->retry) { ++ req->retry = false; ++ switch (req->cmd.mode) { + case SCSI_XFER_FROM_DEV: + case SCSI_XFER_TO_DEV: + scsi_req_continue(req); +@@ -166,37 +247,22 @@ static void scsi_dma_restart_bh(void *opaque) + scsi_req_dequeue(req); + scsi_req_enqueue(req); + break; +- } + } +- scsi_req_unref(req); + } +- aio_context_release(blk_get_aio_context(s->conf.blk)); +- /* Drop the reference that was acquired in scsi_dma_restart_cb */ +- object_unref(OBJECT(s)); +-} +- +-void scsi_req_retry(SCSIRequest *req) +-{ +- /* No need to save a reference, because scsi_dma_restart_bh just +- * looks at the request list. */ +- req->retry = true; ++ scsi_req_unref(req); + } + + static void scsi_dma_restart_cb(void *opaque, bool running, RunState state) + { + SCSIDevice *s = opaque; + ++ assert(qemu_in_main_thread()); ++ + if (!running) { + return; + } +- if (!s->bh) { +- AioContext *ctx = blk_get_aio_context(s->conf.blk); +- /* The reference is dropped in scsi_dma_restart_bh.*/ +- object_ref(OBJECT(s)); +- s->bh = aio_bh_new_guarded(ctx, scsi_dma_restart_bh, s, +- &DEVICE(s)->mem_reentrancy_guard); +- qemu_bh_schedule(s->bh); +- } ++ ++ scsi_device_for_each_req_async(s, scsi_dma_restart_req, NULL); + } + + static bool scsi_bus_is_address_free(SCSIBus *bus, +@@ -1657,15 +1723,16 @@ void scsi_device_set_ua(SCSIDevice *sdev, SCSISense sense) + } + } + ++static void scsi_device_purge_one_req(SCSIRequest *req, void *opaque) ++{ ++ scsi_req_cancel_async(req, NULL); ++} ++ + void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense) + { +- SCSIRequest *req; ++ scsi_device_for_each_req_async(sdev, scsi_device_purge_one_req, NULL); + + aio_context_acquire(blk_get_aio_context(sdev->conf.blk)); +- while (!QTAILQ_EMPTY(&sdev->requests)) { +- req = QTAILQ_FIRST(&sdev->requests); +- scsi_req_cancel_async(req, NULL); +- } + blk_drain(sdev->conf.blk); + aio_context_release(blk_get_aio_context(sdev->conf.blk)); + scsi_device_set_ua(sdev, sense); +@@ -1737,31 +1804,33 @@ static char *scsibus_get_fw_dev_path(DeviceState *dev) + + /* SCSI request list. For simplicity, pv points to the whole device */ + ++static void put_scsi_req(SCSIRequest *req, void *opaque) ++{ ++ QEMUFile *f = opaque; ++ ++ assert(!req->io_canceled); ++ assert(req->status == -1 && req->host_status == -1); ++ assert(req->enqueued); ++ ++ qemu_put_sbyte(f, req->retry ? 1 : 2); ++ qemu_put_buffer(f, req->cmd.buf, sizeof(req->cmd.buf)); ++ qemu_put_be32s(f, &req->tag); ++ qemu_put_be32s(f, &req->lun); ++ if (req->bus->info->save_request) { ++ req->bus->info->save_request(f, req); ++ } ++ if (req->ops->save_request) { ++ req->ops->save_request(f, req); ++ } ++} ++ + static int put_scsi_requests(QEMUFile *f, void *pv, size_t size, + const VMStateField *field, JSONWriter *vmdesc) + { + SCSIDevice *s = pv; +- SCSIBus *bus = DO_UPCAST(SCSIBus, qbus, s->qdev.parent_bus); +- SCSIRequest *req; + +- QTAILQ_FOREACH(req, &s->requests, next) { +- assert(!req->io_canceled); +- assert(req->status == -1 && req->host_status == -1); +- assert(req->enqueued); +- +- qemu_put_sbyte(f, req->retry ? 1 : 2); +- qemu_put_buffer(f, req->cmd.buf, sizeof(req->cmd.buf)); +- qemu_put_be32s(f, &req->tag); +- qemu_put_be32s(f, &req->lun); +- if (bus->info->save_request) { +- bus->info->save_request(f, req); +- } +- if (req->ops->save_request) { +- req->ops->save_request(f, req); +- } +- } ++ scsi_device_for_each_req_sync(s, put_scsi_req, f); + qemu_put_sbyte(f, 0); +- + return 0; + } + +diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h +index 3692ca82f3..10c4e8288d 100644 +--- a/include/hw/scsi/scsi.h ++++ b/include/hw/scsi/scsi.h +@@ -69,14 +69,19 @@ struct SCSIDevice + { + DeviceState qdev; + VMChangeStateEntry *vmsentry; +- QEMUBH *bh; + uint32_t id; + BlockConf conf; + SCSISense unit_attention; + bool sense_is_ua; + uint8_t sense[SCSI_SENSE_BUF_SIZE]; + uint32_t sense_len; ++ ++ /* ++ * The requests list is only accessed from the AioContext that executes ++ * requests or from the main loop when IOThread processing is stopped. ++ */ + QTAILQ_HEAD(, SCSIRequest) requests; ++ + uint32_t channel; + uint32_t lun; + int blocksize; +-- +2.39.3 + diff --git a/SOURCES/kvm-scsi-remove-AioContext-locking.patch b/SOURCES/kvm-scsi-remove-AioContext-locking.patch new file mode 100644 index 0000000..34a5e46 --- /dev/null +++ b/SOURCES/kvm-scsi-remove-AioContext-locking.patch @@ -0,0 +1,280 @@ +From 61d605433a5edfcc7fe836fd399106ed1e1907bb Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 5 Dec 2023 13:20:05 -0500 +Subject: [PATCH 088/101] scsi: remove AioContext locking + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [19/26] 12a8e26670074a17dd2b0cfac06e0aea03b3068f (kmwolf/centos-qemu-kvm) + +The AioContext lock no longer has any effect. Remove it. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Eric Blake +Message-ID: <20231205182011.1976568-9-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +--- + hw/scsi/scsi-bus.c | 2 -- + hw/scsi/scsi-disk.c | 31 +++++-------------------------- + hw/scsi/virtio-scsi.c | 18 ------------------ + include/hw/virtio/virtio-scsi.h | 14 -------------- + 4 files changed, 5 insertions(+), 60 deletions(-) + +diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c +index b649cdf555..5b08cbf60a 100644 +--- a/hw/scsi/scsi-bus.c ++++ b/hw/scsi/scsi-bus.c +@@ -1732,9 +1732,7 @@ void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense) + { + scsi_device_for_each_req_async(sdev, scsi_device_purge_one_req, NULL); + +- aio_context_acquire(blk_get_aio_context(sdev->conf.blk)); + blk_drain(sdev->conf.blk); +- aio_context_release(blk_get_aio_context(sdev->conf.blk)); + scsi_device_set_ua(sdev, sense); + } + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index a5048e0aaf..61be3d395a 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -2339,14 +2339,10 @@ static void scsi_disk_reset(DeviceState *dev) + { + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev.qdev, dev); + uint64_t nb_sectors; +- AioContext *ctx; + + scsi_device_purge_requests(&s->qdev, SENSE_CODE(RESET)); + +- ctx = blk_get_aio_context(s->qdev.conf.blk); +- aio_context_acquire(ctx); + blk_get_geometry(s->qdev.conf.blk, &nb_sectors); +- aio_context_release(ctx); + + nb_sectors /= s->qdev.blocksize / BDRV_SECTOR_SIZE; + if (nb_sectors) { +@@ -2545,15 +2541,13 @@ static void scsi_unrealize(SCSIDevice *dev) + static void scsi_hd_realize(SCSIDevice *dev, Error **errp) + { + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev); +- AioContext *ctx = NULL; ++ + /* can happen for devices without drive. The error message for missing + * backend will be issued in scsi_realize + */ + if (s->qdev.conf.blk) { +- ctx = blk_get_aio_context(s->qdev.conf.blk); +- aio_context_acquire(ctx); + if (!blkconf_blocksizes(&s->qdev.conf, errp)) { +- goto out; ++ return; + } + } + s->qdev.blocksize = s->qdev.conf.logical_block_size; +@@ -2562,16 +2556,11 @@ static void scsi_hd_realize(SCSIDevice *dev, Error **errp) + s->product = g_strdup("QEMU HARDDISK"); + } + scsi_realize(&s->qdev, errp); +-out: +- if (ctx) { +- aio_context_release(ctx); +- } + } + + static void scsi_cd_realize(SCSIDevice *dev, Error **errp) + { + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev); +- AioContext *ctx; + int ret; + uint32_t blocksize = 2048; + +@@ -2587,8 +2576,6 @@ static void scsi_cd_realize(SCSIDevice *dev, Error **errp) + blocksize = dev->conf.physical_block_size; + } + +- ctx = blk_get_aio_context(dev->conf.blk); +- aio_context_acquire(ctx); + s->qdev.blocksize = blocksize; + s->qdev.type = TYPE_ROM; + s->features |= 1 << SCSI_DISK_F_REMOVABLE; +@@ -2596,7 +2583,6 @@ static void scsi_cd_realize(SCSIDevice *dev, Error **errp) + s->product = g_strdup("QEMU CD-ROM"); + } + scsi_realize(&s->qdev, errp); +- aio_context_release(ctx); + } + + +@@ -2727,7 +2713,6 @@ static int get_device_type(SCSIDiskState *s) + static void scsi_block_realize(SCSIDevice *dev, Error **errp) + { + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev); +- AioContext *ctx; + int sg_version; + int rc; + +@@ -2742,9 +2727,6 @@ static void scsi_block_realize(SCSIDevice *dev, Error **errp) + "be removed in a future version"); + } + +- ctx = blk_get_aio_context(s->qdev.conf.blk); +- aio_context_acquire(ctx); +- + /* check we are using a driver managing SG_IO (version 3 and after) */ + rc = blk_ioctl(s->qdev.conf.blk, SG_GET_VERSION_NUM, &sg_version); + if (rc < 0) { +@@ -2752,18 +2734,18 @@ static void scsi_block_realize(SCSIDevice *dev, Error **errp) + if (rc != -EPERM) { + error_append_hint(errp, "Is this a SCSI device?\n"); + } +- goto out; ++ return; + } + if (sg_version < 30000) { + error_setg(errp, "scsi generic interface too old"); +- goto out; ++ return; + } + + /* get device type from INQUIRY data */ + rc = get_device_type(s); + if (rc < 0) { + error_setg(errp, "INQUIRY failed"); +- goto out; ++ return; + } + + /* Make a guess for the block size, we'll fix it when the guest sends. +@@ -2783,9 +2765,6 @@ static void scsi_block_realize(SCSIDevice *dev, Error **errp) + + scsi_realize(&s->qdev, errp); + scsi_generic_read_device_inquiry(&s->qdev); +- +-out: +- aio_context_release(ctx); + } + + typedef struct SCSIBlockReq { +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index 4f8d35facc..ca365a70e9 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -642,9 +642,7 @@ static void virtio_scsi_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) + return; + } + +- virtio_scsi_acquire(s); + virtio_scsi_handle_ctrl_vq(s, vq); +- virtio_scsi_release(s); + } + + static void virtio_scsi_complete_cmd_req(VirtIOSCSIReq *req) +@@ -882,9 +880,7 @@ static void virtio_scsi_handle_cmd(VirtIODevice *vdev, VirtQueue *vq) + return; + } + +- virtio_scsi_acquire(s); + virtio_scsi_handle_cmd_vq(s, vq); +- virtio_scsi_release(s); + } + + static void virtio_scsi_get_config(VirtIODevice *vdev, +@@ -1031,9 +1027,7 @@ static void virtio_scsi_handle_event(VirtIODevice *vdev, VirtQueue *vq) + return; + } + +- virtio_scsi_acquire(s); + virtio_scsi_handle_event_vq(s, vq); +- virtio_scsi_release(s); + } + + static void virtio_scsi_change(SCSIBus *bus, SCSIDevice *dev, SCSISense sense) +@@ -1052,9 +1046,7 @@ static void virtio_scsi_change(SCSIBus *bus, SCSIDevice *dev, SCSISense sense) + }, + }; + +- virtio_scsi_acquire(s); + virtio_scsi_push_event(s, &info); +- virtio_scsi_release(s); + } + } + +@@ -1071,17 +1063,13 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, + VirtIODevice *vdev = VIRTIO_DEVICE(hotplug_dev); + VirtIOSCSI *s = VIRTIO_SCSI(vdev); + SCSIDevice *sd = SCSI_DEVICE(dev); +- AioContext *old_context; + int ret; + + if (s->ctx && !s->dataplane_fenced) { + if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { + return; + } +- old_context = blk_get_aio_context(sd->conf.blk); +- aio_context_acquire(old_context); + ret = blk_set_aio_context(sd->conf.blk, s->ctx, errp); +- aio_context_release(old_context); + if (ret < 0) { + return; + } +@@ -1097,10 +1085,8 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, + }, + }; + +- virtio_scsi_acquire(s); + virtio_scsi_push_event(s, &info); + scsi_bus_set_ua(&s->bus, SENSE_CODE(REPORTED_LUNS_CHANGED)); +- virtio_scsi_release(s); + } + } + +@@ -1122,17 +1108,13 @@ static void virtio_scsi_hotunplug(HotplugHandler *hotplug_dev, DeviceState *dev, + qdev_simple_device_unplug_cb(hotplug_dev, dev, errp); + + if (s->ctx) { +- virtio_scsi_acquire(s); + /* If other users keep the BlockBackend in the iothread, that's ok */ + blk_set_aio_context(sd->conf.blk, qemu_get_aio_context(), NULL); +- virtio_scsi_release(s); + } + + if (virtio_vdev_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) { +- virtio_scsi_acquire(s); + virtio_scsi_push_event(s, &info); + scsi_bus_set_ua(&s->bus, SENSE_CODE(REPORTED_LUNS_CHANGED)); +- virtio_scsi_release(s); + } + } + +diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h +index da8cb928d9..7f0573b1bf 100644 +--- a/include/hw/virtio/virtio-scsi.h ++++ b/include/hw/virtio/virtio-scsi.h +@@ -101,20 +101,6 @@ struct VirtIOSCSI { + uint32_t host_features; + }; + +-static inline void virtio_scsi_acquire(VirtIOSCSI *s) +-{ +- if (s->ctx) { +- aio_context_acquire(s->ctx); +- } +-} +- +-static inline void virtio_scsi_release(VirtIOSCSI *s) +-{ +- if (s->ctx) { +- aio_context_release(s->ctx); +- } +-} +- + void virtio_scsi_common_realize(DeviceState *dev, + VirtIOHandleOutput ctrl, + VirtIOHandleOutput evt, +-- +2.39.3 + diff --git a/SOURCES/kvm-scsi-remove-outdated-AioContext-lock-comment.patch b/SOURCES/kvm-scsi-remove-outdated-AioContext-lock-comment.patch new file mode 100644 index 0000000..c9baf60 --- /dev/null +++ b/SOURCES/kvm-scsi-remove-outdated-AioContext-lock-comment.patch @@ -0,0 +1,41 @@ +From 9f5c6dbe907fe6227006ab51179eaa50a63559cb Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 5 Dec 2023 13:20:09 -0500 +Subject: [PATCH 092/101] scsi: remove outdated AioContext lock comment + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [23/26] 96e2e7d2e6a160ce4d695060f902d21030b3b1d8 (kmwolf/centos-qemu-kvm) + +The SCSI subsystem no longer uses the AioContext lock. Request +processing runs exclusively in the BlockBackend's AioContext since +"scsi: only access SCSIDevice->requests from one thread" and hence the +lock is unnecessary. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Eric Blake +Message-ID: <20231205182011.1976568-13-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +--- + hw/scsi/scsi-disk.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index 61be3d395a..2e7e1e9a1c 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -355,7 +355,6 @@ done: + scsi_req_unref(&r->req); + } + +-/* Called with AioContext lock held */ + static void scsi_dma_complete(void *opaque, int ret) + { + SCSIDiskReq *r = (SCSIDiskReq *)opaque; +-- +2.39.3 + diff --git a/SOURCES/kvm-smbios-add-smbios_add_usr_blob_size-helper.patch b/SOURCES/kvm-smbios-add-smbios_add_usr_blob_size-helper.patch new file mode 100644 index 0000000..a9d24a0 --- /dev/null +++ b/SOURCES/kvm-smbios-add-smbios_add_usr_blob_size-helper.patch @@ -0,0 +1,62 @@ +From 93cf5b82771f1d1e8182be168dae7a45d42069e9 Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Mon, 4 Mar 2024 15:39:57 +0100 +Subject: [PATCH 11/20] smbios: add smbios_add_usr_blob_size() helper + +RH-Author: Igor Mammedov +RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS +RH-Jira: RHEL-21705 +RH-Acked-by: MST +RH-Acked-by: Ani Sinha +RH-Commit: [9/18] 8c698fb9e186d2b1d2b7f75a74305f356450ad68 + +JIRA: https://issues.redhat.com/browse/RHEL-21705 + +it will be used by follow up patch when legacy handling +is moved out into a separate file. + +Signed-off-by: Igor Mammedov +Reviewed-by: Ani Sinha +--- + hw/smbios/smbios.c | 18 ++++++++++++++---- + 1 file changed, 14 insertions(+), 4 deletions(-) + +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index 441517cf24..c48a290478 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -1426,6 +1426,14 @@ static bool save_opt_list(size_t *ndest, char ***dest, QemuOpts *opts, + return true; + } + ++static void smbios_add_usr_blob_size(size_t size) ++{ ++ if (!usr_blobs_sizes) { ++ usr_blobs_sizes = g_array_new(false, false, sizeof(size_t)); ++ } ++ g_array_append_val(usr_blobs_sizes, size); ++} ++ + void smbios_entry_add(QemuOpts *opts, Error **errp) + { + const char *val; +@@ -1473,10 +1481,12 @@ void smbios_entry_add(QemuOpts *opts, Error **errp) + smbios_type4_count++; + } + +- if (!usr_blobs_sizes) { +- usr_blobs_sizes = g_array_new(false, false, sizeof(size_t)); +- } +- g_array_append_val(usr_blobs_sizes, size); ++ /* ++ * preserve blob size for legacy mode so it could build its ++ * blobs flavor from 'usr_blobs' ++ */ ++ smbios_add_usr_blob_size(size); ++ + usr_blobs_len += size; + if (size > usr_table_max) { + usr_table_max = size; +-- +2.39.3 + diff --git a/SOURCES/kvm-smbios-avoid-mangling-user-provided-tables.patch b/SOURCES/kvm-smbios-avoid-mangling-user-provided-tables.patch new file mode 100644 index 0000000..b8e4d92 --- /dev/null +++ b/SOURCES/kvm-smbios-avoid-mangling-user-provided-tables.patch @@ -0,0 +1,309 @@ +From 15d293b706ca6c9e6ad569becda8da5f70461c30 Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Tue, 13 Feb 2024 09:25:11 +0100 +Subject: [PATCH 09/20] smbios: avoid mangling user provided tables + +RH-Author: Igor Mammedov +RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS +RH-Jira: RHEL-21705 +RH-Acked-by: MST +RH-Acked-by: Ani Sinha +RH-Commit: [7/18] fa2c7372f55f29e5834eee94ba98f19ea02e7a82 + +JIRA: https://issues.redhat.com/browse/RHEL-21705 + + currently smbios_entry_add() preserves internally '-smbios type=' + options but tables provided with '-smbios file=' are stored directly + into blob that eventually will be exposed to VM. And then later + QEMU adds default/'-smbios type' entries on top into the same blob. + + It makes impossible to generate tables more than once, hence + 'immutable' guard was used. + Make it possible to regenerate final blob by storing user provided + blobs into a dedicated area (usr_blobs) and then copy it when + composing final blob. Which also makes handling of -smbios + options consistent. + + As side effect of this and previous commits there is no need to + generate legacy smbios_entries at the time options are parsed. + Instead compose smbios_entries on demand from usr_blobs like + it is done for non-legacy SMBIOS tables. + + Signed-off-by: Igor Mammedov + Tested-by: Fiona Ebner + Reviewed-by: Ani Sinha + +Conflicts: hw/smbios/smbios.c + caused by downstream smbios_type2_required + +Signed-off-by: Igor Mammedov +--- + hw/smbios/smbios.c | 181 +++++++++++++++++++++++---------------------- + 1 file changed, 93 insertions(+), 88 deletions(-) + +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index 0c8c439859..d8d68716d4 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -60,6 +60,14 @@ static bool smbios_uuid_encoded = true; + /* Set to true for modern Windows 10 HardwareID-6 compat */ + static bool smbios_type2_required; + ++/* ++ * SMBIOS tables provided by user with '-smbios file=' option ++ */ ++uint8_t *usr_blobs; ++size_t usr_blobs_len; ++static GArray *usr_blobs_sizes; ++static unsigned usr_table_max; ++static unsigned usr_table_cnt; + + uint8_t *smbios_tables; + size_t smbios_tables_len; +@@ -70,7 +78,6 @@ static SmbiosEntryPointType smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32; + static SmbiosEntryPoint ep; + + static int smbios_type4_count = 0; +-static bool smbios_immutable; + static bool smbios_have_defaults; + static uint32_t smbios_cpuid_version, smbios_cpuid_features; + +@@ -617,9 +624,8 @@ static void smbios_build_type_1_fields(void) + + uint8_t *smbios_get_table_legacy(uint32_t expected_t4_count, size_t *length) + { +- /* drop unwanted version of command-line file blob(s) */ +- g_free(smbios_tables); +- smbios_tables = NULL; ++ int i; ++ size_t usr_offset; + + /* also complain if fields were given for types > 1 */ + if (find_next_bit(have_fields_bitmap, +@@ -629,12 +635,33 @@ uint8_t *smbios_get_table_legacy(uint32_t expected_t4_count, size_t *length) + exit(1); + } + +- if (!smbios_immutable) { +- smbios_build_type_0_fields(); +- smbios_build_type_1_fields(); +- smbios_validate_table(expected_t4_count); +- smbios_immutable = true; ++ g_free(smbios_entries); ++ smbios_entries_len = sizeof(uint16_t); ++ smbios_entries = g_malloc0(smbios_entries_len); ++ ++ for (i = 0, usr_offset = 0; usr_blobs_sizes && i < usr_blobs_sizes->len; ++ i++) ++ { ++ struct smbios_table *table; ++ struct smbios_structure_header *header; ++ size_t size = g_array_index(usr_blobs_sizes, size_t, i); ++ ++ header = (struct smbios_structure_header *)(usr_blobs + usr_offset); ++ smbios_entries = g_realloc(smbios_entries, smbios_entries_len + ++ size + sizeof(*table)); ++ table = (struct smbios_table *)(smbios_entries + smbios_entries_len); ++ table->header.type = SMBIOS_TABLE_ENTRY; ++ table->header.length = cpu_to_le16(sizeof(*table) + size); ++ memcpy(table->data, header, size); ++ smbios_entries_len += sizeof(*table) + size; ++ (*(uint16_t *)smbios_entries) = ++ cpu_to_le16(le16_to_cpu(*(uint16_t *)smbios_entries) + 1); ++ usr_offset += size; + } ++ ++ smbios_build_type_0_fields(); ++ smbios_build_type_1_fields(); ++ smbios_validate_table(expected_t4_count); + *length = smbios_entries_len; + return smbios_entries; + } +@@ -1232,68 +1259,68 @@ void smbios_get_tables(MachineState *ms, + { + unsigned i, dimm_cnt, offset; + +- /* drop unwanted (legacy) version of command-line file blob(s) */ +- g_free(smbios_entries); +- smbios_entries = NULL; ++ g_free(smbios_tables); ++ smbios_tables = g_memdup2(usr_blobs, usr_blobs_len); ++ smbios_tables_len = usr_blobs_len; ++ smbios_table_max = usr_table_max; ++ smbios_table_cnt = usr_table_cnt; + +- if (!smbios_immutable) { +- smbios_build_type_0_table(); +- smbios_build_type_1_table(); +- smbios_build_type_2_table(); +- smbios_build_type_3_table(); ++ smbios_build_type_0_table(); ++ smbios_build_type_1_table(); ++ smbios_build_type_2_table(); ++ smbios_build_type_3_table(); + +- assert(ms->smp.sockets >= 1); ++ assert(ms->smp.sockets >= 1); + +- for (i = 0; i < ms->smp.sockets; i++) { +- smbios_build_type_4_table(ms, i); +- } ++ for (i = 0; i < ms->smp.sockets; i++) { ++ smbios_build_type_4_table(ms, i); ++ } + +- smbios_build_type_8_table(); +- smbios_build_type_9_table(errp); +- smbios_build_type_11_table(); ++ smbios_build_type_8_table(); ++ smbios_build_type_9_table(errp); ++ smbios_build_type_11_table(); + + #define MAX_DIMM_SZ (16 * GiB) + #define GET_DIMM_SZ ((i < dimm_cnt - 1) ? MAX_DIMM_SZ \ + : ((current_machine->ram_size - 1) % MAX_DIMM_SZ) + 1) + +- dimm_cnt = QEMU_ALIGN_UP(current_machine->ram_size, MAX_DIMM_SZ) / MAX_DIMM_SZ; ++ dimm_cnt = QEMU_ALIGN_UP(current_machine->ram_size, MAX_DIMM_SZ) / ++ MAX_DIMM_SZ; + +- /* +- * The offset determines if we need to keep additional space between +- * table 17 and table 19 header handle numbers so that they do +- * not overlap. For example, for a VM with larger than 8 TB guest +- * memory and DIMM like chunks of 16 GiB, the default space between +- * the two tables (T19_BASE - T17_BASE = 512) is not enough. +- */ +- offset = (dimm_cnt > (T19_BASE - T17_BASE)) ? \ +- dimm_cnt - (T19_BASE - T17_BASE) : 0; ++ /* ++ * The offset determines if we need to keep additional space between ++ * table 17 and table 19 header handle numbers so that they do ++ * not overlap. For example, for a VM with larger than 8 TB guest ++ * memory and DIMM like chunks of 16 GiB, the default space between ++ * the two tables (T19_BASE - T17_BASE = 512) is not enough. ++ */ ++ offset = (dimm_cnt > (T19_BASE - T17_BASE)) ? \ ++ dimm_cnt - (T19_BASE - T17_BASE) : 0; + +- smbios_build_type_16_table(dimm_cnt); ++ smbios_build_type_16_table(dimm_cnt); + +- for (i = 0; i < dimm_cnt; i++) { +- smbios_build_type_17_table(i, GET_DIMM_SZ); +- } ++ for (i = 0; i < dimm_cnt; i++) { ++ smbios_build_type_17_table(i, GET_DIMM_SZ); ++ } + +- for (i = 0; i < mem_array_size; i++) { +- smbios_build_type_19_table(i, offset, mem_array[i].address, +- mem_array[i].length); +- } ++ for (i = 0; i < mem_array_size; i++) { ++ smbios_build_type_19_table(i, offset, mem_array[i].address, ++ mem_array[i].length); ++ } + +- /* +- * make sure 16 bit handle numbers in the headers of tables 19 +- * and 32 do not overlap. +- */ +- assert((mem_array_size + offset) < (T32_BASE - T19_BASE)); ++ /* ++ * make sure 16 bit handle numbers in the headers of tables 19 ++ * and 32 do not overlap. ++ */ ++ assert((mem_array_size + offset) < (T32_BASE - T19_BASE)); + +- smbios_build_type_32_table(); +- smbios_build_type_38_table(); +- smbios_build_type_41_table(errp); +- smbios_build_type_127_table(); ++ smbios_build_type_32_table(); ++ smbios_build_type_38_table(); ++ smbios_build_type_41_table(errp); ++ smbios_build_type_127_table(); + +- smbios_validate_table(ms->smp.sockets); +- smbios_entry_point_setup(); +- smbios_immutable = true; +- } ++ smbios_validate_table(ms->smp.sockets); ++ smbios_entry_point_setup(); + + /* return tables blob and entry point (anchor), and their sizes */ + *tables = smbios_tables; +@@ -1393,13 +1420,10 @@ void smbios_entry_add(QemuOpts *opts, Error **errp) + { + const char *val; + +- assert(!smbios_immutable); +- + val = qemu_opt_get(opts, "file"); + if (val) { + struct smbios_structure_header *header; +- int size; +- struct smbios_table *table; /* legacy mode only */ ++ size_t size; + + if (!qemu_opts_validate(opts, qemu_smbios_file_opts, errp)) { + return; +@@ -1416,9 +1440,9 @@ void smbios_entry_add(QemuOpts *opts, Error **errp) + * (except in legacy mode, where the second '\0' is implicit and + * will be inserted by the BIOS). + */ +- smbios_tables = g_realloc(smbios_tables, smbios_tables_len + size); +- header = (struct smbios_structure_header *)(smbios_tables + +- smbios_tables_len); ++ usr_blobs = g_realloc(usr_blobs, usr_blobs_len + size); ++ header = (struct smbios_structure_header *)(usr_blobs + ++ usr_blobs_len); + + if (load_image_size(val, (uint8_t *)header, size) != size) { + error_setg(errp, "Failed to load SMBIOS file %s", val); +@@ -1439,34 +1463,15 @@ void smbios_entry_add(QemuOpts *opts, Error **errp) + smbios_type4_count++; + } + +- smbios_tables_len += size; +- if (size > smbios_table_max) { +- smbios_table_max = size; ++ if (!usr_blobs_sizes) { ++ usr_blobs_sizes = g_array_new(false, false, sizeof(size_t)); + } +- smbios_table_cnt++; +- +- /* add a copy of the newly loaded blob to legacy smbios_entries */ +- /* NOTE: This code runs before smbios_set_defaults(), so we don't +- * yet know which mode (legacy vs. aggregate-table) will be +- * required. We therefore add the binary blob to both legacy +- * (smbios_entries) and aggregate (smbios_tables) tables, and +- * delete the one we don't need from smbios_set_defaults(), +- * once we know which machine version has been requested. +- */ +- if (!smbios_entries) { +- smbios_entries_len = sizeof(uint16_t); +- smbios_entries = g_malloc0(smbios_entries_len); ++ g_array_append_val(usr_blobs_sizes, size); ++ usr_blobs_len += size; ++ if (size > usr_table_max) { ++ usr_table_max = size; + } +- smbios_entries = g_realloc(smbios_entries, smbios_entries_len + +- size + sizeof(*table)); +- table = (struct smbios_table *)(smbios_entries + smbios_entries_len); +- table->header.type = SMBIOS_TABLE_ENTRY; +- table->header.length = cpu_to_le16(sizeof(*table) + size); +- memcpy(table->data, header, size); +- smbios_entries_len += sizeof(*table) + size; +- (*(uint16_t *)smbios_entries) = +- cpu_to_le16(le16_to_cpu(*(uint16_t *)smbios_entries) + 1); +- /* end: add a copy of the newly loaded blob to legacy smbios_entries */ ++ usr_table_cnt++; + + return; + } +-- +2.39.3 + diff --git a/SOURCES/kvm-smbios-build-legacy-mode-code-only-for-pc-machine.patch b/SOURCES/kvm-smbios-build-legacy-mode-code-only-for-pc-machine.patch new file mode 100644 index 0000000..1dc4d22 --- /dev/null +++ b/SOURCES/kvm-smbios-build-legacy-mode-code-only-for-pc-machine.patch @@ -0,0 +1,517 @@ +From 7ebb314a4f81d6d1a7dd4980b757fb5e556f5837 Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Tue, 13 Feb 2024 16:45:18 +0100 +Subject: [PATCH 13/20] smbios: build legacy mode code only for 'pc' machine + +RH-Author: Igor Mammedov +RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS +RH-Jira: RHEL-21705 +RH-Acked-by: MST +RH-Acked-by: Ani Sinha +RH-Commit: [11/18] 06e639be03e0d151fb9bcf5f728388edcb84219a + +JIRA: https://issues.redhat.com/browse/RHEL-21705 + + basically moving code around without functional change. + And exposing some symbols so that they could be shared + between smbbios.c and new smbios_legacy.c + + plus some meson magic to build smbios_legacy.c only + for 'pc' machine and otherwise replace it with stub + if not selected. + + Signed-off-by: Igor Mammedov + Reviewed-by: Ani Sinha + +Conflicts: hw/smbios/smbios.c + context change due to downstream smbios_type2_required + +Signed-off-by: Igor Mammedov +--- + hw/i386/Kconfig | 1 + + hw/smbios/Kconfig | 2 + + hw/smbios/meson.build | 5 + + hw/smbios/smbios.c | 163 +----------------------------- + hw/smbios/smbios_legacy.c | 179 +++++++++++++++++++++++++++++++++ + hw/smbios/smbios_legacy_stub.c | 15 +++ + include/hw/firmware/smbios.h | 5 + + 7 files changed, 208 insertions(+), 162 deletions(-) + create mode 100644 hw/smbios/smbios_legacy.c + create mode 100644 hw/smbios/smbios_legacy_stub.c + +diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig +index a1846be6f7..a6ee052f9a 100644 +--- a/hw/i386/Kconfig ++++ b/hw/i386/Kconfig +@@ -76,6 +76,7 @@ config I440FX + select PIIX + select DIMM + select SMBIOS ++ select SMBIOS_LEGACY + select FW_CFG_DMA + + config ISAPC +diff --git a/hw/smbios/Kconfig b/hw/smbios/Kconfig +index 553adf4bfc..8d989a2f1b 100644 +--- a/hw/smbios/Kconfig ++++ b/hw/smbios/Kconfig +@@ -1,2 +1,4 @@ + config SMBIOS + bool ++config SMBIOS_LEGACY ++ bool +diff --git a/hw/smbios/meson.build b/hw/smbios/meson.build +index 6eeae4b35c..fcac1d7490 100644 +--- a/hw/smbios/meson.build ++++ b/hw/smbios/meson.build +@@ -4,10 +4,15 @@ smbios_ss.add(when: 'CONFIG_IPMI', + if_true: files('smbios_type_38.c'), + if_false: files('smbios_type_38-stub.c')) + ++smbios_ss.add(when: 'CONFIG_SMBIOS_LEGACY', ++ if_true: files('smbios_legacy.c'), ++ if_false: files('smbios_legacy_stub.c')) ++ + system_ss.add_all(when: 'CONFIG_SMBIOS', if_true: smbios_ss) + system_ss.add(when: 'CONFIG_SMBIOS', if_false: files('smbios-stub.c')) + + system_ss.add(when: 'CONFIG_ALL', if_true: files( + 'smbios-stub.c', + 'smbios_type_38-stub.c', ++ 'smbios_legacy_stub.c', + )) +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index eb9927335d..e40204550e 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -31,31 +31,7 @@ + #include "hw/pci/pci_device.h" + #include "smbios_build.h" + +-/* legacy structures and constants for <= 2.0 machines */ +-struct smbios_header { +- uint16_t length; +- uint8_t type; +-} QEMU_PACKED; +- +-struct smbios_field { +- struct smbios_header header; +- uint8_t type; +- uint16_t offset; +- uint8_t data[]; +-} QEMU_PACKED; +- +-struct smbios_table { +- struct smbios_header header; +- uint8_t data[]; +-} QEMU_PACKED; +- +-#define SMBIOS_FIELD_ENTRY 0 +-#define SMBIOS_TABLE_ENTRY 1 +- +-static uint8_t *smbios_entries; +-static size_t smbios_entries_len; + static bool smbios_uuid_encoded = true; +-/* end: legacy structures & constants for <= 2.0 machines */ + + /* Set to true for modern Windows 10 HardwareID-6 compat */ + static bool smbios_type2_required; +@@ -65,7 +41,6 @@ static bool smbios_type2_required; + */ + uint8_t *usr_blobs; + size_t usr_blobs_len; +-static GArray *usr_blobs_sizes; + static unsigned usr_table_max; + static unsigned usr_table_cnt; + +@@ -531,7 +506,7 @@ static void smbios_check_type4_count(uint32_t expected_t4_count) + } + } + +-static void smbios_validate_table(void) ++void smbios_validate_table(void) + { + if (smbios_ep_type == SMBIOS_ENTRY_POINT_TYPE_32 && + smbios_tables_len > SMBIOS_21_MAX_TABLES_LEN) { +@@ -541,134 +516,6 @@ static void smbios_validate_table(void) + } + } + +- +-/* legacy setup functions for <= 2.0 machines */ +-static void smbios_add_field(int type, int offset, const void *data, size_t len) +-{ +- struct smbios_field *field; +- +- if (!smbios_entries) { +- smbios_entries_len = sizeof(uint16_t); +- smbios_entries = g_malloc0(smbios_entries_len); +- } +- smbios_entries = g_realloc(smbios_entries, smbios_entries_len + +- sizeof(*field) + len); +- field = (struct smbios_field *)(smbios_entries + smbios_entries_len); +- field->header.type = SMBIOS_FIELD_ENTRY; +- field->header.length = cpu_to_le16(sizeof(*field) + len); +- +- field->type = type; +- field->offset = cpu_to_le16(offset); +- memcpy(field->data, data, len); +- +- smbios_entries_len += sizeof(*field) + len; +- (*(uint16_t *)smbios_entries) = +- cpu_to_le16(le16_to_cpu(*(uint16_t *)smbios_entries) + 1); +-} +- +-static void smbios_maybe_add_str(int type, int offset, const char *data) +-{ +- if (data) { +- smbios_add_field(type, offset, data, strlen(data) + 1); +- } +-} +- +-static void smbios_build_type_0_fields(void) +-{ +- smbios_maybe_add_str(0, offsetof(struct smbios_type_0, vendor_str), +- smbios_type0.vendor); +- smbios_maybe_add_str(0, offsetof(struct smbios_type_0, bios_version_str), +- smbios_type0.version); +- smbios_maybe_add_str(0, offsetof(struct smbios_type_0, +- bios_release_date_str), +- smbios_type0.date); +- if (smbios_type0.have_major_minor) { +- smbios_add_field(0, offsetof(struct smbios_type_0, +- system_bios_major_release), +- &smbios_type0.major, 1); +- smbios_add_field(0, offsetof(struct smbios_type_0, +- system_bios_minor_release), +- &smbios_type0.minor, 1); +- } +-} +- +-static void smbios_build_type_1_fields(void) +-{ +- smbios_maybe_add_str(1, offsetof(struct smbios_type_1, manufacturer_str), +- smbios_type1.manufacturer); +- smbios_maybe_add_str(1, offsetof(struct smbios_type_1, product_name_str), +- smbios_type1.product); +- smbios_maybe_add_str(1, offsetof(struct smbios_type_1, version_str), +- smbios_type1.version); +- smbios_maybe_add_str(1, offsetof(struct smbios_type_1, serial_number_str), +- smbios_type1.serial); +- smbios_maybe_add_str(1, offsetof(struct smbios_type_1, sku_number_str), +- smbios_type1.sku); +- smbios_maybe_add_str(1, offsetof(struct smbios_type_1, family_str), +- smbios_type1.family); +- if (qemu_uuid_set) { +- /* We don't encode the UUID in the "wire format" here because this +- * function is for legacy mode and needs to keep the guest ABI, and +- * because we don't know what's the SMBIOS version advertised by the +- * BIOS. +- */ +- smbios_add_field(1, offsetof(struct smbios_type_1, uuid), +- &qemu_uuid, 16); +- } +-} +- +-uint8_t *smbios_get_table_legacy(size_t *length) +-{ +- int i; +- size_t usr_offset; +- +- /* also complain if fields were given for types > 1 */ +- if (find_next_bit(smbios_have_fields_bitmap, +- SMBIOS_MAX_TYPE + 1, 2) < SMBIOS_MAX_TYPE + 1) { +- error_report("can't process fields for smbios " +- "types > 1 on machine versions < 2.1!"); +- exit(1); +- } +- +- if (test_bit(4, smbios_have_binfile_bitmap)) { +- error_report("can't process table for smbios " +- "type 4 on machine versions < 2.1!"); +- exit(1); +- } +- +- g_free(smbios_entries); +- smbios_entries_len = sizeof(uint16_t); +- smbios_entries = g_malloc0(smbios_entries_len); +- +- for (i = 0, usr_offset = 0; usr_blobs_sizes && i < usr_blobs_sizes->len; +- i++) +- { +- struct smbios_table *table; +- struct smbios_structure_header *header; +- size_t size = g_array_index(usr_blobs_sizes, size_t, i); +- +- header = (struct smbios_structure_header *)(usr_blobs + usr_offset); +- smbios_entries = g_realloc(smbios_entries, smbios_entries_len + +- size + sizeof(*table)); +- table = (struct smbios_table *)(smbios_entries + smbios_entries_len); +- table->header.type = SMBIOS_TABLE_ENTRY; +- table->header.length = cpu_to_le16(sizeof(*table) + size); +- memcpy(table->data, header, size); +- smbios_entries_len += sizeof(*table) + size; +- (*(uint16_t *)smbios_entries) = +- cpu_to_le16(le16_to_cpu(*(uint16_t *)smbios_entries) + 1); +- usr_offset += size; +- } +- +- smbios_build_type_0_fields(); +- smbios_build_type_1_fields(); +- smbios_validate_table(); +- *length = smbios_entries_len; +- return smbios_entries; +-} +-/* end: legacy setup functions for <= 2.0 machines */ +- +- + bool smbios_skip_table(uint8_t type, bool required_table) + { + if (test_bit(type, smbios_have_binfile_bitmap)) { +@@ -1418,14 +1265,6 @@ static bool save_opt_list(size_t *ndest, char ***dest, QemuOpts *opts, + return true; + } + +-static void smbios_add_usr_blob_size(size_t size) +-{ +- if (!usr_blobs_sizes) { +- usr_blobs_sizes = g_array_new(false, false, sizeof(size_t)); +- } +- g_array_append_val(usr_blobs_sizes, size); +-} +- + void smbios_entry_add(QemuOpts *opts, Error **errp) + { + const char *val; +diff --git a/hw/smbios/smbios_legacy.c b/hw/smbios/smbios_legacy.c +new file mode 100644 +index 0000000000..21f143e738 +--- /dev/null ++++ b/hw/smbios/smbios_legacy.c +@@ -0,0 +1,179 @@ ++/* ++ * SMBIOS legacy support ++ * ++ * Copyright (C) 2009 Hewlett-Packard Development Company, L.P. ++ * Copyright (C) 2013 Red Hat, Inc. ++ * ++ * Authors: ++ * Alex Williamson ++ * Markus Armbruster ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2. See ++ * the COPYING file in the top-level directory. ++ * ++ * Contributions after 2012-01-13 are licensed under the terms of the ++ * GNU GPL, version 2 or (at your option) any later version. ++ */ ++ ++#include "qemu/osdep.h" ++#include "qemu/bswap.h" ++#include "hw/firmware/smbios.h" ++#include "sysemu/sysemu.h" ++#include "qemu/error-report.h" ++ ++struct smbios_header { ++ uint16_t length; ++ uint8_t type; ++} QEMU_PACKED; ++ ++struct smbios_field { ++ struct smbios_header header; ++ uint8_t type; ++ uint16_t offset; ++ uint8_t data[]; ++} QEMU_PACKED; ++ ++struct smbios_table { ++ struct smbios_header header; ++ uint8_t data[]; ++} QEMU_PACKED; ++ ++#define SMBIOS_FIELD_ENTRY 0 ++#define SMBIOS_TABLE_ENTRY 1 ++ ++static uint8_t *smbios_entries; ++static size_t smbios_entries_len; ++GArray *usr_blobs_sizes; ++ ++void smbios_add_usr_blob_size(size_t size) ++{ ++ if (!usr_blobs_sizes) { ++ usr_blobs_sizes = g_array_new(false, false, sizeof(size_t)); ++ } ++ g_array_append_val(usr_blobs_sizes, size); ++} ++ ++static void smbios_add_field(int type, int offset, const void *data, size_t len) ++{ ++ struct smbios_field *field; ++ ++ if (!smbios_entries) { ++ smbios_entries_len = sizeof(uint16_t); ++ smbios_entries = g_malloc0(smbios_entries_len); ++ } ++ smbios_entries = g_realloc(smbios_entries, smbios_entries_len + ++ sizeof(*field) + len); ++ field = (struct smbios_field *)(smbios_entries + smbios_entries_len); ++ field->header.type = SMBIOS_FIELD_ENTRY; ++ field->header.length = cpu_to_le16(sizeof(*field) + len); ++ ++ field->type = type; ++ field->offset = cpu_to_le16(offset); ++ memcpy(field->data, data, len); ++ ++ smbios_entries_len += sizeof(*field) + len; ++ (*(uint16_t *)smbios_entries) = ++ cpu_to_le16(le16_to_cpu(*(uint16_t *)smbios_entries) + 1); ++} ++ ++static void smbios_maybe_add_str(int type, int offset, const char *data) ++{ ++ if (data) { ++ smbios_add_field(type, offset, data, strlen(data) + 1); ++ } ++} ++ ++static void smbios_build_type_0_fields(void) ++{ ++ smbios_maybe_add_str(0, offsetof(struct smbios_type_0, vendor_str), ++ smbios_type0.vendor); ++ smbios_maybe_add_str(0, offsetof(struct smbios_type_0, bios_version_str), ++ smbios_type0.version); ++ smbios_maybe_add_str(0, offsetof(struct smbios_type_0, ++ bios_release_date_str), ++ smbios_type0.date); ++ if (smbios_type0.have_major_minor) { ++ smbios_add_field(0, offsetof(struct smbios_type_0, ++ system_bios_major_release), ++ &smbios_type0.major, 1); ++ smbios_add_field(0, offsetof(struct smbios_type_0, ++ system_bios_minor_release), ++ &smbios_type0.minor, 1); ++ } ++} ++ ++static void smbios_build_type_1_fields(void) ++{ ++ smbios_maybe_add_str(1, offsetof(struct smbios_type_1, manufacturer_str), ++ smbios_type1.manufacturer); ++ smbios_maybe_add_str(1, offsetof(struct smbios_type_1, product_name_str), ++ smbios_type1.product); ++ smbios_maybe_add_str(1, offsetof(struct smbios_type_1, version_str), ++ smbios_type1.version); ++ smbios_maybe_add_str(1, offsetof(struct smbios_type_1, serial_number_str), ++ smbios_type1.serial); ++ smbios_maybe_add_str(1, offsetof(struct smbios_type_1, sku_number_str), ++ smbios_type1.sku); ++ smbios_maybe_add_str(1, offsetof(struct smbios_type_1, family_str), ++ smbios_type1.family); ++ if (qemu_uuid_set) { ++ /* ++ * We don't encode the UUID in the "wire format" here because this ++ * function is for legacy mode and needs to keep the guest ABI, and ++ * because we don't know what's the SMBIOS version advertised by the ++ * BIOS. ++ */ ++ smbios_add_field(1, offsetof(struct smbios_type_1, uuid), ++ &qemu_uuid, 16); ++ } ++} ++ ++uint8_t *smbios_get_table_legacy(size_t *length) ++{ ++ int i; ++ size_t usr_offset; ++ ++ /* complain if fields were given for types > 1 */ ++ if (find_next_bit(smbios_have_fields_bitmap, ++ SMBIOS_MAX_TYPE + 1, 2) < SMBIOS_MAX_TYPE + 1) { ++ error_report("can't process fields for smbios " ++ "types > 1 on machine versions < 2.1!"); ++ exit(1); ++ } ++ ++ if (test_bit(4, smbios_have_binfile_bitmap)) { ++ error_report("can't process table for smbios " ++ "type 4 on machine versions < 2.1!"); ++ exit(1); ++ } ++ ++ g_free(smbios_entries); ++ smbios_entries_len = sizeof(uint16_t); ++ smbios_entries = g_malloc0(smbios_entries_len); ++ ++ for (i = 0, usr_offset = 0; usr_blobs_sizes && i < usr_blobs_sizes->len; ++ i++) ++ { ++ struct smbios_table *table; ++ struct smbios_structure_header *header; ++ size_t size = g_array_index(usr_blobs_sizes, size_t, i); ++ ++ header = (struct smbios_structure_header *)(usr_blobs + usr_offset); ++ smbios_entries = g_realloc(smbios_entries, smbios_entries_len + ++ size + sizeof(*table)); ++ table = (struct smbios_table *)(smbios_entries + smbios_entries_len); ++ table->header.type = SMBIOS_TABLE_ENTRY; ++ table->header.length = cpu_to_le16(sizeof(*table) + size); ++ memcpy(table->data, header, size); ++ smbios_entries_len += sizeof(*table) + size; ++ (*(uint16_t *)smbios_entries) = ++ cpu_to_le16(le16_to_cpu(*(uint16_t *)smbios_entries) + 1); ++ usr_offset += size; ++ } ++ ++ smbios_build_type_0_fields(); ++ smbios_build_type_1_fields(); ++ smbios_validate_table(); ++ *length = smbios_entries_len; ++ return smbios_entries; ++} +diff --git a/hw/smbios/smbios_legacy_stub.c b/hw/smbios/smbios_legacy_stub.c +new file mode 100644 +index 0000000000..f29b15316c +--- /dev/null ++++ b/hw/smbios/smbios_legacy_stub.c +@@ -0,0 +1,15 @@ ++/* ++ * IPMI SMBIOS firmware handling ++ * ++ * Copyright (c) 2024 Igor Mammedov, Red Hat, Inc. ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++ ++#include "qemu/osdep.h" ++#include "hw/firmware/smbios.h" ++ ++void smbios_add_usr_blob_size(size_t size) ++{ ++} +diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h +index 333de0d5fc..92e9aba415 100644 +--- a/include/hw/firmware/smbios.h ++++ b/include/hw/firmware/smbios.h +@@ -17,6 +17,9 @@ + * + */ + ++extern uint8_t *usr_blobs; ++extern GArray *usr_blobs_sizes; ++ + typedef struct { + const char *vendor, *version, *date; + bool have_major_minor, uefi; +@@ -323,6 +326,8 @@ struct smbios_type_127 { + struct smbios_structure_header header; + } QEMU_PACKED; + ++void smbios_validate_table(void); ++void smbios_add_usr_blob_size(size_t size); + void smbios_entry_add(QemuOpts *opts, Error **errp); + void smbios_set_cpuid(uint32_t version, uint32_t features); + void smbios_set_defaults(const char *manufacturer, const char *product, +-- +2.39.3 + diff --git a/SOURCES/kvm-smbios-cleanup-smbios_get_tables-from-legacy-handlin.patch b/SOURCES/kvm-smbios-cleanup-smbios_get_tables-from-legacy-handlin.patch new file mode 100644 index 0000000..48a9e16 --- /dev/null +++ b/SOURCES/kvm-smbios-cleanup-smbios_get_tables-from-legacy-handlin.patch @@ -0,0 +1,65 @@ +From 07f6ef2d032cda3e746ac2477c0a9bc1ac636f45 Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Fri, 29 Dec 2023 14:08:13 +0100 +Subject: [PATCH 06/20] smbios: cleanup smbios_get_tables() from legacy + handling + +RH-Author: Igor Mammedov +RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS +RH-Jira: RHEL-21705 +RH-Acked-by: MST +RH-Acked-by: Ani Sinha +RH-Commit: [4/18] a5e09ce1df72293fecad863edd146a8c4b1a734f + +JIRA: https://issues.redhat.com/browse/RHEL-21705 + +smbios_get_tables() bails out right away if leagacy mode is enabled +and won't generate any SMBIOS tables. At the same time x86 specific +fw_cfg_build_smbios() will genarate legacy tables and then proceed +to preparing temporary mem_array for useless call to +smbios_get_tables() and then discard it. + +Drop legacy related check in smbios_get_tables() and return from +fw_cfg_build_smbios() early if legacy tables where built without +proceeding to non legacy part of the function. + +Signed-off-by: Igor Mammedov +Reviewed-by: Ani Sinha +Tested-by: Fiona Ebner +--- + hw/i386/fw_cfg.c | 1 + + hw/smbios/smbios.c | 6 ------ + 2 files changed, 1 insertion(+), 6 deletions(-) + +diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c +index 6a5466faf0..ed72b1442d 100644 +--- a/hw/i386/fw_cfg.c ++++ b/hw/i386/fw_cfg.c +@@ -76,6 +76,7 @@ void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg) + if (smbios_tables) { + fw_cfg_add_bytes(fw_cfg, FW_CFG_SMBIOS_ENTRIES, + smbios_tables, smbios_tables_len); ++ return; + } + + /* build the array of physical mem area from e820 table */ +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index 074705fa4c..b13e40bae2 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -1244,12 +1244,6 @@ void smbios_get_tables(MachineState *ms, + { + unsigned i, dimm_cnt, offset; + +- if (smbios_legacy) { +- *tables = *anchor = NULL; +- *tables_len = *anchor_len = 0; +- return; +- } +- + if (!smbios_immutable) { + smbios_build_type_0_table(); + smbios_build_type_1_table(); +-- +2.39.3 + diff --git a/SOURCES/kvm-smbios-clear-smbios_type4_count-before-building-tabl.patch b/SOURCES/kvm-smbios-clear-smbios_type4_count-before-building-tabl.patch new file mode 100644 index 0000000..38a7f95 --- /dev/null +++ b/SOURCES/kvm-smbios-clear-smbios_type4_count-before-building-tabl.patch @@ -0,0 +1,38 @@ +From c0282a842a912aacac28a6ae229de5854c3fb5df Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Mon, 26 Feb 2024 13:20:22 +0100 +Subject: [PATCH 16/20] smbios: clear smbios_type4_count before building tables + +RH-Author: Igor Mammedov +RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS +RH-Jira: RHEL-21705 +RH-Acked-by: MST +RH-Acked-by: Ani Sinha +RH-Commit: [14/18] f809595d2934ae975c0b7d17a4a79645e062ba42 + +JIRA: https://issues.redhat.com/browse/RHEL-21705 + +it will help to keep type 4 tables accounting correct in case +SMBIOS tables are built multiple times. + +Signed-off-by: Igor Mammedov +Tested-by: Fiona Ebner +--- + hw/smbios/smbios.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index 7e32430b85..4521ea386c 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -1111,6 +1111,7 @@ void smbios_get_tables(MachineState *ms, + ep_type == SMBIOS_ENTRY_POINT_TYPE_64); + + g_free(smbios_tables); ++ smbios_type4_count = 0; + smbios_tables = g_memdup2(usr_blobs, usr_blobs_len); + smbios_tables_len = usr_blobs_len; + smbios_table_max = usr_table_max; +-- +2.39.3 + diff --git a/SOURCES/kvm-smbios-don-t-check-type4-structures-in-legacy-mode.patch b/SOURCES/kvm-smbios-don-t-check-type4-structures-in-legacy-mode.patch new file mode 100644 index 0000000..a2c9532 --- /dev/null +++ b/SOURCES/kvm-smbios-don-t-check-type4-structures-in-legacy-mode.patch @@ -0,0 +1,133 @@ +From 2b76d95ec07aba6d96070ee90c5015c1676be091 Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Tue, 13 Feb 2024 16:25:54 +0100 +Subject: [PATCH 10/20] smbios: don't check type4 structures in legacy mode + +RH-Author: Igor Mammedov +RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS +RH-Jira: RHEL-21705 +RH-Acked-by: MST +RH-Acked-by: Ani Sinha +RH-Commit: [8/18] c1f8409ea0d916f333c9373535bf21b521c62855 + +JIRA: https://issues.redhat.com/browse/RHEL-21705 + + legacy mode doesn't support structures of type 2 and more, + and CLI has a check for '-smbios type' option, however it's + still possible to sneak in type4 as a blob with '-smbios file' + option. However doing the later makes SMBIOS tables broken + since SeaBIOS doesn't expect that. + + Rather than trying to add support for type4 to legacy code + (both QEMU and SeaBIOS), simplify smbios_get_table_legacy() + by dropping not relevant check in legacy code and error out + on type4 blob. + + Signed-off-by: Igor Mammedov + Reviewed-by: Ani Sinha + Tested-by: Fiona Ebner + +Conflicts: include/hw/firmware/smbios.h +Signed-off-by: Igor Mammedov + + Please enter the commit message for your changes. Lines starting +--- + hw/i386/fw_cfg.c | 3 +-- + hw/smbios/smbios.c | 18 ++++++++++++++---- + include/hw/firmware/smbios.h | 2 +- + 3 files changed, 16 insertions(+), 7 deletions(-) + +diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c +index bb7149c4c3..a25793a68f 100644 +--- a/hw/i386/fw_cfg.c ++++ b/hw/i386/fw_cfg.c +@@ -73,8 +73,7 @@ void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg) + smbios_set_cpuid(cpu->env.cpuid_version, cpu->env.features[FEAT_1_EDX]); + + if (pcmc->smbios_legacy_mode) { +- smbios_tables = smbios_get_table_legacy(ms->smp.cpus, +- &smbios_tables_len); ++ smbios_tables = smbios_get_table_legacy(&smbios_tables_len); + fw_cfg_add_bytes(fw_cfg, FW_CFG_SMBIOS_ENTRIES, + smbios_tables, smbios_tables_len); + return; +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index d8d68716d4..441517cf24 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -530,14 +530,17 @@ opts_init(smbios_register_config); + */ + #define SMBIOS_21_MAX_TABLES_LEN 0xffff + +-static void smbios_validate_table(uint32_t expected_t4_count) ++static void smbios_check_type4_count(uint32_t expected_t4_count) + { + if (smbios_type4_count && smbios_type4_count != expected_t4_count) { + error_report("Expected %d SMBIOS Type 4 tables, got %d instead", + expected_t4_count, smbios_type4_count); + exit(1); + } ++} + ++static void smbios_validate_table(void) ++{ + if (smbios_ep_type == SMBIOS_ENTRY_POINT_TYPE_32 && + smbios_tables_len > SMBIOS_21_MAX_TABLES_LEN) { + error_report("SMBIOS 2.1 table length %zu exceeds %d", +@@ -622,7 +625,7 @@ static void smbios_build_type_1_fields(void) + } + } + +-uint8_t *smbios_get_table_legacy(uint32_t expected_t4_count, size_t *length) ++uint8_t *smbios_get_table_legacy(size_t *length) + { + int i; + size_t usr_offset; +@@ -635,6 +638,12 @@ uint8_t *smbios_get_table_legacy(uint32_t expected_t4_count, size_t *length) + exit(1); + } + ++ if (test_bit(4, have_binfile_bitmap)) { ++ error_report("can't process table for smbios " ++ "type 4 on machine versions < 2.1!"); ++ exit(1); ++ } ++ + g_free(smbios_entries); + smbios_entries_len = sizeof(uint16_t); + smbios_entries = g_malloc0(smbios_entries_len); +@@ -661,7 +670,7 @@ uint8_t *smbios_get_table_legacy(uint32_t expected_t4_count, size_t *length) + + smbios_build_type_0_fields(); + smbios_build_type_1_fields(); +- smbios_validate_table(expected_t4_count); ++ smbios_validate_table(); + *length = smbios_entries_len; + return smbios_entries; + } +@@ -1319,7 +1328,8 @@ void smbios_get_tables(MachineState *ms, + smbios_build_type_41_table(errp); + smbios_build_type_127_table(); + +- smbios_validate_table(ms->smp.sockets); ++ smbios_check_type4_count(ms->smp.sockets); ++ smbios_validate_table(); + smbios_entry_point_setup(); + + /* return tables blob and entry point (anchor), and their sizes */ +diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h +index b9fc9a0f42..d55018e5e3 100644 +--- a/include/hw/firmware/smbios.h ++++ b/include/hw/firmware/smbios.h +@@ -315,7 +315,7 @@ void smbios_set_defaults(const char *manufacturer, const char *product, + SmbiosEntryPointType ep_type, + const char *stream_product, + const char *stream_version); +-uint8_t *smbios_get_table_legacy(uint32_t expected_t4_count, size_t *length); ++uint8_t *smbios_get_table_legacy(size_t *length); + void smbios_get_tables(MachineState *ms, + const struct smbios_phys_mem_area *mem_array, + const unsigned int mem_array_size, +-- +2.39.3 + diff --git a/SOURCES/kvm-smbios-error-out-when-building-type-4-table-is-not-p.patch b/SOURCES/kvm-smbios-error-out-when-building-type-4-table-is-not-p.patch new file mode 100644 index 0000000..2afbe66 --- /dev/null +++ b/SOURCES/kvm-smbios-error-out-when-building-type-4-table-is-not-p.patch @@ -0,0 +1,72 @@ +From bbb2d260e6f33380b9df28c74421055bd8dccda5 Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Mon, 26 Feb 2024 12:59:33 +0100 +Subject: [PATCH 19/20] smbios: error out when building type 4 table is not + possible + +RH-Author: Igor Mammedov +RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS +RH-Jira: RHEL-21705 +RH-Acked-by: MST +RH-Acked-by: Ani Sinha +RH-Commit: [17/18] 86b1c67bfbe9c0c14a190cd1204b6ccd1de1630f + +JIRA: https://issues.redhat.com/browse/RHEL-21705 + +If SMBIOS v2 version is requested but number of cores/threads +are more than it's possible to describe with v2, error out +instead of silently ignoring the fact and filling core/thread +count with bogus values. + +This will help caller to decide if it should fallback to +SMBIOSv3 when smbios-entry-point-type='auto' + +Signed-off-by: Igor Mammedov +Reviewed-by: Ani Sinha +Tested-by: Fiona Ebner +--- + hw/smbios/smbios.c | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index 3d9dcb0d31..637aa952f5 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -655,7 +655,8 @@ static void smbios_build_type_3_table(void) + } + + static void smbios_build_type_4_table(MachineState *ms, unsigned instance, +- SmbiosEntryPointType ep_type) ++ SmbiosEntryPointType ep_type, ++ Error **errp) + { + char sock_str[128]; + size_t tbl_len = SMBIOS_TYPE_4_LEN_V28; +@@ -709,6 +710,12 @@ static void smbios_build_type_4_table(MachineState *ms, unsigned instance, + if (tbl_len == SMBIOS_TYPE_4_LEN_V30) { + t->core_count2 = t->core_enabled2 = cpu_to_le16(cores_per_socket); + t->thread_count2 = cpu_to_le16(threads_per_socket); ++ } else if (t->core_count == 0xFF || t->thread_count == 0xFF) { ++ error_setg(errp, "SMBIOS 2.0 doesn't support number of processor " ++ "cores/threads more than 255, use " ++ "-machine smbios-entry-point-type=64 option to enable " ++ "SMBIOS 3.0 support"); ++ return; + } + + SMBIOS_BUILD_TABLE_POST; +@@ -1126,7 +1133,10 @@ static bool smbios_get_tables_ep(MachineState *ms, + assert(ms->smp.sockets >= 1); + + for (i = 0; i < ms->smp.sockets; i++) { +- smbios_build_type_4_table(ms, i, ep_type); ++ smbios_build_type_4_table(ms, i, ep_type, errp); ++ if (*errp) { ++ goto err_exit; ++ } + } + + smbios_build_type_8_table(); +-- +2.39.3 + diff --git a/SOURCES/kvm-smbios-extend-smbios-entry-point-type-with-auto-valu.patch b/SOURCES/kvm-smbios-extend-smbios-entry-point-type-with-auto-valu.patch new file mode 100644 index 0000000..db012bd --- /dev/null +++ b/SOURCES/kvm-smbios-extend-smbios-entry-point-type-with-auto-valu.patch @@ -0,0 +1,48 @@ +From c083959c963fde33f4769fd4c6e122dd16ce6d3c Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Wed, 21 Feb 2024 16:04:36 +0100 +Subject: [PATCH 17/20] smbios: extend smbios-entry-point-type with 'auto' + value + +RH-Author: Igor Mammedov +RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS +RH-Jira: RHEL-21705 +RH-Acked-by: MST +RH-Acked-by: Ani Sinha +RH-Commit: [15/18] 202563dcf77e062a238aa2a10ec14c25d3f5a7d0 + +JIRA: https://issues.redhat.com/browse/RHEL-21705 + +later patches will use it to pick SMBIOS version at runtime +depending on configuration. + +Signed-off-by: Igor Mammedov +Acked-by: Markus Armbruster +Reviewed-by: Ani Sinha +Tested-by: Fiona Ebner +--- + qapi/machine.json | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/qapi/machine.json b/qapi/machine.json +index b6d634b30d..99f6368fa6 100644 +--- a/qapi/machine.json ++++ b/qapi/machine.json +@@ -1788,10 +1788,13 @@ + # + # @64: SMBIOS version 3.0 (64-bit) Entry Point + # ++# @auto: Either 2.x or 3.x SMBIOS version, 2.x if configuration can be ++# described by it and 3.x otherwise (since: 9.0) ++# + # Since: 7.0 + ## + { 'enum': 'SmbiosEntryPointType', +- 'data': [ '32', '64' ] } ++ 'data': [ '32', '64', 'auto' ] } + + ## + # @MemorySizeConfiguration: +-- +2.39.3 + diff --git a/SOURCES/kvm-smbios-get-rid-of-global-smbios_ep_type.patch b/SOURCES/kvm-smbios-get-rid-of-global-smbios_ep_type.patch new file mode 100644 index 0000000..1896f2b --- /dev/null +++ b/SOURCES/kvm-smbios-get-rid-of-global-smbios_ep_type.patch @@ -0,0 +1,281 @@ +From be0abbf3f7845847b46486704c46c5de5a2b2323 Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Mon, 26 Feb 2024 13:49:14 +0100 +Subject: [PATCH 15/20] smbios: get rid of global smbios_ep_type + +RH-Author: Igor Mammedov +RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS +RH-Jira: RHEL-21705 +RH-Acked-by: MST +RH-Acked-by: Ani Sinha +RH-Commit: [13/18] 2e838ed0d03989e2e4ee08041b5ba64d5d7f5820 + +JIRA: https://issues.redhat.com/browse/RHEL-21705 + +Conflicts: hw/arm/virt.c, hw/i386/fw_cfg.c, hw/riscv/virt.c, hw/smbios/smbios.c, + include/hw/firmware/smbios.h + due to downstream specific smbios_set_defaults() + +Signed-off-by: Igor Mammedov +Acked-by: Daniel Henrique Barboza +Reviewed-by: Ani Sinha +Tested-by: Fiona Ebner +Signed-off-by: Igor Mammedov +--- + hw/arm/virt.c | 4 ++-- + hw/i386/fw_cfg.c | 6 +++--- + hw/i386/fw_cfg.h | 3 ++- + hw/i386/pc.c | 2 +- + hw/loongarch/virt.c | 7 ++++--- + hw/smbios/smbios.c | 26 ++++++++++++++------------ + hw/smbios/smbios_legacy.c | 2 +- + include/hw/firmware/smbios.h | 4 ++-- + 8 files changed, 29 insertions(+), 25 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index e5cfc19c08..e4a66affcb 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -1695,14 +1695,14 @@ static void virt_build_smbios(VirtMachineState *vms) + + smbios_set_defaults("QEMU", product, + vmc->smbios_old_sys_ver ? "1.0" : mc->name, +- true, SMBIOS_ENTRY_POINT_TYPE_64, ++ true, + NULL, NULL); + + /* build the array of physical mem area from base_memmap */ + mem_array.address = vms->memmap[VIRT_MEM].base; + mem_array.length = ms->ram_size; + +- smbios_get_tables(ms, &mem_array, 1, ++ smbios_get_tables(ms, SMBIOS_ENTRY_POINT_TYPE_64, &mem_array, 1, + &smbios_tables, &smbios_tables_len, + &smbios_anchor, &smbios_anchor_len, + &error_fatal); +diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c +index bdc3cc4556..58429bb78d 100644 +--- a/hw/i386/fw_cfg.c ++++ b/hw/i386/fw_cfg.c +@@ -48,7 +48,8 @@ const char *fw_cfg_arch_key_name(uint16_t key) + return NULL; + } + +-void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg) ++void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg, ++ SmbiosEntryPointType ep_type) + { + #ifdef CONFIG_SMBIOS + uint8_t *smbios_tables, *smbios_anchor; +@@ -64,7 +65,6 @@ void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg) + /* These values are guest ABI, do not change */ + smbios_set_defaults("QEMU", mc->desc, mc->name, + pcmc->smbios_uuid_encoded, +- pcms->smbios_entry_point_type, + pcmc->smbios_stream_product, + pcmc->smbios_stream_version); + } +@@ -91,7 +91,7 @@ void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg) + array_count++; + } + } +- smbios_get_tables(ms, mem_array, array_count, ++ smbios_get_tables(ms, ep_type, mem_array, array_count, + &smbios_tables, &smbios_tables_len, + &smbios_anchor, &smbios_anchor_len, + &error_fatal); +diff --git a/hw/i386/fw_cfg.h b/hw/i386/fw_cfg.h +index 1e1de6b4a3..92e310f5fd 100644 +--- a/hw/i386/fw_cfg.h ++++ b/hw/i386/fw_cfg.h +@@ -23,7 +23,8 @@ + FWCfgState *fw_cfg_arch_create(MachineState *ms, + uint16_t boot_cpus, + uint16_t apic_id_limit); +-void fw_cfg_build_smbios(PCMachineState *ms, FWCfgState *fw_cfg); ++void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg, ++ SmbiosEntryPointType ep_type); + void fw_cfg_build_feature_control(MachineState *ms, FWCfgState *fw_cfg); + void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg); + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 16de2a59e8..ae6777fc1a 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -847,7 +847,7 @@ void pc_machine_done(Notifier *notifier, void *data) + + acpi_setup(); + if (x86ms->fw_cfg) { +- fw_cfg_build_smbios(pcms, x86ms->fw_cfg); ++ fw_cfg_build_smbios(pcms, x86ms->fw_cfg, pcms->smbios_entry_point_type); + fw_cfg_build_feature_control(MACHINE(pcms), x86ms->fw_cfg); + /* update FW_CFG_NB_CPUS to account for -device added CPUs */ + fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index 7358a023d3..77956b5ada 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -320,10 +320,11 @@ static void virt_build_smbios(LoongArchMachineState *lams) + return; + } + +- smbios_set_defaults("QEMU", product, mc->name, +- true, SMBIOS_ENTRY_POINT_TYPE_64); ++ smbios_set_defaults("QEMU", product, mc->name, true); + +- smbios_get_tables(ms, NULL, 0, &smbios_tables, &smbios_tables_len, ++ smbios_get_tables(ms, SMBIOS_ENTRY_POINT_TYPE_64, ++ NULL, 0, ++ &smbios_tables, &smbios_tables_len, + &smbios_anchor, &smbios_anchor_len, &error_fatal); + + if (smbios_anchor) { +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index b5745c6c2d..7e32430b85 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -47,7 +47,6 @@ uint8_t *smbios_tables; + size_t smbios_tables_len; + unsigned smbios_table_max; + unsigned smbios_table_cnt; +-static SmbiosEntryPointType smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32; + + static SmbiosEntryPoint ep; + +@@ -506,9 +505,9 @@ static bool smbios_check_type4_count(uint32_t expected_t4_count, Error **errp) + return true; + } + +-bool smbios_validate_table(Error **errp) ++bool smbios_validate_table(SmbiosEntryPointType ep_type, Error **errp) + { +- if (smbios_ep_type == SMBIOS_ENTRY_POINT_TYPE_32 && ++ if (ep_type == SMBIOS_ENTRY_POINT_TYPE_32 && + smbios_tables_len > SMBIOS_21_MAX_TABLES_LEN) { + error_setg(errp, "SMBIOS 2.1 table length %zu exceeds %d", + smbios_tables_len, SMBIOS_21_MAX_TABLES_LEN); +@@ -655,14 +654,15 @@ static void smbios_build_type_3_table(void) + SMBIOS_BUILD_TABLE_POST; + } + +-static void smbios_build_type_4_table(MachineState *ms, unsigned instance) ++static void smbios_build_type_4_table(MachineState *ms, unsigned instance, ++ SmbiosEntryPointType ep_type) + { + char sock_str[128]; + size_t tbl_len = SMBIOS_TYPE_4_LEN_V28; + unsigned threads_per_socket; + unsigned cores_per_socket; + +- if (smbios_ep_type == SMBIOS_ENTRY_POINT_TYPE_64) { ++ if (ep_type == SMBIOS_ENTRY_POINT_TYPE_64) { + tbl_len = SMBIOS_TYPE_4_LEN_V30; + } + +@@ -991,13 +991,11 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) + void smbios_set_defaults(const char *manufacturer, const char *product, + const char *version, + bool uuid_encoded, +- SmbiosEntryPointType ep_type, + const char *stream_product, + const char *stream_version) + { + smbios_have_defaults = true; + smbios_uuid_encoded = uuid_encoded; +- smbios_ep_type = ep_type; + + /* + * If @stream_product & @stream_version are non-NULL, then +@@ -1048,9 +1046,9 @@ void smbios_set_defaults(const char *manufacturer, const char *product, + SMBIOS_SET_DEFAULT(type17.manufacturer, manufacturer); + } + +-static void smbios_entry_point_setup(void) ++static void smbios_entry_point_setup(SmbiosEntryPointType ep_type) + { +- switch (smbios_ep_type) { ++ switch (ep_type) { + case SMBIOS_ENTRY_POINT_TYPE_32: + memcpy(ep.ep21.anchor_string, "_SM_", 4); + memcpy(ep.ep21.intermediate_anchor_string, "_DMI_", 5); +@@ -1100,6 +1098,7 @@ static void smbios_entry_point_setup(void) + } + + void smbios_get_tables(MachineState *ms, ++ SmbiosEntryPointType ep_type, + const struct smbios_phys_mem_area *mem_array, + const unsigned int mem_array_size, + uint8_t **tables, size_t *tables_len, +@@ -1108,6 +1107,9 @@ void smbios_get_tables(MachineState *ms, + { + unsigned i, dimm_cnt, offset; + ++ assert(ep_type == SMBIOS_ENTRY_POINT_TYPE_32 || ++ ep_type == SMBIOS_ENTRY_POINT_TYPE_64); ++ + g_free(smbios_tables); + smbios_tables = g_memdup2(usr_blobs, usr_blobs_len); + smbios_tables_len = usr_blobs_len; +@@ -1122,7 +1124,7 @@ void smbios_get_tables(MachineState *ms, + assert(ms->smp.sockets >= 1); + + for (i = 0; i < ms->smp.sockets; i++) { +- smbios_build_type_4_table(ms, i); ++ smbios_build_type_4_table(ms, i, ep_type); + } + + smbios_build_type_8_table(); +@@ -1171,10 +1173,10 @@ void smbios_get_tables(MachineState *ms, + if (!smbios_check_type4_count(ms->smp.sockets, errp)) { + goto err_exit; + } +- if (!smbios_validate_table(errp)) { ++ if (!smbios_validate_table(ep_type, errp)) { + goto err_exit; + } +- smbios_entry_point_setup(); ++ smbios_entry_point_setup(ep_type); + + /* return tables blob and entry point (anchor), and their sizes */ + *tables = smbios_tables; +diff --git a/hw/smbios/smbios_legacy.c b/hw/smbios/smbios_legacy.c +index a6544bf55a..06907cd16c 100644 +--- a/hw/smbios/smbios_legacy.c ++++ b/hw/smbios/smbios_legacy.c +@@ -173,7 +173,7 @@ uint8_t *smbios_get_table_legacy(size_t *length, Error **errp) + + smbios_build_type_0_fields(); + smbios_build_type_1_fields(); +- if (!smbios_validate_table(errp)) { ++ if (!smbios_validate_table(SMBIOS_ENTRY_POINT_TYPE_32, errp)) { + goto err_exit; + } + +diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h +index 44af3a0d82..781298f594 100644 +--- a/include/hw/firmware/smbios.h ++++ b/include/hw/firmware/smbios.h +@@ -326,18 +326,18 @@ struct smbios_type_127 { + struct smbios_structure_header header; + } QEMU_PACKED; + +-bool smbios_validate_table(Error **errp); ++bool smbios_validate_table(SmbiosEntryPointType ep_type, Error **errp); + void smbios_add_usr_blob_size(size_t size); + void smbios_entry_add(QemuOpts *opts, Error **errp); + void smbios_set_cpuid(uint32_t version, uint32_t features); + void smbios_set_defaults(const char *manufacturer, const char *product, + const char *version, + bool uuid_encoded, +- SmbiosEntryPointType ep_type, + const char *stream_product, + const char *stream_version); + uint8_t *smbios_get_table_legacy(size_t *length, Error **errp); + void smbios_get_tables(MachineState *ms, ++ SmbiosEntryPointType ep_type, + const struct smbios_phys_mem_area *mem_array, + const unsigned int mem_array_size, + uint8_t **tables, size_t *tables_len, +-- +2.39.3 + diff --git a/SOURCES/kvm-smbios-get-rid-of-smbios_legacy-global.patch b/SOURCES/kvm-smbios-get-rid-of-smbios_legacy-global.patch new file mode 100644 index 0000000..ecb730e --- /dev/null +++ b/SOURCES/kvm-smbios-get-rid-of-smbios_legacy-global.patch @@ -0,0 +1,198 @@ +From 0802fa7199c8085d018fc38dd4beaa5062d383d1 Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Fri, 29 Dec 2023 15:37:29 +0100 +Subject: [PATCH 08/20] smbios: get rid of smbios_legacy global + +RH-Author: Igor Mammedov +RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS +RH-Jira: RHEL-21705 +RH-Acked-by: MST +RH-Acked-by: Ani Sinha +RH-Commit: [6/18] 684dd1dca8d611c6de97b26ef8c1cda6ca509d54 + +JIRA: https://issues.redhat.com/browse/RHEL-21705 + + clean up smbios_set_defaults() which is reused by legacy + and non legacy machines from being aware of 'legacy' notion + and need to turn it off. And push legacy handling up to + PC machine code where it's relevant. + + Signed-off-by: Igor Mammedov + Reviewed-by: Ani Sinha + Acked-by: Daniel Henrique Barboza + Tested-by: Fiona Ebner + +Conflicts: hw/arm/virt.c, hw/i386/fw_cfg.c, hw/loongarch/virt.c, + hw/smbios/smbios.c, include/hw/firmware/smbios.h + due to downstream specifc signature of smbios_set_defaults() +PS: + while fixing conflicts move RHEL specific + smbios_stream_product/smbios_stream_version + at the end of arguments list + +Signed-off-by: Igor Mammedov +--- + hw/arm/virt.c | 5 +++-- + hw/i386/fw_cfg.c | 11 +++++----- + hw/loongarch/virt.c | 2 +- + hw/smbios/smbios.c | 39 ++++++++++++++++-------------------- + include/hw/firmware/smbios.h | 6 +++--- + 5 files changed, 30 insertions(+), 33 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 943c563391..e5cfc19c08 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -1694,8 +1694,9 @@ static void virt_build_smbios(VirtMachineState *vms) + } + + smbios_set_defaults("QEMU", product, +- vmc->smbios_old_sys_ver ? "1.0" : mc->name, false, +- true, NULL, NULL, SMBIOS_ENTRY_POINT_TYPE_64); ++ vmc->smbios_old_sys_ver ? "1.0" : mc->name, ++ true, SMBIOS_ENTRY_POINT_TYPE_64, ++ NULL, NULL); + + /* build the array of physical mem area from base_memmap */ + mem_array.address = vms->memmap[VIRT_MEM].base; +diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c +index 79ff7f7225..bb7149c4c3 100644 +--- a/hw/i386/fw_cfg.c ++++ b/hw/i386/fw_cfg.c +@@ -63,17 +63,18 @@ void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg) + if (pcmc->smbios_defaults) { + /* These values are guest ABI, do not change */ + smbios_set_defaults("QEMU", mc->desc, mc->name, +- pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded, ++ pcmc->smbios_uuid_encoded, ++ pcms->smbios_entry_point_type, + pcmc->smbios_stream_product, +- pcmc->smbios_stream_version, +- pcms->smbios_entry_point_type); ++ pcmc->smbios_stream_version); + } + + /* tell smbios about cpuid version and features */ + smbios_set_cpuid(cpu->env.cpuid_version, cpu->env.features[FEAT_1_EDX]); + +- smbios_tables = smbios_get_table_legacy(ms->smp.cpus, &smbios_tables_len); +- if (smbios_tables) { ++ if (pcmc->smbios_legacy_mode) { ++ smbios_tables = smbios_get_table_legacy(ms->smp.cpus, ++ &smbios_tables_len); + fw_cfg_add_bytes(fw_cfg, FW_CFG_SMBIOS_ENTRIES, + smbios_tables, smbios_tables_len); + return; +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index 4b7dc67a2d..7358a023d3 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -320,7 +320,7 @@ static void virt_build_smbios(LoongArchMachineState *lams) + return; + } + +- smbios_set_defaults("QEMU", product, mc->name, false, ++ smbios_set_defaults("QEMU", product, mc->name, + true, SMBIOS_ENTRY_POINT_TYPE_64); + + smbios_get_tables(ms, NULL, 0, &smbios_tables, &smbios_tables_len, +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index 8129d396d1..0c8c439859 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -54,7 +54,6 @@ struct smbios_table { + + static uint8_t *smbios_entries; + static size_t smbios_entries_len; +-static bool smbios_legacy = true; + static bool smbios_uuid_encoded = true; + /* end: legacy structures & constants for <= 2.0 machines */ + +@@ -618,9 +617,16 @@ static void smbios_build_type_1_fields(void) + + uint8_t *smbios_get_table_legacy(uint32_t expected_t4_count, size_t *length) + { +- if (!smbios_legacy) { +- *length = 0; +- return NULL; ++ /* drop unwanted version of command-line file blob(s) */ ++ g_free(smbios_tables); ++ smbios_tables = NULL; ++ ++ /* also complain if fields were given for types > 1 */ ++ if (find_next_bit(have_fields_bitmap, ++ SMBIOS_MAX_TYPE + 1, 2) < SMBIOS_MAX_TYPE + 1) { ++ error_report("can't process fields for smbios " ++ "types > 1 on machine versions < 2.1!"); ++ exit(1); + } + + if (!smbios_immutable) { +@@ -1107,31 +1113,16 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) + } + + void smbios_set_defaults(const char *manufacturer, const char *product, +- const char *version, bool legacy_mode, ++ const char *version, + bool uuid_encoded, ++ SmbiosEntryPointType ep_type, + const char *stream_product, +- const char *stream_version, +- SmbiosEntryPointType ep_type) ++ const char *stream_version) + { + smbios_have_defaults = true; +- smbios_legacy = legacy_mode; + smbios_uuid_encoded = uuid_encoded; + smbios_ep_type = ep_type; + +- /* drop unwanted version of command-line file blob(s) */ +- if (smbios_legacy) { +- g_free(smbios_tables); +- /* in legacy mode, also complain if fields were given for types > 1 */ +- if (find_next_bit(have_fields_bitmap, +- SMBIOS_MAX_TYPE+1, 2) < SMBIOS_MAX_TYPE+1) { +- error_report("can't process fields for smbios " +- "types > 1 on machine versions < 2.1!"); +- exit(1); +- } +- } else { +- g_free(smbios_entries); +- } +- + /* + * If @stream_product & @stream_version are non-NULL, then + * we're following rules for new Windows driver support. +@@ -1241,6 +1232,10 @@ void smbios_get_tables(MachineState *ms, + { + unsigned i, dimm_cnt, offset; + ++ /* drop unwanted (legacy) version of command-line file blob(s) */ ++ g_free(smbios_entries); ++ smbios_entries = NULL; ++ + if (!smbios_immutable) { + smbios_build_type_0_table(); + smbios_build_type_1_table(); +diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h +index 95ec64ce2c..b9fc9a0f42 100644 +--- a/include/hw/firmware/smbios.h ++++ b/include/hw/firmware/smbios.h +@@ -310,11 +310,11 @@ struct smbios_type_127 { + void smbios_entry_add(QemuOpts *opts, Error **errp); + void smbios_set_cpuid(uint32_t version, uint32_t features); + void smbios_set_defaults(const char *manufacturer, const char *product, +- const char *version, bool legacy_mode, ++ const char *version, + bool uuid_encoded, ++ SmbiosEntryPointType ep_type, + const char *stream_product, +- const char *stream_version, +- SmbiosEntryPointType ep_type); ++ const char *stream_version); + uint8_t *smbios_get_table_legacy(uint32_t expected_t4_count, size_t *length); + void smbios_get_tables(MachineState *ms, + const struct smbios_phys_mem_area *mem_array, +-- +2.39.3 + diff --git a/SOURCES/kvm-smbios-get-rid-of-smbios_smp_sockets-global.patch b/SOURCES/kvm-smbios-get-rid-of-smbios_smp_sockets-global.patch new file mode 100644 index 0000000..fde17ac --- /dev/null +++ b/SOURCES/kvm-smbios-get-rid-of-smbios_smp_sockets-global.patch @@ -0,0 +1,134 @@ +From 1536f1ec00ddc1729854b381cc0d54814bb6c19f Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Fri, 29 Dec 2023 15:04:55 +0100 +Subject: [PATCH 07/20] smbios: get rid of smbios_smp_sockets global + +RH-Author: Igor Mammedov +RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS +RH-Jira: RHEL-21705 +RH-Acked-by: MST +RH-Acked-by: Ani Sinha +RH-Commit: [5/18] f59bfeb547b7febcf1de2b6179af006e7fa0bccd + +JIRA: https://issues.redhat.com/browse/RHEL-21705 + + it makes smbios_validate_table() independent from + smbios_smp_sockets global, which in turn lets + smbios_get_tables() avoid using not related legacy code. + + Signed-off-by: Igor Mammedov + Reviewed-by: Ani Sinha + Tested-by: Fiona Ebner + +Conflicts: + include/hw/firmware/smbios.h due to down-stream + (d9ff466c980d Machine type related general changes) + adding custom stream_product/stream_version + +Signed-off-by: Igor Mammedov +--- + hw/i386/fw_cfg.c | 2 +- + hw/smbios/smbios.c | 22 +++++++++------------- + include/hw/firmware/smbios.h | 2 +- + 3 files changed, 11 insertions(+), 15 deletions(-) + +diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c +index ed72b1442d..79ff7f7225 100644 +--- a/hw/i386/fw_cfg.c ++++ b/hw/i386/fw_cfg.c +@@ -72,7 +72,7 @@ void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg) + /* tell smbios about cpuid version and features */ + smbios_set_cpuid(cpu->env.cpuid_version, cpu->env.features[FEAT_1_EDX]); + +- smbios_tables = smbios_get_table_legacy(ms, &smbios_tables_len); ++ smbios_tables = smbios_get_table_legacy(ms->smp.cpus, &smbios_tables_len); + if (smbios_tables) { + fw_cfg_add_bytes(fw_cfg, FW_CFG_SMBIOS_ENTRIES, + smbios_tables, smbios_tables_len); +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index b13e40bae2..8129d396d1 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -73,7 +73,7 @@ static SmbiosEntryPoint ep; + static int smbios_type4_count = 0; + static bool smbios_immutable; + static bool smbios_have_defaults; +-static uint32_t smbios_cpuid_version, smbios_cpuid_features, smbios_smp_sockets; ++static uint32_t smbios_cpuid_version, smbios_cpuid_features; + + static DECLARE_BITMAP(have_binfile_bitmap, SMBIOS_MAX_TYPE+1); + static DECLARE_BITMAP(have_fields_bitmap, SMBIOS_MAX_TYPE+1); +@@ -524,14 +524,11 @@ opts_init(smbios_register_config); + */ + #define SMBIOS_21_MAX_TABLES_LEN 0xffff + +-static void smbios_validate_table(MachineState *ms) ++static void smbios_validate_table(uint32_t expected_t4_count) + { +- uint32_t expect_t4_count = smbios_legacy ? +- ms->smp.cpus : smbios_smp_sockets; +- +- if (smbios_type4_count && smbios_type4_count != expect_t4_count) { ++ if (smbios_type4_count && smbios_type4_count != expected_t4_count) { + error_report("Expected %d SMBIOS Type 4 tables, got %d instead", +- expect_t4_count, smbios_type4_count); ++ expected_t4_count, smbios_type4_count); + exit(1); + } + +@@ -619,7 +616,7 @@ static void smbios_build_type_1_fields(void) + } + } + +-uint8_t *smbios_get_table_legacy(MachineState *ms, size_t *length) ++uint8_t *smbios_get_table_legacy(uint32_t expected_t4_count, size_t *length) + { + if (!smbios_legacy) { + *length = 0; +@@ -629,7 +626,7 @@ uint8_t *smbios_get_table_legacy(MachineState *ms, size_t *length) + if (!smbios_immutable) { + smbios_build_type_0_fields(); + smbios_build_type_1_fields(); +- smbios_validate_table(ms); ++ smbios_validate_table(expected_t4_count); + smbios_immutable = true; + } + *length = smbios_entries_len; +@@ -1250,10 +1247,9 @@ void smbios_get_tables(MachineState *ms, + smbios_build_type_2_table(); + smbios_build_type_3_table(); + +- smbios_smp_sockets = ms->smp.sockets; +- assert(smbios_smp_sockets >= 1); ++ assert(ms->smp.sockets >= 1); + +- for (i = 0; i < smbios_smp_sockets; i++) { ++ for (i = 0; i < ms->smp.sockets; i++) { + smbios_build_type_4_table(ms, i); + } + +@@ -1299,7 +1295,7 @@ void smbios_get_tables(MachineState *ms, + smbios_build_type_41_table(errp); + smbios_build_type_127_table(); + +- smbios_validate_table(ms); ++ smbios_validate_table(ms->smp.sockets); + smbios_entry_point_setup(); + smbios_immutable = true; + } +diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h +index f8dd07fe4c..95ec64ce2c 100644 +--- a/include/hw/firmware/smbios.h ++++ b/include/hw/firmware/smbios.h +@@ -315,7 +315,7 @@ void smbios_set_defaults(const char *manufacturer, const char *product, + const char *stream_product, + const char *stream_version, + SmbiosEntryPointType ep_type); +-uint8_t *smbios_get_table_legacy(MachineState *ms, size_t *length); ++uint8_t *smbios_get_table_legacy(uint32_t expected_t4_count, size_t *length); + void smbios_get_tables(MachineState *ms, + const struct smbios_phys_mem_area *mem_array, + const unsigned int mem_array_size, +-- +2.39.3 + diff --git a/SOURCES/kvm-smbios-handle-errors-consistently.patch b/SOURCES/kvm-smbios-handle-errors-consistently.patch new file mode 100644 index 0000000..ac75c76 --- /dev/null +++ b/SOURCES/kvm-smbios-handle-errors-consistently.patch @@ -0,0 +1,217 @@ +From 7271c4424c6d90f0bb34f8090eb4e192eb2b2537 Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Tue, 20 Feb 2024 10:30:28 +0100 +Subject: [PATCH 14/20] smbios: handle errors consistently + +RH-Author: Igor Mammedov +RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS +RH-Jira: RHEL-21705 +RH-Acked-by: MST +RH-Acked-by: Ani Sinha +RH-Commit: [12/18] d8a5a70602ae0665ce35e5bf87b2d8420f9189bc + +JIRA: https://issues.redhat.com/browse/RHEL-21705 + + Current code uses mix of error_report()+exit(1) + and error_setg() to handle errors. + Use newer error_setg() everywhere, beside consistency + it will allow to detect error condition without killing + QEMU and attempt switch-over to SMBIOS3.x tables/entrypoint + in follow up patch. + + while at it, clear smbios_tables pointer after freeing. + that will avoid double free if smbios_get_tables() is called + multiple times. + + Signed-off-by: Igor Mammedov + Reviewed-by: Ani Sinha + +Conflicts: include/hw/firmware/smbios.h + due to downstream specific smbios_set_defaults() + +Signed-off-by: Igor Mammedov +--- + hw/i386/fw_cfg.c | 3 ++- + hw/smbios/smbios.c | 34 ++++++++++++++++++++++------------ + hw/smbios/smbios_legacy.c | 22 ++++++++++++++-------- + include/hw/firmware/smbios.h | 4 ++-- + 4 files changed, 40 insertions(+), 23 deletions(-) + +diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c +index a25793a68f..bdc3cc4556 100644 +--- a/hw/i386/fw_cfg.c ++++ b/hw/i386/fw_cfg.c +@@ -73,7 +73,8 @@ void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg) + smbios_set_cpuid(cpu->env.cpuid_version, cpu->env.features[FEAT_1_EDX]); + + if (pcmc->smbios_legacy_mode) { +- smbios_tables = smbios_get_table_legacy(&smbios_tables_len); ++ smbios_tables = smbios_get_table_legacy(&smbios_tables_len, ++ &error_fatal); + fw_cfg_add_bytes(fw_cfg, FW_CFG_SMBIOS_ENTRIES, + smbios_tables, smbios_tables_len); + return; +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index e40204550e..b5745c6c2d 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -19,7 +19,6 @@ + #include "qemu/units.h" + #include "qapi/error.h" + #include "qemu/config-file.h" +-#include "qemu/error-report.h" + #include "qemu/module.h" + #include "qemu/option.h" + #include "sysemu/sysemu.h" +@@ -497,23 +496,25 @@ opts_init(smbios_register_config); + */ + #define SMBIOS_21_MAX_TABLES_LEN 0xffff + +-static void smbios_check_type4_count(uint32_t expected_t4_count) ++static bool smbios_check_type4_count(uint32_t expected_t4_count, Error **errp) + { + if (smbios_type4_count && smbios_type4_count != expected_t4_count) { +- error_report("Expected %d SMBIOS Type 4 tables, got %d instead", +- expected_t4_count, smbios_type4_count); +- exit(1); ++ error_setg(errp, "Expected %d SMBIOS Type 4 tables, got %d instead", ++ expected_t4_count, smbios_type4_count); ++ return false; + } ++ return true; + } + +-void smbios_validate_table(void) ++bool smbios_validate_table(Error **errp) + { + if (smbios_ep_type == SMBIOS_ENTRY_POINT_TYPE_32 && + smbios_tables_len > SMBIOS_21_MAX_TABLES_LEN) { +- error_report("SMBIOS 2.1 table length %zu exceeds %d", +- smbios_tables_len, SMBIOS_21_MAX_TABLES_LEN); +- exit(1); ++ error_setg(errp, "SMBIOS 2.1 table length %zu exceeds %d", ++ smbios_tables_len, SMBIOS_21_MAX_TABLES_LEN); ++ return false; + } ++ return true; + } + + bool smbios_skip_table(uint8_t type, bool required_table) +@@ -1167,15 +1168,18 @@ void smbios_get_tables(MachineState *ms, + smbios_build_type_41_table(errp); + smbios_build_type_127_table(); + +- smbios_check_type4_count(ms->smp.sockets); +- smbios_validate_table(); ++ if (!smbios_check_type4_count(ms->smp.sockets, errp)) { ++ goto err_exit; ++ } ++ if (!smbios_validate_table(errp)) { ++ goto err_exit; ++ } + smbios_entry_point_setup(); + + /* return tables blob and entry point (anchor), and their sizes */ + *tables = smbios_tables; + *tables_len = smbios_tables_len; + *anchor = (uint8_t *)&ep; +- + /* calculate length based on anchor string */ + if (!strncmp((char *)&ep, "_SM_", 4)) { + *anchor_len = sizeof(struct smbios_21_entry_point); +@@ -1184,6 +1188,12 @@ void smbios_get_tables(MachineState *ms, + } else { + abort(); + } ++ ++ return; ++err_exit: ++ g_free(smbios_tables); ++ smbios_tables = NULL; ++ return; + } + + static void save_opt(const char **dest, QemuOpts *opts, const char *name) +diff --git a/hw/smbios/smbios_legacy.c b/hw/smbios/smbios_legacy.c +index 21f143e738..a6544bf55a 100644 +--- a/hw/smbios/smbios_legacy.c ++++ b/hw/smbios/smbios_legacy.c +@@ -19,7 +19,7 @@ + #include "qemu/bswap.h" + #include "hw/firmware/smbios.h" + #include "sysemu/sysemu.h" +-#include "qemu/error-report.h" ++#include "qapi/error.h" + + struct smbios_header { + uint16_t length; +@@ -128,7 +128,7 @@ static void smbios_build_type_1_fields(void) + } + } + +-uint8_t *smbios_get_table_legacy(size_t *length) ++uint8_t *smbios_get_table_legacy(size_t *length, Error **errp) + { + int i; + size_t usr_offset; +@@ -136,15 +136,15 @@ uint8_t *smbios_get_table_legacy(size_t *length) + /* complain if fields were given for types > 1 */ + if (find_next_bit(smbios_have_fields_bitmap, + SMBIOS_MAX_TYPE + 1, 2) < SMBIOS_MAX_TYPE + 1) { +- error_report("can't process fields for smbios " ++ error_setg(errp, "can't process fields for smbios " + "types > 1 on machine versions < 2.1!"); +- exit(1); ++ goto err_exit; + } + + if (test_bit(4, smbios_have_binfile_bitmap)) { +- error_report("can't process table for smbios " +- "type 4 on machine versions < 2.1!"); +- exit(1); ++ error_setg(errp, "can't process table for smbios " ++ "type 4 on machine versions < 2.1!"); ++ goto err_exit; + } + + g_free(smbios_entries); +@@ -173,7 +173,13 @@ uint8_t *smbios_get_table_legacy(size_t *length) + + smbios_build_type_0_fields(); + smbios_build_type_1_fields(); +- smbios_validate_table(); ++ if (!smbios_validate_table(errp)) { ++ goto err_exit; ++ } ++ + *length = smbios_entries_len; + return smbios_entries; ++err_exit: ++ g_free(smbios_entries); ++ return NULL; + } +diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h +index 92e9aba415..44af3a0d82 100644 +--- a/include/hw/firmware/smbios.h ++++ b/include/hw/firmware/smbios.h +@@ -326,7 +326,7 @@ struct smbios_type_127 { + struct smbios_structure_header header; + } QEMU_PACKED; + +-void smbios_validate_table(void); ++bool smbios_validate_table(Error **errp); + void smbios_add_usr_blob_size(size_t size); + void smbios_entry_add(QemuOpts *opts, Error **errp); + void smbios_set_cpuid(uint32_t version, uint32_t features); +@@ -336,7 +336,7 @@ void smbios_set_defaults(const char *manufacturer, const char *product, + SmbiosEntryPointType ep_type, + const char *stream_product, + const char *stream_version); +-uint8_t *smbios_get_table_legacy(size_t *length); ++uint8_t *smbios_get_table_legacy(size_t *length, Error **errp); + void smbios_get_tables(MachineState *ms, + const struct smbios_phys_mem_area *mem_array, + const unsigned int mem_array_size, +-- +2.39.3 + diff --git a/SOURCES/kvm-smbios-in-case-of-entry-point-is-auto-try-to-build-v.patch b/SOURCES/kvm-smbios-in-case-of-entry-point-is-auto-try-to-build-v.patch new file mode 100644 index 0000000..fd1bc19 --- /dev/null +++ b/SOURCES/kvm-smbios-in-case-of-entry-point-is-auto-try-to-build-v.patch @@ -0,0 +1,131 @@ +From 38220bc61bdb1614f34a53481f7604720c9e9e5a Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Tue, 20 Feb 2024 10:41:06 +0100 +Subject: [PATCH 18/20] smbios: in case of entry point is 'auto' try to build + v2 tables 1st + +RH-Author: Igor Mammedov +RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS +RH-Jira: RHEL-21705 +RH-Acked-by: MST +RH-Acked-by: Ani Sinha +RH-Commit: [16/18] becbcb3d8dad4842e5939bb75e21f4e737a4a325 + +JIRA: https://issues.redhat.com/browse/RHEL-21705 + +QEMU for some time now uses SMBIOS 3.0 for PC/Q35 machines by +default, however Windows has a bug in locating SMBIOS 3.0 +entrypoint and fails to find tables when booted on SeaBIOS +(on UEFI SMBIOS 3.0 tables work fine since firmware hands +over tables in another way) + +Missing SMBIOS tables may lead to some issues for guest +though (worst are: possible reactiveation, inability to +get virtio drivers from 'Windows Update') + +It's unclear at this point if MS will fix the issue on their +side. So instead of it (or rather in addition) this patch +will try to workaround the issue. + +aka, use smbios-entry-point-type=auto to make QEMU try +generating conservative SMBIOS 2.0 tables and if that +fails (due to limits/requested configuration) fallback +to SMBIOS 3.0 tables. + +With this in place majority of users will use SMBIOS 2.0 +tables which work fine with (Windows + legacy BIOS). +The configurations that is not to possible to describe +with SMBIOS 2.0 will switch automatically to SMBIOS 3.0 +(which will trigger Windows bug but there is nothing +QEMU can do here, so go and aks Microsoft to real fix). + +Signed-off-by: Igor Mammedov +Reviewed-by: Ani Sinha +Tested-by: Fiona Ebner +--- + hw/smbios/smbios.c | 52 +++++++++++++++++++++++++++++++++++++++++++--- + 1 file changed, 49 insertions(+), 3 deletions(-) + +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index 4521ea386c..3d9dcb0d31 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -1097,7 +1097,7 @@ static void smbios_entry_point_setup(SmbiosEntryPointType ep_type) + } + } + +-void smbios_get_tables(MachineState *ms, ++static bool smbios_get_tables_ep(MachineState *ms, + SmbiosEntryPointType ep_type, + const struct smbios_phys_mem_area *mem_array, + const unsigned int mem_array_size, +@@ -1106,6 +1106,7 @@ void smbios_get_tables(MachineState *ms, + Error **errp) + { + unsigned i, dimm_cnt, offset; ++ ERRP_GUARD(); + + assert(ep_type == SMBIOS_ENTRY_POINT_TYPE_32 || + ep_type == SMBIOS_ENTRY_POINT_TYPE_64); +@@ -1192,11 +1193,56 @@ void smbios_get_tables(MachineState *ms, + abort(); + } + +- return; ++ return true; + err_exit: + g_free(smbios_tables); + smbios_tables = NULL; +- return; ++ return false; ++} ++ ++void smbios_get_tables(MachineState *ms, ++ SmbiosEntryPointType ep_type, ++ const struct smbios_phys_mem_area *mem_array, ++ const unsigned int mem_array_size, ++ uint8_t **tables, size_t *tables_len, ++ uint8_t **anchor, size_t *anchor_len, ++ Error **errp) ++{ ++ Error *local_err = NULL; ++ bool is_valid; ++ ERRP_GUARD(); ++ ++ switch (ep_type) { ++ case SMBIOS_ENTRY_POINT_TYPE_AUTO: ++ case SMBIOS_ENTRY_POINT_TYPE_32: ++ is_valid = smbios_get_tables_ep(ms, SMBIOS_ENTRY_POINT_TYPE_32, ++ mem_array, mem_array_size, ++ tables, tables_len, ++ anchor, anchor_len, ++ &local_err); ++ if (is_valid || ep_type != SMBIOS_ENTRY_POINT_TYPE_AUTO) { ++ break; ++ } ++ /* ++ * fall through in case AUTO endpoint is selected and ++ * SMBIOS 2.x tables can't be generated, to try if SMBIOS 3.x ++ * tables would work ++ */ ++ case SMBIOS_ENTRY_POINT_TYPE_64: ++ error_free(local_err); ++ local_err = NULL; ++ is_valid = smbios_get_tables_ep(ms, SMBIOS_ENTRY_POINT_TYPE_64, ++ mem_array, mem_array_size, ++ tables, tables_len, ++ anchor, anchor_len, ++ &local_err); ++ break; ++ default: ++ abort(); ++ } ++ if (!is_valid) { ++ error_propagate(errp, local_err); ++ } + } + + static void save_opt(const char **dest, QemuOpts *opts, const char *name) +-- +2.39.3 + diff --git a/SOURCES/kvm-smbios-rename-expose-structures-bitmaps-used-by-both.patch b/SOURCES/kvm-smbios-rename-expose-structures-bitmaps-used-by-both.patch new file mode 100644 index 0000000..cba6134 --- /dev/null +++ b/SOURCES/kvm-smbios-rename-expose-structures-bitmaps-used-by-both.patch @@ -0,0 +1,330 @@ +From 36b0256e27f9d5268c5413891b4a7322819ae9db Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Mon, 4 Mar 2024 15:56:19 +0100 +Subject: [PATCH 12/20] smbios: rename/expose structures/bitmaps used by both + legacy and modern code + +RH-Author: Igor Mammedov +RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS +RH-Jira: RHEL-21705 +RH-Acked-by: MST +RH-Acked-by: Ani Sinha +RH-Commit: [10/18] 59fe438d5b7f6e584a6bb02597e4d4724fe2cece + +JIRA: https://issues.redhat.com/browse/RHEL-21705 + + As a preparation to move legacy handling into a separate file, + add prefix 'smbios_' to type0/type1/have_binfile_bitmap/have_fields_bitmap + and expose them in smbios.h so that they can be reused in + legacy and modern code. + + Doing it as a separate patch to avoid rename cluttering follow-up + patch which will move legacy code into a separate file. + + Signed-off-by: Igor Mammedov + Reviewed-by: Ani Sinha + +Conflicts: hw/smbios/smbios.c + due to setting downstream type1.family/type1.sku defaults + +Signed-off-by: Igor Mammedov +--- + hw/smbios/smbios.c | 117 ++++++++++++++++------------------- + include/hw/firmware/smbios.h | 16 +++++ + 2 files changed, 71 insertions(+), 62 deletions(-) + +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index c48a290478..eb9927335d 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -81,19 +81,11 @@ static int smbios_type4_count = 0; + static bool smbios_have_defaults; + static uint32_t smbios_cpuid_version, smbios_cpuid_features; + +-static DECLARE_BITMAP(have_binfile_bitmap, SMBIOS_MAX_TYPE+1); +-static DECLARE_BITMAP(have_fields_bitmap, SMBIOS_MAX_TYPE+1); ++DECLARE_BITMAP(smbios_have_binfile_bitmap, SMBIOS_MAX_TYPE + 1); ++DECLARE_BITMAP(smbios_have_fields_bitmap, SMBIOS_MAX_TYPE + 1); + +-static struct { +- const char *vendor, *version, *date; +- bool have_major_minor, uefi; +- uint8_t major, minor; +-} type0; +- +-static struct { +- const char *manufacturer, *product, *version, *serial, *sku, *family; +- /* uuid is in qemu_uuid */ +-} type1; ++smbios_type0_t smbios_type0; ++smbios_type1_t smbios_type1; + + static struct { + const char *manufacturer, *product, *version, *serial, *asset, *location; +@@ -584,36 +576,36 @@ static void smbios_maybe_add_str(int type, int offset, const char *data) + static void smbios_build_type_0_fields(void) + { + smbios_maybe_add_str(0, offsetof(struct smbios_type_0, vendor_str), +- type0.vendor); ++ smbios_type0.vendor); + smbios_maybe_add_str(0, offsetof(struct smbios_type_0, bios_version_str), +- type0.version); ++ smbios_type0.version); + smbios_maybe_add_str(0, offsetof(struct smbios_type_0, + bios_release_date_str), +- type0.date); +- if (type0.have_major_minor) { ++ smbios_type0.date); ++ if (smbios_type0.have_major_minor) { + smbios_add_field(0, offsetof(struct smbios_type_0, + system_bios_major_release), +- &type0.major, 1); ++ &smbios_type0.major, 1); + smbios_add_field(0, offsetof(struct smbios_type_0, + system_bios_minor_release), +- &type0.minor, 1); ++ &smbios_type0.minor, 1); + } + } + + static void smbios_build_type_1_fields(void) + { + smbios_maybe_add_str(1, offsetof(struct smbios_type_1, manufacturer_str), +- type1.manufacturer); ++ smbios_type1.manufacturer); + smbios_maybe_add_str(1, offsetof(struct smbios_type_1, product_name_str), +- type1.product); ++ smbios_type1.product); + smbios_maybe_add_str(1, offsetof(struct smbios_type_1, version_str), +- type1.version); ++ smbios_type1.version); + smbios_maybe_add_str(1, offsetof(struct smbios_type_1, serial_number_str), +- type1.serial); ++ smbios_type1.serial); + smbios_maybe_add_str(1, offsetof(struct smbios_type_1, sku_number_str), +- type1.sku); ++ smbios_type1.sku); + smbios_maybe_add_str(1, offsetof(struct smbios_type_1, family_str), +- type1.family); ++ smbios_type1.family); + if (qemu_uuid_set) { + /* We don't encode the UUID in the "wire format" here because this + * function is for legacy mode and needs to keep the guest ABI, and +@@ -631,14 +623,14 @@ uint8_t *smbios_get_table_legacy(size_t *length) + size_t usr_offset; + + /* also complain if fields were given for types > 1 */ +- if (find_next_bit(have_fields_bitmap, ++ if (find_next_bit(smbios_have_fields_bitmap, + SMBIOS_MAX_TYPE + 1, 2) < SMBIOS_MAX_TYPE + 1) { + error_report("can't process fields for smbios " + "types > 1 on machine versions < 2.1!"); + exit(1); + } + +- if (test_bit(4, have_binfile_bitmap)) { ++ if (test_bit(4, smbios_have_binfile_bitmap)) { + error_report("can't process table for smbios " + "type 4 on machine versions < 2.1!"); + exit(1); +@@ -679,10 +671,10 @@ uint8_t *smbios_get_table_legacy(size_t *length) + + bool smbios_skip_table(uint8_t type, bool required_table) + { +- if (test_bit(type, have_binfile_bitmap)) { ++ if (test_bit(type, smbios_have_binfile_bitmap)) { + return true; /* user provided their own binary blob(s) */ + } +- if (test_bit(type, have_fields_bitmap)) { ++ if (test_bit(type, smbios_have_fields_bitmap)) { + return false; /* user provided fields via command line */ + } + if (smbios_have_defaults && required_table) { +@@ -710,25 +702,25 @@ static void smbios_build_type_0_table(void) + { + SMBIOS_BUILD_TABLE_PRE(0, T0_BASE, false); /* optional, leave up to BIOS */ + +- SMBIOS_TABLE_SET_STR(0, vendor_str, type0.vendor); +- SMBIOS_TABLE_SET_STR(0, bios_version_str, type0.version); ++ SMBIOS_TABLE_SET_STR(0, vendor_str, smbios_type0.vendor); ++ SMBIOS_TABLE_SET_STR(0, bios_version_str, smbios_type0.version); + + t->bios_starting_address_segment = cpu_to_le16(0xE800); /* from SeaBIOS */ + +- SMBIOS_TABLE_SET_STR(0, bios_release_date_str, type0.date); ++ SMBIOS_TABLE_SET_STR(0, bios_release_date_str, smbios_type0.date); + + t->bios_rom_size = 0; /* hardcoded in SeaBIOS with FIXME comment */ + + t->bios_characteristics = cpu_to_le64(0x08); /* Not supported */ + t->bios_characteristics_extension_bytes[0] = 0; + t->bios_characteristics_extension_bytes[1] = 0x14; /* TCD/SVVP | VM */ +- if (type0.uefi) { ++ if (smbios_type0.uefi) { + t->bios_characteristics_extension_bytes[1] |= 0x08; /* |= UEFI */ + } + +- if (type0.have_major_minor) { +- t->system_bios_major_release = type0.major; +- t->system_bios_minor_release = type0.minor; ++ if (smbios_type0.have_major_minor) { ++ t->system_bios_major_release = smbios_type0.major; ++ t->system_bios_minor_release = smbios_type0.minor; + } else { + t->system_bios_major_release = 0; + t->system_bios_minor_release = 0; +@@ -758,18 +750,18 @@ static void smbios_build_type_1_table(void) + { + SMBIOS_BUILD_TABLE_PRE(1, T1_BASE, true); /* required */ + +- SMBIOS_TABLE_SET_STR(1, manufacturer_str, type1.manufacturer); +- SMBIOS_TABLE_SET_STR(1, product_name_str, type1.product); +- SMBIOS_TABLE_SET_STR(1, version_str, type1.version); +- SMBIOS_TABLE_SET_STR(1, serial_number_str, type1.serial); ++ SMBIOS_TABLE_SET_STR(1, manufacturer_str, smbios_type1.manufacturer); ++ SMBIOS_TABLE_SET_STR(1, product_name_str, smbios_type1.product); ++ SMBIOS_TABLE_SET_STR(1, version_str, smbios_type1.version); ++ SMBIOS_TABLE_SET_STR(1, serial_number_str, smbios_type1.serial); + if (qemu_uuid_set) { + smbios_encode_uuid(&t->uuid, &qemu_uuid); + } else { + memset(&t->uuid, 0, 16); + } + t->wake_up_type = 0x06; /* power switch */ +- SMBIOS_TABLE_SET_STR(1, sku_number_str, type1.sku); +- SMBIOS_TABLE_SET_STR(1, family_str, type1.family); ++ SMBIOS_TABLE_SET_STR(1, sku_number_str, smbios_type1.sku); ++ SMBIOS_TABLE_SET_STR(1, family_str, smbios_type1.family); + + SMBIOS_BUILD_TABLE_POST; + } +@@ -1184,12 +1176,12 @@ void smbios_set_defaults(const char *manufacturer, const char *product, + * + * We get 'System Manufacturer' and 'Baseboard Manufacturer' + */ +- SMBIOS_SET_DEFAULT(type1.manufacturer, manufacturer); +- SMBIOS_SET_DEFAULT(type1.product, product); +- SMBIOS_SET_DEFAULT(type1.version, version); +- SMBIOS_SET_DEFAULT(type1.family, "Red Hat Enterprise Linux"); ++ SMBIOS_SET_DEFAULT(smbios_type1.manufacturer, manufacturer); ++ SMBIOS_SET_DEFAULT(smbios_type1.product, product); ++ SMBIOS_SET_DEFAULT(smbios_type1.version, version); ++ SMBIOS_SET_DEFAULT(smbios_type1.family, "Red Hat Enterprise Linux"); + if (stream_version != NULL) { +- SMBIOS_SET_DEFAULT(type1.sku, stream_version); ++ SMBIOS_SET_DEFAULT(smbios_type1.sku, stream_version); + } + SMBIOS_SET_DEFAULT(type2.manufacturer, manufacturer); + if (stream_product != NULL) { +@@ -1468,13 +1460,13 @@ void smbios_entry_add(QemuOpts *opts, Error **errp) + } + + if (header->type <= SMBIOS_MAX_TYPE) { +- if (test_bit(header->type, have_fields_bitmap)) { ++ if (test_bit(header->type, smbios_have_fields_bitmap)) { + error_setg(errp, + "can't load type %d struct, fields already specified!", + header->type); + return; + } +- set_bit(header->type, have_binfile_bitmap); ++ set_bit(header->type, smbios_have_binfile_bitmap); + } + + if (header->type == 4) { +@@ -1505,41 +1497,42 @@ void smbios_entry_add(QemuOpts *opts, Error **errp) + return; + } + +- if (test_bit(type, have_binfile_bitmap)) { ++ if (test_bit(type, smbios_have_binfile_bitmap)) { + error_setg(errp, "can't add fields, binary file already loaded!"); + return; + } +- set_bit(type, have_fields_bitmap); ++ set_bit(type, smbios_have_fields_bitmap); + + switch (type) { + case 0: + if (!qemu_opts_validate(opts, qemu_smbios_type0_opts, errp)) { + return; + } +- save_opt(&type0.vendor, opts, "vendor"); +- save_opt(&type0.version, opts, "version"); +- save_opt(&type0.date, opts, "date"); +- type0.uefi = qemu_opt_get_bool(opts, "uefi", false); ++ save_opt(&smbios_type0.vendor, opts, "vendor"); ++ save_opt(&smbios_type0.version, opts, "version"); ++ save_opt(&smbios_type0.date, opts, "date"); ++ smbios_type0.uefi = qemu_opt_get_bool(opts, "uefi", false); + + val = qemu_opt_get(opts, "release"); + if (val) { +- if (sscanf(val, "%hhu.%hhu", &type0.major, &type0.minor) != 2) { ++ if (sscanf(val, "%hhu.%hhu", &smbios_type0.major, ++ &smbios_type0.minor) != 2) { + error_setg(errp, "Invalid release"); + return; + } +- type0.have_major_minor = true; ++ smbios_type0.have_major_minor = true; + } + return; + case 1: + if (!qemu_opts_validate(opts, qemu_smbios_type1_opts, errp)) { + return; + } +- save_opt(&type1.manufacturer, opts, "manufacturer"); +- save_opt(&type1.product, opts, "product"); +- save_opt(&type1.version, opts, "version"); +- save_opt(&type1.serial, opts, "serial"); +- save_opt(&type1.sku, opts, "sku"); +- save_opt(&type1.family, opts, "family"); ++ save_opt(&smbios_type1.manufacturer, opts, "manufacturer"); ++ save_opt(&smbios_type1.product, opts, "product"); ++ save_opt(&smbios_type1.version, opts, "version"); ++ save_opt(&smbios_type1.serial, opts, "serial"); ++ save_opt(&smbios_type1.sku, opts, "sku"); ++ save_opt(&smbios_type1.family, opts, "family"); + + val = qemu_opt_get(opts, "uuid"); + if (val) { +diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h +index d55018e5e3..333de0d5fc 100644 +--- a/include/hw/firmware/smbios.h ++++ b/include/hw/firmware/smbios.h +@@ -2,6 +2,7 @@ + #define QEMU_SMBIOS_H + + #include "qapi/qapi-types-machine.h" ++#include "qemu/bitmap.h" + + /* + * SMBIOS Support +@@ -16,8 +17,23 @@ + * + */ + ++typedef struct { ++ const char *vendor, *version, *date; ++ bool have_major_minor, uefi; ++ uint8_t major, minor; ++} smbios_type0_t; ++extern smbios_type0_t smbios_type0; ++ ++typedef struct { ++ const char *manufacturer, *product, *version, *serial, *sku, *family; ++ /* uuid is in qemu_uuid */ ++} smbios_type1_t; ++extern smbios_type1_t smbios_type1; + + #define SMBIOS_MAX_TYPE 127 ++extern DECLARE_BITMAP(smbios_have_binfile_bitmap, SMBIOS_MAX_TYPE + 1); ++extern DECLARE_BITMAP(smbios_have_fields_bitmap, SMBIOS_MAX_TYPE + 1); ++ + #define offsetofend(TYPE, MEMBER) \ + (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER)) + +-- +2.39.3 + diff --git a/SOURCES/kvm-spice-move-client_migrate_info-command-to-ui.patch b/SOURCES/kvm-spice-move-client_migrate_info-command-to-ui.patch deleted file mode 100644 index f1de158..0000000 --- a/SOURCES/kvm-spice-move-client_migrate_info-command-to-ui.patch +++ /dev/null @@ -1,248 +0,0 @@ -From 00f6e941e75f378c84c773a15efde7dd085d9ce3 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 19:40:14 +0100 -Subject: [PATCH 21/56] spice: move client_migrate_info command to ui/ -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [20/50] a587bb001b51a1f9fdf2fcfb0978bb931ae443b6 (peterx/qemu-kvm) - -It has nothing to do with migration, except for the "migrate" in the -name of the command. Move it with the rest of the ui commands. - -Signed-off-by: Juan Quintela -Reviewed-by: Philippe Mathieu-Daudé -(cherry picked from commit f9e1ef7482f1ee289b04f4b45702a1701bc8929d) -Signed-off-by: Peter Xu ---- - migration/migration-hmp-cmds.c | 17 ----------------- - migration/migration.c | 30 ------------------------------ - qapi/migration.json | 28 ---------------------------- - qapi/ui.json | 28 ++++++++++++++++++++++++++++ - ui/ui-hmp-cmds.c | 17 +++++++++++++++++ - ui/ui-qmp-cmds.c | 29 +++++++++++++++++++++++++++++ - 6 files changed, 74 insertions(+), 75 deletions(-) - -diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c -index 71da91967a..4e9f00e7dc 100644 ---- a/migration/migration-hmp-cmds.c -+++ b/migration/migration-hmp-cmds.c -@@ -636,23 +636,6 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) - hmp_handle_error(mon, err); - } - --void hmp_client_migrate_info(Monitor *mon, const QDict *qdict) --{ -- Error *err = NULL; -- const char *protocol = qdict_get_str(qdict, "protocol"); -- const char *hostname = qdict_get_str(qdict, "hostname"); -- bool has_port = qdict_haskey(qdict, "port"); -- int port = qdict_get_try_int(qdict, "port", -1); -- bool has_tls_port = qdict_haskey(qdict, "tls-port"); -- int tls_port = qdict_get_try_int(qdict, "tls-port", -1); -- const char *cert_subject = qdict_get_try_str(qdict, "cert-subject"); -- -- qmp_client_migrate_info(protocol, hostname, -- has_port, port, has_tls_port, tls_port, -- cert_subject, &err); -- hmp_handle_error(mon, err); --} -- - void hmp_migrate_start_postcopy(Monitor *mon, const QDict *qdict) - { - Error *err = NULL; -diff --git a/migration/migration.c b/migration/migration.c -index aa96ffdc5b..b745d829a4 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -63,7 +63,6 @@ - #include "sysemu/cpus.h" - #include "yank_functions.h" - #include "sysemu/qtest.h" --#include "ui/qemu-spice.h" - - #define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ - -@@ -1018,35 +1017,6 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) - return params; - } - --void qmp_client_migrate_info(const char *protocol, const char *hostname, -- bool has_port, int64_t port, -- bool has_tls_port, int64_t tls_port, -- const char *cert_subject, -- Error **errp) --{ -- if (strcmp(protocol, "spice") == 0) { -- if (!qemu_using_spice(errp)) { -- return; -- } -- -- if (!has_port && !has_tls_port) { -- error_setg(errp, QERR_MISSING_PARAMETER, "port/tls-port"); -- return; -- } -- -- if (qemu_spice.migrate_info(hostname, -- has_port ? port : -1, -- has_tls_port ? tls_port : -1, -- cert_subject)) { -- error_setg(errp, "Could not set up display for migration"); -- return; -- } -- return; -- } -- -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "protocol", "'spice'"); --} -- - AnnounceParameters *migrate_announce_params(void) - { - static AnnounceParameters ap; -diff --git a/qapi/migration.json b/qapi/migration.json -index c84fa10e86..2c35b7b9cf 100644 ---- a/qapi/migration.json -+++ b/qapi/migration.json -@@ -1203,34 +1203,6 @@ - { 'command': 'query-migrate-parameters', - 'returns': 'MigrationParameters' } - --## --# @client_migrate_info: --# --# Set migration information for remote display. This makes the server --# ask the client to automatically reconnect using the new parameters --# once migration finished successfully. Only implemented for SPICE. --# --# @protocol: must be "spice" --# @hostname: migration target hostname --# @port: spice tcp port for plaintext channels --# @tls-port: spice tcp port for tls-secured channels --# @cert-subject: server certificate subject --# --# Since: 0.14 --# --# Example: --# --# -> { "execute": "client_migrate_info", --# "arguments": { "protocol": "spice", --# "hostname": "virt42.lab.kraxel.org", --# "port": 1234 } } --# <- { "return": {} } --# --## --{ 'command': 'client_migrate_info', -- 'data': { 'protocol': 'str', 'hostname': 'str', '*port': 'int', -- '*tls-port': 'int', '*cert-subject': 'str' } } -- - ## - # @migrate-start-postcopy: - # -diff --git a/qapi/ui.json b/qapi/ui.json -index 98322342f7..7ddd27a932 100644 ---- a/qapi/ui.json -+++ b/qapi/ui.json -@@ -1554,3 +1554,31 @@ - { 'command': 'display-update', - 'data': 'DisplayUpdateOptions', - 'boxed' : true } -+ -+## -+# @client_migrate_info: -+# -+# Set migration information for remote display. This makes the server -+# ask the client to automatically reconnect using the new parameters -+# once migration finished successfully. Only implemented for SPICE. -+# -+# @protocol: must be "spice" -+# @hostname: migration target hostname -+# @port: spice tcp port for plaintext channels -+# @tls-port: spice tcp port for tls-secured channels -+# @cert-subject: server certificate subject -+# -+# Since: 0.14 -+# -+# Example: -+# -+# -> { "execute": "client_migrate_info", -+# "arguments": { "protocol": "spice", -+# "hostname": "virt42.lab.kraxel.org", -+# "port": 1234 } } -+# <- { "return": {} } -+# -+## -+{ 'command': 'client_migrate_info', -+ 'data': { 'protocol': 'str', 'hostname': 'str', '*port': 'int', -+ '*tls-port': 'int', '*cert-subject': 'str' } } -diff --git a/ui/ui-hmp-cmds.c b/ui/ui-hmp-cmds.c -index 5c456ecc02..c671389473 100644 ---- a/ui/ui-hmp-cmds.c -+++ b/ui/ui-hmp-cmds.c -@@ -458,3 +458,20 @@ hmp_screendump(Monitor *mon, const QDict *qdict) - end: - hmp_handle_error(mon, err); - } -+ -+void hmp_client_migrate_info(Monitor *mon, const QDict *qdict) -+{ -+ Error *err = NULL; -+ const char *protocol = qdict_get_str(qdict, "protocol"); -+ const char *hostname = qdict_get_str(qdict, "hostname"); -+ bool has_port = qdict_haskey(qdict, "port"); -+ int port = qdict_get_try_int(qdict, "port", -1); -+ bool has_tls_port = qdict_haskey(qdict, "tls-port"); -+ int tls_port = qdict_get_try_int(qdict, "tls-port", -1); -+ const char *cert_subject = qdict_get_try_str(qdict, "cert-subject"); -+ -+ qmp_client_migrate_info(protocol, hostname, -+ has_port, port, has_tls_port, tls_port, -+ cert_subject, &err); -+ hmp_handle_error(mon, err); -+} -diff --git a/ui/ui-qmp-cmds.c b/ui/ui-qmp-cmds.c -index dbc4afcd73..a37a7024f3 100644 ---- a/ui/ui-qmp-cmds.c -+++ b/ui/ui-qmp-cmds.c -@@ -175,3 +175,32 @@ void qmp_display_update(DisplayUpdateOptions *arg, Error **errp) - abort(); - } - } -+ -+void qmp_client_migrate_info(const char *protocol, const char *hostname, -+ bool has_port, int64_t port, -+ bool has_tls_port, int64_t tls_port, -+ const char *cert_subject, -+ Error **errp) -+{ -+ if (strcmp(protocol, "spice") == 0) { -+ if (!qemu_using_spice(errp)) { -+ return; -+ } -+ -+ if (!has_port && !has_tls_port) { -+ error_setg(errp, QERR_MISSING_PARAMETER, "port/tls-port"); -+ return; -+ } -+ -+ if (qemu_spice.migrate_info(hostname, -+ has_port ? port : -1, -+ has_tls_port ? tls_port : -1, -+ cert_subject)) { -+ error_setg(errp, "Could not set up display for migration"); -+ return; -+ } -+ return; -+ } -+ -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "protocol", "'spice'"); -+} --- -2.39.1 - diff --git a/SOURCES/kvm-string-output-visitor-Fix-pseudo-struct-handling.patch b/SOURCES/kvm-string-output-visitor-Fix-pseudo-struct-handling.patch new file mode 100644 index 0000000..81ae2f1 --- /dev/null +++ b/SOURCES/kvm-string-output-visitor-Fix-pseudo-struct-handling.patch @@ -0,0 +1,190 @@ +From c5f9e92cd49a2171a5b0223cafd7fab3f45edb82 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 9 Jan 2024 19:17:17 +0100 +Subject: [PATCH 06/22] string-output-visitor: Fix (pseudo) struct handling + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter +RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Czenczek +RH-Commit: [2/17] 84e226f161680dd61b6635e213203d062c1aa556 (stefanha/centos-stream-qemu-kvm) + +Commit ff32bb53 tried to get minimal struct support into the string +output visitor by just making it return "". Unfortunately, it +forgot that the caller will still make more visitor calls for the +content of the struct. + +If the struct is contained in a list, such as IOThreadVirtQueueMapping, +in the better case its fields show up as separate list entries. In the +worse case, it contains another list, and the string output visitor +doesn't support nested lists and asserts that this doesn't happen. So as +soon as the optional "vqs" field in IOThreadVirtQueueMapping is +specified, we get a crash. + +This can be reproduced with the following command line: + + echo "info qtree" | ./qemu-system-x86_64 \ + -object iothread,id=t0 \ + -blockdev null-co,node-name=disk \ + -device '{"driver": "virtio-blk-pci", "drive": "disk", + "iothread-vq-mapping": [{"iothread": "t0", "vqs": [0]}]}' \ + -monitor stdio + +Fix the problem by counting the nesting level of structs and ignoring +any visitor calls for values (apart from start/end_struct) while we're +not on the top level. + +Lists nested directly within lists remain unimplemented, as we don't +currently have a use case for them. + +Fixes: ff32bb53476539d352653f4ed56372dced73a388 +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2069 +Reported-by: Aihua Liang +Signed-off-by: Kevin Wolf +Message-ID: <20240109181717.42493-1-kwolf@redhat.com> +Reviewed-by: Stefan Hajnoczi +Signed-off-by: Kevin Wolf +(cherry picked from commit 014b99a8e41c8cd1e895137654b44dec5430122c) +Signed-off-by: Stefan Hajnoczi +--- + qapi/string-output-visitor.c | 46 ++++++++++++++++++++++++++++++++++++ + 1 file changed, 46 insertions(+) + +diff --git a/qapi/string-output-visitor.c b/qapi/string-output-visitor.c +index f0c1dea89e..5115536b15 100644 +--- a/qapi/string-output-visitor.c ++++ b/qapi/string-output-visitor.c +@@ -65,6 +65,7 @@ struct StringOutputVisitor + } range_start, range_end; + GList *ranges; + void *list; /* Only needed for sanity checking the caller */ ++ unsigned int struct_nesting; + }; + + static StringOutputVisitor *to_sov(Visitor *v) +@@ -144,6 +145,10 @@ static bool print_type_int64(Visitor *v, const char *name, int64_t *obj, + StringOutputVisitor *sov = to_sov(v); + GList *l; + ++ if (sov->struct_nesting) { ++ return true; ++ } ++ + switch (sov->list_mode) { + case LM_NONE: + string_output_append(sov, *obj); +@@ -231,6 +236,10 @@ static bool print_type_size(Visitor *v, const char *name, uint64_t *obj, + uint64_t val; + char *out, *psize; + ++ if (sov->struct_nesting) { ++ return true; ++ } ++ + if (!sov->human) { + out = g_strdup_printf("%"PRIu64, *obj); + string_output_set(sov, out); +@@ -250,6 +259,11 @@ static bool print_type_bool(Visitor *v, const char *name, bool *obj, + Error **errp) + { + StringOutputVisitor *sov = to_sov(v); ++ ++ if (sov->struct_nesting) { ++ return true; ++ } ++ + string_output_set(sov, g_strdup(*obj ? "true" : "false")); + return true; + } +@@ -260,6 +274,10 @@ static bool print_type_str(Visitor *v, const char *name, char **obj, + StringOutputVisitor *sov = to_sov(v); + char *out; + ++ if (sov->struct_nesting) { ++ return true; ++ } ++ + if (sov->human) { + out = *obj ? g_strdup_printf("\"%s\"", *obj) : g_strdup(""); + } else { +@@ -273,6 +291,11 @@ static bool print_type_number(Visitor *v, const char *name, double *obj, + Error **errp) + { + StringOutputVisitor *sov = to_sov(v); ++ ++ if (sov->struct_nesting) { ++ return true; ++ } ++ + string_output_set(sov, g_strdup_printf("%.17g", *obj)); + return true; + } +@@ -283,6 +306,10 @@ static bool print_type_null(Visitor *v, const char *name, QNull **obj, + StringOutputVisitor *sov = to_sov(v); + char *out; + ++ if (sov->struct_nesting) { ++ return true; ++ } ++ + if (sov->human) { + out = g_strdup(""); + } else { +@@ -295,6 +322,9 @@ static bool print_type_null(Visitor *v, const char *name, QNull **obj, + static bool start_struct(Visitor *v, const char *name, void **obj, + size_t size, Error **errp) + { ++ StringOutputVisitor *sov = to_sov(v); ++ ++ sov->struct_nesting++; + return true; + } + +@@ -302,6 +332,10 @@ static void end_struct(Visitor *v, void **obj) + { + StringOutputVisitor *sov = to_sov(v); + ++ if (--sov->struct_nesting) { ++ return; ++ } ++ + /* TODO actually print struct fields */ + string_output_set(sov, g_strdup("")); + } +@@ -312,6 +346,10 @@ start_list(Visitor *v, const char *name, GenericList **list, size_t size, + { + StringOutputVisitor *sov = to_sov(v); + ++ if (sov->struct_nesting) { ++ return true; ++ } ++ + /* we can't traverse a list in a list */ + assert(sov->list_mode == LM_NONE); + /* We don't support visits without a list */ +@@ -329,6 +367,10 @@ static GenericList *next_list(Visitor *v, GenericList *tail, size_t size) + StringOutputVisitor *sov = to_sov(v); + GenericList *ret = tail->next; + ++ if (sov->struct_nesting) { ++ return ret; ++ } ++ + if (ret && !ret->next) { + sov->list_mode = LM_END; + } +@@ -339,6 +381,10 @@ static void end_list(Visitor *v, void **obj) + { + StringOutputVisitor *sov = to_sov(v); + ++ if (sov->struct_nesting) { ++ return; ++ } ++ + assert(sov->list == obj); + assert(sov->list_mode == LM_STARTED || + sov->list_mode == LM_END || +-- +2.39.3 + diff --git a/SOURCES/kvm-string-output-visitor-show-structs-as-omitted.patch b/SOURCES/kvm-string-output-visitor-show-structs-as-omitted.patch new file mode 100644 index 0000000..f83635d --- /dev/null +++ b/SOURCES/kvm-string-output-visitor-show-structs-as-omitted.patch @@ -0,0 +1,90 @@ +From fb2069be402ec1322834c555714f0e993778cc9d Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 12 Dec 2023 08:49:34 -0500 +Subject: [PATCH 05/22] string-output-visitor: show structs as "" + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter +RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Czenczek +RH-Commit: [1/17] 0c08e8237d28fbdbdbc7576d4c17d2eeeb413c2a (stefanha/centos-stream-qemu-kvm) + +StringOutputVisitor crashes when it visits a struct because +->start_struct() is NULL. + +Show "" instead of crashing. This is necessary because the +virtio-blk-pci iothread-vq-mapping parameter that I'd like to introduce +soon is a list of IOThreadMapping structs. + +This patch is a quick fix to solve the crash, but the long-term solution +is replacing StringOutputVisitor with something that can handle the full +gamut of values in QEMU. + +Cc: Markus Armbruster +Signed-off-by: Stefan Hajnoczi +Message-ID: <20231212134934.500289-1-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Reviewed-by: Markus Armbruster +Signed-off-by: Kevin Wolf +(cherry picked from commit ff32bb53476539d352653f4ed56372dced73a388) +Signed-off-by: Stefan Hajnoczi +--- + include/qapi/string-output-visitor.h | 6 +++--- + qapi/string-output-visitor.c | 16 ++++++++++++++++ + 2 files changed, 19 insertions(+), 3 deletions(-) + +diff --git a/include/qapi/string-output-visitor.h b/include/qapi/string-output-visitor.h +index 268dfe9986..b1ee473b30 100644 +--- a/include/qapi/string-output-visitor.h ++++ b/include/qapi/string-output-visitor.h +@@ -26,9 +26,9 @@ typedef struct StringOutputVisitor StringOutputVisitor; + * If everything else succeeds, pass @result to visit_complete() to + * collect the result of the visit. + * +- * The string output visitor does not implement support for visiting +- * QAPI structs, alternates, null, or arbitrary QTypes. It also +- * requires a non-null list argument to visit_start_list(). ++ * The string output visitor does not implement support for alternates, null, ++ * or arbitrary QTypes. Struct fields are not shown. It also requires a ++ * non-null list argument to visit_start_list(). + */ + Visitor *string_output_visitor_new(bool human, char **result); + +diff --git a/qapi/string-output-visitor.c b/qapi/string-output-visitor.c +index c0cb72dbe4..f0c1dea89e 100644 +--- a/qapi/string-output-visitor.c ++++ b/qapi/string-output-visitor.c +@@ -292,6 +292,20 @@ static bool print_type_null(Visitor *v, const char *name, QNull **obj, + return true; + } + ++static bool start_struct(Visitor *v, const char *name, void **obj, ++ size_t size, Error **errp) ++{ ++ return true; ++} ++ ++static void end_struct(Visitor *v, void **obj) ++{ ++ StringOutputVisitor *sov = to_sov(v); ++ ++ /* TODO actually print struct fields */ ++ string_output_set(sov, g_strdup("")); ++} ++ + static bool + start_list(Visitor *v, const char *name, GenericList **list, size_t size, + Error **errp) +@@ -379,6 +393,8 @@ Visitor *string_output_visitor_new(bool human, char **result) + v->visitor.type_str = print_type_str; + v->visitor.type_number = print_type_number; + v->visitor.type_null = print_type_null; ++ v->visitor.start_struct = start_struct; ++ v->visitor.end_struct = end_struct; + v->visitor.start_list = start_list; + v->visitor.next_list = next_list; + v->visitor.end_list = end_list; +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-Add-EPYC-Genoa-model-to-support-Zen-4-pr.patch b/SOURCES/kvm-target-i386-Add-EPYC-Genoa-model-to-support-Zen-4-pr.patch deleted file mode 100644 index 43c239a..0000000 --- a/SOURCES/kvm-target-i386-Add-EPYC-Genoa-model-to-support-Zen-4-pr.patch +++ /dev/null @@ -1,203 +0,0 @@ -From 03011d00cfb5862edb7394a9b79b269198af5c89 Mon Sep 17 00:00:00 2001 -From: Bandan Das -Date: Wed, 9 Aug 2023 12:48:34 -0400 -Subject: [PATCH 7/7] target/i386: Add EPYC-Genoa model to support Zen 4 - processor series - -RH-Author: Bandan Das -RH-MergeRequest: 198: Add EPYC-Genoa CPU model in qemu -RH-Bugzilla: 2094913 -RH-Acked-by: Wei Huang -RH-Acked-by: Miroslav Rezanina -RH-Commit: [7/7] 158091c691169a5d30c7c8005371ee7a0d9fc4ce (bdas1/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094913 - -commit 166b1741884dd4fd7090b753cd7333868457a29b -Author: Babu Moger -Date: Thu May 4 15:53:12 2023 -0500 - - target/i386: Add EPYC-Genoa model to support Zen 4 processor series - - Adds the support for AMD EPYC Genoa generation processors. The model - display for the new processor will be EPYC-Genoa. - - Adds the following new feature bits on top of the feature bits from - the previous generation EPYC models. - - avx512f : AVX-512 Foundation instruction - avx512dq : AVX-512 Doubleword & Quadword Instruction - avx512ifma : AVX-512 Integer Fused Multiply Add instruction - avx512cd : AVX-512 Conflict Detection instruction - avx512bw : AVX-512 Byte and Word Instructions - avx512vl : AVX-512 Vector Length Extension Instructions - avx512vbmi : AVX-512 Vector Byte Manipulation Instruction - avx512_vbmi2 : AVX-512 Additional Vector Byte Manipulation Instruction - gfni : AVX-512 Galois Field New Instructions - avx512_vnni : AVX-512 Vector Neural Network Instructions - avx512_bitalg : AVX-512 Bit Algorithms, add bit algorithms Instructions - avx512_vpopcntdq: AVX-512 AVX-512 Vector Population Count Doubleword and - Quadword Instructions - avx512_bf16 : AVX-512 BFLOAT16 instructions - la57 : 57-bit virtual address support (5-level Page Tables) - vnmi : Virtual NMI (VNMI) allows the hypervisor to inject the NMI - into the guest without using Event Injection mechanism - meaning not required to track the guest NMI and intercepting - the IRET. - auto-ibrs : The AMD Zen4 core supports a new feature called Automatic IBRS. - It is a "set-and-forget" feature that means that, unlike e.g., - s/w-toggled SPEC_CTRL.IBRS, h/w manages its IBRS mitigation - resources automatically across CPL transitions. - - Signed-off-by: Babu Moger - Message-Id: <20230504205313.225073-8-babu.moger@amd.com> - Signed-off-by: Paolo Bonzini - -Signed-off-by: Bandan Das ---- - target/i386/cpu.c | 122 ++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 122 insertions(+) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index f1baefe775..b27db050a2 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1973,6 +1973,56 @@ static const CPUCaches epyc_milan_v2_cache_info = { - }, - }; - -+static const CPUCaches epyc_genoa_cache_info = { -+ .l1d_cache = &(CPUCacheInfo) { -+ .type = DATA_CACHE, -+ .level = 1, -+ .size = 32 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 64, -+ .lines_per_tag = 1, -+ .self_init = 1, -+ .no_invd_sharing = true, -+ }, -+ .l1i_cache = &(CPUCacheInfo) { -+ .type = INSTRUCTION_CACHE, -+ .level = 1, -+ .size = 32 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 64, -+ .lines_per_tag = 1, -+ .self_init = 1, -+ .no_invd_sharing = true, -+ }, -+ .l2_cache = &(CPUCacheInfo) { -+ .type = UNIFIED_CACHE, -+ .level = 2, -+ .size = 1 * MiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 2048, -+ .lines_per_tag = 1, -+ }, -+ .l3_cache = &(CPUCacheInfo) { -+ .type = UNIFIED_CACHE, -+ .level = 3, -+ .size = 32 * MiB, -+ .line_size = 64, -+ .associativity = 16, -+ .partitions = 1, -+ .sets = 32768, -+ .lines_per_tag = 1, -+ .self_init = true, -+ .inclusive = true, -+ .complex_indexing = false, -+ }, -+}; -+ - /* The following VMX features are not supported by KVM and are left out in the - * CPU definitions: - * -@@ -4493,6 +4543,78 @@ static const X86CPUDefinition builtin_x86_defs[] = { - { /* end of list */ } - } - }, -+ { -+ .name = "EPYC-Genoa", -+ .level = 0xd, -+ .vendor = CPUID_VENDOR_AMD, -+ .family = 25, -+ .model = 17, -+ .stepping = 0, -+ .features[FEAT_1_EDX] = -+ CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | -+ CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE | -+ CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE | -+ CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | CPUID_DE | -+ CPUID_VME | CPUID_FP87, -+ .features[FEAT_1_ECX] = -+ CPUID_EXT_RDRAND | CPUID_EXT_F16C | CPUID_EXT_AVX | -+ CPUID_EXT_XSAVE | CPUID_EXT_AES | CPUID_EXT_POPCNT | -+ CPUID_EXT_MOVBE | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | -+ CPUID_EXT_PCID | CPUID_EXT_CX16 | CPUID_EXT_FMA | -+ CPUID_EXT_SSSE3 | CPUID_EXT_MONITOR | CPUID_EXT_PCLMULQDQ | -+ CPUID_EXT_SSE3, -+ .features[FEAT_8000_0001_EDX] = -+ CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_PDPE1GB | -+ CPUID_EXT2_FFXSR | CPUID_EXT2_MMXEXT | CPUID_EXT2_NX | -+ CPUID_EXT2_SYSCALL, -+ .features[FEAT_8000_0001_ECX] = -+ CPUID_EXT3_OSVW | CPUID_EXT3_3DNOWPREFETCH | -+ CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | -+ CPUID_EXT3_CR8LEG | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM | -+ CPUID_EXT3_TOPOEXT | CPUID_EXT3_PERFCORE, -+ .features[FEAT_8000_0008_EBX] = -+ CPUID_8000_0008_EBX_CLZERO | CPUID_8000_0008_EBX_XSAVEERPTR | -+ CPUID_8000_0008_EBX_WBNOINVD | CPUID_8000_0008_EBX_IBPB | -+ CPUID_8000_0008_EBX_IBRS | CPUID_8000_0008_EBX_STIBP | -+ CPUID_8000_0008_EBX_STIBP_ALWAYS_ON | -+ CPUID_8000_0008_EBX_AMD_SSBD | CPUID_8000_0008_EBX_AMD_PSFD, -+ .features[FEAT_8000_0021_EAX] = -+ CPUID_8000_0021_EAX_No_NESTED_DATA_BP | -+ CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING | -+ CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE | -+ CPUID_8000_0021_EAX_AUTO_IBRS, -+ .features[FEAT_7_0_EBX] = -+ CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | -+ CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | -+ CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_AVX512F | -+ CPUID_7_0_EBX_AVX512DQ | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | -+ CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_AVX512IFMA | -+ CPUID_7_0_EBX_CLFLUSHOPT | CPUID_7_0_EBX_CLWB | -+ CPUID_7_0_EBX_AVX512CD | CPUID_7_0_EBX_SHA_NI | -+ CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512VL, -+ .features[FEAT_7_0_ECX] = -+ CPUID_7_0_ECX_AVX512_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | -+ CPUID_7_0_ECX_AVX512_VBMI2 | CPUID_7_0_ECX_GFNI | -+ CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ | -+ CPUID_7_0_ECX_AVX512VNNI | CPUID_7_0_ECX_AVX512BITALG | -+ CPUID_7_0_ECX_AVX512_VPOPCNTDQ | CPUID_7_0_ECX_LA57 | -+ CPUID_7_0_ECX_RDPID, -+ .features[FEAT_7_0_EDX] = -+ CPUID_7_0_EDX_FSRM, -+ .features[FEAT_7_1_EAX] = -+ CPUID_7_1_EAX_AVX512_BF16, -+ .features[FEAT_XSAVE] = -+ CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | -+ CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, -+ .features[FEAT_6_EAX] = -+ CPUID_6_EAX_ARAT, -+ .features[FEAT_SVM] = -+ CPUID_SVM_NPT | CPUID_SVM_NRIPSAVE | CPUID_SVM_VNMI | -+ CPUID_SVM_SVME_ADDR_CHK, -+ .xlevel = 0x80000022, -+ .model_id = "AMD EPYC-Genoa Processor", -+ .cache_info = &epyc_genoa_cache_info, -+ }, - }; - - /* --- -2.39.3 - diff --git a/SOURCES/kvm-target-i386-Add-VNMI-and-automatic-IBRS-feature-bits.patch b/SOURCES/kvm-target-i386-Add-VNMI-and-automatic-IBRS-feature-bits.patch deleted file mode 100644 index 5e8f79b..0000000 --- a/SOURCES/kvm-target-i386-Add-VNMI-and-automatic-IBRS-feature-bits.patch +++ /dev/null @@ -1,105 +0,0 @@ -From 95c5cee20741b055dea9ac3ad3176bbaa1eaf705 Mon Sep 17 00:00:00 2001 -From: Bandan Das -Date: Wed, 9 Aug 2023 12:46:25 -0400 -Subject: [PATCH 6/7] target/i386: Add VNMI and automatic IBRS feature bits -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Bandan Das -RH-MergeRequest: 198: Add EPYC-Genoa CPU model in qemu -RH-Bugzilla: 2094913 -RH-Acked-by: Wei Huang -RH-Acked-by: Miroslav Rezanina -RH-Commit: [6/7] 24c0fb08973aa2615817f67576550ce2efadb75c (bdas1/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094913 - -commit 62a798d4bc2c3e767d94670776c77a7df274d7c5 -Author: Babu Moger -Date: Thu May 4 15:53:11 2023 -0500 - - target/i386: Add VNMI and automatic IBRS feature bits - - Add the following featute bits. - - vnmi: Virtual NMI (VNMI) allows the hypervisor to inject the NMI into the - guest without using Event Injection mechanism meaning not required to - track the guest NMI and intercepting the IRET. - The presence of this feature is indicated via the CPUID function - 0x8000000A_EDX[25]. - - automatic-ibrs : - The AMD Zen4 core supports a new feature called Automatic IBRS. - It is a "set-and-forget" feature that means that, unlike e.g., - s/w-toggled SPEC_CTRL.IBRS, h/w manages its IBRS mitigation - resources automatically across CPL transitions. - The presence of this feature is indicated via the CPUID function - 0x80000021_EAX[8]. - - The documention for the features are available in the links below. - a. Processor Programming Reference (PPR) for AMD Family 19h Model 01h, - Revision B1 Processors - b. AMD64 Architecture Programmer’s Manual Volumes 1–5 Publication No. Revision - 40332 4.05 Date October 2022 - - Signed-off-by: Santosh Shukla - Signed-off-by: Kim Phillips - Signed-off-by: Babu Moger - Link: https://www.amd.com/system/files/TechDocs/55898_B1_pub_0.50.zip - Link: https://www.amd.com/system/files/TechDocs/40332_4.05.pdf - Message-Id: <20230504205313.225073-7-babu.moger@amd.com> - Signed-off-by: Paolo Bonzini - -Signed-off-by: Bandan Das ---- - target/i386/cpu.c | 4 ++-- - target/i386/cpu.h | 3 +++ - 2 files changed, 5 insertions(+), 2 deletions(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index bbddc682df..f1baefe775 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -806,7 +806,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - "pfthreshold", "avic", NULL, "v-vmsave-vmload", - "vgif", NULL, NULL, NULL, - NULL, NULL, NULL, NULL, -- NULL, NULL, NULL, NULL, -+ NULL, "vnmi", NULL, NULL, - "svme-addr-chk", NULL, NULL, NULL, - }, - .cpuid = { .eax = 0x8000000A, .reg = R_EDX, }, -@@ -925,7 +925,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - .feat_names = { - "no-nested-data-bp", NULL, "lfence-always-serializing", NULL, - NULL, NULL, "null-sel-clr-base", NULL, -- NULL, NULL, NULL, NULL, -+ "auto-ibrs", NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index c37abf62ae..f7d225e4f1 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -773,6 +773,7 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, - #define CPUID_SVM_AVIC (1U << 13) - #define CPUID_SVM_V_VMSAVE_VMLOAD (1U << 15) - #define CPUID_SVM_VGIF (1U << 16) -+#define CPUID_SVM_VNMI (1U << 25) - #define CPUID_SVM_SVME_ADDR_CHK (1U << 28) - - /* Support RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE */ -@@ -948,6 +949,8 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, - #define CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING (1U << 2) - /* Null Selector Clears Base */ - #define CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE (1U << 6) -+/* Automatic IBRS */ -+#define CPUID_8000_0021_EAX_AUTO_IBRS (1U << 8) - - #define CPUID_XSAVE_XSAVEOPT (1U << 0) - #define CPUID_XSAVE_XSAVEC (1U << 1) --- -2.39.3 - diff --git a/SOURCES/kvm-target-i386-Add-a-couple-of-feature-bits-in-8000_000.patch b/SOURCES/kvm-target-i386-Add-a-couple-of-feature-bits-in-8000_000.patch deleted file mode 100644 index 772bbbd..0000000 --- a/SOURCES/kvm-target-i386-Add-a-couple-of-feature-bits-in-8000_000.patch +++ /dev/null @@ -1,94 +0,0 @@ -From 2d7fb99c02a7666f1d8fe70a4749f0b7771a68ed Mon Sep 17 00:00:00 2001 -From: Bandan Das -Date: Wed, 9 Aug 2023 12:29:55 -0400 -Subject: [PATCH 3/7] target/i386: Add a couple of feature bits in - 8000_0008_EBX - -RH-Author: Bandan Das -RH-MergeRequest: 198: Add EPYC-Genoa CPU model in qemu -RH-Bugzilla: 2094913 -RH-Acked-by: Wei Huang -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/7] b11020b249d4ecc2e3e1ddf4fdc4b52c42ec2642 (bdas1/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094913 - -commit bb039a230e6a7920d71d21fa9afee2653a678c48 -Author: Babu Moger -Date: Thu May 4 15:53:08 2023 -0500 - - target/i386: Add a couple of feature bits in 8000_0008_EBX - - Add the following feature bits. - - amd-psfd : Predictive Store Forwarding Disable: - PSF is a hardware-based micro-architectural optimization - designed to improve the performance of code execution by - predicting address dependencies between loads and stores. - While SSBD (Speculative Store Bypass Disable) disables both - PSF and speculative store bypass, PSFD only disables PSF. - PSFD may be desirable for the software which is concerned - with the speculative behavior of PSF but desires a smaller - performance impact than setting SSBD. - Depends on the following kernel commit: - b73a54321ad8 ("KVM: x86: Expose Predictive Store Forwarding Disable") - - stibp-always-on : - Single Thread Indirect Branch Prediction mode has enhanced - performance and may be left always on. - - The documentation for the features are available in the links below. - a. Processor Programming Reference (PPR) for AMD Family 19h Model 01h, - Revision B1 Processors - b. SECURITY ANALYSIS OF AMD PREDICTIVE STORE FORWARDING - - Signed-off-by: Babu Moger - Acked-by: Michael S. Tsirkin - Link: https://www.amd.com/system/files/documents/security-analysis-predictive-store-forwarding.pdf - Link: https://www.amd.com/system/files/TechDocs/55898_B1_pub_0.50.zip - Message-Id: <20230504205313.225073-4-babu.moger@amd.com> - Signed-off-by: Paolo Bonzini - -Signed-off-by: Bandan Das ---- - target/i386/cpu.c | 4 ++-- - target/i386/cpu.h | 4 ++++ - 2 files changed, 6 insertions(+), 2 deletions(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 8aa7eb611c..c8f88aefc7 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -911,10 +911,10 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - NULL, NULL, NULL, NULL, - NULL, "wbnoinvd", NULL, NULL, - "ibpb", NULL, "ibrs", "amd-stibp", -- NULL, NULL, NULL, NULL, -+ NULL, "stibp-always-on", NULL, NULL, - NULL, NULL, NULL, NULL, - "amd-ssbd", "virt-ssbd", "amd-no-ssb", NULL, -- NULL, NULL, NULL, NULL, -+ "amd-psfd", NULL, NULL, NULL, - }, - .cpuid = { .eax = 0x80000008, .reg = R_EBX, }, - .tcg_features = 0, -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index c28b9df217..81d2200543 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -934,8 +934,12 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, - #define CPUID_8000_0008_EBX_IBRS (1U << 14) - /* Single Thread Indirect Branch Predictors */ - #define CPUID_8000_0008_EBX_STIBP (1U << 15) -+/* STIBP mode has enhanced performance and may be left always on */ -+#define CPUID_8000_0008_EBX_STIBP_ALWAYS_ON (1U << 17) - /* Speculative Store Bypass Disable */ - #define CPUID_8000_0008_EBX_AMD_SSBD (1U << 24) -+/* Predictive Store Forwarding Disable */ -+#define CPUID_8000_0008_EBX_AMD_PSFD (1U << 28) - - #define CPUID_XSAVE_XSAVEOPT (1U << 0) - #define CPUID_XSAVE_XSAVEC (1U << 1) --- -2.39.3 - diff --git a/SOURCES/kvm-target-i386-Add-feature-bits-for-CPUID_Fn80000021_EA.patch b/SOURCES/kvm-target-i386-Add-feature-bits-for-CPUID_Fn80000021_EA.patch deleted file mode 100644 index c714e49..0000000 --- a/SOURCES/kvm-target-i386-Add-feature-bits-for-CPUID_Fn80000021_EA.patch +++ /dev/null @@ -1,126 +0,0 @@ -From 2a2f74c53258ef67034307b59afe2f4c679afaa2 Mon Sep 17 00:00:00 2001 -From: Bandan Das -Date: Wed, 9 Aug 2023 12:32:00 -0400 -Subject: [PATCH 4/7] target/i386: Add feature bits for CPUID_Fn80000021_EAX -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Bandan Das -RH-MergeRequest: 198: Add EPYC-Genoa CPU model in qemu -RH-Bugzilla: 2094913 -RH-Acked-by: Wei Huang -RH-Acked-by: Miroslav Rezanina -RH-Commit: [4/7] 133044a7245226308406a684a875e1f96a394516 (bdas1/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094913 - -commit b70eec312b185197d639bff689007727e596afd1 -Author: Babu Moger -Date: Thu May 4 15:53:09 2023 -0500 - - target/i386: Add feature bits for CPUID_Fn80000021_EAX - - Add the following feature bits. - no-nested-data-bp : Processor ignores nested data breakpoints. - lfence-always-serializing : LFENCE instruction is always serializing. - null-sel-cls-base : Null Selector Clears Base. When this bit is - set, a null segment load clears the segment base. - - The documentation for the features are available in the links below. - a. Processor Programming Reference (PPR) for AMD Family 19h Model 01h, - Revision B1 Processors - b. AMD64 Architecture Programmer’s Manual Volumes 1–5 Publication No. Revision - 40332 4.05 Date October 2022 - - Signed-off-by: Babu Moger - Acked-by: Michael S. Tsirkin - Link: https://www.amd.com/system/files/TechDocs/55898_B1_pub_0.50.zip - Link: https://www.amd.com/system/files/TechDocs/40332_4.05.pdf - Message-Id: <20230504205313.225073-5-babu.moger@amd.com> - Signed-off-by: Paolo Bonzini - -Signed-off-by: Bandan Das ---- - target/i386/cpu.c | 24 ++++++++++++++++++++++++ - target/i386/cpu.h | 8 ++++++++ - 2 files changed, 32 insertions(+) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index c8f88aefc7..7ddebbaa3c 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -920,6 +920,22 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - .tcg_features = 0, - .unmigratable_flags = 0, - }, -+ [FEAT_8000_0021_EAX] = { -+ .type = CPUID_FEATURE_WORD, -+ .feat_names = { -+ "no-nested-data-bp", NULL, "lfence-always-serializing", NULL, -+ NULL, NULL, "null-sel-clr-base", NULL, -+ NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ }, -+ .cpuid = { .eax = 0x80000021, .reg = R_EAX, }, -+ .tcg_features = 0, -+ .unmigratable_flags = 0, -+ }, - [FEAT_XSAVE] = { - .type = CPUID_FEATURE_WORD, - .feat_names = { -@@ -6156,6 +6172,10 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, - *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */ - } - break; -+ case 0x80000021: -+ *eax = env->features[FEAT_8000_0021_EAX]; -+ *ebx = *ecx = *edx = 0; -+ break; - default: - /* reserved values: zero */ - *eax = 0; -@@ -6585,6 +6605,10 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) - x86_cpu_adjust_level(cpu, &env->cpuid_min_xlevel, 0x8000001F); - } - -+ if (env->features[FEAT_8000_0021_EAX]) { -+ x86_cpu_adjust_level(cpu, &env->cpuid_min_xlevel, 0x80000021); -+ } -+ - /* SGX requires CPUID[0x12] for EPC enumeration */ - if (env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_SGX) { - x86_cpu_adjust_level(cpu, &env->cpuid_min_level, 0x12); -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index 81d2200543..c37abf62ae 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -600,6 +600,7 @@ typedef enum FeatureWord { - FEAT_8000_0001_ECX, /* CPUID[8000_0001].ECX */ - FEAT_8000_0007_EDX, /* CPUID[8000_0007].EDX */ - FEAT_8000_0008_EBX, /* CPUID[8000_0008].EBX */ -+ FEAT_8000_0021_EAX, /* CPUID[8000_0021].EAX */ - FEAT_C000_0001_EDX, /* CPUID[C000_0001].EDX */ - FEAT_KVM, /* CPUID[4000_0001].EAX (KVM_CPUID_FEATURES) */ - FEAT_KVM_HINTS, /* CPUID[4000_0001].EDX */ -@@ -941,6 +942,13 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, - /* Predictive Store Forwarding Disable */ - #define CPUID_8000_0008_EBX_AMD_PSFD (1U << 28) - -+/* Processor ignores nested data breakpoints */ -+#define CPUID_8000_0021_EAX_No_NESTED_DATA_BP (1U << 0) -+/* LFENCE is always serializing */ -+#define CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING (1U << 2) -+/* Null Selector Clears Base */ -+#define CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE (1U << 6) -+ - #define CPUID_XSAVE_XSAVEOPT (1U << 0) - #define CPUID_XSAVE_XSAVEC (1U << 1) - #define CPUID_XSAVE_XGETBV1 (1U << 2) --- -2.39.3 - diff --git a/SOURCES/kvm-target-i386-Add-missing-feature-bits-in-EPYC-Milan-m.patch b/SOURCES/kvm-target-i386-Add-missing-feature-bits-in-EPYC-Milan-m.patch deleted file mode 100644 index 9bb4bf9..0000000 --- a/SOURCES/kvm-target-i386-Add-missing-feature-bits-in-EPYC-Milan-m.patch +++ /dev/null @@ -1,152 +0,0 @@ -From a8180665019d537ee9775614627bf9eb8bd4770e Mon Sep 17 00:00:00 2001 -From: Bandan Das -Date: Wed, 9 Aug 2023 12:35:33 -0400 -Subject: [PATCH 5/7] target/i386: Add missing feature bits in EPYC-Milan model -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Bandan Das -RH-MergeRequest: 198: Add EPYC-Genoa CPU model in qemu -RH-Bugzilla: 2094913 -RH-Acked-by: Wei Huang -RH-Acked-by: Miroslav Rezanina -RH-Commit: [5/7] 8f77315c8d7010564423df3e3c594c90fd5f9c00 (bdas1/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094913 - -commit 27f03be6f59d04bd5673ba1e1628b2b490f9a9ff -Author: Babu Moger -Date: Thu May 4 15:53:10 2023 -0500 - - target/i386: Add missing feature bits in EPYC-Milan model - - Add the following feature bits for EPYC-Milan model and bump the version. - vaes : Vector VAES(ENC|DEC), VAES(ENC|DEC)LAST instruction support - vpclmulqdq : Vector VPCLMULQDQ instruction support - stibp-always-on : Single Thread Indirect Branch Prediction Mode has enhanced - performance and may be left Always on - amd-psfd : Predictive Store Forward Disable - no-nested-data-bp : Processor ignores nested data breakpoints - lfence-always-serializing : LFENCE instruction is always serializing - null-sel-clr-base : Null Selector Clears Base. When this bit is - set, a null segment load clears the segment base - - These new features will be added in EPYC-Milan-v2. The "-cpu help" output - after the change will be. - - x86 EPYC-Milan (alias configured by machine type) - x86 EPYC-Milan-v1 AMD EPYC-Milan Processor - x86 EPYC-Milan-v2 AMD EPYC-Milan Processor - - The documentation for the features are available in the links below. - a. Processor Programming Reference (PPR) for AMD Family 19h Model 01h, - Revision B1 Processors - b. SECURITY ANALYSIS OF AMD PREDICTIVE STORE FORWARDING - c. AMD64 Architecture Programmer’s Manual Volumes 1–5 Publication No. Revision - 40332 4.05 Date October 2022 - - Signed-off-by: Babu Moger - Acked-by: Michael S. Tsirkin - Link: https://www.amd.com/system/files/TechDocs/55898_B1_pub_0.50.zip - Link: https://www.amd.com/system/files/documents/security-analysis-predictive-store-forwarding.pdf - Link: https://www.amd.com/system/files/TechDocs/40332_4.05.pdf - Message-Id: <20230504205313.225073-6-babu.moger@amd.com> - Signed-off-by: Paolo Bonzini - -Signed-off-by: Bandan Das ---- - target/i386/cpu.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 70 insertions(+) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 7ddebbaa3c..bbddc682df 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1923,6 +1923,56 @@ static const CPUCaches epyc_milan_cache_info = { - }, - }; - -+static const CPUCaches epyc_milan_v2_cache_info = { -+ .l1d_cache = &(CPUCacheInfo) { -+ .type = DATA_CACHE, -+ .level = 1, -+ .size = 32 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 64, -+ .lines_per_tag = 1, -+ .self_init = 1, -+ .no_invd_sharing = true, -+ }, -+ .l1i_cache = &(CPUCacheInfo) { -+ .type = INSTRUCTION_CACHE, -+ .level = 1, -+ .size = 32 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 64, -+ .lines_per_tag = 1, -+ .self_init = 1, -+ .no_invd_sharing = true, -+ }, -+ .l2_cache = &(CPUCacheInfo) { -+ .type = UNIFIED_CACHE, -+ .level = 2, -+ .size = 512 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 1024, -+ .lines_per_tag = 1, -+ }, -+ .l3_cache = &(CPUCacheInfo) { -+ .type = UNIFIED_CACHE, -+ .level = 3, -+ .size = 32 * MiB, -+ .line_size = 64, -+ .associativity = 16, -+ .partitions = 1, -+ .sets = 32768, -+ .lines_per_tag = 1, -+ .self_init = true, -+ .inclusive = true, -+ .complex_indexing = false, -+ }, -+}; -+ - /* The following VMX features are not supported by KVM and are left out in the - * CPU definitions: - * -@@ -4422,6 +4472,26 @@ static const X86CPUDefinition builtin_x86_defs[] = { - .xlevel = 0x8000001E, - .model_id = "AMD EPYC-Milan Processor", - .cache_info = &epyc_milan_cache_info, -+ .versions = (X86CPUVersionDefinition[]) { -+ { .version = 1 }, -+ { -+ .version = 2, -+ .props = (PropValue[]) { -+ { "model-id", -+ "AMD EPYC-Milan-v2 Processor" }, -+ { "vaes", "on" }, -+ { "vpclmulqdq", "on" }, -+ { "stibp-always-on", "on" }, -+ { "amd-psfd", "on" }, -+ { "no-nested-data-bp", "on" }, -+ { "lfence-always-serializing", "on" }, -+ { "null-sel-clr-base", "on" }, -+ { /* end of list */ } -+ }, -+ .cache_info = &epyc_milan_v2_cache_info -+ }, -+ { /* end of list */ } -+ } - }, - }; - --- -2.39.3 - diff --git a/SOURCES/kvm-target-i386-Add-new-EPYC-CPU-versions-with-updated-c.patch b/SOURCES/kvm-target-i386-Add-new-EPYC-CPU-versions-with-updated-c.patch deleted file mode 100644 index 40c289a..0000000 --- a/SOURCES/kvm-target-i386-Add-new-EPYC-CPU-versions-with-updated-c.patch +++ /dev/null @@ -1,192 +0,0 @@ -From 92f0b5d0c7a841a21cabbc6efc1d7baf0e5a3e0f Mon Sep 17 00:00:00 2001 -From: Bandan Das -Date: Wed, 9 Aug 2023 12:26:12 -0400 -Subject: [PATCH 2/7] target/i386: Add new EPYC CPU versions with updated - cache_info - -RH-Author: Bandan Das -RH-MergeRequest: 198: Add EPYC-Genoa CPU model in qemu -RH-Bugzilla: 2094913 -RH-Acked-by: Wei Huang -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/7] 71a2fd907636733f86729bc9328600f6f9306eaf (bdas1/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094913 - -commit d7c72735f618a7ee27ee109d8b1468193734606a -Author: Michael Roth -Date: Thu May 4 15:53:07 2023 -0500 - - target/i386: Add new EPYC CPU versions with updated cache_info - - Introduce new EPYC cpu versions: EPYC-v4 and EPYC-Rome-v3. - The only difference vs. older models is an updated cache_info with - the 'complex_indexing' bit unset, since this bit is not currently - defined for AMD and may cause problems should it be used for - something else in the future. Setting this bit will also cause - CPUID validation failures when running SEV-SNP guests. - - Signed-off-by: Michael Roth - Signed-off-by: Babu Moger - Acked-by: Michael S. Tsirkin - Message-Id: <20230504205313.225073-3-babu.moger@amd.com> - Signed-off-by: Paolo Bonzini - -Signed-off-by: Bandan Das ---- - target/i386/cpu.c | 118 ++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 118 insertions(+) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 3558c92ed0..8aa7eb611c 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1707,6 +1707,56 @@ static const CPUCaches epyc_cache_info = { - }, - }; - -+static CPUCaches epyc_v4_cache_info = { -+ .l1d_cache = &(CPUCacheInfo) { -+ .type = DATA_CACHE, -+ .level = 1, -+ .size = 32 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 64, -+ .lines_per_tag = 1, -+ .self_init = 1, -+ .no_invd_sharing = true, -+ }, -+ .l1i_cache = &(CPUCacheInfo) { -+ .type = INSTRUCTION_CACHE, -+ .level = 1, -+ .size = 64 * KiB, -+ .line_size = 64, -+ .associativity = 4, -+ .partitions = 1, -+ .sets = 256, -+ .lines_per_tag = 1, -+ .self_init = 1, -+ .no_invd_sharing = true, -+ }, -+ .l2_cache = &(CPUCacheInfo) { -+ .type = UNIFIED_CACHE, -+ .level = 2, -+ .size = 512 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 1024, -+ .lines_per_tag = 1, -+ }, -+ .l3_cache = &(CPUCacheInfo) { -+ .type = UNIFIED_CACHE, -+ .level = 3, -+ .size = 8 * MiB, -+ .line_size = 64, -+ .associativity = 16, -+ .partitions = 1, -+ .sets = 8192, -+ .lines_per_tag = 1, -+ .self_init = true, -+ .inclusive = true, -+ .complex_indexing = false, -+ }, -+}; -+ - static const CPUCaches epyc_rome_cache_info = { - .l1d_cache = &(CPUCacheInfo) { - .type = DATA_CACHE, -@@ -1757,6 +1807,56 @@ static const CPUCaches epyc_rome_cache_info = { - }, - }; - -+static const CPUCaches epyc_rome_v3_cache_info = { -+ .l1d_cache = &(CPUCacheInfo) { -+ .type = DATA_CACHE, -+ .level = 1, -+ .size = 32 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 64, -+ .lines_per_tag = 1, -+ .self_init = 1, -+ .no_invd_sharing = true, -+ }, -+ .l1i_cache = &(CPUCacheInfo) { -+ .type = INSTRUCTION_CACHE, -+ .level = 1, -+ .size = 32 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 64, -+ .lines_per_tag = 1, -+ .self_init = 1, -+ .no_invd_sharing = true, -+ }, -+ .l2_cache = &(CPUCacheInfo) { -+ .type = UNIFIED_CACHE, -+ .level = 2, -+ .size = 512 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 1024, -+ .lines_per_tag = 1, -+ }, -+ .l3_cache = &(CPUCacheInfo) { -+ .type = UNIFIED_CACHE, -+ .level = 3, -+ .size = 16 * MiB, -+ .line_size = 64, -+ .associativity = 16, -+ .partitions = 1, -+ .sets = 16384, -+ .lines_per_tag = 1, -+ .self_init = true, -+ .inclusive = true, -+ .complex_indexing = false, -+ }, -+}; -+ - static const CPUCaches epyc_milan_cache_info = { - .l1d_cache = &(CPUCacheInfo) { - .type = DATA_CACHE, -@@ -4112,6 +4212,15 @@ static const X86CPUDefinition builtin_x86_defs[] = { - { /* end of list */ } - } - }, -+ { -+ .version = 4, -+ .props = (PropValue[]) { -+ { "model-id", -+ "AMD EPYC-v4 Processor" }, -+ { /* end of list */ } -+ }, -+ .cache_info = &epyc_v4_cache_info -+ }, - { /* end of list */ } - } - }, -@@ -4231,6 +4340,15 @@ static const X86CPUDefinition builtin_x86_defs[] = { - { /* end of list */ } - } - }, -+ { -+ .version = 3, -+ .props = (PropValue[]) { -+ { "model-id", -+ "AMD EPYC-Rome-v3 Processor" }, -+ { /* end of list */ } -+ }, -+ .cache_info = &epyc_rome_v3_cache_info -+ }, - { /* end of list */ } - } - }, --- -2.39.3 - diff --git a/SOURCES/kvm-target-i386-add-support-for-FB_CLEAR-feature.patch b/SOURCES/kvm-target-i386-add-support-for-FB_CLEAR-feature.patch deleted file mode 100644 index 2b1cbc9..0000000 --- a/SOURCES/kvm-target-i386-add-support-for-FB_CLEAR-feature.patch +++ /dev/null @@ -1,71 +0,0 @@ -From 0d056d6da9e4147d5965bf3507f6d6d6a413924d Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Wed, 24 May 2023 06:52:43 -0400 -Subject: [PATCH 2/5] target/i386: add support for FB_CLEAR feature - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 167: target/i386: add support for FB_CLEAR feature -RH-Bugzilla: 2216201 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/2] 5f191964ba25754107a06ef907f4ac614280aaa1 (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2216201 - -commit 22e1094ca82d5518c1b69aff3e87c550776ae1eb -Author: Emanuele Giuseppe Esposito -Date: Wed Feb 1 08:57:59 2023 -0500 - - target/i386: add support for FB_CLEAR feature - - As reported by the Intel's doc: - "FB_CLEAR: The processor will overwrite fill buffer values as part of - MD_CLEAR operations with the VERW instruction. - On these processors, L1D_FLUSH does not overwrite fill buffer values." - - If this cpu feature is present in host, allow QEMU to choose whether to - show it to the guest too. - One disadvantage of not exposing it is that the guest will report - a non existing vulnerability in - /sys/devices/system/cpu/vulnerabilities/mmio_stale_data - because the mitigation is present only when the cpu has - (FLUSH_L1D and MD_CLEAR) or FB_CLEAR - features enabled. - - Signed-off-by: Emanuele Giuseppe Esposito - Message-Id: <20230201135759.555607-3-eesposit@redhat.com> - Signed-off-by: Paolo Bonzini - -Signed-off-by: Emanuele Giuseppe Esposito ---- - target/i386/cpu.c | 2 +- - target/i386/cpu.h | 1 + - 2 files changed, 2 insertions(+), 1 deletion(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index caf6338cc0..839706b430 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1012,7 +1012,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - "ssb-no", "mds-no", "pschange-mc-no", "tsx-ctrl", - "taa-no", NULL, NULL, NULL, - NULL, NULL, NULL, NULL, -- NULL, NULL, NULL, NULL, -+ NULL, "fb-clear", NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index 74fa649b60..c28b9df217 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -989,6 +989,7 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, - #define MSR_ARCH_CAP_PSCHANGE_MC_NO (1U << 6) - #define MSR_ARCH_CAP_TSX_CTRL_MSR (1U << 7) - #define MSR_ARCH_CAP_TAA_NO (1U << 8) -+#define MSR_ARCH_CAP_FB_CLEAR (1U << 17) - - #define MSR_CORE_CAP_SPLIT_LOCK_DETECT (1U << 5) - --- -2.39.3 - diff --git a/SOURCES/kvm-target-i386-add-support-for-FLUSH_L1D-feature.patch b/SOURCES/kvm-target-i386-add-support-for-FLUSH_L1D-feature.patch deleted file mode 100644 index 39f2542..0000000 --- a/SOURCES/kvm-target-i386-add-support-for-FLUSH_L1D-feature.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 14eae569030805680570d93412100ad26242c7e6 Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Wed, 24 May 2023 06:52:34 -0400 -Subject: [PATCH 1/5] target/i386: add support for FLUSH_L1D feature - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 167: target/i386: add support for FB_CLEAR feature -RH-Bugzilla: 2216201 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/2] e296c75c5cd7e1d16d3c70483d52aeba9f9eb2cd (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2216201 - -commit 0e7e3bf1a552c178924867fa7c2f30ccc8a179e0 -Author: Emanuele Giuseppe Esposito -Date: Wed Feb 1 08:57:58 2023 -0500 - - target/i386: add support for FLUSH_L1D feature - - As reported by Intel's doc: - "L1D_FLUSH: Writeback and invalidate the L1 data cache" - - If this cpu feature is present in host, allow QEMU to choose whether to - show it to the guest too. - One disadvantage of not exposing it is that the guest will report - a non existing vulnerability in - /sys/devices/system/cpu/vulnerabilities/mmio_stale_data - because the mitigation is present only when the cpu has - (FLUSH_L1D and MD_CLEAR) or FB_CLEAR - features enabled. - - Signed-off-by: Emanuele Giuseppe Esposito - Message-Id: <20230201135759.555607-2-eesposit@redhat.com> - Signed-off-by: Paolo Bonzini - -Signed-off-by: Emanuele Giuseppe Esposito ---- - target/i386/cpu.c | 2 +- - target/i386/cpu.h | 2 ++ - 2 files changed, 3 insertions(+), 1 deletion(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 0ef2bf1b93..caf6338cc0 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -860,7 +860,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - "tsx-ldtrk", NULL, NULL /* pconfig */, "arch-lbr", - NULL, NULL, "amx-bf16", "avx512-fp16", - "amx-tile", "amx-int8", "spec-ctrl", "stibp", -- NULL, "arch-capabilities", "core-capability", "ssbd", -+ "flush-l1d", "arch-capabilities", "core-capability", "ssbd", - }, - .cpuid = { - .eax = 7, -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index d243e290d3..74fa649b60 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -896,6 +896,8 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, - #define CPUID_7_0_EDX_SPEC_CTRL (1U << 26) - /* Single Thread Indirect Branch Predictors */ - #define CPUID_7_0_EDX_STIBP (1U << 27) -+/* Flush L1D cache */ -+#define CPUID_7_0_EDX_FLUSH_L1D (1U << 28) - /* Arch Capabilities */ - #define CPUID_7_0_EDX_ARCH_CAPABILITIES (1U << 29) - /* Core Capability */ --- -2.39.3 - diff --git a/SOURCES/kvm-target-i386-allow-versioned-CPUs-to-specify-new-cach.patch b/SOURCES/kvm-target-i386-allow-versioned-CPUs-to-specify-new-cach.patch deleted file mode 100644 index 2c81c72..0000000 --- a/SOURCES/kvm-target-i386-allow-versioned-CPUs-to-specify-new-cach.patch +++ /dev/null @@ -1,116 +0,0 @@ -From 457e74c076e0fe7b64631dfd4369d167f0762c9a Mon Sep 17 00:00:00 2001 -From: Bandan Das -Date: Wed, 9 Aug 2023 12:22:41 -0400 -Subject: [PATCH 1/7] target/i386: allow versioned CPUs to specify new - cache_info - -RH-Author: Bandan Das -RH-MergeRequest: 198: Add EPYC-Genoa CPU model in qemu -RH-Bugzilla: 2094913 -RH-Acked-by: Wei Huang -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/7] 6070e07a4bb070d1c15a811b2bd3195929c18d61 (bdas1/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094913 - -commit cca0a000d06f897411a8af4402e5d0522bbe450b -Author: Michael Roth -Date: Thu May 4 15:53:06 2023 -0500 - - target/i386: allow versioned CPUs to specify new cache_info - - New EPYC CPUs versions require small changes to their cache_info's. - Because current QEMU x86 CPU definition does not support versioned - cach_info, we would have to declare a new CPU type for each such case. - To avoid the dup work, add "cache_info" in X86CPUVersionDefinition", - to allow new cache_info pointers to be specified for a new CPU version. - - Co-developed-by: Wei Huang - Signed-off-by: Wei Huang - Signed-off-by: Michael Roth - Signed-off-by: Babu Moger - Acked-by: Michael S. Tsirkin - Message-Id: <20230504205313.225073-2-babu.moger@amd.com> - Signed-off-by: Paolo Bonzini - -Signed-off-by: Bandan Das ---- - target/i386/cpu.c | 35 ++++++++++++++++++++++++++++++++--- - 1 file changed, 32 insertions(+), 3 deletions(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 4ac3046313..3558c92ed0 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1598,6 +1598,7 @@ typedef struct X86CPUVersionDefinition { - const char *alias; - const char *note; - PropValue *props; -+ const CPUCaches *const cache_info; - } X86CPUVersionDefinition; - - /* Base definition for a CPU model */ -@@ -5213,6 +5214,31 @@ static void x86_cpu_apply_version_props(X86CPU *cpu, X86CPUModel *model) - assert(vdef->version == version); - } - -+static const CPUCaches *x86_cpu_get_versioned_cache_info(X86CPU *cpu, -+ X86CPUModel *model) -+{ -+ const X86CPUVersionDefinition *vdef; -+ X86CPUVersion version = x86_cpu_model_resolve_version(model); -+ const CPUCaches *cache_info = model->cpudef->cache_info; -+ -+ if (version == CPU_VERSION_LEGACY) { -+ return cache_info; -+ } -+ -+ for (vdef = x86_cpu_def_get_versions(model->cpudef); vdef->version; vdef++) { -+ if (vdef->cache_info) { -+ cache_info = vdef->cache_info; -+ } -+ -+ if (vdef->version == version) { -+ break; -+ } -+ } -+ -+ assert(vdef->version == version); -+ return cache_info; -+} -+ - /* - * Load data from X86CPUDefinition into a X86CPU object. - * Only for builtin_x86_defs models initialized with x86_register_cpudef_types. -@@ -5245,7 +5271,7 @@ static void x86_cpu_load_model(X86CPU *cpu, X86CPUModel *model) - } - - /* legacy-cache defaults to 'off' if CPU model provides cache info */ -- cpu->legacy_cache = !def->cache_info; -+ cpu->legacy_cache = !x86_cpu_get_versioned_cache_info(cpu, model); - - env->features[FEAT_1_ECX] |= CPUID_EXT_HYPERVISOR; - -@@ -6724,14 +6750,17 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) - - /* Cache information initialization */ - if (!cpu->legacy_cache) { -- if (!xcc->model || !xcc->model->cpudef->cache_info) { -+ const CPUCaches *cache_info = -+ x86_cpu_get_versioned_cache_info(cpu, xcc->model); -+ -+ if (!xcc->model || !cache_info) { - g_autofree char *name = x86_cpu_class_get_model_name(xcc); - error_setg(errp, - "CPU model '%s' doesn't support legacy-cache=off", name); - return; - } - env->cache_info_cpuid2 = env->cache_info_cpuid4 = env->cache_info_amd = -- *xcc->model->cpudef->cache_info; -+ *cache_info; - } else { - /* Build legacy cache information */ - env->cache_info_cpuid2.l1d_cache = &legacy_l1d_cache; --- -2.39.3 - diff --git a/SOURCES/kvm-target-s390x-arch_dump-Add-arch-cleanup-function-for.patch b/SOURCES/kvm-target-s390x-arch_dump-Add-arch-cleanup-function-for.patch deleted file mode 100644 index f8f3083..0000000 --- a/SOURCES/kvm-target-s390x-arch_dump-Add-arch-cleanup-function-for.patch +++ /dev/null @@ -1,84 +0,0 @@ -From a84f9954b3f3607d34661b221a72677d81743a5b Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Tue, 21 Nov 2023 16:36:26 +0100 -Subject: [PATCH 3/3] target/s390x/arch_dump: Add arch cleanup function for PV - dumps -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -RH-MergeRequest: 325: Fix problem that secure execution guest remains in "paused" state after dump failure -RH-Jira: RHEL-16997 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/3] 0b3a9a6e992a615d96e7e9978a6b849b17ca69b6 - -JIRA: https://issues.redhat.com/browse/RHEL-16997 - -commit d12a91e0baafce7b1cbacff7cf9339eeb0011732 -Author: Janosch Frank -Date: Thu Nov 9 12:04:43 2023 +0000 - - target/s390x/arch_dump: Add arch cleanup function for PV dumps - - PV dumps block vcpu runs until dump end is reached. If there's an - error between PV dump init and PV dump end the vm will never be able - to run again. One example of such an error is insufficient disk space - for the dump file. - - Let's add a cleanup function that tries to do a dump end. The dump - completion data is discarded but there's no point in writing it to a - file anyway if there's a possibility that other PV dump data is - missing. - - Signed-off-by: Janosch Frank - Reviewed-by: Thomas Huth - Reviewed-by: Claudio Imbrenda - Reviewed-by: Marc-André Lureau - Message-ID: <20231109120443.185979-4-frankja@linux.ibm.com> - Signed-off-by: Thomas Huth - -Signed-off-by: Thomas Huth ---- - target/s390x/arch_dump.c | 17 +++++++++++++++++ - 1 file changed, 17 insertions(+) - -diff --git a/target/s390x/arch_dump.c b/target/s390x/arch_dump.c -index f6af8f780a..de0b3d7d84 100644 ---- a/target/s390x/arch_dump.c -+++ b/target/s390x/arch_dump.c -@@ -433,6 +433,22 @@ static int arch_sections_write(DumpState *s, uint8_t *buff) - return 0; - } - -+static void arch_cleanup(DumpState *s) -+{ -+ g_autofree uint8_t *buff = NULL; -+ int rc; -+ -+ if (!pv_dump_initialized) { -+ return; -+ } -+ -+ buff = g_malloc(kvm_s390_pv_dmp_get_size_completion_data()); -+ rc = kvm_s390_dump_completion_data(buff); -+ if (!rc) { -+ pv_dump_initialized = false; -+ } -+} -+ - int cpu_get_dump_info(ArchDumpInfo *info, - const struct GuestPhysBlockList *guest_phys_blocks) - { -@@ -448,6 +464,7 @@ int cpu_get_dump_info(ArchDumpInfo *info, - info->arch_sections_add_fn = *arch_sections_add; - info->arch_sections_write_hdr_fn = *arch_sections_write_hdr; - info->arch_sections_write_fn = *arch_sections_write; -+ info->arch_cleanup_fn = *arch_cleanup; - } - return 0; - } --- -2.39.3 - diff --git a/SOURCES/kvm-target-s390x-dump-Remove-unneeded-dump-info-function.patch b/SOURCES/kvm-target-s390x-dump-Remove-unneeded-dump-info-function.patch deleted file mode 100644 index df69915..0000000 --- a/SOURCES/kvm-target-s390x-dump-Remove-unneeded-dump-info-function.patch +++ /dev/null @@ -1,56 +0,0 @@ -From b7e726278fe5564ed7f1d9e9fb15b88a4dcd57ef Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Tue, 21 Nov 2023 16:36:26 +0100 -Subject: [PATCH 1/3] target/s390x/dump: Remove unneeded dump info function - pointer init -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -RH-MergeRequest: 325: Fix problem that secure execution guest remains in "paused" state after dump failure -RH-Jira: RHEL-16997 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/3] d7c935ffff9722d27fb47486976719d566a71810 - -JIRA: https://issues.redhat.com/browse/RHEL-16997 - -commit 816644b1219900875f47d7adf9bfb283f1b29aa0 -Author: Janosch Frank -Date: Thu Nov 9 12:04:41 2023 +0000 - - target/s390x/dump: Remove unneeded dump info function pointer init - - dump_state_prepare() now sets the function pointers to NULL so we only - need to touch them if we're going to use them. - - Signed-off-by: Janosch Frank - Reviewed-by: Marc-André Lureau - Reviewed-by: Thomas Huth - Message-ID: <20231109120443.185979-2-frankja@linux.ibm.com> - Signed-off-by: Thomas Huth - -Signed-off-by: Thomas Huth ---- - target/s390x/arch_dump.c | 4 ---- - 1 file changed, 4 deletions(-) - -diff --git a/target/s390x/arch_dump.c b/target/s390x/arch_dump.c -index cb98f4894d..f6af8f780a 100644 ---- a/target/s390x/arch_dump.c -+++ b/target/s390x/arch_dump.c -@@ -448,10 +448,6 @@ int cpu_get_dump_info(ArchDumpInfo *info, - info->arch_sections_add_fn = *arch_sections_add; - info->arch_sections_write_hdr_fn = *arch_sections_write_hdr; - info->arch_sections_write_fn = *arch_sections_write; -- } else { -- info->arch_sections_add_fn = NULL; -- info->arch_sections_write_hdr_fn = NULL; -- info->arch_sections_write_fn = NULL; - } - return 0; - } --- -2.39.3 - diff --git a/SOURCES/kvm-target-s390x-kvm-pv-Provide-some-more-useful-informa.patch b/SOURCES/kvm-target-s390x-kvm-pv-Provide-some-more-useful-informa.patch new file mode 100644 index 0000000..a2d712f --- /dev/null +++ b/SOURCES/kvm-target-s390x-kvm-pv-Provide-some-more-useful-informa.patch @@ -0,0 +1,205 @@ +From cc8d794932e26df7c7f3c8cc0c1f42da8d52f12b Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Mon, 15 Jan 2024 10:26:52 +0100 +Subject: [PATCH 069/101] target/s390x/kvm/pv: Provide some more useful + information if decryption fails +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 213: s390x: Provide some more useful information if decryption of a PV image fails +RH-Jira: RHEL-18212 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cornelia Huck +RH-Commit: [1/1] 4ffb61869f7df33e23d3e0ebf8c29e386e3f6cbc (thuth/qemu-kvm-cs9) + +JIRA: https://issues.redhat.com/browse/RHEL-18212 + +commit 7af51621b16ae86646cc2dc9dee30de8176ff761 +Author: Thomas Huth +Date: Wed Jan 10 15:29:16 2024 +0100 + + target/s390x/kvm/pv: Provide some more useful information if decryption fails + + It's a common scenario to copy guest images from one host to another + to run the guest on the other machine. This (of course) does not work + with "secure execution" guests since they are encrypted with one certain + host key. However, if you still (accidentally) do it, you only get a + very user-unfriendly error message that looks like this: + + qemu-system-s390x: KVM PV command 2 (KVM_PV_SET_SEC_PARMS) failed: + header rc 108 rrc 5 IOCTL rc: -22 + + Let's provide at least a somewhat nicer hint to the users so that they + are able to figure out what might have gone wrong. + + Buglink: https://issues.redhat.com/browse/RHEL-18212 + Message-ID: <20240110142916.850605-1-thuth@redhat.com> + Reviewed-by: Philippe Mathieu-Daudé + Reviewed-by: Cédric Le Goater + Reviewed-by: Claudio Imbrenda + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + hw/s390x/ipl.c | 5 ++--- + hw/s390x/ipl.h | 2 +- + hw/s390x/s390-virtio-ccw.c | 5 ++++- + target/s390x/kvm/pv.c | 25 ++++++++++++++++++++----- + target/s390x/kvm/pv.h | 5 +++-- + 5 files changed, 30 insertions(+), 12 deletions(-) + +diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c +index 515dcf51b5..b23a6a0ef3 100644 +--- a/hw/s390x/ipl.c ++++ b/hw/s390x/ipl.c +@@ -703,7 +703,7 @@ static void s390_ipl_prepare_qipl(S390CPU *cpu) + cpu_physical_memory_unmap(addr, len, 1, len); + } + +-int s390_ipl_prepare_pv_header(void) ++int s390_ipl_prepare_pv_header(Error **errp) + { + IplParameterBlock *ipib = s390_ipl_get_iplb_pv(); + IPLBlockPV *ipib_pv = &ipib->pv; +@@ -712,8 +712,7 @@ int s390_ipl_prepare_pv_header(void) + + cpu_physical_memory_read(ipib_pv->pv_header_addr, hdr, + ipib_pv->pv_header_len); +- rc = s390_pv_set_sec_parms((uintptr_t)hdr, +- ipib_pv->pv_header_len); ++ rc = s390_pv_set_sec_parms((uintptr_t)hdr, ipib_pv->pv_header_len, errp); + g_free(hdr); + return rc; + } +diff --git a/hw/s390x/ipl.h b/hw/s390x/ipl.h +index 7fc86e7905..57cd125769 100644 +--- a/hw/s390x/ipl.h ++++ b/hw/s390x/ipl.h +@@ -107,7 +107,7 @@ typedef union IplParameterBlock IplParameterBlock; + + int s390_ipl_set_loadparm(uint8_t *loadparm); + void s390_ipl_update_diag308(IplParameterBlock *iplb); +-int s390_ipl_prepare_pv_header(void); ++int s390_ipl_prepare_pv_header(Error **errp); + int s390_ipl_pv_unpack(void); + void s390_ipl_prepare_cpu(S390CPU *cpu); + IplParameterBlock *s390_ipl_get_iplb(void); +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 984891b82a..e26ce26f5a 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -391,7 +391,7 @@ static int s390_machine_protect(S390CcwMachineState *ms) + } + + /* Set SE header and unpack */ +- rc = s390_ipl_prepare_pv_header(); ++ rc = s390_ipl_prepare_pv_header(&local_err); + if (rc) { + goto out_err; + } +@@ -410,6 +410,9 @@ static int s390_machine_protect(S390CcwMachineState *ms) + return rc; + + out_err: ++ if (local_err) { ++ error_report_err(local_err); ++ } + s390_machine_unprotect(ms); + return rc; + } +diff --git a/target/s390x/kvm/pv.c b/target/s390x/kvm/pv.c +index 6a69be7e5c..7ca7faec73 100644 +--- a/target/s390x/kvm/pv.c ++++ b/target/s390x/kvm/pv.c +@@ -29,7 +29,8 @@ static bool info_valid; + static struct kvm_s390_pv_info_vm info_vm; + static struct kvm_s390_pv_info_dump info_dump; + +-static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data) ++static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data, ++ int *pvrc) + { + struct kvm_pv_cmd pv_cmd = { + .cmd = cmd, +@@ -46,6 +47,9 @@ static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data) + "IOCTL rc: %d", cmd, cmdname, pv_cmd.rc, pv_cmd.rrc, + rc); + } ++ if (pvrc) { ++ *pvrc = pv_cmd.rc; ++ } + return rc; + } + +@@ -53,12 +57,13 @@ static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data) + * This macro lets us pass the command as a string to the function so + * we can print it on an error. + */ +-#define s390_pv_cmd(cmd, data) __s390_pv_cmd(cmd, #cmd, data) ++#define s390_pv_cmd(cmd, data) __s390_pv_cmd(cmd, #cmd, data, NULL) ++#define s390_pv_cmd_pvrc(cmd, data, pvrc) __s390_pv_cmd(cmd, #cmd, data, pvrc) + #define s390_pv_cmd_exit(cmd, data) \ + { \ + int rc; \ + \ +- rc = __s390_pv_cmd(cmd, #cmd, data);\ ++ rc = __s390_pv_cmd(cmd, #cmd, data, NULL); \ + if (rc) { \ + exit(1); \ + } \ +@@ -142,14 +147,24 @@ bool s390_pv_vm_try_disable_async(S390CcwMachineState *ms) + return true; + } + +-int s390_pv_set_sec_parms(uint64_t origin, uint64_t length) ++int s390_pv_set_sec_parms(uint64_t origin, uint64_t length, Error **errp) + { ++ int ret, pvrc; + struct kvm_s390_pv_sec_parm args = { + .origin = origin, + .length = length, + }; + +- return s390_pv_cmd(KVM_PV_SET_SEC_PARMS, &args); ++ ret = s390_pv_cmd_pvrc(KVM_PV_SET_SEC_PARMS, &args, &pvrc); ++ if (ret) { ++ error_setg(errp, "Failed to set secure execution parameters"); ++ if (pvrc == 0x108) { ++ error_append_hint(errp, "Please check whether the image is " ++ "correctly encrypted for this host\n"); ++ } ++ } ++ ++ return ret; + } + + /* +diff --git a/target/s390x/kvm/pv.h b/target/s390x/kvm/pv.h +index 7b935e2246..5877d28ff1 100644 +--- a/target/s390x/kvm/pv.h ++++ b/target/s390x/kvm/pv.h +@@ -42,7 +42,7 @@ int s390_pv_query_info(void); + int s390_pv_vm_enable(void); + void s390_pv_vm_disable(void); + bool s390_pv_vm_try_disable_async(S390CcwMachineState *ms); +-int s390_pv_set_sec_parms(uint64_t origin, uint64_t length); ++int s390_pv_set_sec_parms(uint64_t origin, uint64_t length, Error **errp); + int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak); + void s390_pv_prep_reset(void); + int s390_pv_verify(void); +@@ -62,7 +62,8 @@ static inline int s390_pv_query_info(void) { return 0; } + static inline int s390_pv_vm_enable(void) { return 0; } + static inline void s390_pv_vm_disable(void) {} + static inline bool s390_pv_vm_try_disable_async(S390CcwMachineState *ms) { return false; } +-static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t length) { return 0; } ++static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t length, ++ Error **errp) { return 0; } + static inline int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak) { return 0; } + static inline void s390_pv_prep_reset(void) {} + static inline int s390_pv_verify(void) { return 0; } +-- +2.39.3 + diff --git a/SOURCES/kvm-tests-remove-aio_context_acquire-tests.patch b/SOURCES/kvm-tests-remove-aio_context_acquire-tests.patch new file mode 100644 index 0000000..9b3eefb --- /dev/null +++ b/SOURCES/kvm-tests-remove-aio_context_acquire-tests.patch @@ -0,0 +1,125 @@ +From 420bf75353286324822c3bbca3b52a7a56ed668c Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 5 Dec 2023 13:20:00 -0500 +Subject: [PATCH 083/101] tests: remove aio_context_acquire() tests + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [14/26] f6421037c1523bc957f3be0f4ad05571ae012dba (kmwolf/centos-qemu-kvm) + +The aio_context_acquire() API is being removed. Drop the test case that +calls the API. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Eric Blake +Reviewed-by: Kevin Wolf +Message-ID: <20231205182011.1976568-4-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +--- + tests/unit/test-aio.c | 67 +------------------------------------------ + 1 file changed, 1 insertion(+), 66 deletions(-) + +diff --git a/tests/unit/test-aio.c b/tests/unit/test-aio.c +index 337b6e4ea7..e77d86be87 100644 +--- a/tests/unit/test-aio.c ++++ b/tests/unit/test-aio.c +@@ -100,76 +100,12 @@ static void event_ready_cb(EventNotifier *e) + + /* Tests using aio_*. */ + +-typedef struct { +- QemuMutex start_lock; +- EventNotifier notifier; +- bool thread_acquired; +-} AcquireTestData; +- +-static void *test_acquire_thread(void *opaque) +-{ +- AcquireTestData *data = opaque; +- +- /* Wait for other thread to let us start */ +- qemu_mutex_lock(&data->start_lock); +- qemu_mutex_unlock(&data->start_lock); +- +- /* event_notifier_set might be called either before or after +- * the main thread's call to poll(). The test case's outcome +- * should be the same in either case. +- */ +- event_notifier_set(&data->notifier); +- aio_context_acquire(ctx); +- aio_context_release(ctx); +- +- data->thread_acquired = true; /* success, we got here */ +- +- return NULL; +-} +- + static void set_event_notifier(AioContext *nctx, EventNotifier *notifier, + EventNotifierHandler *handler) + { + aio_set_event_notifier(nctx, notifier, handler, NULL, NULL); + } + +-static void dummy_notifier_read(EventNotifier *n) +-{ +- event_notifier_test_and_clear(n); +-} +- +-static void test_acquire(void) +-{ +- QemuThread thread; +- AcquireTestData data; +- +- /* Dummy event notifier ensures aio_poll() will block */ +- event_notifier_init(&data.notifier, false); +- set_event_notifier(ctx, &data.notifier, dummy_notifier_read); +- g_assert(!aio_poll(ctx, false)); /* consume aio_notify() */ +- +- qemu_mutex_init(&data.start_lock); +- qemu_mutex_lock(&data.start_lock); +- data.thread_acquired = false; +- +- qemu_thread_create(&thread, "test_acquire_thread", +- test_acquire_thread, +- &data, QEMU_THREAD_JOINABLE); +- +- /* Block in aio_poll(), let other thread kick us and acquire context */ +- aio_context_acquire(ctx); +- qemu_mutex_unlock(&data.start_lock); /* let the thread run */ +- g_assert(aio_poll(ctx, true)); +- g_assert(!data.thread_acquired); +- aio_context_release(ctx); +- +- qemu_thread_join(&thread); +- set_event_notifier(ctx, &data.notifier, NULL); +- event_notifier_cleanup(&data.notifier); +- +- g_assert(data.thread_acquired); +-} +- + static void test_bh_schedule(void) + { + BHTestData data = { .n = 0 }; +@@ -879,7 +815,7 @@ static void test_worker_thread_co_enter(void) + qemu_thread_get_self(&this_thread); + co = qemu_coroutine_create(co_check_current_thread, &this_thread); + +- qemu_thread_create(&worker_thread, "test_acquire_thread", ++ qemu_thread_create(&worker_thread, "test_aio_co_enter", + test_aio_co_enter, + co, QEMU_THREAD_JOINABLE); + +@@ -899,7 +835,6 @@ int main(int argc, char **argv) + while (g_main_context_iteration(NULL, false)); + + g_test_init(&argc, &argv, NULL); +- g_test_add_func("/aio/acquire", test_acquire); + g_test_add_func("/aio/bh/schedule", test_bh_schedule); + g_test_add_func("/aio/bh/schedule10", test_bh_schedule10); + g_test_add_func("/aio/bh/cancel", test_bh_cancel); +-- +2.39.3 + diff --git a/SOURCES/kvm-tests-unit-Bump-test-replication-timeout-to-60-secon.patch b/SOURCES/kvm-tests-unit-Bump-test-replication-timeout-to-60-secon.patch new file mode 100644 index 0000000..0afdea2 --- /dev/null +++ b/SOURCES/kvm-tests-unit-Bump-test-replication-timeout-to-60-secon.patch @@ -0,0 +1,46 @@ +From bbe64d706b3cb8b10ecd22bd71cf76b21eea257f Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 25 Jan 2024 17:58:03 +0100 +Subject: [PATCH 20/22] tests/unit: Bump test-replication timeout to 60 seconds + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter +RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Czenczek +RH-Commit: [16/17] 200768aedee44d10aa8d199b92a9c17a9002fc3f (stefanha/centos-stream-qemu-kvm) + +We're seeing timeouts for this test on CI runs (specifically for +ubuntu-20.04-s390x-all). It doesn't fail consistently, but even the +successful runs take about 27 or 28 seconds, which is not very far from +the 30 seconds timeout. + +Bump the timeout a bit to make failure less likely even on this CI host. + +Signed-off-by: Kevin Wolf +Message-ID: <20240125165803.48373-1-kwolf@redhat.com> +Reviewed-by: Thomas Huth +Signed-off-by: Kevin Wolf +(cherry picked from commit 63b18312d14ac984acaf13c7c55d9baa2d61496e) +Signed-off-by: Stefan Hajnoczi +--- + tests/unit/meson.build | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/tests/unit/meson.build b/tests/unit/meson.build +index a05d471090..28db6adea8 100644 +--- a/tests/unit/meson.build ++++ b/tests/unit/meson.build +@@ -173,7 +173,8 @@ test_env.set('G_TEST_BUILDDIR', meson.current_build_dir()) + + slow_tests = { + 'test-crypto-tlscredsx509': 45, +- 'test-crypto-tlssession': 45 ++ 'test-crypto-tlssession': 45, ++ 'test-replication': 60, + } + + foreach test_name, extra: tests +-- +2.39.3 + diff --git a/SOURCES/kvm-ui-Fix-pixel-colour-channel-order-for-PNG-screenshot.patch b/SOURCES/kvm-ui-Fix-pixel-colour-channel-order-for-PNG-screenshot.patch deleted file mode 100644 index ef99b30..0000000 --- a/SOURCES/kvm-ui-Fix-pixel-colour-channel-order-for-PNG-screenshot.patch +++ /dev/null @@ -1,88 +0,0 @@ -From b998f8474846886fa1e0428fe79fe2a79231cc05 Mon Sep 17 00:00:00 2001 -From: Peter Maydell -Date: Fri, 12 May 2023 15:43:38 +0100 -Subject: [PATCH 35/37] ui: Fix pixel colour channel order for PNG screenshots -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -RH-MergeRequest: 183: ui: Fix pixel colour channel order for PNG screenshots -RH-Bugzilla: 2222579 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/1] 76acd3c5526639e70bc2998f584503c78fc9bc56 (marcandre.lureau-rh/qemu-kvm-centos) - -When we take a PNG screenshot the ordering of the colour channels in -the data is not correct, resulting in the image having weird -colouring compared to the actual display. (Specifically, on a -little-endian host the blue and red channels are swapped; on -big-endian everything is wrong.) - -This happens because the pixman idea of the pixel data and the libpng -idea differ. PIXMAN_a8r8g8b8 defines that pixels are 32-bit values, -with A in bits 24-31, R in bits 16-23, G in bits 8-15 and B in bits -0-7. This means that on little-endian systems the bytes in memory -are - B G R A -and on big-endian systems they are - A R G B - -libpng, on the other hand, thinks of pixels as being a series of -values for each channel, so its format PNG_COLOR_TYPE_RGB_ALPHA -always wants bytes in the order - R G B A - -This isn't the same as the pixman order for either big or little -endian hosts. - -The alpha channel is also unnecessary bulk in the output PNG file, -because there is no alpha information in a screenshot. - -To handle the endianness issue, we already define in ui/qemu-pixman.h -various PIXMAN_BE_* and PIXMAN_LE_* values that give consistent -byte-order pixel channel formats. So we can use PIXMAN_BE_r8g8b8 and -PNG_COLOR_TYPE_RGB, which both have an in-memory byte order of - R G B -and 3 bytes per pixel. - -(PPM format screenshots get this right; they already use the -PIXMAN_BE_r8g8b8 format.) - -Cc: qemu-stable@nongnu.org -Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1622 -Fixes: 9a0a119a382867 ("Added parameter to take screenshot with screendump as PNG") -Signed-off-by: Peter Maydell -Reviewed-by: Marc-André Lureau -Message-id: 20230502135548.2451309-1-peter.maydell@linaro.org - -(cherry picked from commit cd22a0f520f471e3bd33bc19cf3b2fa772cdb2a8) -Signed-off-by: Marc-André Lureau ---- - ui/console.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/ui/console.c b/ui/console.c -index 6e8a3cdc62..e173731e20 100644 ---- a/ui/console.c -+++ b/ui/console.c -@@ -311,7 +311,7 @@ static bool png_save(int fd, pixman_image_t *image, Error **errp) - png_struct *png_ptr; - png_info *info_ptr; - g_autoptr(pixman_image_t) linebuf = -- qemu_pixman_linebuf_create(PIXMAN_a8r8g8b8, width); -+ qemu_pixman_linebuf_create(PIXMAN_BE_r8g8b8, width); - uint8_t *buf = (uint8_t *)pixman_image_get_data(linebuf); - FILE *f = fdopen(fd, "wb"); - int y; -@@ -341,7 +341,7 @@ static bool png_save(int fd, pixman_image_t *image, Error **errp) - png_init_io(png_ptr, f); - - png_set_IHDR(png_ptr, info_ptr, width, height, 8, -- PNG_COLOR_TYPE_RGB_ALPHA, PNG_INTERLACE_NONE, -+ PNG_COLOR_TYPE_RGB, PNG_INTERLACE_NONE, - PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE); - - png_write_info(png_ptr, info_ptr); --- -2.39.3 - diff --git a/SOURCES/kvm-ui-clipboard-add-asserts-for-update-and-request.patch b/SOURCES/kvm-ui-clipboard-add-asserts-for-update-and-request.patch new file mode 100644 index 0000000..b51961c --- /dev/null +++ b/SOURCES/kvm-ui-clipboard-add-asserts-for-update-and-request.patch @@ -0,0 +1,81 @@ +From 6e4f68e9ba3fe75ca6f200f189f96bb402f0ee8e Mon Sep 17 00:00:00 2001 +From: Fiona Ebner +Date: Wed, 24 Jan 2024 11:57:49 +0100 +Subject: [PATCH 02/20] ui/clipboard: add asserts for update and request +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 228: ui/clipboard: mark type as not available when there is no data +RH-Jira: RHEL-19629 +RH-Acked-by: Marc-André Lureau +RH-Acked-by: Gerd Hoffmann +RH-Commit: [2/2] 176b4b835fd8aa226f2fa93fd334b9384080cf21 (jmaloy/jmaloy-qemu-kvm-2) + +JIRA: https://issues.redhat.com/browse/RHEL-19629 +CVE: CVE-2023-6683 +Upstream: Merged + +ui/clipboard: add asserts for update and request + +commit 9c416582611b7495bdddb4c5456c7acb64b78938 +Author: Fiona Ebner +Date: Wed Jan 24 11:57:49 2024 +0100 + + ui/clipboard: add asserts for update and request + + Should an issue like CVE-2023-6683 ever appear again in the future, + it will be more obvious which assumption was violated. + + Suggested-by: Marc-André Lureau + Signed-off-by: Fiona Ebner + Reviewed-by: Marc-André Lureau + Message-ID: <20240124105749.204610-2-f.ebner@proxmox.com> + +Signed-off-by: Jon Maloy +--- + ui/clipboard.c | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +diff --git a/ui/clipboard.c b/ui/clipboard.c +index b3f6fa3c9e..4264884a6c 100644 +--- a/ui/clipboard.c ++++ b/ui/clipboard.c +@@ -65,12 +65,24 @@ bool qemu_clipboard_check_serial(QemuClipboardInfo *info, bool client) + + void qemu_clipboard_update(QemuClipboardInfo *info) + { ++ uint32_t type; + QemuClipboardNotify notify = { + .type = QEMU_CLIPBOARD_UPDATE_INFO, + .info = info, + }; + assert(info->selection < QEMU_CLIPBOARD_SELECTION__COUNT); + ++ for (type = 0; type < QEMU_CLIPBOARD_TYPE__COUNT; type++) { ++ /* ++ * If data is missing, the clipboard owner's 'request' callback needs to ++ * be set. Otherwise, there is no way to get the clipboard data and ++ * qemu_clipboard_request() cannot be called. ++ */ ++ if (info->types[type].available && !info->types[type].data) { ++ assert(info->owner && info->owner->request); ++ } ++ } ++ + notifier_list_notify(&clipboard_notifiers, ¬ify); + + if (cbinfo[info->selection] != info) { +@@ -132,6 +144,8 @@ void qemu_clipboard_request(QemuClipboardInfo *info, + !info->owner) + return; + ++ assert(info->owner->request); ++ + info->types[type].requested = true; + info->owner->request(info, type); + } +-- +2.39.3 + diff --git a/SOURCES/kvm-ui-clipboard-mark-type-as-not-available-when-there-i.patch b/SOURCES/kvm-ui-clipboard-mark-type-as-not-available-when-there-i.patch new file mode 100644 index 0000000..00c9369 --- /dev/null +++ b/SOURCES/kvm-ui-clipboard-mark-type-as-not-available-when-there-i.patch @@ -0,0 +1,107 @@ +From 097516bef2993d917e76d92066ca2eb067e45394 Mon Sep 17 00:00:00 2001 +From: Fiona Ebner +Date: Wed, 24 Jan 2024 11:57:48 +0100 +Subject: [PATCH 01/20] ui/clipboard: mark type as not available when there is + no data +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 228: ui/clipboard: mark type as not available when there is no data +RH-Jira: RHEL-19629 +RH-Acked-by: Marc-André Lureau +RH-Acked-by: Gerd Hoffmann +RH-Commit: [1/2] 74ded03d6376f9693733d673502219f76eab7099 (jmaloy/jmaloy-qemu-kvm-2) + +JIRA: https://issues.redhat.com/browse/RHEL-19629 +CVE: CVE-2023-6683 +Upstream: Merged + +commit 405484b29f6548c7b86549b0f961b906337aa68a +Author: Fiona Ebner +Date: Wed Jan 24 11:57:48 2024 +0100 + + ui/clipboard: mark type as not available when there is no data + + With VNC, a client can send a non-extended VNC_MSG_CLIENT_CUT_TEXT + message with len=0. In qemu_clipboard_set_data(), the clipboard info + will be updated setting data to NULL (because g_memdup(data, size) + returns NULL when size is 0). If the client does not set the + VNC_ENCODING_CLIPBOARD_EXT feature when setting up the encodings, then + the 'request' callback for the clipboard peer is not initialized. + Later, because data is NULL, qemu_clipboard_request() can be reached + via vdagent_chr_write() and vdagent_clipboard_recv_request() and + there, the clipboard owner's 'request' callback will be attempted to + be called, but that is a NULL pointer. + + In particular, this can happen when using the KRDC (22.12.3) VNC + client. + + Another scenario leading to the same issue is with two clients (say + noVNC and KRDC): + + The noVNC client sets the extension VNC_FEATURE_CLIPBOARD_EXT and + initializes its cbpeer. + + The KRDC client does not, but triggers a vnc_client_cut_text() (note + it's not the _ext variant)). There, a new clipboard info with it as + the 'owner' is created and via qemu_clipboard_set_data() is called, + which in turn calls qemu_clipboard_update() with that info. + + In qemu_clipboard_update(), the notifier for the noVNC client will be + called, i.e. vnc_clipboard_notify() and also set vs->cbinfo for the + noVNC client. The 'owner' in that clipboard info is the clipboard peer + for the KRDC client, which did not initialize the 'request' function. + That sounds correct to me, it is the owner of that clipboard info. + + Then when noVNC sends a VNC_MSG_CLIENT_CUT_TEXT message (it did set + the VNC_FEATURE_CLIPBOARD_EXT feature correctly, so a check for it + passes), that clipboard info is passed to qemu_clipboard_request() and + the original segfault still happens. + + Fix the issue by handling updates with size 0 differently. In + particular, mark in the clipboard info that the type is not available. + + While at it, switch to g_memdup2(), because g_memdup() is deprecated. + + Cc: qemu-stable@nongnu.org + Fixes: CVE-2023-6683 + Reported-by: Markus Frank + Suggested-by: Marc-André Lureau + Signed-off-by: Fiona Ebner + Reviewed-by: Marc-André Lureau + Tested-by: Markus Frank + Message-ID: <20240124105749.204610-1-f.ebner@proxmox.com> + +Signed-off-by: Jon Maloy +--- + ui/clipboard.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +diff --git a/ui/clipboard.c b/ui/clipboard.c +index 3d14bffaf8..b3f6fa3c9e 100644 +--- a/ui/clipboard.c ++++ b/ui/clipboard.c +@@ -163,9 +163,15 @@ void qemu_clipboard_set_data(QemuClipboardPeer *peer, + } + + g_free(info->types[type].data); +- info->types[type].data = g_memdup(data, size); +- info->types[type].size = size; +- info->types[type].available = true; ++ if (size) { ++ info->types[type].data = g_memdup2(data, size); ++ info->types[type].size = size; ++ info->types[type].available = true; ++ } else { ++ info->types[type].data = NULL; ++ info->types[type].size = 0; ++ info->types[type].available = false; ++ } + + if (update) { + qemu_clipboard_update(info); +-- +2.39.3 + diff --git a/SOURCES/kvm-util-async-teardown-wire-up-query-command-line-optio.patch b/SOURCES/kvm-util-async-teardown-wire-up-query-command-line-optio.patch deleted file mode 100644 index 8c468d8..0000000 --- a/SOURCES/kvm-util-async-teardown-wire-up-query-command-line-optio.patch +++ /dev/null @@ -1,180 +0,0 @@ -From c1502b0cd16378d6d5bd4259b90bf81a5fb5aad3 Mon Sep 17 00:00:00 2001 -From: Claudio Imbrenda -Date: Fri, 5 May 2023 14:00:51 +0200 -Subject: [PATCH 20/21] util/async-teardown: wire up query-command-line-options -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -RH-MergeRequest: 173: Improve memory reclaiming for z15 Secure Execution guests -RH-Bugzilla: 2168500 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Cédric Le Goater -RH-Commit: [1/2] 76e5f25df2c02721f5a29f552ee3061be589abb2 (thuth/qemu-kvm-cs9) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168500 - -Add new -run-with option with an async-teardown=on|off parameter. It is -visible in the output of query-command-line-options QMP command, so it -can be discovered and used by libvirt. - -The option -async-teardown is now redundant, deprecate it. - -Reported-by: Boris Fiuczynski -Fixes: c891c24b1a ("os-posix: asynchronous teardown for shutdown on Linux") -Signed-off-by: Claudio Imbrenda -Message-Id: <20230505120051.36605-2-imbrenda@linux.ibm.com> -[thuth: Add curly braces to fix error with GCC 8.5, fix bug in deprecated.rst] -Signed-off-by: Thomas Huth - -(cherry picked from commit 80bd81cadd127c1e2fc784612a52abe392670ba4) -Conflicts: - docs/about/deprecated.rst (missing context from other patches) -Signed-off-by: Thomas Huth ---- - docs/about/deprecated.rst | 5 +++++ - os-posix.c | 14 ++++++++++++++ - qemu-options.hx | 34 +++++++++++++++++++++++----------- - util/async-teardown.c | 21 +++++++++++++++++++++ - 4 files changed, 63 insertions(+), 11 deletions(-) - -diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst -index 1ca9dc33d6..52893fcf38 100644 ---- a/docs/about/deprecated.rst -+++ b/docs/about/deprecated.rst -@@ -111,6 +111,11 @@ Use ``-machine acpi=off`` instead. - The HAXM project has been retired (see https://github.com/intel/haxm#status). - Use "whpx" (on Windows) or "hvf" (on macOS) instead. - -+``-async-teardown`` (since 8.1) -+''''''''''''''''''''''''''''''' -+ -+Use ``-run-with async-teardown=on`` instead. -+ - - QEMU Machine Protocol (QMP) commands - ------------------------------------ -diff --git a/os-posix.c b/os-posix.c -index 5adc69f560..90ea71725f 100644 ---- a/os-posix.c -+++ b/os-posix.c -@@ -36,6 +36,8 @@ - #include "qemu/log.h" - #include "sysemu/runstate.h" - #include "qemu/cutils.h" -+#include "qemu/config-file.h" -+#include "qemu/option.h" - - #ifdef CONFIG_LINUX - #include -@@ -152,9 +154,21 @@ int os_parse_cmd_args(int index, const char *optarg) - daemonize = 1; - break; - #if defined(CONFIG_LINUX) -+ /* deprecated */ - case QEMU_OPTION_asyncteardown: - init_async_teardown(); - break; -+ case QEMU_OPTION_run_with: { -+ QemuOpts *opts = qemu_opts_parse_noisily(qemu_find_opts("run-with"), -+ optarg, false); -+ if (!opts) { -+ exit(1); -+ } -+ if (qemu_opt_get_bool(opts, "async-teardown", false)) { -+ init_async_teardown(); -+ } -+ break; -+ } - #endif - default: - return -1; -diff --git a/qemu-options.hx b/qemu-options.hx -index 52b49f1f6a..b18f933703 100644 ---- a/qemu-options.hx -+++ b/qemu-options.hx -@@ -4766,20 +4766,32 @@ DEF("qtest-log", HAS_ARG, QEMU_OPTION_qtest_log, "", QEMU_ARCH_ALL) - DEF("async-teardown", 0, QEMU_OPTION_asyncteardown, - "-async-teardown enable asynchronous teardown\n", - QEMU_ARCH_ALL) --#endif - SRST - ``-async-teardown`` -- Enable asynchronous teardown. A new process called "cleanup/" -- will be created at startup sharing the address space with the main qemu -- process, using clone. It will wait for the main qemu process to -- terminate completely, and then exit. -- This allows qemu to terminate very quickly even if the guest was -- huge, leaving the teardown of the address space to the cleanup -- process. Since the cleanup process shares the same cgroups as the -- main qemu process, accounting is performed correctly. This only -- works if the cleanup process is not forcefully killed with SIGKILL -- before the main qemu process has terminated completely. -+ This option is deprecated and should no longer be used. The new option -+ ``-run-with async-teardown=on`` is a replacement. - ERST -+DEF("run-with", HAS_ARG, QEMU_OPTION_run_with, -+ "-run-with async-teardown[=on|off]\n" -+ " misc QEMU process lifecycle options\n" -+ " async-teardown=on enables asynchronous teardown\n", -+ QEMU_ARCH_ALL) -+SRST -+``-run-with`` -+ Set QEMU process lifecycle options. -+ -+ ``async-teardown=on`` enables asynchronous teardown. A new process called -+ "cleanup/" will be created at startup sharing the address -+ space with the main QEMU process, using clone. It will wait for the -+ main QEMU process to terminate completely, and then exit. This allows -+ QEMU to terminate very quickly even if the guest was huge, leaving the -+ teardown of the address space to the cleanup process. Since the cleanup -+ process shares the same cgroups as the main QEMU process, accounting is -+ performed correctly. This only works if the cleanup process is not -+ forcefully killed with SIGKILL before the main QEMU process has -+ terminated completely. -+ERST -+#endif - - DEF("msg", HAS_ARG, QEMU_OPTION_msg, - "-msg [timestamp[=on|off]][,guest-name=[on|off]]\n" -diff --git a/util/async-teardown.c b/util/async-teardown.c -index 62cdeb0f20..3ab19c8740 100644 ---- a/util/async-teardown.c -+++ b/util/async-teardown.c -@@ -12,6 +12,9 @@ - */ - - #include "qemu/osdep.h" -+#include "qemu/config-file.h" -+#include "qemu/option.h" -+#include "qemu/module.h" - #include - #include - #include -@@ -144,3 +147,21 @@ void init_async_teardown(void) - clone(async_teardown_fn, new_stack_for_clone(), CLONE_VM, NULL); - sigprocmask(SIG_SETMASK, &old_signals, NULL); - } -+ -+static QemuOptsList qemu_run_with_opts = { -+ .name = "run-with", -+ .head = QTAILQ_HEAD_INITIALIZER(qemu_run_with_opts.head), -+ .desc = { -+ { -+ .name = "async-teardown", -+ .type = QEMU_OPT_BOOL, -+ }, -+ { /* end of list */ } -+ }, -+}; -+ -+static void register_teardown(void) -+{ -+ qemu_add_opts(&qemu_run_with_opts); -+} -+opts_init(register_teardown); --- -2.39.3 - diff --git a/SOURCES/kvm-util-char_dev-Add-open_cdev.patch b/SOURCES/kvm-util-char_dev-Add-open_cdev.patch new file mode 100644 index 0000000..1f1e870 --- /dev/null +++ b/SOURCES/kvm-util-char_dev-Add-open_cdev.patch @@ -0,0 +1,175 @@ +From de167878ec4ca159cc6def5134c91c5fe9b5ab96 Mon Sep 17 00:00:00 2001 +From: Yi Liu +Date: Tue, 21 Nov 2023 16:44:01 +0800 +Subject: [PATCH 022/101] util/char_dev: Add open_cdev() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [21/67] 72bf9ec3ccc9959626235bd270ec84caa4cee435 (eauger1/centos-qemu-kvm) + +/dev/vfio/devices/vfioX may not exist. In that case it is still possible +to open /dev/char/$major:$minor instead. Add helper function to abstract +the cdev open. + +Suggested-by: Jason Gunthorpe +Signed-off-by: Yi Liu +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Reviewed-by: Eric Auger +Tested-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit d6b5c4c1b516a8176b74ec35a0af8cf89b04b6c1) +Signed-off-by: Eric Auger +--- + MAINTAINERS | 2 + + include/qemu/chardev_open.h | 16 ++++++++ + util/chardev_open.c | 81 +++++++++++++++++++++++++++++++++++++ + util/meson.build | 1 + + 4 files changed, 100 insertions(+) + create mode 100644 include/qemu/chardev_open.h + create mode 100644 util/chardev_open.c + +diff --git a/MAINTAINERS b/MAINTAINERS +index a5a446914a..ca70bb4e64 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -2174,6 +2174,8 @@ M: Zhenzhong Duan + S: Supported + F: backends/iommufd.c + F: include/sysemu/iommufd.h ++F: include/qemu/chardev_open.h ++F: util/chardev_open.c + + vhost + M: Michael S. Tsirkin +diff --git a/include/qemu/chardev_open.h b/include/qemu/chardev_open.h +new file mode 100644 +index 0000000000..64e8fcfdcb +--- /dev/null ++++ b/include/qemu/chardev_open.h +@@ -0,0 +1,16 @@ ++/* ++ * QEMU Chardev Helper ++ * ++ * Copyright (C) 2023 Intel Corporation. ++ * ++ * Authors: Yi Liu ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2. See ++ * the COPYING file in the top-level directory. ++ */ ++ ++#ifndef QEMU_CHARDEV_OPEN_H ++#define QEMU_CHARDEV_OPEN_H ++ ++int open_cdev(const char *devpath, dev_t cdev); ++#endif +diff --git a/util/chardev_open.c b/util/chardev_open.c +new file mode 100644 +index 0000000000..f776429788 +--- /dev/null ++++ b/util/chardev_open.c +@@ -0,0 +1,81 @@ ++/* ++ * Copyright (c) 2019, Mellanox Technologies. All rights reserved. ++ * Copyright (C) 2023 Intel Corporation. ++ * ++ * This software is available to you under a choice of one of two ++ * licenses. You may choose to be licensed under the terms of the GNU ++ * General Public License (GPL) Version 2, available from the file ++ * COPYING in the main directory of this source tree, or the ++ * OpenIB.org BSD license below: ++ * ++ * Redistribution and use in source and binary forms, with or ++ * without modification, are permitted provided that the following ++ * conditions are met: ++ * ++ * - Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * - Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials ++ * provided with the distribution. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS ++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN ++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++ * SOFTWARE. ++ * ++ * Authors: Yi Liu ++ * ++ * Copied from ++ * https://github.com/linux-rdma/rdma-core/blob/master/util/open_cdev.c ++ * ++ */ ++ ++#include "qemu/osdep.h" ++#include "qemu/chardev_open.h" ++ ++static int open_cdev_internal(const char *path, dev_t cdev) ++{ ++ struct stat st; ++ int fd; ++ ++ fd = qemu_open_old(path, O_RDWR); ++ if (fd == -1) { ++ return -1; ++ } ++ if (fstat(fd, &st) || !S_ISCHR(st.st_mode) || ++ (cdev != 0 && st.st_rdev != cdev)) { ++ close(fd); ++ return -1; ++ } ++ return fd; ++} ++ ++static int open_cdev_robust(dev_t cdev) ++{ ++ g_autofree char *devpath = NULL; ++ ++ /* ++ * This assumes that udev is being used and is creating the /dev/char/ ++ * symlinks. ++ */ ++ devpath = g_strdup_printf("/dev/char/%u:%u", major(cdev), minor(cdev)); ++ return open_cdev_internal(devpath, cdev); ++} ++ ++int open_cdev(const char *devpath, dev_t cdev) ++{ ++ int fd; ++ ++ fd = open_cdev_internal(devpath, cdev); ++ if (fd == -1 && cdev != 0) { ++ return open_cdev_robust(cdev); ++ } ++ return fd; ++} +diff --git a/util/meson.build b/util/meson.build +index c2322ef6e7..174c133368 100644 +--- a/util/meson.build ++++ b/util/meson.build +@@ -108,6 +108,7 @@ if have_block + util_ss.add(files('filemonitor-stub.c')) + endif + util_ss.add(when: 'CONFIG_LINUX', if_true: files('vfio-helpers.c')) ++ util_ss.add(when: 'CONFIG_LINUX', if_true: files('chardev_open.c')) + endif + + if cpu == 'aarch64' +-- +2.39.3 + diff --git a/SOURCES/kvm-util-iov-Make-qiov_slice-public.patch b/SOURCES/kvm-util-iov-Make-qiov_slice-public.patch deleted file mode 100644 index fe68d18..0000000 --- a/SOURCES/kvm-util-iov-Make-qiov_slice-public.patch +++ /dev/null @@ -1,97 +0,0 @@ -From 64652225695c23855cfb1252cea2b55c24da2260 Mon Sep 17 00:00:00 2001 -From: Hanna Czenczek -Date: Tue, 11 Apr 2023 19:34:15 +0200 -Subject: [PATCH 1/9] util/iov: Make qiov_slice() public - -RH-Author: Hanna Czenczek -RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX -RH-Bugzilla: 2174676 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/5] 9c3cd661f7139ce124ee4f4d5fcbeaf3dbb9c45c (hreitz/qemu-kvm-c-9-s) - -We want to inline qemu_iovec_init_extended() in block/io.c for padding -requests, and having access to qiov_slice() is useful for this. As a -public function, it is renamed to qemu_iovec_slice(). - -(We will need to count the number of I/O vector elements of a slice -there, and then later process this slice. Without qiov_slice(), we -would need to call qemu_iovec_subvec_niov(), and all further -IOV-processing functions may need to skip prefixing elements to -accomodate for a qiov_offset. Because qemu_iovec_subvec_niov() -internally calls qiov_slice(), we can just have the block/io.c code call -qiov_slice() itself, thus get the number of elements, and also create an -iovec array with the superfluous prefixing elements stripped, so the -following processing functions no longer need to skip them.) - -Reviewed-by: Eric Blake -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Hanna Czenczek -Message-Id: <20230411173418.19549-2-hreitz@redhat.com> -(cherry picked from commit 3d06cea8256d54a6b0238934c31012f7f17100f5) -Signed-off-by: Hanna Czenczek ---- - include/qemu/iov.h | 3 +++ - util/iov.c | 14 +++++++------- - 2 files changed, 10 insertions(+), 7 deletions(-) - -diff --git a/include/qemu/iov.h b/include/qemu/iov.h -index 9330746680..46fadfb27a 100644 ---- a/include/qemu/iov.h -+++ b/include/qemu/iov.h -@@ -229,6 +229,9 @@ int qemu_iovec_init_extended( - void *tail_buf, size_t tail_len); - void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source, - size_t offset, size_t len); -+struct iovec *qemu_iovec_slice(QEMUIOVector *qiov, -+ size_t offset, size_t len, -+ size_t *head, size_t *tail, int *niov); - int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len); - void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len); - void qemu_iovec_concat(QEMUIOVector *dst, -diff --git a/util/iov.c b/util/iov.c -index b4be580022..65a70449da 100644 ---- a/util/iov.c -+++ b/util/iov.c -@@ -378,15 +378,15 @@ static struct iovec *iov_skip_offset(struct iovec *iov, size_t offset, - } - - /* -- * qiov_slice -+ * qemu_iovec_slice - * - * Find subarray of iovec's, containing requested range. @head would - * be offset in first iov (returned by the function), @tail would be - * count of extra bytes in last iovec (returned iov + @niov - 1). - */ --static struct iovec *qiov_slice(QEMUIOVector *qiov, -- size_t offset, size_t len, -- size_t *head, size_t *tail, int *niov) -+struct iovec *qemu_iovec_slice(QEMUIOVector *qiov, -+ size_t offset, size_t len, -+ size_t *head, size_t *tail, int *niov) - { - struct iovec *iov, *end_iov; - -@@ -411,7 +411,7 @@ int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len) - size_t head, tail; - int niov; - -- qiov_slice(qiov, offset, len, &head, &tail, &niov); -+ qemu_iovec_slice(qiov, offset, len, &head, &tail, &niov); - - return niov; - } -@@ -439,8 +439,8 @@ int qemu_iovec_init_extended( - } - - if (mid_len) { -- mid_iov = qiov_slice(mid_qiov, mid_offset, mid_len, -- &mid_head, &mid_tail, &mid_niov); -+ mid_iov = qemu_iovec_slice(mid_qiov, mid_offset, mid_len, -+ &mid_head, &mid_tail, &mid_niov); - } - - total_niov = !!head_len + mid_niov + !!tail_len; --- -2.39.3 - diff --git a/SOURCES/kvm-util-iov-Remove-qemu_iovec_init_extended.patch b/SOURCES/kvm-util-iov-Remove-qemu_iovec_init_extended.patch deleted file mode 100644 index fd21880..0000000 --- a/SOURCES/kvm-util-iov-Remove-qemu_iovec_init_extended.patch +++ /dev/null @@ -1,156 +0,0 @@ -From 8ff973985a04fec1a3cdf886976a03e0dca7b0ea Mon Sep 17 00:00:00 2001 -From: Hanna Czenczek -Date: Tue, 11 Apr 2023 19:34:17 +0200 -Subject: [PATCH 3/9] util/iov: Remove qemu_iovec_init_extended() - -RH-Author: Hanna Czenczek -RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX -RH-Bugzilla: 2174676 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/5] 1740d7b15ea4fbfbe71e7adc122741e85e83fb8c (hreitz/qemu-kvm-c-9-s) - -bdrv_pad_request() was the main user of qemu_iovec_init_extended(). -HEAD^ has removed that use, so we can remove qemu_iovec_init_extended() -now. - -The only remaining user is qemu_iovec_init_slice(), which can easily -inline the small part it really needs. - -Note that qemu_iovec_init_extended() offered a memcpy() optimization to -initialize the new I/O vector. qemu_iovec_concat_iov(), which is used -to replace its functionality, does not, but calls qemu_iovec_add() for -every single element. If we decide this optimization was important, we -will need to re-implement it in qemu_iovec_concat_iov(), which might -also benefit its pre-existing users. - -Reviewed-by: Eric Blake -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Hanna Czenczek -Message-Id: <20230411173418.19549-4-hreitz@redhat.com> -(cherry picked from commit cc63f6f6fa1aaa4b6405dd69432c693e9c8d18ca) -Signed-off-by: Hanna Czenczek ---- - include/qemu/iov.h | 5 --- - util/iov.c | 79 +++++++--------------------------------------- - 2 files changed, 11 insertions(+), 73 deletions(-) - -diff --git a/include/qemu/iov.h b/include/qemu/iov.h -index 46fadfb27a..63a1c01965 100644 ---- a/include/qemu/iov.h -+++ b/include/qemu/iov.h -@@ -222,11 +222,6 @@ static inline void *qemu_iovec_buf(QEMUIOVector *qiov) - - void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint); - void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov); --int qemu_iovec_init_extended( -- QEMUIOVector *qiov, -- void *head_buf, size_t head_len, -- QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len, -- void *tail_buf, size_t tail_len); - void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source, - size_t offset, size_t len); - struct iovec *qemu_iovec_slice(QEMUIOVector *qiov, -diff --git a/util/iov.c b/util/iov.c -index 65a70449da..866fb577f3 100644 ---- a/util/iov.c -+++ b/util/iov.c -@@ -416,70 +416,6 @@ int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len) - return niov; - } - --/* -- * Compile new iovec, combining @head_buf buffer, sub-qiov of @mid_qiov, -- * and @tail_buf buffer into new qiov. -- */ --int qemu_iovec_init_extended( -- QEMUIOVector *qiov, -- void *head_buf, size_t head_len, -- QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len, -- void *tail_buf, size_t tail_len) --{ -- size_t mid_head, mid_tail; -- int total_niov, mid_niov = 0; -- struct iovec *p, *mid_iov = NULL; -- -- assert(mid_qiov->niov <= IOV_MAX); -- -- if (SIZE_MAX - head_len < mid_len || -- SIZE_MAX - head_len - mid_len < tail_len) -- { -- return -EINVAL; -- } -- -- if (mid_len) { -- mid_iov = qemu_iovec_slice(mid_qiov, mid_offset, mid_len, -- &mid_head, &mid_tail, &mid_niov); -- } -- -- total_niov = !!head_len + mid_niov + !!tail_len; -- if (total_niov > IOV_MAX) { -- return -EINVAL; -- } -- -- if (total_niov == 1) { -- qemu_iovec_init_buf(qiov, NULL, 0); -- p = &qiov->local_iov; -- } else { -- qiov->niov = qiov->nalloc = total_niov; -- qiov->size = head_len + mid_len + tail_len; -- p = qiov->iov = g_new(struct iovec, qiov->niov); -- } -- -- if (head_len) { -- p->iov_base = head_buf; -- p->iov_len = head_len; -- p++; -- } -- -- assert(!mid_niov == !mid_len); -- if (mid_niov) { -- memcpy(p, mid_iov, mid_niov * sizeof(*p)); -- p[0].iov_base = (uint8_t *)p[0].iov_base + mid_head; -- p[0].iov_len -= mid_head; -- p[mid_niov - 1].iov_len -= mid_tail; -- p += mid_niov; -- } -- -- if (tail_len) { -- p->iov_base = tail_buf; -- p->iov_len = tail_len; -- } -- -- return 0; --} -- - /* - * Check if the contents of subrange of qiov data is all zeroes. - */ -@@ -511,14 +447,21 @@ bool qemu_iovec_is_zero(QEMUIOVector *qiov, size_t offset, size_t bytes) - void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source, - size_t offset, size_t len) - { -- int ret; -+ struct iovec *slice_iov; -+ int slice_niov; -+ size_t slice_head, slice_tail; - - assert(source->size >= len); - assert(source->size - len >= offset); - -- /* We shrink the request, so we can't overflow neither size_t nor MAX_IOV */ -- ret = qemu_iovec_init_extended(qiov, NULL, 0, source, offset, len, NULL, 0); -- assert(ret == 0); -+ slice_iov = qemu_iovec_slice(source, offset, len, -+ &slice_head, &slice_tail, &slice_niov); -+ if (slice_niov == 1) { -+ qemu_iovec_init_buf(qiov, slice_iov[0].iov_base + slice_head, len); -+ } else { -+ qemu_iovec_init(qiov, slice_niov); -+ qemu_iovec_concat_iov(qiov, slice_iov, slice_niov, slice_head, len); -+ } - } - - void qemu_iovec_destroy(QEMUIOVector *qiov) --- -2.39.3 - diff --git a/SOURCES/kvm-util-mmap-alloc-qemu_fd_getfs.patch b/SOURCES/kvm-util-mmap-alloc-qemu_fd_getfs.patch deleted file mode 100644 index b0e66f6..0000000 --- a/SOURCES/kvm-util-mmap-alloc-qemu_fd_getfs.patch +++ /dev/null @@ -1,95 +0,0 @@ -From 439a8cdd010dfd253fc2277ae4ec605b5ba621d9 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Wed, 19 Apr 2023 12:17:36 -0400 -Subject: [PATCH 02/56] util/mmap-alloc: qemu_fd_getfs() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [1/50] 8970b5ae611a933d693e0c90cbf4eda073635494 (peterx/qemu-kvm) - -This new helper fetches file system type for a fd. Only Linux is -implemented so far. Currently only tmpfs and hugetlbfs are defined, -but it can grow as needed. - -Signed-off-by: Peter Xu -Reviewed-by: David Hildenbrand -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -(cherry picked from commit fa45f8dab9613993c042176ea2d25552bfebc955) -Signed-off-by: Peter Xu ---- - include/qemu/mmap-alloc.h | 7 +++++++ - util/mmap-alloc.c | 28 ++++++++++++++++++++++++++++ - 2 files changed, 35 insertions(+) - -diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h -index 2825e231a7..8344daaa03 100644 ---- a/include/qemu/mmap-alloc.h -+++ b/include/qemu/mmap-alloc.h -@@ -1,8 +1,15 @@ - #ifndef QEMU_MMAP_ALLOC_H - #define QEMU_MMAP_ALLOC_H - -+typedef enum { -+ QEMU_FS_TYPE_UNKNOWN = 0, -+ QEMU_FS_TYPE_TMPFS, -+ QEMU_FS_TYPE_HUGETLBFS, -+ QEMU_FS_TYPE_NUM, -+} QemuFsType; - - size_t qemu_fd_getpagesize(int fd); -+QemuFsType qemu_fd_getfs(int fd); - - /** - * qemu_ram_mmap: mmap anonymous memory, the specified file or device. -diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c -index 5ed7d29183..ed14f9c64d 100644 ---- a/util/mmap-alloc.c -+++ b/util/mmap-alloc.c -@@ -27,8 +27,36 @@ - - #ifdef CONFIG_LINUX - #include -+#include - #endif - -+QemuFsType qemu_fd_getfs(int fd) -+{ -+#ifdef CONFIG_LINUX -+ struct statfs fs; -+ int ret; -+ -+ if (fd < 0) { -+ return QEMU_FS_TYPE_UNKNOWN; -+ } -+ -+ do { -+ ret = fstatfs(fd, &fs); -+ } while (ret != 0 && errno == EINTR); -+ -+ switch (fs.f_type) { -+ case TMPFS_MAGIC: -+ return QEMU_FS_TYPE_TMPFS; -+ case HUGETLBFS_MAGIC: -+ return QEMU_FS_TYPE_HUGETLBFS; -+ default: -+ return QEMU_FS_TYPE_UNKNOWN; -+ } -+#else -+ return QEMU_FS_TYPE_UNKNOWN; -+#endif -+} -+ - size_t qemu_fd_getpagesize(int fd) - { - #ifdef CONFIG_LINUX --- -2.39.1 - diff --git a/SOURCES/kvm-util-vfio-helpers-Use-g_file_read_link.patch b/SOURCES/kvm-util-vfio-helpers-Use-g_file_read_link.patch deleted file mode 100644 index 4e492d9..0000000 --- a/SOURCES/kvm-util-vfio-helpers-Use-g_file_read_link.patch +++ /dev/null @@ -1,82 +0,0 @@ -From fb2d40cc84f689e46138a81c57ccd1f234dbbb7c Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 07/37] util/vfio-helpers: Use g_file_read_link() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [5/28] 3545a07c967782dba8dd081415232f91d3f600a9 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit dbdea0dbfe2c -Author: Akihiko Odaki -Date: Tue May 23 11:39:12 2023 +0900 - - util/vfio-helpers: Use g_file_read_link() - - When _FORTIFY_SOURCE=2, glibc version is 2.35, and GCC version is - 12.1.0, the compiler complains as follows: - - In file included from /usr/include/features.h:490, - from /usr/include/bits/libc-header-start.h:33, - from /usr/include/stdint.h:26, - from /usr/lib/gcc/aarch64-unknown-linux-gnu/12.1.0/include/stdint.h:9, - from /home/alarm/q/var/qemu/include/qemu/osdep.h:94, - from ../util/vfio-helpers.c:13: - In function 'readlink', - inlined from 'sysfs_find_group_file' at ../util/vfio-helpers.c:116:9, - inlined from 'qemu_vfio_init_pci' at ../util/vfio-helpers.c:326:18, - inlined from 'qemu_vfio_open_pci' at ../util/vfio-helpers.c:517:9: - /usr/include/bits/unistd.h:119:10: error: argument 2 is null but the corresponding size argument 3 value is 4095 [-Werror=nonnull] - 119 | return __glibc_fortify (readlink, __len, sizeof (char), - | ^~~~~~~~~~~~~~~ - - This error implies the allocated buffer can be NULL. Use - g_file_read_link(), which allocates buffer automatically to avoid the - error. - - Signed-off-by: Akihiko Odaki - Reviewed-by: Philippe Mathieu-Daudé - Reviewed-by: Cédric Le Goater - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - util/vfio-helpers.c | 8 +++++--- - 1 file changed, 5 insertions(+), 3 deletions(-) - -diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c -index 2d8af38f88..f8bab46c68 100644 ---- a/util/vfio-helpers.c -+++ b/util/vfio-helpers.c -@@ -106,15 +106,17 @@ struct QEMUVFIOState { - */ - static char *sysfs_find_group_file(const char *device, Error **errp) - { -+ g_autoptr(GError) gerr = NULL; - char *sysfs_link; - char *sysfs_group; - char *p; - char *path = NULL; - - sysfs_link = g_strdup_printf("/sys/bus/pci/devices/%s/iommu_group", device); -- sysfs_group = g_malloc0(PATH_MAX); -- if (readlink(sysfs_link, sysfs_group, PATH_MAX - 1) == -1) { -- error_setg_errno(errp, errno, "Failed to find iommu group sysfs path"); -+ sysfs_group = g_file_read_link(sysfs_link, &gerr); -+ if (gerr) { -+ error_setg(errp, "Failed to find iommu group sysfs path: %s", -+ gerr->message); - goto out; - } - p = strrchr(sysfs_group, '/'); --- -2.39.3 - diff --git a/SOURCES/kvm-vdpa-do-not-block-migration-if-device-has-cvq-and-x-.patch b/SOURCES/kvm-vdpa-do-not-block-migration-if-device-has-cvq-and-x-.patch deleted file mode 100644 index 56b9aed..0000000 --- a/SOURCES/kvm-vdpa-do-not-block-migration-if-device-has-cvq-and-x-.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 74c2f378bdf278a03c02ae48948b00b4431a3fd6 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Fri, 2 Jun 2023 16:38:52 +0200 -Subject: [PATCH 6/9] vdpa: do not block migration if device has cvq and - x-svq=on -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 190: vdpa: do not block migration if device has cvq and x-svq=on -RH-Jira: RHEL-573 -RH-Acked-by: Jason Wang -RH-Acked-by: Laurent Vivier -RH-Commit: [1/1] b0e2ec3c9e5c17252cf6a043fe1374ddc3c37de7 (eperezmartin/qemu-kvm) - -It was a mistake to forbid in all cases, as SVQ is already able to send -all the CVQ messages before start forwarding data vqs. It actually -caused a regression, making impossible to migrate device previously -migratable. - -Fixes: 36e4647247f2 ("vdpa: add vhost_vdpa_net_valid_svq_features") -Signed-off-by: Eugenio Pérez -Message-Id: <20230602143854.1879091-2-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Tested-by: Lei Yang -(cherry picked from commit 8bc0049eadafb984d305c847cedff550b58e5fc0) -Signed-off-by: Eugenio Pérez ---- - net/vhost-vdpa.c | 11 +++++++---- - 1 file changed, 7 insertions(+), 4 deletions(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 8c8900f0f4..1ae839da34 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -844,13 +844,16 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - s->vhost_vdpa.shadow_vq_ops_opaque = s; - - /* -- * TODO: We cannot migrate devices with CVQ as there is no way to set -- * the device state (MAC, MQ, etc) before starting the datapath. -+ * TODO: We cannot migrate devices with CVQ and no x-svq enabled as -+ * there is no way to set the device state (MAC, MQ, etc) before -+ * starting the datapath. - * - * Migration blocker ownership now belongs to s->vhost_vdpa. - */ -- error_setg(&s->vhost_vdpa.migration_blocker, -- "net vdpa cannot migrate with CVQ feature"); -+ if (!svq) { -+ error_setg(&s->vhost_vdpa.migration_blocker, -+ "net vdpa cannot migrate with CVQ feature"); -+ } - } - ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); - if (ret) { --- -2.39.3 - diff --git a/SOURCES/kvm-vdpa-export-vhost_vdpa_set_vring_ready.patch b/SOURCES/kvm-vdpa-export-vhost_vdpa_set_vring_ready.patch deleted file mode 100644 index 1ab8f02..0000000 --- a/SOURCES/kvm-vdpa-export-vhost_vdpa_set_vring_ready.patch +++ /dev/null @@ -1,105 +0,0 @@ -From 636eb63cbf23b31fc9880528490ac4bef680305b Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Wed, 25 Jan 2023 08:47:34 +0100 -Subject: [PATCH 4/7] vdpa: export vhost_vdpa_set_vring_ready -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 199: CVQ migration support -RH-Jira: RHEL-923 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Commit: [4/7] 8d1fecec7a993b8b68e268e8783c200c158f5ee0 (eperezmartin/qemu-kvm) - -The vhost-vdpa net backend needs to enable vrings in a different order -than default, so export it. - -No functional change intended except for tracing, that now includes the -(virtio) index being enabled and the return value of the ioctl. - -Still ignoring return value of this function if called from -vhost_vdpa_dev_start, as reorganize calling code around it is out of -the scope of this series. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang ---- - hw/virtio/trace-events | 2 +- - hw/virtio/vhost-vdpa.c | 25 +++++++++++++------------ - include/hw/virtio/vhost-vdpa.h | 1 + - 3 files changed, 15 insertions(+), 13 deletions(-) - -diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events -index 300dec8d3e..85b43cd8fe 100644 ---- a/hw/virtio/trace-events -+++ b/hw/virtio/trace-events -@@ -48,7 +48,7 @@ vhost_vdpa_set_features(void *dev, uint64_t features) "dev: %p features: 0x%"PRI - vhost_vdpa_get_device_id(void *dev, uint32_t device_id) "dev: %p device_id %"PRIu32 - vhost_vdpa_reset_device(void *dev, uint8_t status) "dev: %p status: 0x%"PRIx8 - vhost_vdpa_get_vq_index(void *dev, int idx, int vq_idx) "dev: %p idx: %d vq idx: %d" --vhost_vdpa_set_vring_ready(void *dev) "dev: %p" -+vhost_vdpa_set_vring_ready(void *dev, unsigned i, int r) "dev: %p, idx: %u, r: %d" - vhost_vdpa_dump_config(void *dev, const char *line) "dev: %p %s" - vhost_vdpa_set_config(void *dev, uint32_t offset, uint32_t size, uint32_t flags) "dev: %p offset: %"PRIu32" size: %"PRIu32" flags: 0x%"PRIx32 - vhost_vdpa_get_config(void *dev, void *config, uint32_t config_len) "dev: %p config: %p config_len: %"PRIu32 -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index c04f14420d..e4d0101327 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -733,18 +733,17 @@ static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx) - return idx; - } - --static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev) -+int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, unsigned idx) - { -- int i; -- trace_vhost_vdpa_set_vring_ready(dev); -- for (i = 0; i < dev->nvqs; ++i) { -- struct vhost_vring_state state = { -- .index = dev->vq_index + i, -- .num = 1, -- }; -- vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state); -- } -- return 0; -+ struct vhost_dev *dev = v->dev; -+ struct vhost_vring_state state = { -+ .index = idx, -+ .num = 1, -+ }; -+ int r = vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state); -+ -+ trace_vhost_vdpa_set_vring_ready(dev, idx, r); -+ return r; - } - - static int vhost_vdpa_set_config_call(struct vhost_dev *dev, -@@ -1155,7 +1154,9 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) - if (unlikely(!ok)) { - return -1; - } -- vhost_vdpa_set_vring_ready(dev); -+ for (int i = 0; i < dev->nvqs; ++i) { -+ vhost_vdpa_set_vring_ready(v, dev->vq_index + i); -+ } - } else { - vhost_vdpa_suspend(dev); - vhost_vdpa_svqs_stop(dev); -diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h -index c278a2a8de..540642d304 100644 ---- a/include/hw/virtio/vhost-vdpa.h -+++ b/include/hw/virtio/vhost-vdpa.h -@@ -55,6 +55,7 @@ typedef struct vhost_vdpa { - } VhostVDPA; - - int vhost_vdpa_get_iova_range(int fd, struct vhost_vdpa_iova_range *iova_range); -+int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, unsigned idx); - - int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, - hwaddr size, void *vaddr, bool readonly); --- -2.39.3 - diff --git a/SOURCES/kvm-vdpa-move-CVQ-isolation-check-to-net_init_vhost_vdpa.patch b/SOURCES/kvm-vdpa-move-CVQ-isolation-check-to-net_init_vhost_vdpa.patch deleted file mode 100644 index a37612c..0000000 --- a/SOURCES/kvm-vdpa-move-CVQ-isolation-check-to-net_init_vhost_vdpa.patch +++ /dev/null @@ -1,286 +0,0 @@ -From 1609e47511c9a02b26e0023ff6e1e999d7cdf179 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Fri, 26 May 2023 17:31:43 +0200 -Subject: [PATCH 2/7] vdpa: move CVQ isolation check to net_init_vhost_vdpa -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 199: CVQ migration support -RH-Jira: RHEL-923 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Commit: [2/7] caed8f81c3e30e6147817e7f43225aa3ee90ff37 (eperezmartin/qemu-kvm) - -Evaluating it at start time instead of initialization time may make the -guest capable of dynamically adding or removing migration blockers. - -Also, moving to initialization reduces the number of ioctls in the -migration, reducing failure possibilities. - -As a drawback we need to check for CVQ isolation twice: one time with no -MQ negotiated and another one acking it, as long as the device supports -it. This is because Vring ASID / group management is based on vq -indexes, but we don't know the index of CVQ before negotiating MQ. - -Signed-off-by: Eugenio Pérez -Message-Id: <20230526153143.470745-3-eperezma@redhat.com> -Tested-by: Lei Yang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Acked-by: Jason Wang ---- - net/vhost-vdpa.c | 155 ++++++++++++++++++++++++++++++++++------------- - 1 file changed, 112 insertions(+), 43 deletions(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 801d4e0422..ce17e4416a 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -43,6 +43,10 @@ typedef struct VhostVDPAState { - - /* The device always have SVQ enabled */ - bool always_svq; -+ -+ /* The device can isolate CVQ in its own ASID */ -+ bool cvq_isolated; -+ - bool started; - } VhostVDPAState; - -@@ -369,15 +373,8 @@ static NetClientInfo net_vhost_vdpa_info = { - .check_peer_type = vhost_vdpa_check_peer_type, - }; - --/** -- * Get vring virtqueue group -- * -- * @device_fd vdpa device fd -- * @vq_index Virtqueue index -- * -- * Return -errno in case of error, or vq group if success. -- */ --static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) -+static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index, -+ Error **errp) - { - struct vhost_vring_state state = { - .index = vq_index, -@@ -386,8 +383,7 @@ static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) - - if (unlikely(r < 0)) { - r = -errno; -- error_report("Cannot get VQ %u group: %s", vq_index, -- g_strerror(errno)); -+ error_setg_errno(errp, errno, "Cannot get VQ %u group", vq_index); - return r; - } - -@@ -487,9 +483,9 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) - { - VhostVDPAState *s, *s0; - struct vhost_vdpa *v; -- uint64_t backend_features; - int64_t cvq_group; -- int cvq_index, r; -+ int r; -+ Error *err = NULL; - - assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); - -@@ -509,41 +505,22 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) - /* - * If we early return in these cases SVQ will not be enabled. The migration - * will be blocked as long as vhost-vdpa backends will not offer _F_LOG. -- * -- * Calling VHOST_GET_BACKEND_FEATURES as they are not available in v->dev -- * yet. - */ -- r = ioctl(v->device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features); -- if (unlikely(r < 0)) { -- error_report("Cannot get vdpa backend_features: %s(%d)", -- g_strerror(errno), errno); -- return -1; -+ if (!vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { -+ return 0; - } -- if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)) || -- !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { -+ -+ if (!s->cvq_isolated) { - return 0; - } - -- /* -- * Check if all the virtqueues of the virtio device are in a different vq -- * than the last vq. VQ group of last group passed in cvq_group. -- */ -- cvq_index = v->dev->vq_index_end - 1; -- cvq_group = vhost_vdpa_get_vring_group(v->device_fd, cvq_index); -+ cvq_group = vhost_vdpa_get_vring_group(v->device_fd, -+ v->dev->vq_index_end - 1, -+ &err); - if (unlikely(cvq_group < 0)) { -+ error_report_err(err); - return cvq_group; - } -- for (int i = 0; i < cvq_index; ++i) { -- int64_t group = vhost_vdpa_get_vring_group(v->device_fd, i); -- -- if (unlikely(group < 0)) { -- return group; -- } -- -- if (group == cvq_group) { -- return 0; -- } -- } - - r = vhost_vdpa_set_address_space_id(v, cvq_group, VHOST_VDPA_NET_CVQ_ASID); - if (unlikely(r < 0)) { -@@ -806,6 +783,87 @@ static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = { - .avail_handler = vhost_vdpa_net_handle_ctrl_avail, - }; - -+/** -+ * Probe if CVQ is isolated -+ * -+ * @device_fd The vdpa device fd -+ * @features Features offered by the device. -+ * @cvq_index The control vq pair index -+ * -+ * Returns <0 in case of failure, 0 if false and 1 if true. -+ */ -+static int vhost_vdpa_probe_cvq_isolation(int device_fd, uint64_t features, -+ int cvq_index, Error **errp) -+{ -+ uint64_t backend_features; -+ int64_t cvq_group; -+ uint8_t status = VIRTIO_CONFIG_S_ACKNOWLEDGE | -+ VIRTIO_CONFIG_S_DRIVER | -+ VIRTIO_CONFIG_S_FEATURES_OK; -+ int r; -+ -+ ERRP_GUARD(); -+ -+ r = ioctl(device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features); -+ if (unlikely(r < 0)) { -+ error_setg_errno(errp, errno, "Cannot get vdpa backend_features"); -+ return r; -+ } -+ -+ if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) { -+ return 0; -+ } -+ -+ r = ioctl(device_fd, VHOST_SET_FEATURES, &features); -+ if (unlikely(r)) { -+ error_setg_errno(errp, errno, "Cannot set features"); -+ } -+ -+ r = ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status); -+ if (unlikely(r)) { -+ error_setg_errno(errp, -r, "Cannot set device features"); -+ goto out; -+ } -+ -+ cvq_group = vhost_vdpa_get_vring_group(device_fd, cvq_index, errp); -+ if (unlikely(cvq_group < 0)) { -+ if (cvq_group != -ENOTSUP) { -+ r = cvq_group; -+ goto out; -+ } -+ -+ /* -+ * The kernel report VHOST_BACKEND_F_IOTLB_ASID if the vdpa frontend -+ * support ASID even if the parent driver does not. The CVQ cannot be -+ * isolated in this case. -+ */ -+ error_free(*errp); -+ *errp = NULL; -+ r = 0; -+ goto out; -+ } -+ -+ for (int i = 0; i < cvq_index; ++i) { -+ int64_t group = vhost_vdpa_get_vring_group(device_fd, i, errp); -+ if (unlikely(group < 0)) { -+ r = group; -+ goto out; -+ } -+ -+ if (group == (int64_t)cvq_group) { -+ r = 0; -+ goto out; -+ } -+ } -+ -+ r = 1; -+ -+out: -+ status = 0; -+ ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status); -+ return r; -+} -+ - static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - const char *device, - const char *name, -@@ -815,16 +873,26 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - bool is_datapath, - bool svq, - struct vhost_vdpa_iova_range iova_range, -- uint64_t features) -+ uint64_t features, -+ Error **errp) - { - NetClientState *nc = NULL; - VhostVDPAState *s; - int ret = 0; - assert(name); -+ int cvq_isolated; -+ - if (is_datapath) { - nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device, - name); - } else { -+ cvq_isolated = vhost_vdpa_probe_cvq_isolation(vdpa_device_fd, features, -+ queue_pair_index * 2, -+ errp); -+ if (unlikely(cvq_isolated < 0)) { -+ return NULL; -+ } -+ - nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer, - device, name); - } -@@ -851,6 +919,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - - s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; - s->vhost_vdpa.shadow_vq_ops_opaque = s; -+ s->cvq_isolated = cvq_isolated; - - /* - * TODO: We cannot migrate devices with CVQ and no x-svq enabled as -@@ -982,7 +1051,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - for (i = 0; i < queue_pairs; i++) { - ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, - vdpa_device_fd, i, 2, true, opts->x_svq, -- iova_range, features); -+ iova_range, features, errp); - if (!ncs[i]) - goto err; - } -@@ -990,7 +1059,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - if (has_cvq) { - nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, - vdpa_device_fd, i, 1, false, -- opts->x_svq, iova_range, features); -+ opts->x_svq, iova_range, features, errp); - if (!nc) - goto err; - } --- -2.39.3 - diff --git a/SOURCES/kvm-vdpa-move-vhost_vdpa_set_vring_ready-to-the-caller.patch b/SOURCES/kvm-vdpa-move-vhost_vdpa_set_vring_ready-to-the-caller.patch deleted file mode 100644 index 4ebd8bd..0000000 --- a/SOURCES/kvm-vdpa-move-vhost_vdpa_set_vring_ready-to-the-caller.patch +++ /dev/null @@ -1,134 +0,0 @@ -From 09bf0febef2512f00e71edca0fcbaf452652c2c7 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 10 Aug 2023 11:27:28 +0200 -Subject: [PATCH 6/7] vdpa: move vhost_vdpa_set_vring_ready to the caller -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 199: CVQ migration support -RH-Jira: RHEL-923 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Commit: [6/7] cf4fd1071ca127914c8e8d6aefec451cad97ecc1 (eperezmartin/qemu-kvm) - -Doing that way allows CVQ to be enabled before the dataplane vqs, -restoring the state as MQ or MAC addresses properly in the case of a -migration. - -The patch does it by defining a ->load NetClientInfo callback also for -dataplane. Ideally, this should be done by an independent patch, but -the function is already static so it would only add an empty -vhost_vdpa_net_data_load stub. - -Signed-off-by: Eugenio Pérez ---- -v3: -* Fix subject typo -* Expand patch message so it explains why ---- - hw/virtio/vdpa-dev.c | 3 +++ - hw/virtio/vhost-vdpa.c | 3 --- - net/vhost-vdpa.c | 41 +++++++++++++++++++++++++++++++---------- - 3 files changed, 34 insertions(+), 13 deletions(-) - -diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c -index 01b41eb0f1..8c47d643bf 100644 ---- a/hw/virtio/vdpa-dev.c -+++ b/hw/virtio/vdpa-dev.c -@@ -256,6 +256,9 @@ static int vhost_vdpa_device_start(VirtIODevice *vdev, Error **errp) - error_setg_errno(errp, -ret, "Error starting vhost"); - goto err_guest_notifiers; - } -+ for (i = 0; i < s->dev.nvqs; ++i) { -+ vhost_vdpa_set_vring_ready(&s->vdpa, i); -+ } - s->started = true; - - /* -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index e4d0101327..0d9d311abd 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -1154,9 +1154,6 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) - if (unlikely(!ok)) { - return -1; - } -- for (int i = 0; i < dev->nvqs; ++i) { -- vhost_vdpa_set_vring_ready(v, dev->vq_index + i); -- } - } else { - vhost_vdpa_suspend(dev); - vhost_vdpa_svqs_stop(dev); -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index a1b16bbc52..47b87bf80d 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -344,6 +344,22 @@ static int vhost_vdpa_net_data_start(NetClientState *nc) - return 0; - } - -+static int vhost_vdpa_net_data_load(NetClientState *nc) -+{ -+ VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); -+ struct vhost_vdpa *v = &s->vhost_vdpa; -+ bool has_cvq = v->dev->vq_index_end % 2; -+ -+ if (has_cvq) { -+ return 0; -+ } -+ -+ for (int i = 0; i < v->dev->nvqs; ++i) { -+ vhost_vdpa_set_vring_ready(v, i + v->dev->vq_index); -+ } -+ return 0; -+} -+ - static void vhost_vdpa_net_client_stop(NetClientState *nc) - { - VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); -@@ -366,6 +382,7 @@ static NetClientInfo net_vhost_vdpa_info = { - .size = sizeof(VhostVDPAState), - .receive = vhost_vdpa_receive, - .start = vhost_vdpa_net_data_start, -+ .load = vhost_vdpa_net_data_load, - .stop = vhost_vdpa_net_client_stop, - .cleanup = vhost_vdpa_cleanup, - .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, -@@ -682,18 +699,22 @@ static int vhost_vdpa_net_cvq_load(NetClientState *nc) - - assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); - -- if (!v->shadow_vqs_enabled) { -- return 0; -- } -+ vhost_vdpa_set_vring_ready(v, v->dev->vq_index); - -- n = VIRTIO_NET(v->dev->vdev); -- r = vhost_vdpa_net_load_mac(s, n); -- if (unlikely(r < 0)) { -- return r; -+ if (v->shadow_vqs_enabled) { -+ n = VIRTIO_NET(v->dev->vdev); -+ r = vhost_vdpa_net_load_mac(s, n); -+ if (unlikely(r < 0)) { -+ return r; -+ } -+ r = vhost_vdpa_net_load_mq(s, n); -+ if (unlikely(r)) { -+ return r; -+ } - } -- r = vhost_vdpa_net_load_mq(s, n); -- if (unlikely(r)) { -- return r; -+ -+ for (int i = 0; i < v->dev->vq_index; ++i) { -+ vhost_vdpa_set_vring_ready(v, i); - } - - return 0; --- -2.39.3 - diff --git a/SOURCES/kvm-vdpa-remove-net-cvq-migration-blocker.patch b/SOURCES/kvm-vdpa-remove-net-cvq-migration-blocker.patch deleted file mode 100644 index 9388d75..0000000 --- a/SOURCES/kvm-vdpa-remove-net-cvq-migration-blocker.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 46d5b861a39b7d0d3222162e6b7707526c131230 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Fri, 24 Mar 2023 13:28:15 +0100 -Subject: [PATCH 7/7] vdpa: remove net cvq migration blocker -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 199: CVQ migration support -RH-Jira: RHEL-923 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Commit: [7/7] 9542e305c7ea3a47e0f1fe0629281238b0bb2111 (eperezmartin/qemu-kvm) - -Now that we have add migration blockers if the device does not support -all the needed features, remove the general blocker applied to all net -devices with CVQ. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang ---- - net/vhost-vdpa.c | 12 ------------ - 1 file changed, 12 deletions(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 47b87bf80d..6e03db4afa 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -941,18 +941,6 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; - s->vhost_vdpa.shadow_vq_ops_opaque = s; - s->cvq_isolated = cvq_isolated; -- -- /* -- * TODO: We cannot migrate devices with CVQ and no x-svq enabled as -- * there is no way to set the device state (MAC, MQ, etc) before -- * starting the datapath. -- * -- * Migration blocker ownership now belongs to s->vhost_vdpa. -- */ -- if (!svq) { -- error_setg(&s->vhost_vdpa.migration_blocker, -- "net vdpa cannot migrate with CVQ feature"); -- } - } - ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); - if (ret) { --- -2.39.3 - diff --git a/SOURCES/kvm-vdpa-rename-vhost_vdpa_net_load-to-vhost_vdpa_net_cv.patch b/SOURCES/kvm-vdpa-rename-vhost_vdpa_net_load-to-vhost_vdpa_net_cv.patch deleted file mode 100644 index 15dc410..0000000 --- a/SOURCES/kvm-vdpa-rename-vhost_vdpa_net_load-to-vhost_vdpa_net_cv.patch +++ /dev/null @@ -1,49 +0,0 @@ -From db7ca7692e264e8bf1bd9e08e3de7a92fc76a363 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Wed, 9 Aug 2023 18:07:26 +0200 -Subject: [PATCH 5/7] vdpa: rename vhost_vdpa_net_load to - vhost_vdpa_net_cvq_load -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 199: CVQ migration support -RH-Jira: RHEL-923 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Commit: [5/7] aea91f3274786665725af892eb905818eb0f44f1 (eperezmartin/qemu-kvm) - -Next patches will add the corresponding data load. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang ---- - net/vhost-vdpa.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 29d3fd3ca6..a1b16bbc52 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -673,7 +673,7 @@ static int vhost_vdpa_net_load_mq(VhostVDPAState *s, - return *s->status != VIRTIO_NET_OK; - } - --static int vhost_vdpa_net_load(NetClientState *nc) -+static int vhost_vdpa_net_cvq_load(NetClientState *nc) - { - VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); - struct vhost_vdpa *v = &s->vhost_vdpa; -@@ -704,7 +704,7 @@ static NetClientInfo net_vhost_vdpa_cvq_info = { - .size = sizeof(VhostVDPAState), - .receive = vhost_vdpa_receive, - .start = vhost_vdpa_net_cvq_start, -- .load = vhost_vdpa_net_load, -+ .load = vhost_vdpa_net_cvq_load, - .stop = vhost_vdpa_net_cvq_stop, - .cleanup = vhost_vdpa_cleanup, - .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, --- -2.39.3 - diff --git a/SOURCES/kvm-vdpa-return-errno-in-vhost_vdpa_get_vring_group-erro.patch b/SOURCES/kvm-vdpa-return-errno-in-vhost_vdpa_get_vring_group-erro.patch deleted file mode 100644 index c8b4913..0000000 --- a/SOURCES/kvm-vdpa-return-errno-in-vhost_vdpa_get_vring_group-erro.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 09583f39d51d16079c9fda32545d7a44b6f5c8c6 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Fri, 26 May 2023 17:31:42 +0200 -Subject: [PATCH 1/7] vdpa: return errno in vhost_vdpa_get_vring_group error -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 199: CVQ migration support -RH-Jira: RHEL-923 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Commit: [1/7] 89745b1828a1af535c40657022d385250688d11d (eperezmartin/qemu-kvm) - -We need to tell in the caller, as some errors are expected in a normal -workflow. In particular, parent drivers in recent kernels with -VHOST_BACKEND_F_IOTLB_ASID may not support vring groups. In that case, --ENOTSUP is returned. - -This is the case of vp_vdpa in Linux 6.2. - -Next patches in this series will use that information to know if it must -abort or not. Also, next patches return properly an errp instead of -printing with error_report. - -Reviewed-by: Stefano Garzarella -Acked-by: Jason Wang -Signed-off-by: Eugenio Pérez -Message-Id: <20230526153143.470745-2-eperezma@redhat.com> -Tested-by: Lei Yang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin ---- - net/vhost-vdpa.c | 9 +++++++++ - 1 file changed, 9 insertions(+) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 1ae839da34..801d4e0422 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -369,6 +369,14 @@ static NetClientInfo net_vhost_vdpa_info = { - .check_peer_type = vhost_vdpa_check_peer_type, - }; - -+/** -+ * Get vring virtqueue group -+ * -+ * @device_fd vdpa device fd -+ * @vq_index Virtqueue index -+ * -+ * Return -errno in case of error, or vq group if success. -+ */ - static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) - { - struct vhost_vring_state state = { -@@ -377,6 +385,7 @@ static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) - int r = ioctl(device_fd, VHOST_VDPA_GET_VRING_GROUP, &state); - - if (unlikely(r < 0)) { -+ r = -errno; - error_report("Cannot get VQ %u group: %s", vq_index, - g_strerror(errno)); - return r; --- -2.39.3 - diff --git a/SOURCES/kvm-vdpa-use-first-queue-SVQ-state-for-CVQ-default.patch b/SOURCES/kvm-vdpa-use-first-queue-SVQ-state-for-CVQ-default.patch deleted file mode 100644 index bfb1b8e..0000000 --- a/SOURCES/kvm-vdpa-use-first-queue-SVQ-state-for-CVQ-default.patch +++ /dev/null @@ -1,46 +0,0 @@ -From 726662aee0bc295f6931b7aba1bd68f033e949aa Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 10 Aug 2023 16:08:18 +0200 -Subject: [PATCH 3/7] vdpa: use first queue SVQ state for CVQ default -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 199: CVQ migration support -RH-Jira: RHEL-923 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Commit: [3/7] 5c98f11b5080552a62c8e37ff2c23339455b7b86 (eperezmartin/qemu-kvm) - -Previous to this patch the only way CVQ would be shadowed is if it does -support to isolate CVQ group or if all vqs were shadowed from the -beginning. The second condition was checked at the beginning, and no -more configuration was done. - -After this series we need to check if data queues are shadowed because -they are in the middle of the migration. As checking if they are -shadowed already covers the previous case, let's just mimic it. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang ---- - net/vhost-vdpa.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index ce17e4416a..29d3fd3ca6 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -494,7 +494,7 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) - - s0 = vhost_vdpa_net_first_nc_vdpa(s); - v->shadow_data = s0->vhost_vdpa.shadow_vqs_enabled; -- v->shadow_vqs_enabled = s->always_svq; -+ v->shadow_vqs_enabled = s0->vhost_vdpa.shadow_vqs_enabled; - s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID; - - if (s->vhost_vdpa.shadow_data) { --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-Fix-null-pointer-dereference-bug-in-vfio_bars_f.patch b/SOURCES/kvm-vfio-Fix-null-pointer-dereference-bug-in-vfio_bars_f.patch deleted file mode 100644 index 1e00427..0000000 --- a/SOURCES/kvm-vfio-Fix-null-pointer-dereference-bug-in-vfio_bars_f.patch +++ /dev/null @@ -1,72 +0,0 @@ -From 97124d4f2afbc8e65a3ecf76096e6b34a9b71541 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 30/37] vfio: Fix null pointer dereference bug in - vfio_bars_finalize() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [28/28] 4bbdf7f9c5595897244c6cc3d88d487dd5f99bf0 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 8af87a3ec7e4 -Author: Avihai Horon -Date: Tue Jul 4 16:39:27 2023 +0300 - - vfio: Fix null pointer dereference bug in vfio_bars_finalize() - - vfio_realize() has the following flow: - 1. vfio_bars_prepare() -- sets VFIOBAR->size. - 2. msix_early_setup(). - 3. vfio_bars_register() -- allocates VFIOBAR->mr. - - After vfio_bars_prepare() is called msix_early_setup() can fail. If it - does fail, vfio_bars_register() is never called and VFIOBAR->mr is not - allocated. - - In this case, vfio_bars_finalize() is called as part of the error flow - to free the bars' resources. However, vfio_bars_finalize() calls - object_unparent() for VFIOBAR->mr after checking only VFIOBAR->size, and - thus we get a null pointer dereference. - - Fix it by checking VFIOBAR->mr in vfio_bars_finalize(). - - Fixes: 89d5202edc50 ("vfio/pci: Allow relocating MSI-X MMIO") - Signed-off-by: Avihai Horon - Reviewed-by: Philippe Mathieu-Daudé - Reviewed-by: Cédric Le Goater - Reviewed-by: Alex Williamson - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/pci.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index ba40ca8784..9189459a38 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -1755,9 +1755,11 @@ static void vfio_bars_finalize(VFIOPCIDevice *vdev) - - vfio_bar_quirk_finalize(vdev, i); - vfio_region_finalize(&bar->region); -- if (bar->size) { -+ if (bar->mr) { -+ assert(bar->size); - object_unparent(OBJECT(bar->mr)); - g_free(bar->mr); -+ bar->mr = NULL; - } - } - --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-Implement-a-common-device-info-helper.patch b/SOURCES/kvm-vfio-Implement-a-common-device-info-helper.patch deleted file mode 100644 index 78a554d..0000000 --- a/SOURCES/kvm-vfio-Implement-a-common-device-info-helper.patch +++ /dev/null @@ -1,196 +0,0 @@ -From f68e8c5d841cd7fc785cc3d15b3c280211bfb4c3 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 17/37] vfio: Implement a common device info helper -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [15/28] 9cfd233ab1b95dc7de776e8ef901823bd37c5a6b (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 634f38f0f73f -Author: Alex Williamson -Date: Thu Jun 1 08:45:06 2023 -0600 - - vfio: Implement a common device info helper - - A common helper implementing the realloc algorithm for handling - capabilities. - - Reviewed-by: Philippe Mathieu-Daudé - Reviewed-by: Cédric Le Goater - Signed-off-by: Alex Williamson - Reviewed-by: Robin Voetter - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/s390x/s390-pci-vfio.c | 37 ++++------------------------ - hw/vfio/common.c | 46 ++++++++++++++++++++++++++--------- - include/hw/vfio/vfio-common.h | 1 + - 3 files changed, 41 insertions(+), 43 deletions(-) - -diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c -index f51190d466..59a2e03873 100644 ---- a/hw/s390x/s390-pci-vfio.c -+++ b/hw/s390x/s390-pci-vfio.c -@@ -289,38 +289,11 @@ static void s390_pci_read_pfip(S390PCIBusDevice *pbdev, - memcpy(pbdev->zpci_fn.pfip, cap->pfip, CLP_PFIP_NR_SEGMENTS); - } - --static struct vfio_device_info *get_device_info(S390PCIBusDevice *pbdev, -- uint32_t argsz) -+static struct vfio_device_info *get_device_info(S390PCIBusDevice *pbdev) - { -- struct vfio_device_info *info = g_malloc0(argsz); -- VFIOPCIDevice *vfio_pci; -- int fd; -+ VFIOPCIDevice *vfio_pci = container_of(pbdev->pdev, VFIOPCIDevice, pdev); - -- vfio_pci = container_of(pbdev->pdev, VFIOPCIDevice, pdev); -- fd = vfio_pci->vbasedev.fd; -- -- /* -- * If the specified argsz is not large enough to contain all capabilities -- * it will be updated upon return from the ioctl. Retry until we have -- * a big enough buffer to hold the entire capability chain. On error, -- * just exit and rely on CLP defaults. -- */ --retry: -- info->argsz = argsz; -- -- if (ioctl(fd, VFIO_DEVICE_GET_INFO, info)) { -- trace_s390_pci_clp_dev_info(vfio_pci->vbasedev.name); -- g_free(info); -- return NULL; -- } -- -- if (info->argsz > argsz) { -- argsz = info->argsz; -- info = g_realloc(info, argsz); -- goto retry; -- } -- -- return info; -+ return vfio_get_device_info(vfio_pci->vbasedev.fd); - } - - /* -@@ -335,7 +308,7 @@ bool s390_pci_get_host_fh(S390PCIBusDevice *pbdev, uint32_t *fh) - - assert(fh); - -- info = get_device_info(pbdev, sizeof(*info)); -+ info = get_device_info(pbdev); - if (!info) { - return false; - } -@@ -356,7 +329,7 @@ void s390_pci_get_clp_info(S390PCIBusDevice *pbdev) - { - g_autofree struct vfio_device_info *info = NULL; - -- info = get_device_info(pbdev, sizeof(*info)); -+ info = get_device_info(pbdev); - if (!info) { - return; - } -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index b73086e17a..3b4ac53f15 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -2845,11 +2845,35 @@ void vfio_put_group(VFIOGroup *group) - } - } - -+struct vfio_device_info *vfio_get_device_info(int fd) -+{ -+ struct vfio_device_info *info; -+ uint32_t argsz = sizeof(*info); -+ -+ info = g_malloc0(argsz); -+ -+retry: -+ info->argsz = argsz; -+ -+ if (ioctl(fd, VFIO_DEVICE_GET_INFO, info)) { -+ g_free(info); -+ return NULL; -+ } -+ -+ if (info->argsz > argsz) { -+ argsz = info->argsz; -+ info = g_realloc(info, argsz); -+ goto retry; -+ } -+ -+ return info; -+} -+ - int vfio_get_device(VFIOGroup *group, const char *name, - VFIODevice *vbasedev, Error **errp) - { -- struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) }; -- int ret, fd; -+ g_autofree struct vfio_device_info *info = NULL; -+ int fd; - - fd = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name); - if (fd < 0) { -@@ -2861,11 +2885,11 @@ int vfio_get_device(VFIOGroup *group, const char *name, - return fd; - } - -- ret = ioctl(fd, VFIO_DEVICE_GET_INFO, &dev_info); -- if (ret) { -+ info = vfio_get_device_info(fd); -+ if (!info) { - error_setg_errno(errp, errno, "error getting device info"); - close(fd); -- return ret; -+ return -1; - } - - /* -@@ -2893,14 +2917,14 @@ int vfio_get_device(VFIOGroup *group, const char *name, - vbasedev->group = group; - QLIST_INSERT_HEAD(&group->device_list, vbasedev, next); - -- vbasedev->num_irqs = dev_info.num_irqs; -- vbasedev->num_regions = dev_info.num_regions; -- vbasedev->flags = dev_info.flags; -+ vbasedev->num_irqs = info->num_irqs; -+ vbasedev->num_regions = info->num_regions; -+ vbasedev->flags = info->flags; -+ -+ trace_vfio_get_device(name, info->flags, info->num_regions, info->num_irqs); - -- trace_vfio_get_device(name, dev_info.flags, dev_info.num_regions, -- dev_info.num_irqs); -+ vbasedev->reset_works = !!(info->flags & VFIO_DEVICE_FLAGS_RESET); - -- vbasedev->reset_works = !!(dev_info.flags & VFIO_DEVICE_FLAGS_RESET); - return 0; - } - -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 3dc5f2104c..6d1b8487c3 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -216,6 +216,7 @@ void vfio_region_finalize(VFIORegion *region); - void vfio_reset_handler(void *opaque); - VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp); - void vfio_put_group(VFIOGroup *group); -+struct vfio_device_info *vfio_get_device_info(int fd); - int vfio_get_device(VFIOGroup *group, const char *name, - VFIODevice *vbasedev, Error **errp); - --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-Introduce-a-helper-function-to-initialize-VFIOD.patch b/SOURCES/kvm-vfio-Introduce-a-helper-function-to-initialize-VFIOD.patch new file mode 100644 index 0000000..040288f --- /dev/null +++ b/SOURCES/kvm-vfio-Introduce-a-helper-function-to-initialize-VFIOD.patch @@ -0,0 +1,154 @@ +From f554328f6f4702743af71befcb83c25c36e4fa4d Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 21 Nov 2023 16:44:25 +0800 +Subject: [PATCH 046/101] vfio: Introduce a helper function to initialize + VFIODevice +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [45/67] 73225f394540bf5aeb70c0bdb89771f19a6d286d (eauger1/centos-qemu-kvm) + +Introduce a helper function to replace the common code to initialize +VFIODevice in pci, platform, ap and ccw VFIO device. + +No functional change intended. + +Suggested-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit 6106a329141af7d47bdc3346ce9820d4714e0e5d) +Signed-off-by: Eric Auger +--- + hw/vfio/ap.c | 8 ++------ + hw/vfio/ccw.c | 8 ++------ + hw/vfio/helpers.c | 11 +++++++++++ + hw/vfio/pci.c | 6 ++---- + hw/vfio/platform.c | 6 ++---- + include/hw/vfio/vfio-common.h | 2 ++ + 6 files changed, 21 insertions(+), 20 deletions(-) + +diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c +index 95fe7cd98b..e157aa1ff7 100644 +--- a/hw/vfio/ap.c ++++ b/hw/vfio/ap.c +@@ -226,18 +226,14 @@ static void vfio_ap_instance_init(Object *obj) + VFIOAPDevice *vapdev = VFIO_AP_DEVICE(obj); + VFIODevice *vbasedev = &vapdev->vdev; + +- vbasedev->type = VFIO_DEVICE_TYPE_AP; +- vbasedev->ops = &vfio_ap_ops; +- vbasedev->dev = DEVICE(vapdev); +- vbasedev->fd = -1; +- + /* + * vfio-ap devices operate in a way compatible with discarding of + * memory in RAM blocks, as no pages are pinned in the host. + * This needs to be set before vfio_get_device() for vfio common to + * handle ram_block_discard_disable(). + */ +- vbasedev->ram_block_discard_allowed = true; ++ vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_AP, &vfio_ap_ops, ++ DEVICE(vapdev), true); + } + + #ifdef CONFIG_IOMMUFD +diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c +index 6305a4c1b8..90e4a53437 100644 +--- a/hw/vfio/ccw.c ++++ b/hw/vfio/ccw.c +@@ -683,11 +683,6 @@ static void vfio_ccw_instance_init(Object *obj) + VFIOCCWDevice *vcdev = VFIO_CCW(obj); + VFIODevice *vbasedev = &vcdev->vdev; + +- vbasedev->type = VFIO_DEVICE_TYPE_CCW; +- vbasedev->ops = &vfio_ccw_ops; +- vbasedev->dev = DEVICE(vcdev); +- vbasedev->fd = -1; +- + /* + * All vfio-ccw devices are believed to operate in a way compatible with + * discarding of memory in RAM blocks, ie. pages pinned in the host are +@@ -696,7 +691,8 @@ static void vfio_ccw_instance_init(Object *obj) + * needs to be set before vfio_get_device() for vfio common to handle + * ram_block_discard_disable(). + */ +- vbasedev->ram_block_discard_allowed = true; ++ vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_CCW, &vfio_ccw_ops, ++ DEVICE(vcdev), true); + } + + #ifdef CONFIG_IOMMUFD +diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c +index 3592c3d54e..6789870802 100644 +--- a/hw/vfio/helpers.c ++++ b/hw/vfio/helpers.c +@@ -652,3 +652,14 @@ void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp) + } + vbasedev->fd = fd; + } ++ ++void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops, ++ DeviceState *dev, bool ram_discard) ++{ ++ vbasedev->type = type; ++ vbasedev->ops = ops; ++ vbasedev->dev = dev; ++ vbasedev->fd = -1; ++ ++ vbasedev->ram_block_discard_allowed = ram_discard; ++} +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 3f5900cc46..83c3238608 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3353,10 +3353,8 @@ static void vfio_instance_init(Object *obj) + vdev->host.slot = ~0U; + vdev->host.function = ~0U; + +- vbasedev->type = VFIO_DEVICE_TYPE_PCI; +- vbasedev->ops = &vfio_pci_ops; +- vbasedev->dev = DEVICE(vdev); +- vbasedev->fd = -1; ++ vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PCI, &vfio_pci_ops, ++ DEVICE(vdev), false); + + vdev->nv_gpudirect_clique = 0xFF; + +diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c +index 506eb8193f..a8d9b7da63 100644 +--- a/hw/vfio/platform.c ++++ b/hw/vfio/platform.c +@@ -657,10 +657,8 @@ static void vfio_platform_instance_init(Object *obj) + VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(obj); + VFIODevice *vbasedev = &vdev->vbasedev; + +- vbasedev->type = VFIO_DEVICE_TYPE_PLATFORM; +- vbasedev->ops = &vfio_platform_ops; +- vbasedev->dev = DEVICE(vdev); +- vbasedev->fd = -1; ++ vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PLATFORM, &vfio_platform_ops, ++ DEVICE(vdev), false); + } + + #ifdef CONFIG_IOMMUFD +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index efcba19f66..b8aa8a5495 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -257,4 +257,6 @@ int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova, + /* Returns 0 on success, or a negative errno. */ + int vfio_device_get_name(VFIODevice *vbasedev, Error **errp); + void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp); ++void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops, ++ DeviceState *dev, bool ram_discard); + #endif /* HW_VFIO_VFIO_COMMON_H */ +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-Introduce-base-object-for-VFIOContainer-and-tar.patch b/SOURCES/kvm-vfio-Introduce-base-object-for-VFIOContainer-and-tar.patch new file mode 100644 index 0000000..d41e8fb --- /dev/null +++ b/SOURCES/kvm-vfio-Introduce-base-object-for-VFIOContainer-and-tar.patch @@ -0,0 +1,129 @@ +From 7f392385d1b865904eae4b6681e3e7a87eb3af3d Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Thu, 2 Nov 2023 15:12:27 +0800 +Subject: [PATCH 002/101] vfio: Introduce base object for VFIOContainer and + targeted interface +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [1/67] e63af50c2cb94f286b2d91f58c2d19dd862e019d (eauger1/centos-qemu-kvm) + +Introduce a dumb VFIOContainerBase object and its targeted interface. +This is willingly not a QOM object because we don't want it to be +visible from the user interface. The VFIOContainerBase will be +smoothly populated in subsequent patches as well as interfaces. + +No functional change intended. + +Signed-off-by: Eric Auger +Signed-off-by: Yi Liu +Signed-off-by: Yi Sun +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +(cherry picked from commit f61dddd73232e3d82d560d1e1bca120446021f2f) +Signed-off-by: Eric Auger +--- + include/hw/vfio/vfio-common.h | 8 ++--- + include/hw/vfio/vfio-container-base.h | 50 +++++++++++++++++++++++++++ + 2 files changed, 52 insertions(+), 6 deletions(-) + create mode 100644 include/hw/vfio/vfio-container-base.h + +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index a4a22accb9..586d153c12 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -30,6 +30,7 @@ + #include + #endif + #include "sysemu/sysemu.h" ++#include "hw/vfio/vfio-container-base.h" + + #define VFIO_MSG_PREFIX "vfio %s: " + +@@ -81,6 +82,7 @@ typedef struct VFIOAddressSpace { + struct VFIOGroup; + + typedef struct VFIOContainer { ++ VFIOContainerBase bcontainer; + VFIOAddressSpace *space; + int fd; /* /dev/vfio/vfio, empowered by the attached groups */ + MemoryListener listener; +@@ -201,12 +203,6 @@ typedef struct VFIODisplay { + } dmabuf; + } VFIODisplay; + +-typedef struct { +- unsigned long *bitmap; +- hwaddr size; +- hwaddr pages; +-} VFIOBitmap; +- + VFIOAddressSpace *vfio_get_address_space(AddressSpace *as); + void vfio_put_address_space(VFIOAddressSpace *space); + bool vfio_devices_all_running_and_saving(VFIOContainer *container); +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +new file mode 100644 +index 0000000000..1d6daaea5d +--- /dev/null ++++ b/include/hw/vfio/vfio-container-base.h +@@ -0,0 +1,50 @@ ++/* ++ * VFIO BASE CONTAINER ++ * ++ * Copyright (C) 2023 Intel Corporation. ++ * Copyright Red Hat, Inc. 2023 ++ * ++ * Authors: Yi Liu ++ * Eric Auger ++ * ++ * SPDX-License-Identifier: GPL-2.0-or-later ++ */ ++ ++#ifndef HW_VFIO_VFIO_CONTAINER_BASE_H ++#define HW_VFIO_VFIO_CONTAINER_BASE_H ++ ++#include "exec/memory.h" ++ ++typedef struct VFIODevice VFIODevice; ++typedef struct VFIOIOMMUOps VFIOIOMMUOps; ++ ++typedef struct { ++ unsigned long *bitmap; ++ hwaddr size; ++ hwaddr pages; ++} VFIOBitmap; ++ ++/* ++ * This is the base object for vfio container backends ++ */ ++typedef struct VFIOContainerBase { ++ const VFIOIOMMUOps *ops; ++} VFIOContainerBase; ++ ++struct VFIOIOMMUOps { ++ /* basic feature */ ++ int (*dma_map)(VFIOContainerBase *bcontainer, ++ hwaddr iova, ram_addr_t size, ++ void *vaddr, bool readonly); ++ int (*dma_unmap)(VFIOContainerBase *bcontainer, ++ hwaddr iova, ram_addr_t size, ++ IOMMUTLBEntry *iotlb); ++ int (*attach_device)(const char *name, VFIODevice *vbasedev, ++ AddressSpace *as, Error **errp); ++ void (*detach_device)(VFIODevice *vbasedev); ++ /* migration feature */ ++ int (*set_dirty_page_tracking)(VFIOContainerBase *bcontainer, bool start); ++ int (*query_dirty_bitmap)(VFIOContainerBase *bcontainer, VFIOBitmap *vbmap, ++ hwaddr iova, hwaddr size); ++}; ++#endif /* HW_VFIO_VFIO_CONTAINER_BASE_H */ +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-Make-VFIOContainerBase-poiner-parameter-const-i.patch b/SOURCES/kvm-vfio-Make-VFIOContainerBase-poiner-parameter-const-i.patch new file mode 100644 index 0000000..03fb220 --- /dev/null +++ b/SOURCES/kvm-vfio-Make-VFIOContainerBase-poiner-parameter-const-i.patch @@ -0,0 +1,276 @@ +From 84b15fad1af781d06d0206d362de0801d7a18d0b Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 21 Nov 2023 16:44:17 +0800 +Subject: [PATCH 038/101] vfio: Make VFIOContainerBase poiner parameter const + in VFIOIOMMUOps callbacks +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [37/67] 95eb9edc7fcfefbd4b075f6f04941ed4a19ff87d (eauger1/centos-qemu-kvm) + +Some of the callbacks in VFIOIOMMUOps pass VFIOContainerBase poiner, +those callbacks only need read access to the sub object of VFIOContainerBase. +So make VFIOContainerBase, VFIOContainer and VFIOIOMMUFDContainer as const +in these callbacks. + +Local functions called by those callbacks also need same changes to avoid +build error. + +Suggested-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Reviewed-by: Eric Auger +Tested-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit 4517c33c31d392f08fa96a9db911da1e3507be94) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 9 +++---- + hw/vfio/container-base.c | 2 +- + hw/vfio/container.c | 34 ++++++++++++++------------- + hw/vfio/iommufd.c | 8 +++---- + include/hw/vfio/vfio-common.h | 12 ++++++---- + include/hw/vfio/vfio-container-base.h | 12 ++++++---- + 6 files changed, 42 insertions(+), 35 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 6569732b7a..08a3e57672 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -204,7 +204,7 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainerBase *bcontainer) + return true; + } + +-bool vfio_devices_all_device_dirty_tracking(VFIOContainerBase *bcontainer) ++bool vfio_devices_all_device_dirty_tracking(const VFIOContainerBase *bcontainer) + { + VFIODevice *vbasedev; + +@@ -221,7 +221,8 @@ bool vfio_devices_all_device_dirty_tracking(VFIOContainerBase *bcontainer) + * Check if all VFIO devices are running and migration is active, which is + * essentially equivalent to the migration being in pre-copy phase. + */ +-bool vfio_devices_all_running_and_mig_active(VFIOContainerBase *bcontainer) ++bool ++vfio_devices_all_running_and_mig_active(const VFIOContainerBase *bcontainer) + { + VFIODevice *vbasedev; + +@@ -1139,7 +1140,7 @@ static int vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova, + return 0; + } + +-int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer, ++int vfio_devices_query_dirty_bitmap(const VFIOContainerBase *bcontainer, + VFIOBitmap *vbmap, hwaddr iova, + hwaddr size) + { +@@ -1162,7 +1163,7 @@ int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer, + return 0; + } + +-int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova, ++int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova, + uint64_t size, ram_addr_t ram_addr) + { + bool all_device_dirty_tracking = +diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c +index eee2dcfe76..1ffd25bbfa 100644 +--- a/hw/vfio/container-base.c ++++ b/hw/vfio/container-base.c +@@ -63,7 +63,7 @@ int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, + return bcontainer->ops->set_dirty_page_tracking(bcontainer, start); + } + +-int vfio_container_query_dirty_bitmap(VFIOContainerBase *bcontainer, ++int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, + VFIOBitmap *vbmap, + hwaddr iova, hwaddr size) + { +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 1dbf9b9a17..b22feb8ded 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -61,11 +61,11 @@ static int vfio_ram_block_discard_disable(VFIOContainer *container, bool state) + } + } + +-static int vfio_dma_unmap_bitmap(VFIOContainer *container, ++static int vfio_dma_unmap_bitmap(const VFIOContainer *container, + hwaddr iova, ram_addr_t size, + IOMMUTLBEntry *iotlb) + { +- VFIOContainerBase *bcontainer = &container->bcontainer; ++ const VFIOContainerBase *bcontainer = &container->bcontainer; + struct vfio_iommu_type1_dma_unmap *unmap; + struct vfio_bitmap *bitmap; + VFIOBitmap vbmap; +@@ -117,11 +117,12 @@ unmap_exit: + /* + * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86 + */ +-static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, +- ram_addr_t size, IOMMUTLBEntry *iotlb) ++static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer, ++ hwaddr iova, ram_addr_t size, ++ IOMMUTLBEntry *iotlb) + { +- VFIOContainer *container = container_of(bcontainer, VFIOContainer, +- bcontainer); ++ const VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); + struct vfio_iommu_type1_dma_unmap unmap = { + .argsz = sizeof(unmap), + .flags = 0, +@@ -174,11 +175,11 @@ static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, + return 0; + } + +-static int vfio_legacy_dma_map(VFIOContainerBase *bcontainer, hwaddr iova, ++static int vfio_legacy_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova, + ram_addr_t size, void *vaddr, bool readonly) + { +- VFIOContainer *container = container_of(bcontainer, VFIOContainer, +- bcontainer); ++ const VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); + struct vfio_iommu_type1_dma_map map = { + .argsz = sizeof(map), + .flags = VFIO_DMA_MAP_FLAG_READ, +@@ -207,11 +208,12 @@ static int vfio_legacy_dma_map(VFIOContainerBase *bcontainer, hwaddr iova, + return -errno; + } + +-static int vfio_legacy_set_dirty_page_tracking(VFIOContainerBase *bcontainer, +- bool start) ++static int ++vfio_legacy_set_dirty_page_tracking(const VFIOContainerBase *bcontainer, ++ bool start) + { +- VFIOContainer *container = container_of(bcontainer, VFIOContainer, +- bcontainer); ++ const VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); + int ret; + struct vfio_iommu_type1_dirty_bitmap dirty = { + .argsz = sizeof(dirty), +@@ -233,12 +235,12 @@ static int vfio_legacy_set_dirty_page_tracking(VFIOContainerBase *bcontainer, + return ret; + } + +-static int vfio_legacy_query_dirty_bitmap(VFIOContainerBase *bcontainer, ++static int vfio_legacy_query_dirty_bitmap(const VFIOContainerBase *bcontainer, + VFIOBitmap *vbmap, + hwaddr iova, hwaddr size) + { +- VFIOContainer *container = container_of(bcontainer, VFIOContainer, +- bcontainer); ++ const VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); + struct vfio_iommu_type1_dirty_bitmap *dbitmap; + struct vfio_iommu_type1_dirty_bitmap_get *range; + int ret; +diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c +index 5accd26484..87a561c545 100644 +--- a/hw/vfio/iommufd.c ++++ b/hw/vfio/iommufd.c +@@ -26,10 +26,10 @@ + #include "qemu/chardev_open.h" + #include "pci.h" + +-static int iommufd_cdev_map(VFIOContainerBase *bcontainer, hwaddr iova, ++static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova, + ram_addr_t size, void *vaddr, bool readonly) + { +- VFIOIOMMUFDContainer *container = ++ const VFIOIOMMUFDContainer *container = + container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); + + return iommufd_backend_map_dma(container->be, +@@ -37,11 +37,11 @@ static int iommufd_cdev_map(VFIOContainerBase *bcontainer, hwaddr iova, + iova, size, vaddr, readonly); + } + +-static int iommufd_cdev_unmap(VFIOContainerBase *bcontainer, ++static int iommufd_cdev_unmap(const VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + IOMMUTLBEntry *iotlb) + { +- VFIOIOMMUFDContainer *container = ++ const VFIOIOMMUFDContainer *container = + container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); + + /* TODO: Handle dma_unmap_bitmap with iotlb args (migration) */ +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 697bf24a35..efcba19f66 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -244,13 +244,15 @@ bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp); + void vfio_migration_exit(VFIODevice *vbasedev); + + int vfio_bitmap_alloc(VFIOBitmap *vbmap, hwaddr size); +-bool vfio_devices_all_running_and_mig_active(VFIOContainerBase *bcontainer); +-bool vfio_devices_all_device_dirty_tracking(VFIOContainerBase *bcontainer); +-int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer, ++bool ++vfio_devices_all_running_and_mig_active(const VFIOContainerBase *bcontainer); ++bool ++vfio_devices_all_device_dirty_tracking(const VFIOContainerBase *bcontainer); ++int vfio_devices_query_dirty_bitmap(const VFIOContainerBase *bcontainer, + VFIOBitmap *vbmap, hwaddr iova, + hwaddr size); +-int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova, +- uint64_t size, ram_addr_t ram_addr); ++int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova, ++ uint64_t size, ram_addr_t ram_addr); + + /* Returns 0 on success, or a negative errno. */ + int vfio_device_get_name(VFIODevice *vbasedev, Error **errp); +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 45bb19c767..2ae297ccda 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -82,7 +82,7 @@ void vfio_container_del_section_window(VFIOContainerBase *bcontainer, + MemoryRegionSection *section); + int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, + bool start); +-int vfio_container_query_dirty_bitmap(VFIOContainerBase *bcontainer, ++int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, + VFIOBitmap *vbmap, + hwaddr iova, hwaddr size); + +@@ -93,18 +93,20 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer); + + struct VFIOIOMMUOps { + /* basic feature */ +- int (*dma_map)(VFIOContainerBase *bcontainer, ++ int (*dma_map)(const VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + void *vaddr, bool readonly); +- int (*dma_unmap)(VFIOContainerBase *bcontainer, ++ int (*dma_unmap)(const VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + IOMMUTLBEntry *iotlb); + int (*attach_device)(const char *name, VFIODevice *vbasedev, + AddressSpace *as, Error **errp); + void (*detach_device)(VFIODevice *vbasedev); + /* migration feature */ +- int (*set_dirty_page_tracking)(VFIOContainerBase *bcontainer, bool start); +- int (*query_dirty_bitmap)(VFIOContainerBase *bcontainer, VFIOBitmap *vbmap, ++ int (*set_dirty_page_tracking)(const VFIOContainerBase *bcontainer, ++ bool start); ++ int (*query_dirty_bitmap)(const VFIOContainerBase *bcontainer, ++ VFIOBitmap *vbmap, + hwaddr iova, hwaddr size); + /* PCI specific */ + int (*pci_hot_reset)(VFIODevice *vbasedev, bool single); +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-ap-Allow-the-selection-of-a-given-iommu-backend.patch b/SOURCES/kvm-vfio-ap-Allow-the-selection-of-a-given-iommu-backend.patch new file mode 100644 index 0000000..ffd8b9f --- /dev/null +++ b/SOURCES/kvm-vfio-ap-Allow-the-selection-of-a-given-iommu-backend.patch @@ -0,0 +1,75 @@ +From 57bdfc821d6f4b4f9c6b1ff05bf0114e5cabc77e Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 21 Nov 2023 16:44:13 +0800 +Subject: [PATCH 034/101] vfio/ap: Allow the selection of a given iommu backend +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [33/67] a12bb86e5b627ccf246fb9ce60820595589ff8e5 (eauger1/centos-qemu-kvm) + +Now we support two types of iommu backends, let's add the capability +to select one of them. This depends on whether an iommufd object has +been linked with the vfio-ap device: + +if the user wants to use the legacy backend, it shall not +link the vfio-ap device with any iommufd object: + + -device vfio-ap,sysfsdev=/sys/bus/mdev/devices/XXX + +This is called the legacy mode/backend. + +If the user wants to use the iommufd backend (/dev/iommu) it +shall pass an iommufd object id in the vfio-ap device options: + + -object iommufd,id=iommufd0 + -device vfio-ap,sysfsdev=/sys/bus/mdev/devices/XXX,iommufd=iommufd0 + +Suggested-by: Alex Williamson +Signed-off-by: Zhenzhong Duan +Reviewed-by: Matthew Rosato +Reviewed-by: Cédric Le Goater +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit 336f308958d598f3db351bb7d94cc57b4b2d448d) +Signed-off-by: Eric Auger +--- + hw/vfio/ap.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c +index bbf69ff55a..80629609ae 100644 +--- a/hw/vfio/ap.c ++++ b/hw/vfio/ap.c +@@ -11,10 +11,12 @@ + */ + + #include "qemu/osdep.h" ++#include CONFIG_DEVICES /* CONFIG_IOMMUFD */ + #include + #include + #include "qapi/error.h" + #include "hw/vfio/vfio-common.h" ++#include "sysemu/iommufd.h" + #include "hw/s390x/ap-device.h" + #include "qemu/error-report.h" + #include "qemu/event_notifier.h" +@@ -204,6 +206,10 @@ static void vfio_ap_unrealize(DeviceState *dev) + + static Property vfio_ap_properties[] = { + DEFINE_PROP_STRING("sysfsdev", VFIOAPDevice, vdev.sysfsdev), ++#ifdef CONFIG_IOMMUFD ++ DEFINE_PROP_LINK("iommufd", VFIOAPDevice, vdev.iommufd, ++ TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *), ++#endif + DEFINE_PROP_END_OF_LIST(), + }; + +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-ap-Make-vfio-cdev-pre-openable-by-passing-a-fil.patch b/SOURCES/kvm-vfio-ap-Make-vfio-cdev-pre-openable-by-passing-a-fil.patch new file mode 100644 index 0000000..1055329 --- /dev/null +++ b/SOURCES/kvm-vfio-ap-Make-vfio-cdev-pre-openable-by-passing-a-fil.patch @@ -0,0 +1,87 @@ +From db09b7c60c01ee75d602261ee959a96fa0d89d68 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 21 Nov 2023 16:44:14 +0800 +Subject: [PATCH 035/101] vfio/ap: Make vfio cdev pre-openable by passing a + file handle +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [34/67] aaafa6088a9b0302d53aa539f67792d02ea0f663 (eauger1/centos-qemu-kvm) + +This gives management tools like libvirt a chance to open the vfio +cdev with privilege and pass FD to qemu. This way qemu never needs +to have privilege to open a VFIO or iommu cdev node. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Matthew Rosato +Reviewed-by: Cédric Le Goater +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit 5e7ba401b71d18544a3e44b2a58b9e63fd5148d5) +Signed-off-by: Eric Auger +--- + hw/vfio/ap.c | 23 ++++++++++++++++++++++- + 1 file changed, 22 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c +index 80629609ae..f180e4a32a 100644 +--- a/hw/vfio/ap.c ++++ b/hw/vfio/ap.c +@@ -160,7 +160,10 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp) + VFIOAPDevice *vapdev = VFIO_AP_DEVICE(dev); + VFIODevice *vbasedev = &vapdev->vdev; + +- vbasedev->name = g_path_get_basename(vbasedev->sysfsdev); ++ if (vfio_device_get_name(vbasedev, errp) < 0) { ++ return; ++ } ++ + vbasedev->ops = &vfio_ap_ops; + vbasedev->type = VFIO_DEVICE_TYPE_AP; + vbasedev->dev = dev; +@@ -230,11 +233,28 @@ static const VMStateDescription vfio_ap_vmstate = { + .unmigratable = 1, + }; + ++static void vfio_ap_instance_init(Object *obj) ++{ ++ VFIOAPDevice *vapdev = VFIO_AP_DEVICE(obj); ++ ++ vapdev->vdev.fd = -1; ++} ++ ++#ifdef CONFIG_IOMMUFD ++static void vfio_ap_set_fd(Object *obj, const char *str, Error **errp) ++{ ++ vfio_device_set_fd(&VFIO_AP_DEVICE(obj)->vdev, str, errp); ++} ++#endif ++ + static void vfio_ap_class_init(ObjectClass *klass, void *data) + { + DeviceClass *dc = DEVICE_CLASS(klass); + + device_class_set_props(dc, vfio_ap_properties); ++#ifdef CONFIG_IOMMUFD ++ object_class_property_add_str(klass, "fd", NULL, vfio_ap_set_fd); ++#endif + dc->vmsd = &vfio_ap_vmstate; + dc->desc = "VFIO-based AP device assignment"; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); +@@ -249,6 +269,7 @@ static const TypeInfo vfio_ap_info = { + .name = TYPE_VFIO_AP_DEVICE, + .parent = TYPE_AP_DEVICE, + .instance_size = sizeof(VFIOAPDevice), ++ .instance_init = vfio_ap_instance_init, + .class_init = vfio_ap_class_init, + }; + +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-ap-Move-VFIODevice-initializations-in-vfio_ap_i.patch b/SOURCES/kvm-vfio-ap-Move-VFIODevice-initializations-in-vfio_ap_i.patch new file mode 100644 index 0000000..ed60920 --- /dev/null +++ b/SOURCES/kvm-vfio-ap-Move-VFIODevice-initializations-in-vfio_ap_i.patch @@ -0,0 +1,81 @@ +From b8630ecb698e31311089ba4e224d5e2c08c8e665 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 21 Nov 2023 16:44:23 +0800 +Subject: [PATCH 044/101] vfio/ap: Move VFIODevice initializations in + vfio_ap_instance_init +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [43/67] 95a527f649b28c5c78903e99735107667e8468b1 (eauger1/centos-qemu-kvm) + +Some of the VFIODevice initializations is in vfio_ap_realize, +move all of them in vfio_ap_instance_init. + +No functional change intended. + +Suggested-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Eric Farman +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit cbbcc2f1706aa1a08637142744d2f5f6515ac93f) +Signed-off-by: Eric Auger +--- + hw/vfio/ap.c | 26 +++++++++++++------------- + 1 file changed, 13 insertions(+), 13 deletions(-) + +diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c +index f180e4a32a..95fe7cd98b 100644 +--- a/hw/vfio/ap.c ++++ b/hw/vfio/ap.c +@@ -164,18 +164,6 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp) + return; + } + +- vbasedev->ops = &vfio_ap_ops; +- vbasedev->type = VFIO_DEVICE_TYPE_AP; +- vbasedev->dev = dev; +- +- /* +- * vfio-ap devices operate in a way compatible with discarding of +- * memory in RAM blocks, as no pages are pinned in the host. +- * This needs to be set before vfio_get_device() for vfio common to +- * handle ram_block_discard_disable(). +- */ +- vapdev->vdev.ram_block_discard_allowed = true; +- + ret = vfio_attach_device(vbasedev->name, vbasedev, + &address_space_memory, errp); + if (ret) { +@@ -236,8 +224,20 @@ static const VMStateDescription vfio_ap_vmstate = { + static void vfio_ap_instance_init(Object *obj) + { + VFIOAPDevice *vapdev = VFIO_AP_DEVICE(obj); ++ VFIODevice *vbasedev = &vapdev->vdev; + +- vapdev->vdev.fd = -1; ++ vbasedev->type = VFIO_DEVICE_TYPE_AP; ++ vbasedev->ops = &vfio_ap_ops; ++ vbasedev->dev = DEVICE(vapdev); ++ vbasedev->fd = -1; ++ ++ /* ++ * vfio-ap devices operate in a way compatible with discarding of ++ * memory in RAM blocks, as no pages are pinned in the host. ++ * This needs to be set before vfio_get_device() for vfio common to ++ * handle ram_block_discard_disable(). ++ */ ++ vbasedev->ram_block_discard_allowed = true; + } + + #ifdef CONFIG_IOMMUFD +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-ccw-Allow-the-selection-of-a-given-iommu-backen.patch b/SOURCES/kvm-vfio-ccw-Allow-the-selection-of-a-given-iommu-backen.patch new file mode 100644 index 0000000..ff64a91 --- /dev/null +++ b/SOURCES/kvm-vfio-ccw-Allow-the-selection-of-a-given-iommu-backen.patch @@ -0,0 +1,79 @@ +From 732115c80eb0dd672925a0737e09643d8a889abd Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 21 Nov 2023 16:44:15 +0800 +Subject: [PATCH 036/101] vfio/ccw: Allow the selection of a given iommu + backend +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [35/67] 1701de023a9f3b3f0420689bf851e11aee88800d (eauger1/centos-qemu-kvm) + +Now we support two types of iommu backends, let's add the capability +to select one of them. This depends on whether an iommufd object has +been linked with the vfio-ccw device: + +If the user wants to use the legacy backend, it shall not +link the vfio-ccw device with any iommufd object: + + -device vfio-ccw,sysfsdev=/sys/bus/mdev/devices/XXX + +This is called the legacy mode/backend. + +If the user wants to use the iommufd backend (/dev/iommu) it +shall pass an iommufd object id in the vfio-ccw device options: + + -object iommufd,id=iommufd0 + -device vfio-ccw,sysfsdev=/sys/bus/mdev/devices/XXX,iommufd=iommufd0 + +Suggested-by: Alex Williamson +Signed-off-by: Zhenzhong Duan +Reviewed-by: Matthew Rosato +Reviewed-by: Cédric Le Goater +Reviewed-by: Eric Farman +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit e70f971a6c1230138843d7ab82267e4a5aaf6bda) +Signed-off-by: Eric Auger +--- + hw/vfio/ccw.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c +index d857bb8d0f..d2d58bb677 100644 +--- a/hw/vfio/ccw.c ++++ b/hw/vfio/ccw.c +@@ -15,12 +15,14 @@ + */ + + #include "qemu/osdep.h" ++#include CONFIG_DEVICES /* CONFIG_IOMMUFD */ + #include + #include + #include + + #include "qapi/error.h" + #include "hw/vfio/vfio-common.h" ++#include "sysemu/iommufd.h" + #include "hw/s390x/s390-ccw.h" + #include "hw/s390x/vfio-ccw.h" + #include "hw/qdev-properties.h" +@@ -677,6 +679,10 @@ static void vfio_ccw_unrealize(DeviceState *dev) + static Property vfio_ccw_properties[] = { + DEFINE_PROP_STRING("sysfsdev", VFIOCCWDevice, vdev.sysfsdev), + DEFINE_PROP_BOOL("force-orb-pfch", VFIOCCWDevice, force_orb_pfch, false), ++#ifdef CONFIG_IOMMUFD ++ DEFINE_PROP_LINK("iommufd", VFIOCCWDevice, vdev.iommufd, ++ TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *), ++#endif + DEFINE_PROP_END_OF_LIST(), + }; + +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-ccw-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch b/SOURCES/kvm-vfio-ccw-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch new file mode 100644 index 0000000..6c91d85 --- /dev/null +++ b/SOURCES/kvm-vfio-ccw-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch @@ -0,0 +1,93 @@ +From 0ff08afdec19f4decaf750fa7d158e0ea498ff28 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 21 Nov 2023 16:44:16 +0800 +Subject: [PATCH 037/101] vfio/ccw: Make vfio cdev pre-openable by passing a + file handle +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [36/67] cc0d8f51cffa5d5a7aebc2334b908b9877179ae7 (eauger1/centos-qemu-kvm) + +This gives management tools like libvirt a chance to open the vfio +cdev with privilege and pass FD to qemu. This way qemu never needs +to have privilege to open a VFIO or iommu cdev node. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Matthew Rosato +Reviewed-by: Cédric Le Goater +Reviewed-by: Eric Farman +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit 909a6254edaa8d0b0e3f1c0a623862e73d1842e9) +Signed-off-by: Eric Auger +--- + hw/vfio/ccw.c | 25 ++++++++++++++++++++++--- + 1 file changed, 22 insertions(+), 3 deletions(-) + +diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c +index d2d58bb677..2afdf17dbe 100644 +--- a/hw/vfio/ccw.c ++++ b/hw/vfio/ccw.c +@@ -590,11 +590,12 @@ static void vfio_ccw_realize(DeviceState *dev, Error **errp) + } + } + ++ if (vfio_device_get_name(vbasedev, errp) < 0) { ++ return; ++ } ++ + vbasedev->ops = &vfio_ccw_ops; + vbasedev->type = VFIO_DEVICE_TYPE_CCW; +- vbasedev->name = g_strdup_printf("%x.%x.%04x", vcdev->cdev.hostid.cssid, +- vcdev->cdev.hostid.ssid, +- vcdev->cdev.hostid.devid); + vbasedev->dev = dev; + + /* +@@ -691,12 +692,29 @@ static const VMStateDescription vfio_ccw_vmstate = { + .unmigratable = 1, + }; + ++static void vfio_ccw_instance_init(Object *obj) ++{ ++ VFIOCCWDevice *vcdev = VFIO_CCW(obj); ++ ++ vcdev->vdev.fd = -1; ++} ++ ++#ifdef CONFIG_IOMMUFD ++static void vfio_ccw_set_fd(Object *obj, const char *str, Error **errp) ++{ ++ vfio_device_set_fd(&VFIO_CCW(obj)->vdev, str, errp); ++} ++#endif ++ + static void vfio_ccw_class_init(ObjectClass *klass, void *data) + { + DeviceClass *dc = DEVICE_CLASS(klass); + S390CCWDeviceClass *cdc = S390_CCW_DEVICE_CLASS(klass); + + device_class_set_props(dc, vfio_ccw_properties); ++#ifdef CONFIG_IOMMUFD ++ object_class_property_add_str(klass, "fd", NULL, vfio_ccw_set_fd); ++#endif + dc->vmsd = &vfio_ccw_vmstate; + dc->desc = "VFIO-based subchannel assignment"; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); +@@ -714,6 +732,7 @@ static const TypeInfo vfio_ccw_info = { + .name = TYPE_VFIO_CCW, + .parent = TYPE_S390_CCW, + .instance_size = sizeof(VFIOCCWDevice), ++ .instance_init = vfio_ccw_instance_init, + .class_init = vfio_ccw_class_init, + }; + +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-ccw-Move-VFIODevice-initializations-in-vfio_ccw.patch b/SOURCES/kvm-vfio-ccw-Move-VFIODevice-initializations-in-vfio_ccw.patch new file mode 100644 index 0000000..95b85f9 --- /dev/null +++ b/SOURCES/kvm-vfio-ccw-Move-VFIODevice-initializations-in-vfio_ccw.patch @@ -0,0 +1,85 @@ +From 2ef1c050722115247962e3cd4d8fcf73727e597e Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 21 Nov 2023 16:44:24 +0800 +Subject: [PATCH 045/101] vfio/ccw: Move VFIODevice initializations in + vfio_ccw_instance_init +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [44/67] 3345ed58f491aba8fd51bcc172af267ae53e6c8c (eauger1/centos-qemu-kvm) + +Some of the VFIODevice initializations is in vfio_ccw_realize, +move all of them in vfio_ccw_instance_init. + +No functional change intended. + +Suggested-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Eric Farman +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit c12b55ad6f9d3b4792b590e9211bd7319e4a2d70) +Signed-off-by: Eric Auger +--- + hw/vfio/ccw.c | 30 +++++++++++++++--------------- + 1 file changed, 15 insertions(+), 15 deletions(-) + +diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c +index 2afdf17dbe..6305a4c1b8 100644 +--- a/hw/vfio/ccw.c ++++ b/hw/vfio/ccw.c +@@ -594,20 +594,6 @@ static void vfio_ccw_realize(DeviceState *dev, Error **errp) + return; + } + +- vbasedev->ops = &vfio_ccw_ops; +- vbasedev->type = VFIO_DEVICE_TYPE_CCW; +- vbasedev->dev = dev; +- +- /* +- * All vfio-ccw devices are believed to operate in a way compatible with +- * discarding of memory in RAM blocks, ie. pages pinned in the host are +- * in the current working set of the guest driver and therefore never +- * overlap e.g., with pages available to the guest balloon driver. This +- * needs to be set before vfio_get_device() for vfio common to handle +- * ram_block_discard_disable(). +- */ +- vbasedev->ram_block_discard_allowed = true; +- + ret = vfio_attach_device(cdev->mdevid, vbasedev, + &address_space_memory, errp); + if (ret) { +@@ -695,8 +681,22 @@ static const VMStateDescription vfio_ccw_vmstate = { + static void vfio_ccw_instance_init(Object *obj) + { + VFIOCCWDevice *vcdev = VFIO_CCW(obj); ++ VFIODevice *vbasedev = &vcdev->vdev; ++ ++ vbasedev->type = VFIO_DEVICE_TYPE_CCW; ++ vbasedev->ops = &vfio_ccw_ops; ++ vbasedev->dev = DEVICE(vcdev); ++ vbasedev->fd = -1; + +- vcdev->vdev.fd = -1; ++ /* ++ * All vfio-ccw devices are believed to operate in a way compatible with ++ * discarding of memory in RAM blocks, ie. pages pinned in the host are ++ * in the current working set of the guest driver and therefore never ++ * overlap e.g., with pages available to the guest balloon driver. This ++ * needs to be set before vfio_get_device() for vfio common to handle ++ * ram_block_discard_disable(). ++ */ ++ vbasedev->ram_block_discard_allowed = true; + } + + #ifdef CONFIG_IOMMUFD +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-common-Introduce-vfio_container_init-destroy-he.patch b/SOURCES/kvm-vfio-common-Introduce-vfio_container_init-destroy-he.patch new file mode 100644 index 0000000..8615b6d --- /dev/null +++ b/SOURCES/kvm-vfio-common-Introduce-vfio_container_init-destroy-he.patch @@ -0,0 +1,98 @@ +From 7de36998dd6177380e46b8c5f3a91c3fad75483c Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Thu, 2 Nov 2023 15:12:30 +0800 +Subject: [PATCH 005/101] vfio/common: Introduce vfio_container_init/destroy + helper +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [4/67] 8287f687ef19cd84afede1e8f3b16ac3caf29a1d (eauger1/centos-qemu-kvm) + +This adds two helper functions vfio_container_init/destroy which will be +used by both legacy and iommufd containers to do base container specific +initialization and release. + +No functional change intended. + +Suggested-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +(cherry picked from commit ed2f7f80170251e7cdd2965a13ee97527d1fbec8) +Signed-off-by: Eric Auger +--- + hw/vfio/container-base.c | 9 +++++++++ + hw/vfio/container.c | 4 +++- + include/hw/vfio/vfio-container-base.h | 4 ++++ + 3 files changed, 16 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c +index 55d3a35fa4..e929435751 100644 +--- a/hw/vfio/container-base.c ++++ b/hw/vfio/container-base.c +@@ -30,3 +30,12 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, + g_assert(bcontainer->ops->dma_unmap); + return bcontainer->ops->dma_unmap(bcontainer, iova, size, iotlb); + } ++ ++void vfio_container_init(VFIOContainerBase *bcontainer, const VFIOIOMMUOps *ops) ++{ ++ bcontainer->ops = ops; ++} ++ ++void vfio_container_destroy(VFIOContainerBase *bcontainer) ++{ ++} +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index c04df26323..32a0251dd1 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -559,7 +559,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + QLIST_INIT(&container->giommu_list); + QLIST_INIT(&container->vrdl_list); + bcontainer = &container->bcontainer; +- bcontainer->ops = &vfio_legacy_ops; ++ vfio_container_init(bcontainer, &vfio_legacy_ops); + + ret = vfio_init_container(container, group->fd, errp); + if (ret) { +@@ -661,6 +661,7 @@ put_space_exit: + static void vfio_disconnect_container(VFIOGroup *group) + { + VFIOContainer *container = group->container; ++ VFIOContainerBase *bcontainer = &container->bcontainer; + + QLIST_REMOVE(group, container_next); + group->container = NULL; +@@ -695,6 +696,7 @@ static void vfio_disconnect_container(VFIOGroup *group) + QLIST_REMOVE(giommu, giommu_next); + g_free(giommu); + } ++ vfio_container_destroy(bcontainer); + + trace_vfio_disconnect_container(container->fd); + close(container->fd); +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 56b033f59f..577f52ccbc 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -38,6 +38,10 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + IOMMUTLBEntry *iotlb); + ++void vfio_container_init(VFIOContainerBase *bcontainer, ++ const VFIOIOMMUOps *ops); ++void vfio_container_destroy(VFIOContainerBase *bcontainer); ++ + struct VFIOIOMMUOps { + /* basic feature */ + int (*dma_map)(VFIOContainerBase *bcontainer, +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-common-Move-giommu_list-in-base-container.patch b/SOURCES/kvm-vfio-common-Move-giommu_list-in-base-container.patch new file mode 100644 index 0000000..eec555b --- /dev/null +++ b/SOURCES/kvm-vfio-common-Move-giommu_list-in-base-container.patch @@ -0,0 +1,221 @@ +From 36f4005c3dbb4c8b63a975494c75281de51c25f9 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 2 Nov 2023 15:12:31 +0800 +Subject: [PATCH 006/101] vfio/common: Move giommu_list in base container +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [5/67] ba5898e96c16c7f6e8108ae461b454d3c8c35404 (eauger1/centos-qemu-kvm) + +Move the giommu_list field in the base container and store +the base container in the VFIOGuestIOMMU. + +No functional change intended. + +Signed-off-by: Eric Auger +Signed-off-by: Yi Liu +Signed-off-by: Yi Sun +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +(cherry picked from commit dddf83ab99eb832c449249397a1c302c6ed746bf) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 17 +++++++++++------ + hw/vfio/container-base.c | 9 +++++++++ + hw/vfio/container.c | 8 -------- + include/hw/vfio/vfio-common.h | 9 --------- + include/hw/vfio/vfio-container-base.h | 9 +++++++++ + 5 files changed, 29 insertions(+), 23 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index e610771888..43580bcc43 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -292,7 +292,7 @@ static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, + static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) + { + VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n); +- VFIOContainerBase *bcontainer = &giommu->container->bcontainer; ++ VFIOContainerBase *bcontainer = giommu->bcontainer; + hwaddr iova = iotlb->iova + giommu->iommu_offset; + void *vaddr; + int ret; +@@ -569,6 +569,7 @@ static void vfio_listener_region_add(MemoryListener *listener, + MemoryRegionSection *section) + { + VFIOContainer *container = container_of(listener, VFIOContainer, listener); ++ VFIOContainerBase *bcontainer = &container->bcontainer; + hwaddr iova, end; + Int128 llend, llsize; + void *vaddr; +@@ -612,7 +613,7 @@ static void vfio_listener_region_add(MemoryListener *listener, + giommu->iommu_mr = iommu_mr; + giommu->iommu_offset = section->offset_within_address_space - + section->offset_within_region; +- giommu->container = container; ++ giommu->bcontainer = bcontainer; + llend = int128_add(int128_make64(section->offset_within_region), + section->size); + llend = int128_sub(llend, int128_one()); +@@ -647,7 +648,7 @@ static void vfio_listener_region_add(MemoryListener *listener, + g_free(giommu); + goto fail; + } +- QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next); ++ QLIST_INSERT_HEAD(&bcontainer->giommu_list, giommu, giommu_next); + memory_region_iommu_replay(giommu->iommu_mr, &giommu->n); + + return; +@@ -732,6 +733,7 @@ static void vfio_listener_region_del(MemoryListener *listener, + MemoryRegionSection *section) + { + VFIOContainer *container = container_of(listener, VFIOContainer, listener); ++ VFIOContainerBase *bcontainer = &container->bcontainer; + hwaddr iova, end; + Int128 llend, llsize; + int ret; +@@ -744,7 +746,7 @@ static void vfio_listener_region_del(MemoryListener *listener, + if (memory_region_is_iommu(section->mr)) { + VFIOGuestIOMMU *giommu; + +- QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) { ++ QLIST_FOREACH(giommu, &bcontainer->giommu_list, giommu_next) { + if (MEMORY_REGION(giommu->iommu_mr) == section->mr && + giommu->n.start == section->offset_within_region) { + memory_region_unregister_iommu_notifier(section->mr, +@@ -1206,7 +1208,9 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) + vfio_giommu_dirty_notifier *gdn = container_of(n, + vfio_giommu_dirty_notifier, n); + VFIOGuestIOMMU *giommu = gdn->giommu; +- VFIOContainer *container = giommu->container; ++ VFIOContainerBase *bcontainer = giommu->bcontainer; ++ VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); + hwaddr iova = iotlb->iova + giommu->iommu_offset; + ram_addr_t translated_addr; + int ret = -EINVAL; +@@ -1284,12 +1288,13 @@ static int vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainer *container, + static int vfio_sync_dirty_bitmap(VFIOContainer *container, + MemoryRegionSection *section) + { ++ VFIOContainerBase *bcontainer = &container->bcontainer; + ram_addr_t ram_addr; + + if (memory_region_is_iommu(section->mr)) { + VFIOGuestIOMMU *giommu; + +- QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) { ++ QLIST_FOREACH(giommu, &bcontainer->giommu_list, giommu_next) { + if (MEMORY_REGION(giommu->iommu_mr) == section->mr && + giommu->n.start == section->offset_within_region) { + Int128 llend; +diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c +index e929435751..20bcb9669a 100644 +--- a/hw/vfio/container-base.c ++++ b/hw/vfio/container-base.c +@@ -34,8 +34,17 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, + void vfio_container_init(VFIOContainerBase *bcontainer, const VFIOIOMMUOps *ops) + { + bcontainer->ops = ops; ++ QLIST_INIT(&bcontainer->giommu_list); + } + + void vfio_container_destroy(VFIOContainerBase *bcontainer) + { ++ VFIOGuestIOMMU *giommu, *tmp; ++ ++ QLIST_FOREACH_SAFE(giommu, &bcontainer->giommu_list, giommu_next, tmp) { ++ memory_region_unregister_iommu_notifier( ++ MEMORY_REGION(giommu->iommu_mr), &giommu->n); ++ QLIST_REMOVE(giommu, giommu_next); ++ g_free(giommu); ++ } + } +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 32a0251dd1..133d3c8f5c 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -556,7 +556,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + container->dirty_pages_supported = false; + container->dma_max_mappings = 0; + container->iova_ranges = NULL; +- QLIST_INIT(&container->giommu_list); + QLIST_INIT(&container->vrdl_list); + bcontainer = &container->bcontainer; + vfio_container_init(bcontainer, &vfio_legacy_ops); +@@ -686,16 +685,9 @@ static void vfio_disconnect_container(VFIOGroup *group) + + if (QLIST_EMPTY(&container->group_list)) { + VFIOAddressSpace *space = container->space; +- VFIOGuestIOMMU *giommu, *tmp; + + QLIST_REMOVE(container, next); + +- QLIST_FOREACH_SAFE(giommu, &container->giommu_list, giommu_next, tmp) { +- memory_region_unregister_iommu_notifier( +- MEMORY_REGION(giommu->iommu_mr), &giommu->n); +- QLIST_REMOVE(giommu, giommu_next); +- g_free(giommu); +- } + vfio_container_destroy(bcontainer); + + trace_vfio_disconnect_container(container->fd); +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 24a26345e5..6be082b8f2 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -95,7 +95,6 @@ typedef struct VFIOContainer { + uint64_t max_dirty_bitmap_size; + unsigned long pgsizes; + unsigned int dma_max_mappings; +- QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; + QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; + QLIST_HEAD(, VFIOGroup) group_list; + QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; +@@ -104,14 +103,6 @@ typedef struct VFIOContainer { + GList *iova_ranges; + } VFIOContainer; + +-typedef struct VFIOGuestIOMMU { +- VFIOContainer *container; +- IOMMUMemoryRegion *iommu_mr; +- hwaddr iommu_offset; +- IOMMUNotifier n; +- QLIST_ENTRY(VFIOGuestIOMMU) giommu_next; +-} VFIOGuestIOMMU; +- + typedef struct VFIORamDiscardListener { + VFIOContainer *container; + MemoryRegion *mr; +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 577f52ccbc..a11aec5755 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -29,8 +29,17 @@ typedef struct { + */ + typedef struct VFIOContainerBase { + const VFIOIOMMUOps *ops; ++ QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; + } VFIOContainerBase; + ++typedef struct VFIOGuestIOMMU { ++ VFIOContainerBase *bcontainer; ++ IOMMUMemoryRegion *iommu_mr; ++ hwaddr iommu_offset; ++ IOMMUNotifier n; ++ QLIST_ENTRY(VFIOGuestIOMMU) giommu_next; ++} VFIOGuestIOMMU; ++ + int vfio_container_dma_map(VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + void *vaddr, bool readonly); +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-common-return-early-if-space-isn-t-empty.patch b/SOURCES/kvm-vfio-common-return-early-if-space-isn-t-empty.patch new file mode 100644 index 0000000..261807a --- /dev/null +++ b/SOURCES/kvm-vfio-common-return-early-if-space-isn-t-empty.patch @@ -0,0 +1,55 @@ +From e9476ee64edd81fafd409fb3ceaad80668446bff Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 21 Nov 2023 16:44:02 +0800 +Subject: [PATCH 023/101] vfio/common: return early if space isn't empty +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [22/67] 239c21ae7cddc8efabc041b9c7774f15b4964631 (eauger1/centos-qemu-kvm) + +This is a trivial optimization. If there is active container in space, +vfio_reset_handler will never be unregistered. So revert the check of +space->containers and return early. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Reviewed-by: Eric Auger +Tested-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit 1eae5b7bd3ddd03b5591e9122b011c6520064a5a) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 572ae7c934..934f4f5446 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1462,10 +1462,13 @@ VFIOAddressSpace *vfio_get_address_space(AddressSpace *as) + + void vfio_put_address_space(VFIOAddressSpace *space) + { +- if (QLIST_EMPTY(&space->containers)) { +- QLIST_REMOVE(space, list); +- g_free(space); ++ if (!QLIST_EMPTY(&space->containers)) { ++ return; + } ++ ++ QLIST_REMOVE(space, list); ++ g_free(space); ++ + if (QLIST_EMPTY(&vfio_address_spaces)) { + qemu_unregister_reset(vfio_reset_handler, NULL); + } +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-container-Convert-functions-to-base-container.patch b/SOURCES/kvm-vfio-container-Convert-functions-to-base-container.patch new file mode 100644 index 0000000..62caf8a --- /dev/null +++ b/SOURCES/kvm-vfio-container-Convert-functions-to-base-container.patch @@ -0,0 +1,257 @@ +From facad966c42b1ec38b12e45f2b84bd059542b60c Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 2 Nov 2023 15:12:35 +0800 +Subject: [PATCH 010/101] vfio/container: Convert functions to base container +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [9/67] a0002d6e9cb0ca76e3e2f25208ecba22dd9f9a88 (eauger1/centos-qemu-kvm) + +In the prospect to get rid of VFIOContainer refs +in common.c lets convert misc functions to use the base +container object instead: + +vfio_devices_all_dirty_tracking +vfio_devices_all_device_dirty_tracking +vfio_devices_all_running_and_mig_active +vfio_devices_query_dirty_bitmap +vfio_get_dirty_bitmap + +Signed-off-by: Eric Auger +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +(cherry picked from commit e1cac6b203f45b5322e831e8d50edfdf18609b09) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 42 +++++++++++++++-------------------- + hw/vfio/container.c | 6 ++--- + hw/vfio/trace-events | 2 +- + include/hw/vfio/vfio-common.h | 9 ++++---- + 4 files changed, 26 insertions(+), 33 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 9415395ed9..cf6618f6ed 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -177,9 +177,8 @@ bool vfio_device_state_is_precopy(VFIODevice *vbasedev) + migration->device_state == VFIO_DEVICE_STATE_PRE_COPY_P2P; + } + +-static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) ++static bool vfio_devices_all_dirty_tracking(VFIOContainerBase *bcontainer) + { +- VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + MigrationState *ms = migrate_get_current(); + +@@ -204,9 +203,8 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) + return true; + } + +-bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container) ++bool vfio_devices_all_device_dirty_tracking(VFIOContainerBase *bcontainer) + { +- VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + + QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { +@@ -222,9 +220,8 @@ bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container) + * Check if all VFIO devices are running and migration is active, which is + * essentially equivalent to the migration being in pre-copy phase. + */ +-bool vfio_devices_all_running_and_mig_active(VFIOContainer *container) ++bool vfio_devices_all_running_and_mig_active(VFIOContainerBase *bcontainer) + { +- VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + + if (!migration_is_active(migrate_get_current())) { +@@ -1082,7 +1079,7 @@ static void vfio_listener_log_global_start(MemoryListener *listener) + VFIOContainer *container = container_of(listener, VFIOContainer, listener); + int ret; + +- if (vfio_devices_all_device_dirty_tracking(container)) { ++ if (vfio_devices_all_device_dirty_tracking(&container->bcontainer)) { + ret = vfio_devices_dma_logging_start(container); + } else { + ret = vfio_container_set_dirty_page_tracking(&container->bcontainer, +@@ -1101,7 +1098,7 @@ static void vfio_listener_log_global_stop(MemoryListener *listener) + VFIOContainer *container = container_of(listener, VFIOContainer, listener); + int ret = 0; + +- if (vfio_devices_all_device_dirty_tracking(container)) { ++ if (vfio_devices_all_device_dirty_tracking(&container->bcontainer)) { + vfio_devices_dma_logging_stop(container); + } else { + ret = vfio_container_set_dirty_page_tracking(&container->bcontainer, +@@ -1141,11 +1138,10 @@ static int vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova, + return 0; + } + +-int vfio_devices_query_dirty_bitmap(VFIOContainer *container, ++int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer, + VFIOBitmap *vbmap, hwaddr iova, + hwaddr size) + { +- VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + int ret; + +@@ -1165,17 +1161,16 @@ int vfio_devices_query_dirty_bitmap(VFIOContainer *container, + return 0; + } + +-int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, ++int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova, + uint64_t size, ram_addr_t ram_addr) + { + bool all_device_dirty_tracking = +- vfio_devices_all_device_dirty_tracking(container); ++ vfio_devices_all_device_dirty_tracking(bcontainer); + uint64_t dirty_pages; + VFIOBitmap vbmap; + int ret; + +- if (!container->bcontainer.dirty_pages_supported && +- !all_device_dirty_tracking) { ++ if (!bcontainer->dirty_pages_supported && !all_device_dirty_tracking) { + cpu_physical_memory_set_dirty_range(ram_addr, size, + tcg_enabled() ? DIRTY_CLIENTS_ALL : + DIRTY_CLIENTS_NOCODE); +@@ -1188,10 +1183,9 @@ int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, + } + + if (all_device_dirty_tracking) { +- ret = vfio_devices_query_dirty_bitmap(container, &vbmap, iova, size); ++ ret = vfio_devices_query_dirty_bitmap(bcontainer, &vbmap, iova, size); + } else { +- ret = vfio_container_query_dirty_bitmap(&container->bcontainer, &vbmap, +- iova, size); ++ ret = vfio_container_query_dirty_bitmap(bcontainer, &vbmap, iova, size); + } + + if (ret) { +@@ -1201,8 +1195,7 @@ int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, + dirty_pages = cpu_physical_memory_set_dirty_lebitmap(vbmap.bitmap, ram_addr, + vbmap.pages); + +- trace_vfio_get_dirty_bitmap(container->fd, iova, size, vbmap.size, +- ram_addr, dirty_pages); ++ trace_vfio_get_dirty_bitmap(iova, size, vbmap.size, ram_addr, dirty_pages); + out: + g_free(vbmap.bitmap); + +@@ -1236,8 +1229,8 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) + + rcu_read_lock(); + if (vfio_get_xlat_addr(iotlb, NULL, &translated_addr, NULL)) { +- ret = vfio_get_dirty_bitmap(container, iova, iotlb->addr_mask + 1, +- translated_addr); ++ ret = vfio_get_dirty_bitmap(&container->bcontainer, iova, ++ iotlb->addr_mask + 1, translated_addr); + if (ret) { + error_report("vfio_iommu_map_dirty_notify(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx") = %d (%s)", +@@ -1266,7 +1259,8 @@ static int vfio_ram_discard_get_dirty_bitmap(MemoryRegionSection *section, + * Sync the whole mapped region (spanning multiple individual mappings) + * in one go. + */ +- return vfio_get_dirty_bitmap(vrdl->container, iova, size, ram_addr); ++ return vfio_get_dirty_bitmap(&vrdl->container->bcontainer, iova, size, ++ ram_addr); + } + + static int vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainer *container, +@@ -1335,7 +1329,7 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container, + ram_addr = memory_region_get_ram_addr(section->mr) + + section->offset_within_region; + +- return vfio_get_dirty_bitmap(container, ++ return vfio_get_dirty_bitmap(&container->bcontainer, + REAL_HOST_PAGE_ALIGN(section->offset_within_address_space), + int128_get64(section->size), ram_addr); + } +@@ -1350,7 +1344,7 @@ static void vfio_listener_log_sync(MemoryListener *listener, + return; + } + +- if (vfio_devices_all_dirty_tracking(container)) { ++ if (vfio_devices_all_dirty_tracking(&container->bcontainer)) { + ret = vfio_sync_dirty_bitmap(container, section); + if (ret) { + error_report("vfio: Failed to sync dirty bitmap, err: %d (%s)", ret, +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 63a906de93..7bd81eab09 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -129,8 +129,8 @@ static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, + bool need_dirty_sync = false; + int ret; + +- if (iotlb && vfio_devices_all_running_and_mig_active(container)) { +- if (!vfio_devices_all_device_dirty_tracking(container) && ++ if (iotlb && vfio_devices_all_running_and_mig_active(bcontainer)) { ++ if (!vfio_devices_all_device_dirty_tracking(bcontainer) && + container->bcontainer.dirty_pages_supported) { + return vfio_dma_unmap_bitmap(container, iova, size, iotlb); + } +@@ -162,7 +162,7 @@ static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, + } + + if (need_dirty_sync) { +- ret = vfio_get_dirty_bitmap(container, iova, size, ++ ret = vfio_get_dirty_bitmap(bcontainer, iova, size, + iotlb->translated_addr); + if (ret) { + return ret; +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 9f7fedee98..08a1f9dfa4 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -117,7 +117,7 @@ vfio_region_sparse_mmap_header(const char *name, int index, int nr_areas) "Devic + vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned long end) "sparse entry %d [0x%lx - 0x%lx]" + vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%08x" + vfio_legacy_dma_unmap_overflow_workaround(void) "" +-vfio_get_dirty_bitmap(int fd, uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start, uint64_t dirty_pages) "container fd=%d, iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64" dirty_pages=%"PRIu64 ++vfio_get_dirty_bitmap(uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start, uint64_t dirty_pages) "iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64" dirty_pages=%"PRIu64 + vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64 + + # platform.c +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 9740cf9fbc..bc67e1316c 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -186,7 +186,6 @@ typedef struct VFIODisplay { + + VFIOAddressSpace *vfio_get_address_space(AddressSpace *as); + void vfio_put_address_space(VFIOAddressSpace *space); +-bool vfio_devices_all_running_and_saving(VFIOContainer *container); + + /* SPAPR specific */ + int vfio_container_add_section_window(VFIOContainer *container, +@@ -260,11 +259,11 @@ bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp); + void vfio_migration_exit(VFIODevice *vbasedev); + + int vfio_bitmap_alloc(VFIOBitmap *vbmap, hwaddr size); +-bool vfio_devices_all_running_and_mig_active(VFIOContainer *container); +-bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container); +-int vfio_devices_query_dirty_bitmap(VFIOContainer *container, ++bool vfio_devices_all_running_and_mig_active(VFIOContainerBase *bcontainer); ++bool vfio_devices_all_device_dirty_tracking(VFIOContainerBase *bcontainer); ++int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer, + VFIOBitmap *vbmap, hwaddr iova, + hwaddr size); +-int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, ++int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova, + uint64_t size, ram_addr_t ram_addr); + #endif /* HW_VFIO_VFIO_COMMON_H */ +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-container-Implement-attach-detach_device.patch b/SOURCES/kvm-vfio-container-Implement-attach-detach_device.patch new file mode 100644 index 0000000..92e9a38 --- /dev/null +++ b/SOURCES/kvm-vfio-container-Implement-attach-detach_device.patch @@ -0,0 +1,97 @@ +From a5d19bfbfddb36fa6d68ca6282a5acd9b245d48a Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 2 Nov 2023 15:12:41 +0800 +Subject: [PATCH 016/101] vfio/container: Implement attach/detach_device +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [15/67] e233c90e4af2061dc0612bc1b1d17be1a47daeae (eauger1/centos-qemu-kvm) + +No functional change intended. + +Signed-off-by: Eric Auger +Signed-off-by: Yi Liu +Signed-off-by: Yi Sun +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +(cherry picked from commit 1eb31f13b24c49884d8256f96a6664df2dd0824d) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 16 ++++++++++++++++ + hw/vfio/container.c | 12 +++++------- + 2 files changed, 21 insertions(+), 7 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 8ef2e7967d..483ba82089 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1498,3 +1498,19 @@ retry: + + return info; + } ++ ++int vfio_attach_device(char *name, VFIODevice *vbasedev, ++ AddressSpace *as, Error **errp) ++{ ++ const VFIOIOMMUOps *ops = &vfio_legacy_ops; ++ ++ return ops->attach_device(name, vbasedev, as, errp); ++} ++ ++void vfio_detach_device(VFIODevice *vbasedev) ++{ ++ if (!vbasedev->bcontainer) { ++ return; ++ } ++ vbasedev->bcontainer->ops->detach_device(vbasedev); ++} +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 721c0d7375..6bacf38222 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -873,8 +873,8 @@ static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp) + * @name and @vbasedev->name are likely to be different depending + * on the type of the device, hence the need for passing @name + */ +-int vfio_attach_device(char *name, VFIODevice *vbasedev, +- AddressSpace *as, Error **errp) ++static int vfio_legacy_attach_device(const char *name, VFIODevice *vbasedev, ++ AddressSpace *as, Error **errp) + { + int groupid = vfio_device_groupid(vbasedev, errp); + VFIODevice *vbasedev_iter; +@@ -914,14 +914,10 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev, + return ret; + } + +-void vfio_detach_device(VFIODevice *vbasedev) ++static void vfio_legacy_detach_device(VFIODevice *vbasedev) + { + VFIOGroup *group = vbasedev->group; + +- if (!vbasedev->bcontainer) { +- return; +- } +- + QLIST_REMOVE(vbasedev, global_next); + QLIST_REMOVE(vbasedev, container_next); + vbasedev->bcontainer = NULL; +@@ -933,6 +929,8 @@ void vfio_detach_device(VFIODevice *vbasedev) + const VFIOIOMMUOps vfio_legacy_ops = { + .dma_map = vfio_legacy_dma_map, + .dma_unmap = vfio_legacy_dma_unmap, ++ .attach_device = vfio_legacy_attach_device, ++ .detach_device = vfio_legacy_detach_device, + .set_dirty_page_tracking = vfio_legacy_set_dirty_page_tracking, + .query_dirty_bitmap = vfio_legacy_query_dirty_bitmap, + }; +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-container-Initialize-VFIOIOMMUOps-under-vfio_in.patch b/SOURCES/kvm-vfio-container-Initialize-VFIOIOMMUOps-under-vfio_in.patch new file mode 100644 index 0000000..42b406b --- /dev/null +++ b/SOURCES/kvm-vfio-container-Initialize-VFIOIOMMUOps-under-vfio_in.patch @@ -0,0 +1,65 @@ +From c3c9f366c356032fa57ff7cc664732ba87ceb3fb Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 19 Dec 2023 07:58:18 +0100 +Subject: [PATCH 051/101] vfio/container: Initialize VFIOIOMMUOps under + vfio_init_container() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [50/67] f325136391b22babadb1be3394c527deecdcd3ca (eauger1/centos-qemu-kvm) + +vfio_init_container() already defines the IOMMU type of the container. +Do the same for the VFIOIOMMUOps struct. This prepares ground for the +following patches that will deduce the associated VFIOIOMMUOps struct +from the IOMMU type. + +Reviewed-by: Zhenzhong Duan +Tested-by: Eric Farman +Signed-off-by: Cédric Le Goater +(cherry picked from commit bffe92af0e7571868d47a1d1cd2205e13054d492) +Signed-off-by: Eric Auger +--- + hw/vfio/container.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index afcfe80488..f4a0434a52 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -370,7 +370,7 @@ static int vfio_get_iommu_type(VFIOContainer *container, + } + + static int vfio_init_container(VFIOContainer *container, int group_fd, +- Error **errp) ++ VFIOAddressSpace *space, Error **errp) + { + int iommu_type, ret; + +@@ -401,6 +401,7 @@ static int vfio_init_container(VFIOContainer *container, int group_fd, + } + + container->iommu_type = iommu_type; ++ vfio_container_init(&container->bcontainer, space, &vfio_legacy_ops); + return 0; + } + +@@ -583,9 +584,8 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + container = g_malloc0(sizeof(*container)); + container->fd = fd; + bcontainer = &container->bcontainer; +- vfio_container_init(bcontainer, space, &vfio_legacy_ops); + +- ret = vfio_init_container(container, group->fd, errp); ++ ret = vfio_init_container(container, group->fd, space, errp); + if (ret) { + goto free_container_exit; + } +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-container-Intoduce-a-new-VFIOIOMMUClass-setup-h.patch b/SOURCES/kvm-vfio-container-Intoduce-a-new-VFIOIOMMUClass-setup-h.patch new file mode 100644 index 0000000..3411ecb --- /dev/null +++ b/SOURCES/kvm-vfio-container-Intoduce-a-new-VFIOIOMMUClass-setup-h.patch @@ -0,0 +1,55 @@ +From 29f13011e62f5370ef7fb3248dc85c90ae5bb042 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 19 Dec 2023 07:58:21 +0100 +Subject: [PATCH 054/101] vfio/container: Intoduce a new VFIOIOMMUClass::setup + handler +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [53/67] 8641161afc33d68795bcf51a47e89061b34d50a8 (eauger1/centos-qemu-kvm) + +This will help in converting the sPAPR IOMMU backend to a QOM interface. + +Reviewed-by: Zhenzhong Duan +Tested-by: Eric Farman +Signed-off-by: Cédric Le Goater +(cherry picked from commit 61d893f2cdb34a2b0255f9b5fbba6b49b94ff730) +Signed-off-by: Eric Auger +--- + hw/vfio/container.c | 1 + + include/hw/vfio/vfio-container-base.h | 1 + + 2 files changed, 2 insertions(+) + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 220e838a91..c22bdd3216 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -1129,6 +1129,7 @@ static void vfio_iommu_legacy_class_init(ObjectClass *klass, void *data) + { + VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); + ++ vioc->setup = vfio_legacy_setup; + vioc->dma_map = vfio_legacy_dma_map; + vioc->dma_unmap = vfio_legacy_dma_unmap; + vioc->attach_device = vfio_legacy_attach_device; +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index c60370fc5e..ce8b1fba88 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -109,6 +109,7 @@ struct VFIOIOMMUClass { + InterfaceClass parent_class; + + /* basic feature */ ++ int (*setup)(VFIOContainerBase *bcontainer, Error **errp); + int (*dma_map)(const VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + void *vaddr, bool readonly); +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-container-Introduce-a-VFIOIOMMU-QOM-interface.patch b/SOURCES/kvm-vfio-container-Introduce-a-VFIOIOMMU-QOM-interface.patch new file mode 100644 index 0000000..7139e64 --- /dev/null +++ b/SOURCES/kvm-vfio-container-Introduce-a-VFIOIOMMU-QOM-interface.patch @@ -0,0 +1,143 @@ +From 5b63e4595e106196ef922b7f762c8f4150d73979 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 19 Dec 2023 07:58:19 +0100 +Subject: [PATCH 052/101] vfio/container: Introduce a VFIOIOMMU QOM interface +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [51/67] 7c06e2165efe94dcd203d44e422a7aa9fac9816c (eauger1/centos-qemu-kvm) + +VFIOContainerBase was not introduced as an abstract QOM object because +it felt unnecessary to expose all the IOMMU backends to the QEMU +machine and human interface. However, we can still abstract the IOMMU +backend handlers using a QOM interface class. This provides more +flexibility when referencing the various implementations. + +Simply transform the VFIOIOMMUOps struct in an InterfaceClass and do +some initial name replacements. Next changes will start converting +VFIOIOMMUOps. + +Reviewed-by: Zhenzhong Duan +Tested-by: Eric Farman +Signed-off-by: Cédric Le Goater +(cherry picked from commit fdaa774e67435a328c0e28006c4d749f2198294a) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 2 +- + hw/vfio/container-base.c | 12 +++++++++++- + hw/vfio/pci.c | 2 +- + include/hw/vfio/vfio-container-base.h | 23 +++++++++++++++++++---- + 4 files changed, 32 insertions(+), 7 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 08a3e57672..49dab41566 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1503,7 +1503,7 @@ retry: + int vfio_attach_device(char *name, VFIODevice *vbasedev, + AddressSpace *as, Error **errp) + { +- const VFIOIOMMUOps *ops = &vfio_legacy_ops; ++ const VFIOIOMMUClass *ops = &vfio_legacy_ops; + + #ifdef CONFIG_IOMMUFD + if (vbasedev->iommufd) { +diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c +index 1ffd25bbfa..913ae49077 100644 +--- a/hw/vfio/container-base.c ++++ b/hw/vfio/container-base.c +@@ -72,7 +72,7 @@ int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, + } + + void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space, +- const VFIOIOMMUOps *ops) ++ const VFIOIOMMUClass *ops) + { + bcontainer->ops = ops; + bcontainer->space = space; +@@ -99,3 +99,13 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer) + + g_list_free_full(bcontainer->iova_ranges, g_free); + } ++ ++static const TypeInfo types[] = { ++ { ++ .name = TYPE_VFIO_IOMMU, ++ .parent = TYPE_INTERFACE, ++ .class_size = sizeof(VFIOIOMMUClass), ++ }, ++}; ++ ++DEFINE_TYPES(types) +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 83c3238608..adb7c09367 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -2491,7 +2491,7 @@ int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev, + static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) + { + VFIODevice *vbasedev = &vdev->vbasedev; +- const VFIOIOMMUOps *ops = vbasedev->bcontainer->ops; ++ const VFIOIOMMUClass *ops = vbasedev->bcontainer->ops; + + return ops->pci_hot_reset(vbasedev, single); + } +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 5c9594b6c7..d6147b4aee 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -16,7 +16,8 @@ + #include "exec/memory.h" + + typedef struct VFIODevice VFIODevice; +-typedef struct VFIOIOMMUOps VFIOIOMMUOps; ++typedef struct VFIOIOMMUClass VFIOIOMMUClass; ++#define VFIOIOMMUOps VFIOIOMMUClass /* To remove */ + + typedef struct { + unsigned long *bitmap; +@@ -34,7 +35,7 @@ typedef struct VFIOAddressSpace { + * This is the base object for vfio container backends + */ + typedef struct VFIOContainerBase { +- const VFIOIOMMUOps *ops; ++ const VFIOIOMMUClass *ops; + VFIOAddressSpace *space; + MemoryListener listener; + Error *error; +@@ -88,10 +89,24 @@ int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, + + void vfio_container_init(VFIOContainerBase *bcontainer, + VFIOAddressSpace *space, +- const VFIOIOMMUOps *ops); ++ const VFIOIOMMUClass *ops); + void vfio_container_destroy(VFIOContainerBase *bcontainer); + +-struct VFIOIOMMUOps { ++ ++#define TYPE_VFIO_IOMMU "vfio-iommu" ++ ++/* ++ * VFIOContainerBase is not an abstract QOM object because it felt ++ * unnecessary to expose all the IOMMU backends to the QEMU machine ++ * and human interface. However, we can still abstract the IOMMU ++ * backend handlers using a QOM interface class. This provides more ++ * flexibility when referencing the various implementations. ++ */ ++DECLARE_CLASS_CHECKERS(VFIOIOMMUClass, VFIO_IOMMU, TYPE_VFIO_IOMMU) ++ ++struct VFIOIOMMUClass { ++ InterfaceClass parent_class; ++ + /* basic feature */ + int (*dma_map)(const VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-container-Introduce-a-VFIOIOMMU-legacy-QOM-inte.patch b/SOURCES/kvm-vfio-container-Introduce-a-VFIOIOMMU-legacy-QOM-inte.patch new file mode 100644 index 0000000..60439ff --- /dev/null +++ b/SOURCES/kvm-vfio-container-Introduce-a-VFIOIOMMU-legacy-QOM-inte.patch @@ -0,0 +1,168 @@ +From 58927bf236541b9423f855eca1970f7a3cf864a9 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 19 Dec 2023 07:58:20 +0100 +Subject: [PATCH 053/101] vfio/container: Introduce a VFIOIOMMU legacy QOM + interface +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [52/67] a81f39d13305e84699313e17ae64d10ff4b09067 (eauger1/centos-qemu-kvm) + +Convert the legacy VFIOIOMMUOps struct to the new VFIOIOMMU QOM +interface. The set of of operations for this backend can be referenced +with a literal typename instead of a C struct. This will simplify +support of multiple backends. + +Reviewed-by: Zhenzhong Duan +Tested-by: Eric Farman +Signed-off-by: Cédric Le Goater +(cherry picked from commit 9812feefab3a4ff95a6cfd73aecb120b406bc98c) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 6 ++- + hw/vfio/container.c | 58 ++++++++++++++++++++++----- + include/hw/vfio/vfio-common.h | 1 - + include/hw/vfio/vfio-container-base.h | 1 + + 4 files changed, 55 insertions(+), 11 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 49dab41566..2329d0efc8 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1503,13 +1503,17 @@ retry: + int vfio_attach_device(char *name, VFIODevice *vbasedev, + AddressSpace *as, Error **errp) + { +- const VFIOIOMMUClass *ops = &vfio_legacy_ops; ++ const VFIOIOMMUClass *ops = ++ VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY)); + + #ifdef CONFIG_IOMMUFD + if (vbasedev->iommufd) { + ops = &vfio_iommufd_ops; + } + #endif ++ ++ assert(ops); ++ + return ops->attach_device(name, vbasedev, as, errp); + } + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index f4a0434a52..220e838a91 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -369,10 +369,30 @@ static int vfio_get_iommu_type(VFIOContainer *container, + return -EINVAL; + } + ++/* ++ * vfio_get_iommu_ops - get a VFIOIOMMUClass associated with a type ++ */ ++static const VFIOIOMMUClass *vfio_get_iommu_class(int iommu_type, Error **errp) ++{ ++ ObjectClass *klass = NULL; ++ ++ switch (iommu_type) { ++ case VFIO_TYPE1v2_IOMMU: ++ case VFIO_TYPE1_IOMMU: ++ klass = object_class_by_name(TYPE_VFIO_IOMMU_LEGACY); ++ break; ++ default: ++ g_assert_not_reached(); ++ }; ++ ++ return VFIO_IOMMU_CLASS(klass); ++} ++ + static int vfio_init_container(VFIOContainer *container, int group_fd, + VFIOAddressSpace *space, Error **errp) + { + int iommu_type, ret; ++ const VFIOIOMMUClass *vioc; + + iommu_type = vfio_get_iommu_type(container, errp); + if (iommu_type < 0) { +@@ -401,7 +421,14 @@ static int vfio_init_container(VFIOContainer *container, int group_fd, + } + + container->iommu_type = iommu_type; +- vfio_container_init(&container->bcontainer, space, &vfio_legacy_ops); ++ ++ vioc = vfio_get_iommu_class(iommu_type, errp); ++ if (!vioc) { ++ error_setg(errp, "No available IOMMU models"); ++ return -EINVAL; ++ } ++ ++ vfio_container_init(&container->bcontainer, space, vioc); + return 0; + } + +@@ -1098,12 +1125,25 @@ out_single: + return ret; + } + +-const VFIOIOMMUOps vfio_legacy_ops = { +- .dma_map = vfio_legacy_dma_map, +- .dma_unmap = vfio_legacy_dma_unmap, +- .attach_device = vfio_legacy_attach_device, +- .detach_device = vfio_legacy_detach_device, +- .set_dirty_page_tracking = vfio_legacy_set_dirty_page_tracking, +- .query_dirty_bitmap = vfio_legacy_query_dirty_bitmap, +- .pci_hot_reset = vfio_legacy_pci_hot_reset, ++static void vfio_iommu_legacy_class_init(ObjectClass *klass, void *data) ++{ ++ VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); ++ ++ vioc->dma_map = vfio_legacy_dma_map; ++ vioc->dma_unmap = vfio_legacy_dma_unmap; ++ vioc->attach_device = vfio_legacy_attach_device; ++ vioc->detach_device = vfio_legacy_detach_device; ++ vioc->set_dirty_page_tracking = vfio_legacy_set_dirty_page_tracking; ++ vioc->query_dirty_bitmap = vfio_legacy_query_dirty_bitmap; ++ vioc->pci_hot_reset = vfio_legacy_pci_hot_reset; + }; ++ ++static const TypeInfo types[] = { ++ { ++ .name = TYPE_VFIO_IOMMU_LEGACY, ++ .parent = TYPE_VFIO_IOMMU, ++ .class_init = vfio_iommu_legacy_class_init, ++ }, ++}; ++ ++DEFINE_TYPES(types) +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index b8aa8a5495..14c497b6b0 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -210,7 +210,6 @@ typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList; + typedef QLIST_HEAD(VFIODeviceList, VFIODevice) VFIODeviceList; + extern VFIOGroupList vfio_group_list; + extern VFIODeviceList vfio_device_list; +-extern const VFIOIOMMUOps vfio_legacy_ops; + extern const VFIOIOMMUOps vfio_iommufd_ops; + extern const MemoryListener vfio_memory_listener; + extern int vfio_kvm_device_fd; +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index d6147b4aee..c60370fc5e 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -94,6 +94,7 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer); + + + #define TYPE_VFIO_IOMMU "vfio-iommu" ++#define TYPE_VFIO_IOMMU_LEGACY TYPE_VFIO_IOMMU "-legacy" + + /* + * VFIOContainerBase is not an abstract QOM object because it felt +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-container-Introduce-a-empty-VFIOIOMMUOps.patch b/SOURCES/kvm-vfio-container-Introduce-a-empty-VFIOIOMMUOps.patch new file mode 100644 index 0000000..2840e2c --- /dev/null +++ b/SOURCES/kvm-vfio-container-Introduce-a-empty-VFIOIOMMUOps.patch @@ -0,0 +1,71 @@ +From e56f961fbe95a53a52c5eca00b4fca17d825e860 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Thu, 2 Nov 2023 15:12:28 +0800 +Subject: [PATCH 003/101] vfio/container: Introduce a empty VFIOIOMMUOps +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [2/67] 0de0afffce42fa4a17f6d33a10b6162cdfbe8150 (eauger1/centos-qemu-kvm) + +This empty VFIOIOMMUOps named vfio_legacy_ops will hold all general +IOMMU ops of legacy container. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +(cherry picked from commit d24668579184f4098779983724ec74cd3db62e10) +Signed-off-by: Eric Auger +--- + hw/vfio/container.c | 5 +++++ + include/hw/vfio/vfio-common.h | 2 +- + 2 files changed, 6 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 242010036a..4bc43ddfa4 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -472,6 +472,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + Error **errp) + { + VFIOContainer *container; ++ VFIOContainerBase *bcontainer; + int ret, fd; + VFIOAddressSpace *space; + +@@ -552,6 +553,8 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + container->iova_ranges = NULL; + QLIST_INIT(&container->giommu_list); + QLIST_INIT(&container->vrdl_list); ++ bcontainer = &container->bcontainer; ++ bcontainer->ops = &vfio_legacy_ops; + + ret = vfio_init_container(container, group->fd, errp); + if (ret) { +@@ -933,3 +936,5 @@ void vfio_detach_device(VFIODevice *vbasedev) + vfio_put_base_device(vbasedev); + vfio_put_group(group); + } ++ ++const VFIOIOMMUOps vfio_legacy_ops; +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 586d153c12..678161f207 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -255,7 +255,7 @@ typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList; + typedef QLIST_HEAD(VFIODeviceList, VFIODevice) VFIODeviceList; + extern VFIOGroupList vfio_group_list; + extern VFIODeviceList vfio_device_list; +- ++extern const VFIOIOMMUOps vfio_legacy_ops; + extern const MemoryListener vfio_memory_listener; + extern int vfio_kvm_device_fd; + +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-container-Introduce-vfio_legacy_setup-for-furth.patch b/SOURCES/kvm-vfio-container-Introduce-vfio_legacy_setup-for-furth.patch new file mode 100644 index 0000000..ae9ccd8 --- /dev/null +++ b/SOURCES/kvm-vfio-container-Introduce-vfio_legacy_setup-for-furth.patch @@ -0,0 +1,118 @@ +From 6c7546756e979e4f5ba29ae51a21c63fa90492cf Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 19 Dec 2023 07:58:17 +0100 +Subject: [PATCH 050/101] vfio/container: Introduce vfio_legacy_setup() for + further cleanups +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [49/67] 3a621ba2605c98b7fbf7fd9f93a207f728f1202e (eauger1/centos-qemu-kvm) + +This will help subsequent patches to unify the initialization of type1 +and sPAPR IOMMU backends. + +Reviewed-by: Zhenzhong Duan +Tested-by: Eric Farman +Signed-off-by: Cédric Le Goater +(cherry picked from commit d3764db87531cd53849ccee9b2f72aede90ccf5b) +Signed-off-by: Eric Auger +--- + hw/vfio/container.c | 63 +++++++++++++++++++++++++-------------------- + 1 file changed, 35 insertions(+), 28 deletions(-) + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 1e77a2929e..afcfe80488 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -474,6 +474,35 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container, + } + } + ++static int vfio_legacy_setup(VFIOContainerBase *bcontainer, Error **errp) ++{ ++ VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); ++ g_autofree struct vfio_iommu_type1_info *info = NULL; ++ int ret; ++ ++ ret = vfio_get_iommu_info(container, &info); ++ if (ret) { ++ error_setg_errno(errp, -ret, "Failed to get VFIO IOMMU info"); ++ return ret; ++ } ++ ++ if (info->flags & VFIO_IOMMU_INFO_PGSIZES) { ++ bcontainer->pgsizes = info->iova_pgsizes; ++ } else { ++ bcontainer->pgsizes = qemu_real_host_page_size(); ++ } ++ ++ if (!vfio_get_info_dma_avail(info, &bcontainer->dma_max_mappings)) { ++ bcontainer->dma_max_mappings = 65535; ++ } ++ ++ vfio_get_info_iova_range(info, bcontainer); ++ ++ vfio_get_iommu_info_migration(container, info); ++ return 0; ++} ++ + static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + Error **errp) + { +@@ -570,40 +599,18 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + switch (container->iommu_type) { + case VFIO_TYPE1v2_IOMMU: + case VFIO_TYPE1_IOMMU: +- { +- struct vfio_iommu_type1_info *info; +- +- ret = vfio_get_iommu_info(container, &info); +- if (ret) { +- error_setg_errno(errp, -ret, "Failed to get VFIO IOMMU info"); +- goto enable_discards_exit; +- } +- +- if (info->flags & VFIO_IOMMU_INFO_PGSIZES) { +- bcontainer->pgsizes = info->iova_pgsizes; +- } else { +- bcontainer->pgsizes = qemu_real_host_page_size(); +- } +- +- if (!vfio_get_info_dma_avail(info, &bcontainer->dma_max_mappings)) { +- bcontainer->dma_max_mappings = 65535; +- } +- +- vfio_get_info_iova_range(info, bcontainer); +- +- vfio_get_iommu_info_migration(container, info); +- g_free(info); ++ ret = vfio_legacy_setup(bcontainer, errp); + break; +- } + case VFIO_SPAPR_TCE_v2_IOMMU: + case VFIO_SPAPR_TCE_IOMMU: +- { + ret = vfio_spapr_container_init(container, errp); +- if (ret) { +- goto enable_discards_exit; +- } + break; ++ default: ++ g_assert_not_reached(); + } ++ ++ if (ret) { ++ goto enable_discards_exit; + } + + vfio_kvm_device_add_group(group); +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-container-Move-dirty_pgsizes-and-max_dirty_bitm.patch b/SOURCES/kvm-vfio-container-Move-dirty_pgsizes-and-max_dirty_bitm.patch new file mode 100644 index 0000000..3d46a06 --- /dev/null +++ b/SOURCES/kvm-vfio-container-Move-dirty_pgsizes-and-max_dirty_bitm.patch @@ -0,0 +1,102 @@ +From 6a597d7c82a4538fa1f928db7e600ec2e5a44361 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 2 Nov 2023 15:12:39 +0800 +Subject: [PATCH 014/101] vfio/container: Move dirty_pgsizes and + max_dirty_bitmap_size to base container +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [13/67] b9fe57174368e36788b017cc2ad13b748592cfc2 (eauger1/centos-qemu-kvm) + +No functional change intended. + +Signed-off-by: Eric Auger +Signed-off-by: Yi Liu +Signed-off-by: Yi Sun +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +(cherry picked from commit 4d6b95010c59127ac4f7230d6ee88b5d0e99738c) +Signed-off-by: Eric Auger +--- + hw/vfio/container.c | 9 +++++---- + include/hw/vfio/vfio-common.h | 2 -- + include/hw/vfio/vfio-container-base.h | 2 ++ + 3 files changed, 7 insertions(+), 6 deletions(-) + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 5c1dee8c9f..c8088a8174 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -64,6 +64,7 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container, + hwaddr iova, ram_addr_t size, + IOMMUTLBEntry *iotlb) + { ++ VFIOContainerBase *bcontainer = &container->bcontainer; + struct vfio_iommu_type1_dma_unmap *unmap; + struct vfio_bitmap *bitmap; + VFIOBitmap vbmap; +@@ -91,7 +92,7 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container, + bitmap->size = vbmap.size; + bitmap->data = (__u64 *)vbmap.bitmap; + +- if (vbmap.size > container->max_dirty_bitmap_size) { ++ if (vbmap.size > bcontainer->max_dirty_bitmap_size) { + error_report("UNMAP: Size of bitmap too big 0x%"PRIx64, vbmap.size); + ret = -E2BIG; + goto unmap_exit; +@@ -131,7 +132,7 @@ static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, + + if (iotlb && vfio_devices_all_running_and_mig_active(bcontainer)) { + if (!vfio_devices_all_device_dirty_tracking(bcontainer) && +- container->bcontainer.dirty_pages_supported) { ++ bcontainer->dirty_pages_supported) { + return vfio_dma_unmap_bitmap(container, iova, size, iotlb); + } + +@@ -469,8 +470,8 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container, + */ + if (cap_mig->pgsize_bitmap & qemu_real_host_page_size()) { + bcontainer->dirty_pages_supported = true; +- container->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size; +- container->dirty_pgsizes = cap_mig->pgsize_bitmap; ++ bcontainer->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size; ++ bcontainer->dirty_pgsizes = cap_mig->pgsize_bitmap; + } + } + +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 922022cbc6..b1c9fe711b 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -80,8 +80,6 @@ typedef struct VFIOContainer { + int fd; /* /dev/vfio/vfio, empowered by the attached groups */ + MemoryListener prereg_listener; + unsigned iommu_type; +- uint64_t dirty_pgsizes; +- uint64_t max_dirty_bitmap_size; + QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; + QLIST_HEAD(, VFIOGroup) group_list; + GList *iova_ranges; +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 95f8d319e0..80e4a993c5 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -39,6 +39,8 @@ typedef struct VFIOContainerBase { + MemoryListener listener; + Error *error; + bool initialized; ++ uint64_t dirty_pgsizes; ++ uint64_t max_dirty_bitmap_size; + unsigned long pgsizes; + unsigned int dma_max_mappings; + bool dirty_pages_supported; +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-container-Move-iova_ranges-to-base-container.patch b/SOURCES/kvm-vfio-container-Move-iova_ranges-to-base-container.patch new file mode 100644 index 0000000..c9c79b6 --- /dev/null +++ b/SOURCES/kvm-vfio-container-Move-iova_ranges-to-base-container.patch @@ -0,0 +1,168 @@ +From 882143ef30da4182f049eb8192e0fac317c372b3 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Thu, 2 Nov 2023 15:12:40 +0800 +Subject: [PATCH 015/101] vfio/container: Move iova_ranges to base container +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [14/67] 49f2e3c484b4c0c63be9aa4eb1bf08804dcb1ec3 (eauger1/centos-qemu-kvm) + +Meanwhile remove the helper function vfio_free_container as it +only calls g_free now. + +No functional change intended. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +(cherry picked from commit f79baf8c9575ac3193ca86ec508791c86d96b13e) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 5 +++-- + hw/vfio/container-base.c | 3 +++ + hw/vfio/container.c | 19 ++++++------------- + include/hw/vfio/vfio-common.h | 1 - + include/hw/vfio/vfio-container-base.h | 1 + + 5 files changed, 13 insertions(+), 16 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index be623e544b..8ef2e7967d 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -637,9 +637,10 @@ static void vfio_listener_region_add(MemoryListener *listener, + goto fail; + } + +- if (container->iova_ranges) { ++ if (bcontainer->iova_ranges) { + ret = memory_region_iommu_set_iova_ranges(giommu->iommu_mr, +- container->iova_ranges, &err); ++ bcontainer->iova_ranges, ++ &err); + if (ret) { + g_free(giommu); + goto fail; +diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c +index 7f508669f5..0177f43741 100644 +--- a/hw/vfio/container-base.c ++++ b/hw/vfio/container-base.c +@@ -54,6 +54,7 @@ void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space, + bcontainer->error = NULL; + bcontainer->dirty_pages_supported = false; + bcontainer->dma_max_mappings = 0; ++ bcontainer->iova_ranges = NULL; + QLIST_INIT(&bcontainer->giommu_list); + QLIST_INIT(&bcontainer->vrdl_list); + } +@@ -70,4 +71,6 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer) + QLIST_REMOVE(giommu, giommu_next); + g_free(giommu); + } ++ ++ g_list_free_full(bcontainer->iova_ranges, g_free); + } +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index c8088a8174..721c0d7375 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -308,7 +308,7 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info, + } + + static bool vfio_get_info_iova_range(struct vfio_iommu_type1_info *info, +- VFIOContainer *container) ++ VFIOContainerBase *bcontainer) + { + struct vfio_info_cap_header *hdr; + struct vfio_iommu_type1_info_cap_iova_range *cap; +@@ -326,8 +326,8 @@ static bool vfio_get_info_iova_range(struct vfio_iommu_type1_info *info, + + range_set_bounds(range, cap->iova_ranges[i].start, + cap->iova_ranges[i].end); +- container->iova_ranges = +- range_list_insert(container->iova_ranges, range); ++ bcontainer->iova_ranges = ++ range_list_insert(bcontainer->iova_ranges, range); + } + + return true; +@@ -475,12 +475,6 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container, + } + } + +-static void vfio_free_container(VFIOContainer *container) +-{ +- g_list_free_full(container->iova_ranges, g_free); +- g_free(container); +-} +- + static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + Error **errp) + { +@@ -560,7 +554,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + + container = g_malloc0(sizeof(*container)); + container->fd = fd; +- container->iova_ranges = NULL; + bcontainer = &container->bcontainer; + vfio_container_init(bcontainer, space, &vfio_legacy_ops); + +@@ -597,7 +590,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + bcontainer->dma_max_mappings = 65535; + } + +- vfio_get_info_iova_range(info, container); ++ vfio_get_info_iova_range(info, bcontainer); + + vfio_get_iommu_info_migration(container, info); + g_free(info); +@@ -649,7 +642,7 @@ enable_discards_exit: + vfio_ram_block_discard_disable(container, false); + + free_container_exit: +- vfio_free_container(container); ++ g_free(container); + + close_fd_exit: + close(fd); +@@ -693,7 +686,7 @@ static void vfio_disconnect_container(VFIOGroup *group) + + trace_vfio_disconnect_container(container->fd); + close(container->fd); +- vfio_free_container(container); ++ g_free(container); + + vfio_put_address_space(space); + } +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index b1c9fe711b..b9e5a0e64b 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -82,7 +82,6 @@ typedef struct VFIOContainer { + unsigned iommu_type; + QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; + QLIST_HEAD(, VFIOGroup) group_list; +- GList *iova_ranges; + } VFIOContainer; + + typedef struct VFIOHostDMAWindow { +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 80e4a993c5..9658ffb526 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -48,6 +48,7 @@ typedef struct VFIOContainerBase { + QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; + QLIST_ENTRY(VFIOContainerBase) next; + QLIST_HEAD(, VFIODevice) device_list; ++ GList *iova_ranges; + } VFIOContainerBase; + + typedef struct VFIOGuestIOMMU { +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-container-Move-listener-to-base-container.patch b/SOURCES/kvm-vfio-container-Move-listener-to-base-container.patch new file mode 100644 index 0000000..3198bfd --- /dev/null +++ b/SOURCES/kvm-vfio-container-Move-listener-to-base-container.patch @@ -0,0 +1,522 @@ +From 36bc7782bb02f81368e3e43a3947d16ad362e137 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 2 Nov 2023 15:12:38 +0800 +Subject: [PATCH 013/101] vfio/container: Move listener to base container +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [12/67] f469ab126c6366170aa2520f9b4d9969d3ae0a04 (eauger1/centos-qemu-kvm) + +Move listener to base container. Also error and initialized fields +are moved at the same time. + +No functional change intended. + +Signed-off-by: Eric Auger +Signed-off-by: Yi Liu +Signed-off-by: Yi Sun +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +(cherry picked from commit c7b313d300f161c650d011a5c9da469bcd5d34e4) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 110 +++++++++++++------------- + hw/vfio/container-base.c | 1 + + hw/vfio/container.c | 19 +++-- + hw/vfio/spapr.c | 11 +-- + include/hw/vfio/vfio-common.h | 3 - + include/hw/vfio/vfio-container-base.h | 3 + + 6 files changed, 74 insertions(+), 73 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index f15665789f..be623e544b 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -541,7 +541,7 @@ static bool vfio_listener_valid_section(MemoryRegionSection *section, + return true; + } + +-static bool vfio_get_section_iova_range(VFIOContainer *container, ++static bool vfio_get_section_iova_range(VFIOContainerBase *bcontainer, + MemoryRegionSection *section, + hwaddr *out_iova, hwaddr *out_end, + Int128 *out_llend) +@@ -569,8 +569,10 @@ static bool vfio_get_section_iova_range(VFIOContainer *container, + static void vfio_listener_region_add(MemoryListener *listener, + MemoryRegionSection *section) + { +- VFIOContainer *container = container_of(listener, VFIOContainer, listener); +- VFIOContainerBase *bcontainer = &container->bcontainer; ++ VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, ++ listener); ++ VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); + hwaddr iova, end; + Int128 llend, llsize; + void *vaddr; +@@ -581,7 +583,8 @@ static void vfio_listener_region_add(MemoryListener *listener, + return; + } + +- if (!vfio_get_section_iova_range(container, section, &iova, &end, &llend)) { ++ if (!vfio_get_section_iova_range(bcontainer, section, &iova, &end, ++ &llend)) { + if (memory_region_is_ram_device(section->mr)) { + trace_vfio_listener_region_add_no_dma_map( + memory_region_name(section->mr), +@@ -688,13 +691,12 @@ static void vfio_listener_region_add(MemoryListener *listener, + } + } + +- ret = vfio_container_dma_map(&container->bcontainer, +- iova, int128_get64(llsize), vaddr, +- section->readonly); ++ ret = vfio_container_dma_map(bcontainer, iova, int128_get64(llsize), ++ vaddr, section->readonly); + if (ret) { + error_setg(&err, "vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx", %p) = %d (%s)", +- container, iova, int128_get64(llsize), vaddr, ret, ++ bcontainer, iova, int128_get64(llsize), vaddr, ret, + strerror(-ret)); + if (memory_region_is_ram_device(section->mr)) { + /* Allow unexpected mappings not to be fatal for RAM devices */ +@@ -716,9 +718,9 @@ fail: + * can gracefully fail. Runtime, there's not much we can do other + * than throw a hardware error. + */ +- if (!container->initialized) { +- if (!container->error) { +- error_propagate_prepend(&container->error, err, ++ if (!bcontainer->initialized) { ++ if (!bcontainer->error) { ++ error_propagate_prepend(&bcontainer->error, err, + "Region %s: ", + memory_region_name(section->mr)); + } else { +@@ -733,8 +735,10 @@ fail: + static void vfio_listener_region_del(MemoryListener *listener, + MemoryRegionSection *section) + { +- VFIOContainer *container = container_of(listener, VFIOContainer, listener); +- VFIOContainerBase *bcontainer = &container->bcontainer; ++ VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, ++ listener); ++ VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); + hwaddr iova, end; + Int128 llend, llsize; + int ret; +@@ -767,7 +771,8 @@ static void vfio_listener_region_del(MemoryListener *listener, + */ + } + +- if (!vfio_get_section_iova_range(container, section, &iova, &end, &llend)) { ++ if (!vfio_get_section_iova_range(bcontainer, section, &iova, &end, ++ &llend)) { + return; + } + +@@ -790,22 +795,22 @@ static void vfio_listener_region_del(MemoryListener *listener, + if (int128_eq(llsize, int128_2_64())) { + /* The unmap ioctl doesn't accept a full 64-bit span. */ + llsize = int128_rshift(llsize, 1); +- ret = vfio_container_dma_unmap(&container->bcontainer, iova, ++ ret = vfio_container_dma_unmap(bcontainer, iova, + int128_get64(llsize), NULL); + if (ret) { + error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx") = %d (%s)", +- container, iova, int128_get64(llsize), ret, ++ bcontainer, iova, int128_get64(llsize), ret, + strerror(-ret)); + } + iova += int128_get64(llsize); + } +- ret = vfio_container_dma_unmap(&container->bcontainer, iova, ++ ret = vfio_container_dma_unmap(bcontainer, iova, + int128_get64(llsize), NULL); + if (ret) { + error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx") = %d (%s)", +- container, iova, int128_get64(llsize), ret, ++ bcontainer, iova, int128_get64(llsize), ret, + strerror(-ret)); + } + } +@@ -825,16 +830,15 @@ typedef struct VFIODirtyRanges { + } VFIODirtyRanges; + + typedef struct VFIODirtyRangesListener { +- VFIOContainer *container; ++ VFIOContainerBase *bcontainer; + VFIODirtyRanges ranges; + MemoryListener listener; + } VFIODirtyRangesListener; + + static bool vfio_section_is_vfio_pci(MemoryRegionSection *section, +- VFIOContainer *container) ++ VFIOContainerBase *bcontainer) + { + VFIOPCIDevice *pcidev; +- VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + Object *owner; + +@@ -863,7 +867,7 @@ static void vfio_dirty_tracking_update(MemoryListener *listener, + hwaddr iova, end, *min, *max; + + if (!vfio_listener_valid_section(section, "tracking_update") || +- !vfio_get_section_iova_range(dirty->container, section, ++ !vfio_get_section_iova_range(dirty->bcontainer, section, + &iova, &end, NULL)) { + return; + } +@@ -887,7 +891,7 @@ static void vfio_dirty_tracking_update(MemoryListener *listener, + * The alternative would be an IOVATree but that has a much bigger runtime + * overhead and unnecessary complexity. + */ +- if (vfio_section_is_vfio_pci(section, dirty->container) && ++ if (vfio_section_is_vfio_pci(section, dirty->bcontainer) && + iova >= UINT32_MAX) { + min = &range->minpci64; + max = &range->maxpci64; +@@ -911,7 +915,7 @@ static const MemoryListener vfio_dirty_tracking_listener = { + .region_add = vfio_dirty_tracking_update, + }; + +-static void vfio_dirty_tracking_init(VFIOContainer *container, ++static void vfio_dirty_tracking_init(VFIOContainerBase *bcontainer, + VFIODirtyRanges *ranges) + { + VFIODirtyRangesListener dirty; +@@ -921,10 +925,10 @@ static void vfio_dirty_tracking_init(VFIOContainer *container, + dirty.ranges.min64 = UINT64_MAX; + dirty.ranges.minpci64 = UINT64_MAX; + dirty.listener = vfio_dirty_tracking_listener; +- dirty.container = container; ++ dirty.bcontainer = bcontainer; + + memory_listener_register(&dirty.listener, +- container->bcontainer.space->as); ++ bcontainer->space->as); + + *ranges = dirty.ranges; + +@@ -936,12 +940,11 @@ static void vfio_dirty_tracking_init(VFIOContainer *container, + memory_listener_unregister(&dirty.listener); + } + +-static void vfio_devices_dma_logging_stop(VFIOContainer *container) ++static void vfio_devices_dma_logging_stop(VFIOContainerBase *bcontainer) + { + uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature), + sizeof(uint64_t))] = {}; + struct vfio_device_feature *feature = (struct vfio_device_feature *)buf; +- VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + + feature->argsz = sizeof(buf); +@@ -962,7 +965,7 @@ static void vfio_devices_dma_logging_stop(VFIOContainer *container) + } + + static struct vfio_device_feature * +-vfio_device_feature_dma_logging_start_create(VFIOContainer *container, ++vfio_device_feature_dma_logging_start_create(VFIOContainerBase *bcontainer, + VFIODirtyRanges *tracking) + { + struct vfio_device_feature *feature; +@@ -1035,16 +1038,15 @@ static void vfio_device_feature_dma_logging_start_destroy( + g_free(feature); + } + +-static int vfio_devices_dma_logging_start(VFIOContainer *container) ++static int vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer) + { + struct vfio_device_feature *feature; + VFIODirtyRanges ranges; +- VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + int ret = 0; + +- vfio_dirty_tracking_init(container, &ranges); +- feature = vfio_device_feature_dma_logging_start_create(container, ++ vfio_dirty_tracking_init(bcontainer, &ranges); ++ feature = vfio_device_feature_dma_logging_start_create(bcontainer, + &ranges); + if (!feature) { + return -errno; +@@ -1067,7 +1069,7 @@ static int vfio_devices_dma_logging_start(VFIOContainer *container) + + out: + if (ret) { +- vfio_devices_dma_logging_stop(container); ++ vfio_devices_dma_logging_stop(bcontainer); + } + + vfio_device_feature_dma_logging_start_destroy(feature); +@@ -1077,14 +1079,14 @@ out: + + static void vfio_listener_log_global_start(MemoryListener *listener) + { +- VFIOContainer *container = container_of(listener, VFIOContainer, listener); ++ VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, ++ listener); + int ret; + +- if (vfio_devices_all_device_dirty_tracking(&container->bcontainer)) { +- ret = vfio_devices_dma_logging_start(container); ++ if (vfio_devices_all_device_dirty_tracking(bcontainer)) { ++ ret = vfio_devices_dma_logging_start(bcontainer); + } else { +- ret = vfio_container_set_dirty_page_tracking(&container->bcontainer, +- true); ++ ret = vfio_container_set_dirty_page_tracking(bcontainer, true); + } + + if (ret) { +@@ -1096,14 +1098,14 @@ static void vfio_listener_log_global_start(MemoryListener *listener) + + static void vfio_listener_log_global_stop(MemoryListener *listener) + { +- VFIOContainer *container = container_of(listener, VFIOContainer, listener); ++ VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, ++ listener); + int ret = 0; + +- if (vfio_devices_all_device_dirty_tracking(&container->bcontainer)) { +- vfio_devices_dma_logging_stop(container); ++ if (vfio_devices_all_device_dirty_tracking(bcontainer)) { ++ vfio_devices_dma_logging_stop(bcontainer); + } else { +- ret = vfio_container_set_dirty_page_tracking(&container->bcontainer, +- false); ++ ret = vfio_container_set_dirty_page_tracking(bcontainer, false); + } + + if (ret) { +@@ -1214,8 +1216,6 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) + vfio_giommu_dirty_notifier, n); + VFIOGuestIOMMU *giommu = gdn->giommu; + VFIOContainerBase *bcontainer = giommu->bcontainer; +- VFIOContainer *container = container_of(bcontainer, VFIOContainer, +- bcontainer); + hwaddr iova = iotlb->iova + giommu->iommu_offset; + ram_addr_t translated_addr; + int ret = -EINVAL; +@@ -1230,12 +1230,12 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) + + rcu_read_lock(); + if (vfio_get_xlat_addr(iotlb, NULL, &translated_addr, NULL)) { +- ret = vfio_get_dirty_bitmap(&container->bcontainer, iova, +- iotlb->addr_mask + 1, translated_addr); ++ ret = vfio_get_dirty_bitmap(bcontainer, iova, iotlb->addr_mask + 1, ++ translated_addr); + if (ret) { + error_report("vfio_iommu_map_dirty_notify(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx") = %d (%s)", +- container, iova, iotlb->addr_mask + 1, ret, ++ bcontainer, iova, iotlb->addr_mask + 1, ret, + strerror(-ret)); + } + } +@@ -1291,10 +1291,9 @@ vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainerBase *bcontainer, + &vrdl); + } + +-static int vfio_sync_dirty_bitmap(VFIOContainer *container, ++static int vfio_sync_dirty_bitmap(VFIOContainerBase *bcontainer, + MemoryRegionSection *section) + { +- VFIOContainerBase *bcontainer = &container->bcontainer; + ram_addr_t ram_addr; + + if (memory_region_is_iommu(section->mr)) { +@@ -1330,7 +1329,7 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container, + ram_addr = memory_region_get_ram_addr(section->mr) + + section->offset_within_region; + +- return vfio_get_dirty_bitmap(&container->bcontainer, ++ return vfio_get_dirty_bitmap(bcontainer, + REAL_HOST_PAGE_ALIGN(section->offset_within_address_space), + int128_get64(section->size), ram_addr); + } +@@ -1338,15 +1337,16 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container, + static void vfio_listener_log_sync(MemoryListener *listener, + MemoryRegionSection *section) + { +- VFIOContainer *container = container_of(listener, VFIOContainer, listener); ++ VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, ++ listener); + int ret; + + if (vfio_listener_skipped_section(section)) { + return; + } + +- if (vfio_devices_all_dirty_tracking(&container->bcontainer)) { +- ret = vfio_sync_dirty_bitmap(container, section); ++ if (vfio_devices_all_dirty_tracking(bcontainer)) { ++ ret = vfio_sync_dirty_bitmap(bcontainer, section); + if (ret) { + error_report("vfio: Failed to sync dirty bitmap, err: %d (%s)", ret, + strerror(-ret)); +diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c +index 584eee4ba1..7f508669f5 100644 +--- a/hw/vfio/container-base.c ++++ b/hw/vfio/container-base.c +@@ -51,6 +51,7 @@ void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space, + { + bcontainer->ops = ops; + bcontainer->space = space; ++ bcontainer->error = NULL; + bcontainer->dirty_pages_supported = false; + bcontainer->dma_max_mappings = 0; + QLIST_INIT(&bcontainer->giommu_list); +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 6ba2e2f8c4..5c1dee8c9f 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -453,6 +453,7 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container, + { + struct vfio_info_cap_header *hdr; + struct vfio_iommu_type1_info_cap_migration *cap_mig; ++ VFIOContainerBase *bcontainer = &container->bcontainer; + + hdr = vfio_get_iommu_info_cap(info, VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION); + if (!hdr) { +@@ -467,7 +468,7 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container, + * qemu_real_host_page_size to mark those dirty. + */ + if (cap_mig->pgsize_bitmap & qemu_real_host_page_size()) { +- container->bcontainer.dirty_pages_supported = true; ++ bcontainer->dirty_pages_supported = true; + container->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size; + container->dirty_pgsizes = cap_mig->pgsize_bitmap; + } +@@ -558,7 +559,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + + container = g_malloc0(sizeof(*container)); + container->fd = fd; +- container->error = NULL; + container->iova_ranges = NULL; + bcontainer = &container->bcontainer; + vfio_container_init(bcontainer, space, &vfio_legacy_ops); +@@ -621,25 +621,24 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + group->container = container; + QLIST_INSERT_HEAD(&container->group_list, group, container_next); + +- container->listener = vfio_memory_listener; +- +- memory_listener_register(&container->listener, bcontainer->space->as); ++ bcontainer->listener = vfio_memory_listener; ++ memory_listener_register(&bcontainer->listener, bcontainer->space->as); + +- if (container->error) { ++ if (bcontainer->error) { + ret = -1; +- error_propagate_prepend(errp, container->error, ++ error_propagate_prepend(errp, bcontainer->error, + "memory listener initialization failed: "); + goto listener_release_exit; + } + +- container->initialized = true; ++ bcontainer->initialized = true; + + return 0; + listener_release_exit: + QLIST_REMOVE(group, container_next); + QLIST_REMOVE(bcontainer, next); + vfio_kvm_device_del_group(group); +- memory_listener_unregister(&container->listener); ++ memory_listener_unregister(&bcontainer->listener); + if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU || + container->iommu_type == VFIO_SPAPR_TCE_IOMMU) { + vfio_spapr_container_deinit(container); +@@ -674,7 +673,7 @@ static void vfio_disconnect_container(VFIOGroup *group) + * group. + */ + if (QLIST_EMPTY(&container->group_list)) { +- memory_listener_unregister(&container->listener); ++ memory_listener_unregister(&bcontainer->listener); + if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU || + container->iommu_type == VFIO_SPAPR_TCE_IOMMU) { + vfio_spapr_container_deinit(container); +diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c +index 4f76bdd3ca..7a50975f25 100644 +--- a/hw/vfio/spapr.c ++++ b/hw/vfio/spapr.c +@@ -46,6 +46,7 @@ static void vfio_prereg_listener_region_add(MemoryListener *listener, + { + VFIOContainer *container = container_of(listener, VFIOContainer, + prereg_listener); ++ VFIOContainerBase *bcontainer = &container->bcontainer; + const hwaddr gpa = section->offset_within_address_space; + hwaddr end; + int ret; +@@ -88,9 +89,9 @@ static void vfio_prereg_listener_region_add(MemoryListener *listener, + * can gracefully fail. Runtime, there's not much we can do other + * than throw a hardware error. + */ +- if (!container->initialized) { +- if (!container->error) { +- error_setg_errno(&container->error, -ret, ++ if (!bcontainer->initialized) { ++ if (!bcontainer->error) { ++ error_setg_errno(&bcontainer->error, -ret, + "Memory registering failed"); + } + } else { +@@ -445,9 +446,9 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) + + memory_listener_register(&container->prereg_listener, + &address_space_memory); +- if (container->error) { ++ if (bcontainer->error) { + ret = -1; +- error_propagate_prepend(errp, container->error, ++ error_propagate_prepend(errp, bcontainer->error, + "RAM memory listener initialization failed: "); + goto listener_unregister_exit; + } +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 8a607a4c17..922022cbc6 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -78,11 +78,8 @@ struct VFIOGroup; + typedef struct VFIOContainer { + VFIOContainerBase bcontainer; + int fd; /* /dev/vfio/vfio, empowered by the attached groups */ +- MemoryListener listener; + MemoryListener prereg_listener; + unsigned iommu_type; +- Error *error; +- bool initialized; + uint64_t dirty_pgsizes; + uint64_t max_dirty_bitmap_size; + QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 8e05b5ac5a..95f8d319e0 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -36,6 +36,9 @@ typedef struct VFIOAddressSpace { + typedef struct VFIOContainerBase { + const VFIOIOMMUOps *ops; + VFIOAddressSpace *space; ++ MemoryListener listener; ++ Error *error; ++ bool initialized; + unsigned long pgsizes; + unsigned int dma_max_mappings; + bool dirty_pages_supported; +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-container-Move-per-container-device-list-in-bas.patch b/SOURCES/kvm-vfio-container-Move-per-container-device-list-in-bas.patch new file mode 100644 index 0000000..df483e3 --- /dev/null +++ b/SOURCES/kvm-vfio-container-Move-per-container-device-list-in-bas.patch @@ -0,0 +1,230 @@ +From 0b3fbb6bf5c5bccec184829ff9454fd637c512b9 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Thu, 2 Nov 2023 15:12:34 +0800 +Subject: [PATCH 009/101] vfio/container: Move per container device list in + base container +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [8/67] d546cc25f4424b2d42356765c860fdaf4a3ba652 (eauger1/centos-qemu-kvm) + +VFIO Device is also changed to point to base container instead of +legacy container. + +No functional change intended. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +(cherry picked from commit 3e6015d1117579324b456aa169dfca06da9922cf) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 23 +++++++++++++++-------- + hw/vfio/container.c | 12 ++++++------ + include/hw/vfio/vfio-common.h | 3 +-- + include/hw/vfio/vfio-container-base.h | 1 + + 4 files changed, 23 insertions(+), 16 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index b1a875ca93..9415395ed9 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -145,7 +145,7 @@ void vfio_unblock_multiple_devices_migration(void) + + bool vfio_viommu_preset(VFIODevice *vbasedev) + { +- return vbasedev->container->bcontainer.space->as != &address_space_memory; ++ return vbasedev->bcontainer->space->as != &address_space_memory; + } + + static void vfio_set_migration_error(int err) +@@ -179,6 +179,7 @@ bool vfio_device_state_is_precopy(VFIODevice *vbasedev) + + static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) + { ++ VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + MigrationState *ms = migrate_get_current(); + +@@ -187,7 +188,7 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) + return false; + } + +- QLIST_FOREACH(vbasedev, &container->device_list, container_next) { ++ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { + VFIOMigration *migration = vbasedev->migration; + + if (!migration) { +@@ -205,9 +206,10 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) + + bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container) + { ++ VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + +- QLIST_FOREACH(vbasedev, &container->device_list, container_next) { ++ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { + if (!vbasedev->dirty_pages_supported) { + return false; + } +@@ -222,13 +224,14 @@ bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container) + */ + bool vfio_devices_all_running_and_mig_active(VFIOContainer *container) + { ++ VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + + if (!migration_is_active(migrate_get_current())) { + return false; + } + +- QLIST_FOREACH(vbasedev, &container->device_list, container_next) { ++ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { + VFIOMigration *migration = vbasedev->migration; + + if (!migration) { +@@ -833,12 +836,13 @@ static bool vfio_section_is_vfio_pci(MemoryRegionSection *section, + VFIOContainer *container) + { + VFIOPCIDevice *pcidev; ++ VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + Object *owner; + + owner = memory_region_owner(section->mr); + +- QLIST_FOREACH(vbasedev, &container->device_list, container_next) { ++ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { + if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) { + continue; + } +@@ -939,13 +943,14 @@ static void vfio_devices_dma_logging_stop(VFIOContainer *container) + uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature), + sizeof(uint64_t))] = {}; + struct vfio_device_feature *feature = (struct vfio_device_feature *)buf; ++ VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + + feature->argsz = sizeof(buf); + feature->flags = VFIO_DEVICE_FEATURE_SET | + VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP; + +- QLIST_FOREACH(vbasedev, &container->device_list, container_next) { ++ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { + if (!vbasedev->dirty_tracking) { + continue; + } +@@ -1036,6 +1041,7 @@ static int vfio_devices_dma_logging_start(VFIOContainer *container) + { + struct vfio_device_feature *feature; + VFIODirtyRanges ranges; ++ VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + int ret = 0; + +@@ -1046,7 +1052,7 @@ static int vfio_devices_dma_logging_start(VFIOContainer *container) + return -errno; + } + +- QLIST_FOREACH(vbasedev, &container->device_list, container_next) { ++ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { + if (vbasedev->dirty_tracking) { + continue; + } +@@ -1139,10 +1145,11 @@ int vfio_devices_query_dirty_bitmap(VFIOContainer *container, + VFIOBitmap *vbmap, hwaddr iova, + hwaddr size) + { ++ VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + int ret; + +- QLIST_FOREACH(vbasedev, &container->device_list, container_next) { ++ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { + ret = vfio_device_dma_logging_report(vbasedev, iova, size, + vbmap->bitmap); + if (ret) { +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 3ab74e2615..63a906de93 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -888,7 +888,7 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev, + int groupid = vfio_device_groupid(vbasedev, errp); + VFIODevice *vbasedev_iter; + VFIOGroup *group; +- VFIOContainer *container; ++ VFIOContainerBase *bcontainer; + int ret; + + if (groupid < 0) { +@@ -915,9 +915,9 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev, + return ret; + } + +- container = group->container; +- vbasedev->container = container; +- QLIST_INSERT_HEAD(&container->device_list, vbasedev, container_next); ++ bcontainer = &group->container->bcontainer; ++ vbasedev->bcontainer = bcontainer; ++ QLIST_INSERT_HEAD(&bcontainer->device_list, vbasedev, container_next); + QLIST_INSERT_HEAD(&vfio_device_list, vbasedev, global_next); + + return ret; +@@ -927,13 +927,13 @@ void vfio_detach_device(VFIODevice *vbasedev) + { + VFIOGroup *group = vbasedev->group; + +- if (!vbasedev->container) { ++ if (!vbasedev->bcontainer) { + return; + } + + QLIST_REMOVE(vbasedev, global_next); + QLIST_REMOVE(vbasedev, container_next); +- vbasedev->container = NULL; ++ vbasedev->bcontainer = NULL; + trace_vfio_detach_device(vbasedev->name, group->groupid); + vfio_put_base_device(vbasedev); + vfio_put_group(group); +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 60f2785fe0..9740cf9fbc 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -90,7 +90,6 @@ typedef struct VFIOContainer { + QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; + QLIST_HEAD(, VFIOGroup) group_list; + QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; +- QLIST_HEAD(, VFIODevice) device_list; + GList *iova_ranges; + } VFIOContainer; + +@@ -118,7 +117,7 @@ typedef struct VFIODevice { + QLIST_ENTRY(VFIODevice) container_next; + QLIST_ENTRY(VFIODevice) global_next; + struct VFIOGroup *group; +- VFIOContainer *container; ++ VFIOContainerBase *bcontainer; + char *sysfsdev; + char *name; + DeviceState *dev; +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index f244f003d0..7090962496 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -39,6 +39,7 @@ typedef struct VFIOContainerBase { + bool dirty_pages_supported; + QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; + QLIST_ENTRY(VFIOContainerBase) next; ++ QLIST_HEAD(, VFIODevice) device_list; + } VFIOContainerBase; + + typedef struct VFIOGuestIOMMU { +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-container-Move-pgsizes-and-dma_max_mappings-to-.patch b/SOURCES/kvm-vfio-container-Move-pgsizes-and-dma_max_mappings-to-.patch new file mode 100644 index 0000000..0db20c2 --- /dev/null +++ b/SOURCES/kvm-vfio-container-Move-pgsizes-and-dma_max_mappings-to-.patch @@ -0,0 +1,242 @@ +From d798939fbbe6c27200c165edd6f3771413821b34 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 2 Nov 2023 15:12:36 +0800 +Subject: [PATCH 011/101] vfio/container: Move pgsizes and dma_max_mappings to + base container +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [10/67] e80696175aba159a17ce9a869535db66682deb08 (eauger1/centos-qemu-kvm) + +No functional change intended. + +Signed-off-by: Eric Auger +Signed-off-by: Yi Liu +Signed-off-by: Yi Sun +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +(cherry picked from commit 7ab1cb74ffdbf92ef237243b41bde5c7067d5298) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 17 +++++++++-------- + hw/vfio/container-base.c | 1 + + hw/vfio/container.c | 11 +++++------ + hw/vfio/spapr.c | 10 ++++++---- + include/hw/vfio/vfio-common.h | 2 -- + include/hw/vfio/vfio-container-base.h | 2 ++ + 6 files changed, 23 insertions(+), 20 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index cf6618f6ed..1cb53d369e 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -401,6 +401,7 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, + static void vfio_register_ram_discard_listener(VFIOContainer *container, + MemoryRegionSection *section) + { ++ VFIOContainerBase *bcontainer = &container->bcontainer; + RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); + VFIORamDiscardListener *vrdl; + +@@ -419,8 +420,8 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container, + section->mr); + + g_assert(vrdl->granularity && is_power_of_2(vrdl->granularity)); +- g_assert(container->pgsizes && +- vrdl->granularity >= 1ULL << ctz64(container->pgsizes)); ++ g_assert(bcontainer->pgsizes && ++ vrdl->granularity >= 1ULL << ctz64(bcontainer->pgsizes)); + + ram_discard_listener_init(&vrdl->listener, + vfio_ram_discard_notify_populate, +@@ -441,7 +442,7 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container, + * number of sections in the address space we could have over time, + * also consuming DMA mappings. + */ +- if (container->dma_max_mappings) { ++ if (bcontainer->dma_max_mappings) { + unsigned int vrdl_count = 0, vrdl_mappings = 0, max_memslots = 512; + + #ifdef CONFIG_KVM +@@ -462,11 +463,11 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container, + } + + if (vrdl_mappings + max_memslots - vrdl_count > +- container->dma_max_mappings) { ++ bcontainer->dma_max_mappings) { + warn_report("%s: possibly running out of DMA mappings. E.g., try" + " increasing the 'block-size' of virtio-mem devies." + " Maximum possible DMA mappings: %d, Maximum possible" +- " memslots: %d", __func__, container->dma_max_mappings, ++ " memslots: %d", __func__, bcontainer->dma_max_mappings, + max_memslots); + } + } +@@ -626,7 +627,7 @@ static void vfio_listener_region_add(MemoryListener *listener, + iommu_idx); + + ret = memory_region_iommu_set_page_size_mask(giommu->iommu_mr, +- container->pgsizes, ++ bcontainer->pgsizes, + &err); + if (ret) { + g_free(giommu); +@@ -675,7 +676,7 @@ static void vfio_listener_region_add(MemoryListener *listener, + llsize = int128_sub(llend, int128_make64(iova)); + + if (memory_region_is_ram_device(section->mr)) { +- hwaddr pgmask = (1ULL << ctz64(container->pgsizes)) - 1; ++ hwaddr pgmask = (1ULL << ctz64(bcontainer->pgsizes)) - 1; + + if ((iova & pgmask) || (int128_get64(llsize) & pgmask)) { + trace_vfio_listener_region_add_no_dma_map( +@@ -777,7 +778,7 @@ static void vfio_listener_region_del(MemoryListener *listener, + if (memory_region_is_ram_device(section->mr)) { + hwaddr pgmask; + +- pgmask = (1ULL << ctz64(container->pgsizes)) - 1; ++ pgmask = (1ULL << ctz64(bcontainer->pgsizes)) - 1; + try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask)); + } else if (memory_region_has_ram_discard_manager(section->mr)) { + vfio_unregister_ram_discard_listener(container, section); +diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c +index 5d654ae172..dcce111349 100644 +--- a/hw/vfio/container-base.c ++++ b/hw/vfio/container-base.c +@@ -52,6 +52,7 @@ void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space, + bcontainer->ops = ops; + bcontainer->space = space; + bcontainer->dirty_pages_supported = false; ++ bcontainer->dma_max_mappings = 0; + QLIST_INIT(&bcontainer->giommu_list); + } + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 7bd81eab09..c5a6262882 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -154,7 +154,7 @@ static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, + if (errno == EINVAL && unmap.size && !(unmap.iova + unmap.size) && + container->iommu_type == VFIO_TYPE1v2_IOMMU) { + trace_vfio_legacy_dma_unmap_overflow_workaround(); +- unmap.size -= 1ULL << ctz64(container->pgsizes); ++ unmap.size -= 1ULL << ctz64(bcontainer->pgsizes); + continue; + } + error_report("VFIO_UNMAP_DMA failed: %s", strerror(errno)); +@@ -559,7 +559,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + container = g_malloc0(sizeof(*container)); + container->fd = fd; + container->error = NULL; +- container->dma_max_mappings = 0; + container->iova_ranges = NULL; + QLIST_INIT(&container->vrdl_list); + bcontainer = &container->bcontainer; +@@ -589,13 +588,13 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + } + + if (info->flags & VFIO_IOMMU_INFO_PGSIZES) { +- container->pgsizes = info->iova_pgsizes; ++ bcontainer->pgsizes = info->iova_pgsizes; + } else { +- container->pgsizes = qemu_real_host_page_size(); ++ bcontainer->pgsizes = qemu_real_host_page_size(); + } + +- if (!vfio_get_info_dma_avail(info, &container->dma_max_mappings)) { +- container->dma_max_mappings = 65535; ++ if (!vfio_get_info_dma_avail(info, &bcontainer->dma_max_mappings)) { ++ bcontainer->dma_max_mappings = 65535; + } + + vfio_get_info_iova_range(info, container); +diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c +index 83da2f7ec2..4f76bdd3ca 100644 +--- a/hw/vfio/spapr.c ++++ b/hw/vfio/spapr.c +@@ -226,6 +226,7 @@ static int vfio_spapr_create_window(VFIOContainer *container, + hwaddr *pgsize) + { + int ret = 0; ++ VFIOContainerBase *bcontainer = &container->bcontainer; + IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr); + uint64_t pagesize = memory_region_iommu_get_min_page_size(iommu_mr), pgmask; + unsigned entries, bits_total, bits_per_level, max_levels; +@@ -239,13 +240,13 @@ static int vfio_spapr_create_window(VFIOContainer *container, + if (pagesize > rampagesize) { + pagesize = rampagesize; + } +- pgmask = container->pgsizes & (pagesize | (pagesize - 1)); ++ pgmask = bcontainer->pgsizes & (pagesize | (pagesize - 1)); + pagesize = pgmask ? (1ULL << (63 - clz64(pgmask))) : 0; + if (!pagesize) { + error_report("Host doesn't support page size 0x%"PRIx64 + ", the supported mask is 0x%lx", + memory_region_iommu_get_min_page_size(iommu_mr), +- container->pgsizes); ++ bcontainer->pgsizes); + return -EINVAL; + } + +@@ -421,6 +422,7 @@ void vfio_container_del_section_window(VFIOContainer *container, + + int vfio_spapr_container_init(VFIOContainer *container, Error **errp) + { ++ VFIOContainerBase *bcontainer = &container->bcontainer; + struct vfio_iommu_spapr_tce_info info; + bool v2 = container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU; + int ret, fd = container->fd; +@@ -461,7 +463,7 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) + } + + if (v2) { +- container->pgsizes = info.ddw.pgsizes; ++ bcontainer->pgsizes = info.ddw.pgsizes; + /* + * There is a default window in just created container. + * To make region_add/del simpler, we better remove this +@@ -476,7 +478,7 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) + } + } else { + /* The default table uses 4K pages */ +- container->pgsizes = 0x1000; ++ bcontainer->pgsizes = 0x1000; + vfio_host_win_add(container, info.dma32_window_start, + info.dma32_window_start + + info.dma32_window_size - 1, +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index bc67e1316c..d3dc2f9dcb 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -85,8 +85,6 @@ typedef struct VFIOContainer { + bool initialized; + uint64_t dirty_pgsizes; + uint64_t max_dirty_bitmap_size; +- unsigned long pgsizes; +- unsigned int dma_max_mappings; + QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; + QLIST_HEAD(, VFIOGroup) group_list; + QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 7090962496..85ec7e1a56 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -36,6 +36,8 @@ typedef struct VFIOAddressSpace { + typedef struct VFIOContainerBase { + const VFIOIOMMUOps *ops; + VFIOAddressSpace *space; ++ unsigned long pgsizes; ++ unsigned int dma_max_mappings; + bool dirty_pages_supported; + QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; + QLIST_ENTRY(VFIOContainerBase) next; +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-container-Move-space-field-to-base-container.patch b/SOURCES/kvm-vfio-container-Move-space-field-to-base-container.patch new file mode 100644 index 0000000..edd4538 --- /dev/null +++ b/SOURCES/kvm-vfio-container-Move-space-field-to-base-container.patch @@ -0,0 +1,265 @@ +From 3ba43cbc5b096feed6272e070cf152d5fc74df01 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 2 Nov 2023 15:12:32 +0800 +Subject: [PATCH 007/101] vfio/container: Move space field to base container +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [6/67] b0aa17d9ec4588bd64373452a30306e826234d0b (eauger1/centos-qemu-kvm) + +Move the space field to the base object. Also the VFIOAddressSpace +now contains a list of base containers. + +No functional change intended. + +Signed-off-by: Eric Auger +Signed-off-by: Yi Liu +Signed-off-by: Yi Sun +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +(cherry picked from commit e5597063386a0c76308ad16da31726d23f489945) +Signed-off-by: Eric Auger +--- + hw/ppc/spapr_pci_vfio.c | 10 +++++----- + hw/vfio/common.c | 4 ++-- + hw/vfio/container-base.c | 6 +++++- + hw/vfio/container.c | 18 ++++++++---------- + include/hw/vfio/vfio-common.h | 8 -------- + include/hw/vfio/vfio-container-base.h | 9 +++++++++ + 6 files changed, 29 insertions(+), 26 deletions(-) + +diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c +index f283f7e38d..d1d07bec46 100644 +--- a/hw/ppc/spapr_pci_vfio.c ++++ b/hw/ppc/spapr_pci_vfio.c +@@ -84,27 +84,27 @@ static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op) + static VFIOContainer *vfio_eeh_as_container(AddressSpace *as) + { + VFIOAddressSpace *space = vfio_get_address_space(as); +- VFIOContainer *container = NULL; ++ VFIOContainerBase *bcontainer = NULL; + + if (QLIST_EMPTY(&space->containers)) { + /* No containers to act on */ + goto out; + } + +- container = QLIST_FIRST(&space->containers); ++ bcontainer = QLIST_FIRST(&space->containers); + +- if (QLIST_NEXT(container, next)) { ++ if (QLIST_NEXT(bcontainer, next)) { + /* + * We don't yet have logic to synchronize EEH state across + * multiple containers + */ +- container = NULL; ++ bcontainer = NULL; + goto out; + } + + out: + vfio_put_address_space(space); +- return container; ++ return container_of(bcontainer, VFIOContainer, bcontainer); + } + + static bool vfio_eeh_as_ok(AddressSpace *as) +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 43580bcc43..1d8202537e 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -145,7 +145,7 @@ void vfio_unblock_multiple_devices_migration(void) + + bool vfio_viommu_preset(VFIODevice *vbasedev) + { +- return vbasedev->container->space->as != &address_space_memory; ++ return vbasedev->container->bcontainer.space->as != &address_space_memory; + } + + static void vfio_set_migration_error(int err) +@@ -922,7 +922,7 @@ static void vfio_dirty_tracking_init(VFIOContainer *container, + dirty.container = container; + + memory_listener_register(&dirty.listener, +- container->space->as); ++ container->bcontainer.space->as); + + *ranges = dirty.ranges; + +diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c +index 20bcb9669a..3933391e0d 100644 +--- a/hw/vfio/container-base.c ++++ b/hw/vfio/container-base.c +@@ -31,9 +31,11 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, + return bcontainer->ops->dma_unmap(bcontainer, iova, size, iotlb); + } + +-void vfio_container_init(VFIOContainerBase *bcontainer, const VFIOIOMMUOps *ops) ++void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space, ++ const VFIOIOMMUOps *ops) + { + bcontainer->ops = ops; ++ bcontainer->space = space; + QLIST_INIT(&bcontainer->giommu_list); + } + +@@ -41,6 +43,8 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer) + { + VFIOGuestIOMMU *giommu, *tmp; + ++ QLIST_REMOVE(bcontainer, next); ++ + QLIST_FOREACH_SAFE(giommu, &bcontainer->giommu_list, giommu_next, tmp) { + memory_region_unregister_iommu_notifier( + MEMORY_REGION(giommu->iommu_mr), &giommu->n); +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 133d3c8f5c..f12fcb6fe1 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -514,7 +514,8 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + * details once we know which type of IOMMU we are using. + */ + +- QLIST_FOREACH(container, &space->containers, next) { ++ QLIST_FOREACH(bcontainer, &space->containers, next) { ++ container = container_of(bcontainer, VFIOContainer, bcontainer); + if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) { + ret = vfio_ram_block_discard_disable(container, true); + if (ret) { +@@ -550,7 +551,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + } + + container = g_malloc0(sizeof(*container)); +- container->space = space; + container->fd = fd; + container->error = NULL; + container->dirty_pages_supported = false; +@@ -558,7 +558,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + container->iova_ranges = NULL; + QLIST_INIT(&container->vrdl_list); + bcontainer = &container->bcontainer; +- vfio_container_init(bcontainer, &vfio_legacy_ops); ++ vfio_container_init(bcontainer, space, &vfio_legacy_ops); + + ret = vfio_init_container(container, group->fd, errp); + if (ret) { +@@ -613,14 +613,14 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + vfio_kvm_device_add_group(group); + + QLIST_INIT(&container->group_list); +- QLIST_INSERT_HEAD(&space->containers, container, next); ++ QLIST_INSERT_HEAD(&space->containers, bcontainer, next); + + group->container = container; + QLIST_INSERT_HEAD(&container->group_list, group, container_next); + + container->listener = vfio_memory_listener; + +- memory_listener_register(&container->listener, container->space->as); ++ memory_listener_register(&container->listener, bcontainer->space->as); + + if (container->error) { + ret = -1; +@@ -634,7 +634,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + return 0; + listener_release_exit: + QLIST_REMOVE(group, container_next); +- QLIST_REMOVE(container, next); ++ QLIST_REMOVE(bcontainer, next); + vfio_kvm_device_del_group(group); + memory_listener_unregister(&container->listener); + if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU || +@@ -684,9 +684,7 @@ static void vfio_disconnect_container(VFIOGroup *group) + } + + if (QLIST_EMPTY(&container->group_list)) { +- VFIOAddressSpace *space = container->space; +- +- QLIST_REMOVE(container, next); ++ VFIOAddressSpace *space = bcontainer->space; + + vfio_container_destroy(bcontainer); + +@@ -707,7 +705,7 @@ static VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp) + QLIST_FOREACH(group, &vfio_group_list, next) { + if (group->groupid == groupid) { + /* Found it. Now is it already in the right context? */ +- if (group->container->space->as == as) { ++ if (group->container->bcontainer.space->as == as) { + return group; + } else { + error_setg(errp, "group %d used in multiple address spaces", +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 6be082b8f2..bd4de6cb3a 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -73,17 +73,10 @@ typedef struct VFIOMigration { + bool initial_data_sent; + } VFIOMigration; + +-typedef struct VFIOAddressSpace { +- AddressSpace *as; +- QLIST_HEAD(, VFIOContainer) containers; +- QLIST_ENTRY(VFIOAddressSpace) list; +-} VFIOAddressSpace; +- + struct VFIOGroup; + + typedef struct VFIOContainer { + VFIOContainerBase bcontainer; +- VFIOAddressSpace *space; + int fd; /* /dev/vfio/vfio, empowered by the attached groups */ + MemoryListener listener; + MemoryListener prereg_listener; +@@ -98,7 +91,6 @@ typedef struct VFIOContainer { + QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; + QLIST_HEAD(, VFIOGroup) group_list; + QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; +- QLIST_ENTRY(VFIOContainer) next; + QLIST_HEAD(, VFIODevice) device_list; + GList *iova_ranges; + } VFIOContainer; +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index a11aec5755..c7cc6ec9c5 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -24,12 +24,20 @@ typedef struct { + hwaddr pages; + } VFIOBitmap; + ++typedef struct VFIOAddressSpace { ++ AddressSpace *as; ++ QLIST_HEAD(, VFIOContainerBase) containers; ++ QLIST_ENTRY(VFIOAddressSpace) list; ++} VFIOAddressSpace; ++ + /* + * This is the base object for vfio container backends + */ + typedef struct VFIOContainerBase { + const VFIOIOMMUOps *ops; ++ VFIOAddressSpace *space; + QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; ++ QLIST_ENTRY(VFIOContainerBase) next; + } VFIOContainerBase; + + typedef struct VFIOGuestIOMMU { +@@ -48,6 +56,7 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, + IOMMUTLBEntry *iotlb); + + void vfio_container_init(VFIOContainerBase *bcontainer, ++ VFIOAddressSpace *space, + const VFIOIOMMUOps *ops); + void vfio_container_destroy(VFIOContainerBase *bcontainer); + +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-container-Move-vrdl_list-to-base-container.patch b/SOURCES/kvm-vfio-container-Move-vrdl_list-to-base-container.patch new file mode 100644 index 0000000..5e31d07 --- /dev/null +++ b/SOURCES/kvm-vfio-container-Move-vrdl_list-to-base-container.patch @@ -0,0 +1,255 @@ +From aadd055dcc06cb964ebfd2868b7e9b207d62ae0e Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Thu, 2 Nov 2023 15:12:37 +0800 +Subject: [PATCH 012/101] vfio/container: Move vrdl_list to base container +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [11/67] 42da5389e39291839259f0e4c020c7461b7225cc (eauger1/centos-qemu-kvm) + +No functional change intended. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +(cherry picked from commit dc74a4b0056c0c803d46612a2319294921097974) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 38 +++++++++++++-------------- + hw/vfio/container-base.c | 1 + + hw/vfio/container.c | 1 - + include/hw/vfio/vfio-common.h | 11 -------- + include/hw/vfio/vfio-container-base.h | 11 ++++++++ + 5 files changed, 31 insertions(+), 31 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 1cb53d369e..f15665789f 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -351,13 +351,13 @@ static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl, + { + VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener, + listener); ++ VFIOContainerBase *bcontainer = vrdl->bcontainer; + const hwaddr size = int128_get64(section->size); + const hwaddr iova = section->offset_within_address_space; + int ret; + + /* Unmap with a single call. */ +- ret = vfio_container_dma_unmap(&vrdl->container->bcontainer, +- iova, size , NULL); ++ ret = vfio_container_dma_unmap(bcontainer, iova, size , NULL); + if (ret) { + error_report("%s: vfio_container_dma_unmap() failed: %s", __func__, + strerror(-ret)); +@@ -369,6 +369,7 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, + { + VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener, + listener); ++ VFIOContainerBase *bcontainer = vrdl->bcontainer; + const hwaddr end = section->offset_within_region + + int128_get64(section->size); + hwaddr start, next, iova; +@@ -387,8 +388,8 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, + section->offset_within_address_space; + vaddr = memory_region_get_ram_ptr(section->mr) + start; + +- ret = vfio_container_dma_map(&vrdl->container->bcontainer, iova, +- next - start, vaddr, section->readonly); ++ ret = vfio_container_dma_map(bcontainer, iova, next - start, ++ vaddr, section->readonly); + if (ret) { + /* Rollback */ + vfio_ram_discard_notify_discard(rdl, section); +@@ -398,10 +399,9 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, + return 0; + } + +-static void vfio_register_ram_discard_listener(VFIOContainer *container, ++static void vfio_register_ram_discard_listener(VFIOContainerBase *bcontainer, + MemoryRegionSection *section) + { +- VFIOContainerBase *bcontainer = &container->bcontainer; + RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); + VFIORamDiscardListener *vrdl; + +@@ -412,7 +412,7 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container, + g_assert(QEMU_IS_ALIGNED(int128_get64(section->size), TARGET_PAGE_SIZE)); + + vrdl = g_new0(VFIORamDiscardListener, 1); +- vrdl->container = container; ++ vrdl->bcontainer = bcontainer; + vrdl->mr = section->mr; + vrdl->offset_within_address_space = section->offset_within_address_space; + vrdl->size = int128_get64(section->size); +@@ -427,7 +427,7 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container, + vfio_ram_discard_notify_populate, + vfio_ram_discard_notify_discard, true); + ram_discard_manager_register_listener(rdm, &vrdl->listener, section); +- QLIST_INSERT_HEAD(&container->vrdl_list, vrdl, next); ++ QLIST_INSERT_HEAD(&bcontainer->vrdl_list, vrdl, next); + + /* + * Sanity-check if we have a theoretically problematic setup where we could +@@ -451,7 +451,7 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container, + } + #endif + +- QLIST_FOREACH(vrdl, &container->vrdl_list, next) { ++ QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) { + hwaddr start, end; + + start = QEMU_ALIGN_DOWN(vrdl->offset_within_address_space, +@@ -473,13 +473,13 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container, + } + } + +-static void vfio_unregister_ram_discard_listener(VFIOContainer *container, ++static void vfio_unregister_ram_discard_listener(VFIOContainerBase *bcontainer, + MemoryRegionSection *section) + { + RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); + VFIORamDiscardListener *vrdl = NULL; + +- QLIST_FOREACH(vrdl, &container->vrdl_list, next) { ++ QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) { + if (vrdl->mr == section->mr && + vrdl->offset_within_address_space == + section->offset_within_address_space) { +@@ -663,7 +663,7 @@ static void vfio_listener_region_add(MemoryListener *listener, + * about changes. + */ + if (memory_region_has_ram_discard_manager(section->mr)) { +- vfio_register_ram_discard_listener(container, section); ++ vfio_register_ram_discard_listener(bcontainer, section); + return; + } + +@@ -781,7 +781,7 @@ static void vfio_listener_region_del(MemoryListener *listener, + pgmask = (1ULL << ctz64(bcontainer->pgsizes)) - 1; + try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask)); + } else if (memory_region_has_ram_discard_manager(section->mr)) { +- vfio_unregister_ram_discard_listener(container, section); ++ vfio_unregister_ram_discard_listener(bcontainer, section); + /* Unregistering will trigger an unmap. */ + try_unmap = false; + } +@@ -1260,17 +1260,17 @@ static int vfio_ram_discard_get_dirty_bitmap(MemoryRegionSection *section, + * Sync the whole mapped region (spanning multiple individual mappings) + * in one go. + */ +- return vfio_get_dirty_bitmap(&vrdl->container->bcontainer, iova, size, +- ram_addr); ++ return vfio_get_dirty_bitmap(vrdl->bcontainer, iova, size, ram_addr); + } + +-static int vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainer *container, +- MemoryRegionSection *section) ++static int ++vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainerBase *bcontainer, ++ MemoryRegionSection *section) + { + RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); + VFIORamDiscardListener *vrdl = NULL; + +- QLIST_FOREACH(vrdl, &container->vrdl_list, next) { ++ QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) { + if (vrdl->mr == section->mr && + vrdl->offset_within_address_space == + section->offset_within_address_space) { +@@ -1324,7 +1324,7 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container, + } + return 0; + } else if (memory_region_has_ram_discard_manager(section->mr)) { +- return vfio_sync_ram_discard_listener_dirty_bitmap(container, section); ++ return vfio_sync_ram_discard_listener_dirty_bitmap(bcontainer, section); + } + + ram_addr = memory_region_get_ram_addr(section->mr) + +diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c +index dcce111349..584eee4ba1 100644 +--- a/hw/vfio/container-base.c ++++ b/hw/vfio/container-base.c +@@ -54,6 +54,7 @@ void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space, + bcontainer->dirty_pages_supported = false; + bcontainer->dma_max_mappings = 0; + QLIST_INIT(&bcontainer->giommu_list); ++ QLIST_INIT(&bcontainer->vrdl_list); + } + + void vfio_container_destroy(VFIOContainerBase *bcontainer) +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index c5a6262882..6ba2e2f8c4 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -560,7 +560,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + container->fd = fd; + container->error = NULL; + container->iova_ranges = NULL; +- QLIST_INIT(&container->vrdl_list); + bcontainer = &container->bcontainer; + vfio_container_init(bcontainer, space, &vfio_legacy_ops); + +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index d3dc2f9dcb..8a607a4c17 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -87,20 +87,9 @@ typedef struct VFIOContainer { + uint64_t max_dirty_bitmap_size; + QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; + QLIST_HEAD(, VFIOGroup) group_list; +- QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; + GList *iova_ranges; + } VFIOContainer; + +-typedef struct VFIORamDiscardListener { +- VFIOContainer *container; +- MemoryRegion *mr; +- hwaddr offset_within_address_space; +- hwaddr size; +- uint64_t granularity; +- RamDiscardListener listener; +- QLIST_ENTRY(VFIORamDiscardListener) next; +-} VFIORamDiscardListener; +- + typedef struct VFIOHostDMAWindow { + hwaddr min_iova; + hwaddr max_iova; +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 85ec7e1a56..8e05b5ac5a 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -40,6 +40,7 @@ typedef struct VFIOContainerBase { + unsigned int dma_max_mappings; + bool dirty_pages_supported; + QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; ++ QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; + QLIST_ENTRY(VFIOContainerBase) next; + QLIST_HEAD(, VFIODevice) device_list; + } VFIOContainerBase; +@@ -52,6 +53,16 @@ typedef struct VFIOGuestIOMMU { + QLIST_ENTRY(VFIOGuestIOMMU) giommu_next; + } VFIOGuestIOMMU; + ++typedef struct VFIORamDiscardListener { ++ VFIOContainerBase *bcontainer; ++ MemoryRegion *mr; ++ hwaddr offset_within_address_space; ++ hwaddr size; ++ uint64_t granularity; ++ RamDiscardListener listener; ++ QLIST_ENTRY(VFIORamDiscardListener) next; ++} VFIORamDiscardListener; ++ + int vfio_container_dma_map(VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + void *vaddr, bool readonly); +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-container-Rename-vfio_init_container-to-vfio_se.patch b/SOURCES/kvm-vfio-container-Rename-vfio_init_container-to-vfio_se.patch new file mode 100644 index 0000000..f68be0b --- /dev/null +++ b/SOURCES/kvm-vfio-container-Rename-vfio_init_container-to-vfio_se.patch @@ -0,0 +1,66 @@ +From edfc1ee2a1854d180ffad92e70212535a2ca668c Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Thu, 21 Dec 2023 10:45:17 +0800 +Subject: [PATCH 062/101] vfio/container: Rename vfio_init_container to + vfio_set_iommu +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [61/67] 5e7f956379b54fe6fa7e078ec17e71325aa109af (eauger1/centos-qemu-kvm) + +vfio_container_init() and vfio_init_container() names are confusing +especially when we see vfio_init_container() calls vfio_container_init(). + +vfio_container_init() operates on base container which is consistent +with all routines handling 'VFIOContainerBase *' ops. + +vfio_init_container() operates on legacy container and setup IOMMU +context with ioctl(VFIO_SET_IOMMU). + +So choose to rename vfio_init_container to vfio_set_iommu to avoid +the confusion. + +No functional change intended. + +Suggested-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +(cherry picked from commit 9f734a117cbf63b03577b46c8cad8ad88ec6dced) +Signed-off-by: Eric Auger +--- + hw/vfio/container.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 8d334f52f2..bd25b9fbad 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -392,8 +392,8 @@ static const VFIOIOMMUClass *vfio_get_iommu_class(int iommu_type, Error **errp) + return VFIO_IOMMU_CLASS(klass); + } + +-static int vfio_init_container(VFIOContainer *container, int group_fd, +- VFIOAddressSpace *space, Error **errp) ++static int vfio_set_iommu(VFIOContainer *container, int group_fd, ++ VFIOAddressSpace *space, Error **errp) + { + int iommu_type, ret; + const VFIOIOMMUClass *vioc; +@@ -616,7 +616,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + container->fd = fd; + bcontainer = &container->bcontainer; + +- ret = vfio_init_container(container, group->fd, space, errp); ++ ret = vfio_set_iommu(container, group->fd, space, errp); + if (ret) { + goto free_container_exit; + } +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-container-Replace-basename-with-g_path_get_base.patch b/SOURCES/kvm-vfio-container-Replace-basename-with-g_path_get_base.patch new file mode 100644 index 0000000..77df179 --- /dev/null +++ b/SOURCES/kvm-vfio-container-Replace-basename-with-g_path_get_base.patch @@ -0,0 +1,59 @@ +From 8d3857c7877da58ed0c6b62cf2714c4127350522 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 20 Dec 2023 14:53:02 +0100 +Subject: [PATCH 059/101] vfio/container: Replace basename with + g_path_get_basename +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [58/67] 56a90f23dadc89271b1fff014fc64ade87c1a4cb (eauger1/centos-qemu-kvm) + +g_path_get_basename() is a portable utility function that has the +advantage of not modifing the string argument. It also fixes a compile +breakage with the Musl C library reported in [1]. + +[1] https://lore.kernel.org/all/20231212010228.2701544-1-raj.khem@gmail.com/ + +Reported-by: Khem Raj +Reviewed-by: Eric Auger +Reviewed-by: Zhao Liu +Reviewed-by: Zhenzhong Duan +Signed-off-by: Cédric Le Goater +(cherry picked from commit 213ae3ffda463c0503e39e0cf827511b5298c314) +Signed-off-by: Eric Auger +--- + hw/vfio/container.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 688cf23bab..8d334f52f2 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -869,7 +869,8 @@ static void vfio_put_base_device(VFIODevice *vbasedev) + + static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp) + { +- char *tmp, group_path[PATH_MAX], *group_name; ++ char *tmp, group_path[PATH_MAX]; ++ g_autofree char *group_name = NULL; + int ret, groupid; + ssize_t len; + +@@ -885,7 +886,7 @@ static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp) + + group_path[len] = 0; + +- group_name = basename(group_path); ++ group_name = g_path_get_basename(group_path); + if (sscanf(group_name, "%d", &groupid) != 1) { + error_setg_errno(errp, errno, "failed to read %s", group_path); + return -errno; +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-container-Switch-to-IOMMU-BE-set_dirty_page_tra.patch b/SOURCES/kvm-vfio-container-Switch-to-IOMMU-BE-set_dirty_page_tra.patch new file mode 100644 index 0000000..5442688 --- /dev/null +++ b/SOURCES/kvm-vfio-container-Switch-to-IOMMU-BE-set_dirty_page_tra.patch @@ -0,0 +1,235 @@ +From a2c8aa64b1b21a3e1d4cf2a4fe7d84dc32f69284 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 2 Nov 2023 15:12:33 +0800 +Subject: [PATCH 008/101] vfio/container: Switch to IOMMU BE + set_dirty_page_tracking/query_dirty_bitmap API +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [7/67] 88368809c7990e1d9b01406e48694fe3e3fb1397 (eauger1/centos-qemu-kvm) + +dirty_pages_supported field is also moved to the base container + +No functional change intended. + +Signed-off-by: Eric Auger +Signed-off-by: Yi Liu +Signed-off-by: Yi Sun +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +(cherry picked from commit bb424490edcef73d07f200d53f69415b203d81df) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 12 ++++++++---- + hw/vfio/container-base.c | 16 ++++++++++++++++ + hw/vfio/container.c | 21 ++++++++++++++------- + include/hw/vfio/vfio-common.h | 6 ------ + include/hw/vfio/vfio-container-base.h | 6 ++++++ + 5 files changed, 44 insertions(+), 17 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 1d8202537e..b1a875ca93 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1079,7 +1079,8 @@ static void vfio_listener_log_global_start(MemoryListener *listener) + if (vfio_devices_all_device_dirty_tracking(container)) { + ret = vfio_devices_dma_logging_start(container); + } else { +- ret = vfio_set_dirty_page_tracking(container, true); ++ ret = vfio_container_set_dirty_page_tracking(&container->bcontainer, ++ true); + } + + if (ret) { +@@ -1097,7 +1098,8 @@ static void vfio_listener_log_global_stop(MemoryListener *listener) + if (vfio_devices_all_device_dirty_tracking(container)) { + vfio_devices_dma_logging_stop(container); + } else { +- ret = vfio_set_dirty_page_tracking(container, false); ++ ret = vfio_container_set_dirty_page_tracking(&container->bcontainer, ++ false); + } + + if (ret) { +@@ -1165,7 +1167,8 @@ int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, + VFIOBitmap vbmap; + int ret; + +- if (!container->dirty_pages_supported && !all_device_dirty_tracking) { ++ if (!container->bcontainer.dirty_pages_supported && ++ !all_device_dirty_tracking) { + cpu_physical_memory_set_dirty_range(ram_addr, size, + tcg_enabled() ? DIRTY_CLIENTS_ALL : + DIRTY_CLIENTS_NOCODE); +@@ -1180,7 +1183,8 @@ int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, + if (all_device_dirty_tracking) { + ret = vfio_devices_query_dirty_bitmap(container, &vbmap, iova, size); + } else { +- ret = vfio_query_dirty_bitmap(container, &vbmap, iova, size); ++ ret = vfio_container_query_dirty_bitmap(&container->bcontainer, &vbmap, ++ iova, size); + } + + if (ret) { +diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c +index 3933391e0d..5d654ae172 100644 +--- a/hw/vfio/container-base.c ++++ b/hw/vfio/container-base.c +@@ -31,11 +31,27 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, + return bcontainer->ops->dma_unmap(bcontainer, iova, size, iotlb); + } + ++int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, ++ bool start) ++{ ++ g_assert(bcontainer->ops->set_dirty_page_tracking); ++ return bcontainer->ops->set_dirty_page_tracking(bcontainer, start); ++} ++ ++int vfio_container_query_dirty_bitmap(VFIOContainerBase *bcontainer, ++ VFIOBitmap *vbmap, ++ hwaddr iova, hwaddr size) ++{ ++ g_assert(bcontainer->ops->query_dirty_bitmap); ++ return bcontainer->ops->query_dirty_bitmap(bcontainer, vbmap, iova, size); ++} ++ + void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space, + const VFIOIOMMUOps *ops) + { + bcontainer->ops = ops; + bcontainer->space = space; ++ bcontainer->dirty_pages_supported = false; + QLIST_INIT(&bcontainer->giommu_list); + } + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index f12fcb6fe1..3ab74e2615 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -131,7 +131,7 @@ static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, + + if (iotlb && vfio_devices_all_running_and_mig_active(container)) { + if (!vfio_devices_all_device_dirty_tracking(container) && +- container->dirty_pages_supported) { ++ container->bcontainer.dirty_pages_supported) { + return vfio_dma_unmap_bitmap(container, iova, size, iotlb); + } + +@@ -205,14 +205,17 @@ static int vfio_legacy_dma_map(VFIOContainerBase *bcontainer, hwaddr iova, + return -errno; + } + +-int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start) ++static int vfio_legacy_set_dirty_page_tracking(VFIOContainerBase *bcontainer, ++ bool start) + { ++ VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); + int ret; + struct vfio_iommu_type1_dirty_bitmap dirty = { + .argsz = sizeof(dirty), + }; + +- if (!container->dirty_pages_supported) { ++ if (!bcontainer->dirty_pages_supported) { + return 0; + } + +@@ -232,9 +235,12 @@ int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start) + return ret; + } + +-int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap, +- hwaddr iova, hwaddr size) ++static int vfio_legacy_query_dirty_bitmap(VFIOContainerBase *bcontainer, ++ VFIOBitmap *vbmap, ++ hwaddr iova, hwaddr size) + { ++ VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); + struct vfio_iommu_type1_dirty_bitmap *dbitmap; + struct vfio_iommu_type1_dirty_bitmap_get *range; + int ret; +@@ -461,7 +467,7 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container, + * qemu_real_host_page_size to mark those dirty. + */ + if (cap_mig->pgsize_bitmap & qemu_real_host_page_size()) { +- container->dirty_pages_supported = true; ++ container->bcontainer.dirty_pages_supported = true; + container->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size; + container->dirty_pgsizes = cap_mig->pgsize_bitmap; + } +@@ -553,7 +559,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + container = g_malloc0(sizeof(*container)); + container->fd = fd; + container->error = NULL; +- container->dirty_pages_supported = false; + container->dma_max_mappings = 0; + container->iova_ranges = NULL; + QLIST_INIT(&container->vrdl_list); +@@ -937,4 +942,6 @@ void vfio_detach_device(VFIODevice *vbasedev) + const VFIOIOMMUOps vfio_legacy_ops = { + .dma_map = vfio_legacy_dma_map, + .dma_unmap = vfio_legacy_dma_unmap, ++ .set_dirty_page_tracking = vfio_legacy_set_dirty_page_tracking, ++ .query_dirty_bitmap = vfio_legacy_query_dirty_bitmap, + }; +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index bd4de6cb3a..60f2785fe0 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -83,7 +83,6 @@ typedef struct VFIOContainer { + unsigned iommu_type; + Error *error; + bool initialized; +- bool dirty_pages_supported; + uint64_t dirty_pgsizes; + uint64_t max_dirty_bitmap_size; + unsigned long pgsizes; +@@ -190,11 +189,6 @@ VFIOAddressSpace *vfio_get_address_space(AddressSpace *as); + void vfio_put_address_space(VFIOAddressSpace *space); + bool vfio_devices_all_running_and_saving(VFIOContainer *container); + +-/* container->fd */ +-int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start); +-int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap, +- hwaddr iova, hwaddr size); +- + /* SPAPR specific */ + int vfio_container_add_section_window(VFIOContainer *container, + MemoryRegionSection *section, +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index c7cc6ec9c5..f244f003d0 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -36,6 +36,7 @@ typedef struct VFIOAddressSpace { + typedef struct VFIOContainerBase { + const VFIOIOMMUOps *ops; + VFIOAddressSpace *space; ++ bool dirty_pages_supported; + QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; + QLIST_ENTRY(VFIOContainerBase) next; + } VFIOContainerBase; +@@ -54,6 +55,11 @@ int vfio_container_dma_map(VFIOContainerBase *bcontainer, + int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + IOMMUTLBEntry *iotlb); ++int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, ++ bool start); ++int vfio_container_query_dirty_bitmap(VFIOContainerBase *bcontainer, ++ VFIOBitmap *vbmap, ++ hwaddr iova, hwaddr size); + + void vfio_container_init(VFIOContainerBase *bcontainer, + VFIOAddressSpace *space, +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-container-Switch-to-dma_map-unmap-API.patch b/SOURCES/kvm-vfio-container-Switch-to-dma_map-unmap-API.patch new file mode 100644 index 0000000..cfb5eb1 --- /dev/null +++ b/SOURCES/kvm-vfio-container-Switch-to-dma_map-unmap-API.patch @@ -0,0 +1,303 @@ +From 00daef8e3f4f64b1401b2e8945c256d27fbfa960 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 2 Nov 2023 15:12:29 +0800 +Subject: [PATCH 004/101] vfio/container: Switch to dma_map|unmap API +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [3/67] 9a20e2f2b277be65463f145df3309271493be6ac (eauger1/centos-qemu-kvm) + +No functional change intended. + +Signed-off-by: Eric Auger +Signed-off-by: Yi Liu +Signed-off-by: Yi Sun +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +(cherry picked from commit b08501a999e2448f500a46d68da503be55186b04) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 45 +++++++++++++++------------ + hw/vfio/container-base.c | 32 +++++++++++++++++++ + hw/vfio/container.c | 22 ++++++++----- + hw/vfio/meson.build | 1 + + hw/vfio/trace-events | 2 +- + include/hw/vfio/vfio-common.h | 4 --- + include/hw/vfio/vfio-container-base.h | 7 +++++ + 7 files changed, 81 insertions(+), 32 deletions(-) + create mode 100644 hw/vfio/container-base.c + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index e70fdf5e0c..e610771888 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -292,7 +292,7 @@ static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, + static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) + { + VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n); +- VFIOContainer *container = giommu->container; ++ VFIOContainerBase *bcontainer = &giommu->container->bcontainer; + hwaddr iova = iotlb->iova + giommu->iommu_offset; + void *vaddr; + int ret; +@@ -322,21 +322,22 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) + * of vaddr will always be there, even if the memory object is + * destroyed and its backing memory munmap-ed. + */ +- ret = vfio_dma_map(container, iova, +- iotlb->addr_mask + 1, vaddr, +- read_only); ++ ret = vfio_container_dma_map(bcontainer, iova, ++ iotlb->addr_mask + 1, vaddr, ++ read_only); + if (ret) { +- error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", " ++ error_report("vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx", %p) = %d (%s)", +- container, iova, ++ bcontainer, iova, + iotlb->addr_mask + 1, vaddr, ret, strerror(-ret)); + } + } else { +- ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1, iotlb); ++ ret = vfio_container_dma_unmap(bcontainer, iova, ++ iotlb->addr_mask + 1, iotlb); + if (ret) { +- error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " ++ error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx") = %d (%s)", +- container, iova, ++ bcontainer, iova, + iotlb->addr_mask + 1, ret, strerror(-ret)); + vfio_set_migration_error(ret); + } +@@ -355,9 +356,10 @@ static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl, + int ret; + + /* Unmap with a single call. */ +- ret = vfio_dma_unmap(vrdl->container, iova, size , NULL); ++ ret = vfio_container_dma_unmap(&vrdl->container->bcontainer, ++ iova, size , NULL); + if (ret) { +- error_report("%s: vfio_dma_unmap() failed: %s", __func__, ++ error_report("%s: vfio_container_dma_unmap() failed: %s", __func__, + strerror(-ret)); + } + } +@@ -385,8 +387,8 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, + section->offset_within_address_space; + vaddr = memory_region_get_ram_ptr(section->mr) + start; + +- ret = vfio_dma_map(vrdl->container, iova, next - start, +- vaddr, section->readonly); ++ ret = vfio_container_dma_map(&vrdl->container->bcontainer, iova, ++ next - start, vaddr, section->readonly); + if (ret) { + /* Rollback */ + vfio_ram_discard_notify_discard(rdl, section); +@@ -684,10 +686,11 @@ static void vfio_listener_region_add(MemoryListener *listener, + } + } + +- ret = vfio_dma_map(container, iova, int128_get64(llsize), +- vaddr, section->readonly); ++ ret = vfio_container_dma_map(&container->bcontainer, ++ iova, int128_get64(llsize), vaddr, ++ section->readonly); + if (ret) { +- error_setg(&err, "vfio_dma_map(%p, 0x%"HWADDR_PRIx", " ++ error_setg(&err, "vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx", %p) = %d (%s)", + container, iova, int128_get64(llsize), vaddr, ret, + strerror(-ret)); +@@ -784,18 +787,20 @@ static void vfio_listener_region_del(MemoryListener *listener, + if (int128_eq(llsize, int128_2_64())) { + /* The unmap ioctl doesn't accept a full 64-bit span. */ + llsize = int128_rshift(llsize, 1); +- ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL); ++ ret = vfio_container_dma_unmap(&container->bcontainer, iova, ++ int128_get64(llsize), NULL); + if (ret) { +- error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " ++ error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx") = %d (%s)", + container, iova, int128_get64(llsize), ret, + strerror(-ret)); + } + iova += int128_get64(llsize); + } +- ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL); ++ ret = vfio_container_dma_unmap(&container->bcontainer, iova, ++ int128_get64(llsize), NULL); + if (ret) { +- error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " ++ error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx") = %d (%s)", + container, iova, int128_get64(llsize), ret, + strerror(-ret)); +diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c +new file mode 100644 +index 0000000000..55d3a35fa4 +--- /dev/null ++++ b/hw/vfio/container-base.c +@@ -0,0 +1,32 @@ ++/* ++ * VFIO BASE CONTAINER ++ * ++ * Copyright (C) 2023 Intel Corporation. ++ * Copyright Red Hat, Inc. 2023 ++ * ++ * Authors: Yi Liu ++ * Eric Auger ++ * ++ * SPDX-License-Identifier: GPL-2.0-or-later ++ */ ++ ++#include "qemu/osdep.h" ++#include "qapi/error.h" ++#include "qemu/error-report.h" ++#include "hw/vfio/vfio-container-base.h" ++ ++int vfio_container_dma_map(VFIOContainerBase *bcontainer, ++ hwaddr iova, ram_addr_t size, ++ void *vaddr, bool readonly) ++{ ++ g_assert(bcontainer->ops->dma_map); ++ return bcontainer->ops->dma_map(bcontainer, iova, size, vaddr, readonly); ++} ++ ++int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, ++ hwaddr iova, ram_addr_t size, ++ IOMMUTLBEntry *iotlb) ++{ ++ g_assert(bcontainer->ops->dma_unmap); ++ return bcontainer->ops->dma_unmap(bcontainer, iova, size, iotlb); ++} +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 4bc43ddfa4..c04df26323 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -115,9 +115,11 @@ unmap_exit: + /* + * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86 + */ +-int vfio_dma_unmap(VFIOContainer *container, hwaddr iova, +- ram_addr_t size, IOMMUTLBEntry *iotlb) ++static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, ++ ram_addr_t size, IOMMUTLBEntry *iotlb) + { ++ VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); + struct vfio_iommu_type1_dma_unmap unmap = { + .argsz = sizeof(unmap), + .flags = 0, +@@ -151,7 +153,7 @@ int vfio_dma_unmap(VFIOContainer *container, hwaddr iova, + */ + if (errno == EINVAL && unmap.size && !(unmap.iova + unmap.size) && + container->iommu_type == VFIO_TYPE1v2_IOMMU) { +- trace_vfio_dma_unmap_overflow_workaround(); ++ trace_vfio_legacy_dma_unmap_overflow_workaround(); + unmap.size -= 1ULL << ctz64(container->pgsizes); + continue; + } +@@ -170,9 +172,11 @@ int vfio_dma_unmap(VFIOContainer *container, hwaddr iova, + return 0; + } + +-int vfio_dma_map(VFIOContainer *container, hwaddr iova, +- ram_addr_t size, void *vaddr, bool readonly) ++static int vfio_legacy_dma_map(VFIOContainerBase *bcontainer, hwaddr iova, ++ ram_addr_t size, void *vaddr, bool readonly) + { ++ VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); + struct vfio_iommu_type1_dma_map map = { + .argsz = sizeof(map), + .flags = VFIO_DMA_MAP_FLAG_READ, +@@ -191,7 +195,8 @@ int vfio_dma_map(VFIOContainer *container, hwaddr iova, + * the VGA ROM space. + */ + if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 || +- (errno == EBUSY && vfio_dma_unmap(container, iova, size, NULL) == 0 && ++ (errno == EBUSY && ++ vfio_legacy_dma_unmap(bcontainer, iova, size, NULL) == 0 && + ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) { + return 0; + } +@@ -937,4 +942,7 @@ void vfio_detach_device(VFIODevice *vbasedev) + vfio_put_group(group); + } + +-const VFIOIOMMUOps vfio_legacy_ops; ++const VFIOIOMMUOps vfio_legacy_ops = { ++ .dma_map = vfio_legacy_dma_map, ++ .dma_unmap = vfio_legacy_dma_unmap, ++}; +diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build +index 2a6912c940..eb6ce6229d 100644 +--- a/hw/vfio/meson.build ++++ b/hw/vfio/meson.build +@@ -2,6 +2,7 @@ vfio_ss = ss.source_set() + vfio_ss.add(files( + 'helpers.c', + 'common.c', ++ 'container-base.c', + 'container.c', + 'spapr.c', + 'migration.c', +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 0eb2387cf2..9f7fedee98 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -116,7 +116,7 @@ vfio_region_unmap(const char *name, unsigned long offset, unsigned long end) "Re + vfio_region_sparse_mmap_header(const char *name, int index, int nr_areas) "Device %s region %d: %d sparse mmap entries" + vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned long end) "sparse entry %d [0x%lx - 0x%lx]" + vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%08x" +-vfio_dma_unmap_overflow_workaround(void) "" ++vfio_legacy_dma_unmap_overflow_workaround(void) "" + vfio_get_dirty_bitmap(int fd, uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start, uint64_t dirty_pages) "container fd=%d, iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64" dirty_pages=%"PRIu64 + vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64 + +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 678161f207..24a26345e5 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -208,10 +208,6 @@ void vfio_put_address_space(VFIOAddressSpace *space); + bool vfio_devices_all_running_and_saving(VFIOContainer *container); + + /* container->fd */ +-int vfio_dma_unmap(VFIOContainer *container, hwaddr iova, +- ram_addr_t size, IOMMUTLBEntry *iotlb); +-int vfio_dma_map(VFIOContainer *container, hwaddr iova, +- ram_addr_t size, void *vaddr, bool readonly); + int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start); + int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap, + hwaddr iova, hwaddr size); +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 1d6daaea5d..56b033f59f 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -31,6 +31,13 @@ typedef struct VFIOContainerBase { + const VFIOIOMMUOps *ops; + } VFIOContainerBase; + ++int vfio_container_dma_map(VFIOContainerBase *bcontainer, ++ hwaddr iova, ram_addr_t size, ++ void *vaddr, bool readonly); ++int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, ++ hwaddr iova, ram_addr_t size, ++ IOMMUTLBEntry *iotlb); ++ + struct VFIOIOMMUOps { + /* basic feature */ + int (*dma_map)(VFIOContainerBase *bcontainer, +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-iommufd-Add-support-for-iova_ranges-and-pgsizes.patch b/SOURCES/kvm-vfio-iommufd-Add-support-for-iova_ranges-and-pgsizes.patch new file mode 100644 index 0000000..52e3d87 --- /dev/null +++ b/SOURCES/kvm-vfio-iommufd-Add-support-for-iova_ranges-and-pgsizes.patch @@ -0,0 +1,115 @@ +From 49435d4d592bc890f56b69c2290f890c87b5a103 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 21 Nov 2023 16:44:05 +0800 +Subject: [PATCH 026/101] vfio/iommufd: Add support for iova_ranges and pgsizes +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [25/67] 578af0547d97276ccd4936b574c12118fc70d468 (eauger1/centos-qemu-kvm) + +Some vIOMMU such as virtio-iommu use IOVA ranges from host side to +setup reserved ranges for passthrough device, so that guest will not +use an IOVA range beyond host support. + +Use an uAPI of IOMMUFD to get IOVA ranges of host side and pass to +vIOMMU just like the legacy backend, if this fails, fallback to +64bit IOVA range. + +Also use out_iova_alignment returned from uAPI as pgsizes instead of +qemu_real_host_page_size() as a fallback. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Reviewed-by: Eric Auger +Tested-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit 714e9affa8ae1d84007c8afde7bb10fef9cb883d) +Signed-off-by: Eric Auger +--- + hw/vfio/iommufd.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 55 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c +index 6d31aeac7b..01b448e840 100644 +--- a/hw/vfio/iommufd.c ++++ b/hw/vfio/iommufd.c +@@ -261,6 +261,53 @@ static int iommufd_cdev_ram_block_discard_disable(bool state) + return ram_block_uncoordinated_discard_disable(state); + } + ++static int iommufd_cdev_get_info_iova_range(VFIOIOMMUFDContainer *container, ++ uint32_t ioas_id, Error **errp) ++{ ++ VFIOContainerBase *bcontainer = &container->bcontainer; ++ struct iommu_ioas_iova_ranges *info; ++ struct iommu_iova_range *iova_ranges; ++ int ret, sz, fd = container->be->fd; ++ ++ info = g_malloc0(sizeof(*info)); ++ info->size = sizeof(*info); ++ info->ioas_id = ioas_id; ++ ++ ret = ioctl(fd, IOMMU_IOAS_IOVA_RANGES, info); ++ if (ret && errno != EMSGSIZE) { ++ goto error; ++ } ++ ++ sz = info->num_iovas * sizeof(struct iommu_iova_range); ++ info = g_realloc(info, sizeof(*info) + sz); ++ info->allowed_iovas = (uintptr_t)(info + 1); ++ ++ ret = ioctl(fd, IOMMU_IOAS_IOVA_RANGES, info); ++ if (ret) { ++ goto error; ++ } ++ ++ iova_ranges = (struct iommu_iova_range *)(uintptr_t)info->allowed_iovas; ++ ++ for (int i = 0; i < info->num_iovas; i++) { ++ Range *range = g_new(Range, 1); ++ ++ range_set_bounds(range, iova_ranges[i].start, iova_ranges[i].last); ++ bcontainer->iova_ranges = ++ range_list_insert(bcontainer->iova_ranges, range); ++ } ++ bcontainer->pgsizes = info->out_iova_alignment; ++ ++ g_free(info); ++ return 0; ++ ++error: ++ ret = -errno; ++ g_free(info); ++ error_setg_errno(errp, errno, "Cannot get IOVA ranges"); ++ return ret; ++} ++ + static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, + AddressSpace *as, Error **errp) + { +@@ -335,7 +382,14 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, + goto err_discard_disable; + } + +- bcontainer->pgsizes = qemu_real_host_page_size(); ++ ret = iommufd_cdev_get_info_iova_range(container, ioas_id, &err); ++ if (ret) { ++ error_append_hint(&err, ++ "Fallback to default 64bit IOVA range and 4K page size\n"); ++ warn_report_err(err); ++ err = NULL; ++ bcontainer->pgsizes = qemu_real_host_page_size(); ++ } + + bcontainer->listener = vfio_memory_listener; + memory_listener_register(&bcontainer->listener, bcontainer->space->as); +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-iommufd-Enable-pci-hot-reset-through-iommufd-cd.patch b/SOURCES/kvm-vfio-iommufd-Enable-pci-hot-reset-through-iommufd-cd.patch new file mode 100644 index 0000000..48db196 --- /dev/null +++ b/SOURCES/kvm-vfio-iommufd-Enable-pci-hot-reset-through-iommufd-cd.patch @@ -0,0 +1,215 @@ +From e94700896dd8fcea149d9719eccde6f485440be2 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 21 Nov 2023 16:44:08 +0800 +Subject: [PATCH 029/101] vfio/iommufd: Enable pci hot reset through iommufd + cdev interface +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [28/67] ca1ae970138ee4a6f4b3b49817e775f3159f4c97 (eauger1/centos-qemu-kvm) + +Implement the newly introduced pci_hot_reset callback named +iommufd_cdev_pci_hot_reset to do iommufd specific check and +reset operation. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Eric Auger +Tested-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit 96d6f85ff012abd7aaa35b1a2bc48b8640c898d9) +Signed-off-by: Eric Auger +--- + hw/vfio/iommufd.c | 150 +++++++++++++++++++++++++++++++++++++++++++ + hw/vfio/trace-events | 1 + + 2 files changed, 151 insertions(+) + +diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c +index 01b448e840..6e53e013ef 100644 +--- a/hw/vfio/iommufd.c ++++ b/hw/vfio/iommufd.c +@@ -24,6 +24,7 @@ + #include "sysemu/reset.h" + #include "qemu/cutils.h" + #include "qemu/chardev_open.h" ++#include "pci.h" + + static int iommufd_cdev_map(VFIOContainerBase *bcontainer, hwaddr iova, + ram_addr_t size, void *vaddr, bool readonly) +@@ -468,9 +469,158 @@ static void iommufd_cdev_detach(VFIODevice *vbasedev) + close(vbasedev->fd); + } + ++static VFIODevice *iommufd_cdev_pci_find_by_devid(__u32 devid) ++{ ++ VFIODevice *vbasedev_iter; ++ ++ QLIST_FOREACH(vbasedev_iter, &vfio_device_list, global_next) { ++ if (vbasedev_iter->bcontainer->ops != &vfio_iommufd_ops) { ++ continue; ++ } ++ if (devid == vbasedev_iter->devid) { ++ return vbasedev_iter; ++ } ++ } ++ return NULL; ++} ++ ++static VFIOPCIDevice * ++iommufd_cdev_dep_get_realized_vpdev(struct vfio_pci_dependent_device *dep_dev, ++ VFIODevice *reset_dev) ++{ ++ VFIODevice *vbasedev_tmp; ++ ++ if (dep_dev->devid == reset_dev->devid || ++ dep_dev->devid == VFIO_PCI_DEVID_OWNED) { ++ return NULL; ++ } ++ ++ vbasedev_tmp = iommufd_cdev_pci_find_by_devid(dep_dev->devid); ++ if (!vbasedev_tmp || !vbasedev_tmp->dev->realized || ++ vbasedev_tmp->type != VFIO_DEVICE_TYPE_PCI) { ++ return NULL; ++ } ++ ++ return container_of(vbasedev_tmp, VFIOPCIDevice, vbasedev); ++} ++ ++static int iommufd_cdev_pci_hot_reset(VFIODevice *vbasedev, bool single) ++{ ++ VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); ++ struct vfio_pci_hot_reset_info *info = NULL; ++ struct vfio_pci_dependent_device *devices; ++ struct vfio_pci_hot_reset *reset; ++ int ret, i; ++ bool multi = false; ++ ++ trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi"); ++ ++ if (!single) { ++ vfio_pci_pre_reset(vdev); ++ } ++ vdev->vbasedev.needs_reset = false; ++ ++ ret = vfio_pci_get_pci_hot_reset_info(vdev, &info); ++ ++ if (ret) { ++ goto out_single; ++ } ++ ++ assert(info->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID); ++ ++ devices = &info->devices[0]; ++ ++ if (!(info->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED)) { ++ if (!vdev->has_pm_reset) { ++ for (i = 0; i < info->count; i++) { ++ if (devices[i].devid == VFIO_PCI_DEVID_NOT_OWNED) { ++ error_report("vfio: Cannot reset device %s, " ++ "depends on device %04x:%02x:%02x.%x " ++ "which is not owned.", ++ vdev->vbasedev.name, devices[i].segment, ++ devices[i].bus, PCI_SLOT(devices[i].devfn), ++ PCI_FUNC(devices[i].devfn)); ++ } ++ } ++ } ++ ret = -EPERM; ++ goto out_single; ++ } ++ ++ trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name); ++ ++ for (i = 0; i < info->count; i++) { ++ VFIOPCIDevice *tmp; ++ ++ trace_iommufd_cdev_pci_hot_reset_dep_devices(devices[i].segment, ++ devices[i].bus, ++ PCI_SLOT(devices[i].devfn), ++ PCI_FUNC(devices[i].devfn), ++ devices[i].devid); ++ ++ /* ++ * If a VFIO cdev device is resettable, all the dependent devices ++ * are either bound to same iommufd or within same iommu_groups as ++ * one of the iommufd bound devices. ++ */ ++ assert(devices[i].devid != VFIO_PCI_DEVID_NOT_OWNED); ++ ++ tmp = iommufd_cdev_dep_get_realized_vpdev(&devices[i], &vdev->vbasedev); ++ if (!tmp) { ++ continue; ++ } ++ ++ if (single) { ++ ret = -EINVAL; ++ goto out_single; ++ } ++ vfio_pci_pre_reset(tmp); ++ tmp->vbasedev.needs_reset = false; ++ multi = true; ++ } ++ ++ if (!single && !multi) { ++ ret = -EINVAL; ++ goto out_single; ++ } ++ ++ /* Use zero length array for hot reset with iommufd backend */ ++ reset = g_malloc0(sizeof(*reset)); ++ reset->argsz = sizeof(*reset); ++ ++ /* Bus reset! */ ++ ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset); ++ g_free(reset); ++ if (ret) { ++ ret = -errno; ++ } ++ ++ trace_vfio_pci_hot_reset_result(vdev->vbasedev.name, ++ ret ? strerror(errno) : "Success"); ++ ++ /* Re-enable INTx on affected devices */ ++ for (i = 0; i < info->count; i++) { ++ VFIOPCIDevice *tmp; ++ ++ tmp = iommufd_cdev_dep_get_realized_vpdev(&devices[i], &vdev->vbasedev); ++ if (!tmp) { ++ continue; ++ } ++ vfio_pci_post_reset(tmp); ++ } ++out_single: ++ if (!single) { ++ vfio_pci_post_reset(vdev); ++ } ++ g_free(info); ++ ++ return ret; ++} ++ + const VFIOIOMMUOps vfio_iommufd_ops = { + .dma_map = iommufd_cdev_map, + .dma_unmap = iommufd_cdev_unmap, + .attach_device = iommufd_cdev_attach, + .detach_device = iommufd_cdev_detach, ++ .pci_hot_reset = iommufd_cdev_pci_hot_reset, + }; +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 3340c93af0..8fdde54456 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -174,3 +174,4 @@ iommufd_cdev_detach_ioas_hwpt(int iommufd, const char *name) " [iommufd=%d] Succ + iommufd_cdev_fail_attach_existing_container(const char *msg) " %s" + iommufd_cdev_alloc_ioas(int iommufd, int ioas_id) " [iommufd=%d] new IOMMUFD container with ioasid=%d" + iommufd_cdev_device_info(char *name, int devfd, int num_irqs, int num_regions, int flags) " %s (%d) num_irqs=%d num_regions=%d flags=%d" ++iommufd_cdev_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int dev_id) "\t%04x:%02x:%02x.%x devid %d" +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-iommufd-Implement-the-iommufd-backend.patch b/SOURCES/kvm-vfio-iommufd-Implement-the-iommufd-backend.patch new file mode 100644 index 0000000..f00cbcd --- /dev/null +++ b/SOURCES/kvm-vfio-iommufd-Implement-the-iommufd-backend.patch @@ -0,0 +1,561 @@ +From f018d0b686406256c2b5e823e4227316ee1394e9 Mon Sep 17 00:00:00 2001 +From: Yi Liu +Date: Tue, 21 Nov 2023 16:44:03 +0800 +Subject: [PATCH 024/101] vfio/iommufd: Implement the iommufd backend +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [23/67] d11046654117a690542a1e2b48b9d1994f778b2d (eauger1/centos-qemu-kvm) + +The iommufd backend is implemented based on the new /dev/iommu user API. +This backend obviously depends on CONFIG_IOMMUFD. + +So far, the iommufd backend doesn't support dirty page sync yet. + +Co-authored-by: Eric Auger +Signed-off-by: Yi Liu +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Tested-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit 5ee3dc7af7859e7b8aa34c10c21778101c15e812) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 6 + + hw/vfio/iommufd.c | 422 ++++++++++++++++++++++++++++++++++ + hw/vfio/meson.build | 3 + + hw/vfio/trace-events | 10 + + include/hw/vfio/vfio-common.h | 11 + + 5 files changed, 452 insertions(+) + create mode 100644 hw/vfio/iommufd.c + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 934f4f5446..6569732b7a 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -19,6 +19,7 @@ + */ + + #include "qemu/osdep.h" ++#include CONFIG_DEVICES /* CONFIG_IOMMUFD */ + #include + #ifdef CONFIG_KVM + #include +@@ -1503,6 +1504,11 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev, + { + const VFIOIOMMUOps *ops = &vfio_legacy_ops; + ++#ifdef CONFIG_IOMMUFD ++ if (vbasedev->iommufd) { ++ ops = &vfio_iommufd_ops; ++ } ++#endif + return ops->attach_device(name, vbasedev, as, errp); + } + +diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c +new file mode 100644 +index 0000000000..6d31aeac7b +--- /dev/null ++++ b/hw/vfio/iommufd.c +@@ -0,0 +1,422 @@ ++/* ++ * iommufd container backend ++ * ++ * Copyright (C) 2023 Intel Corporation. ++ * Copyright Red Hat, Inc. 2023 ++ * ++ * Authors: Yi Liu ++ * Eric Auger ++ * ++ * SPDX-License-Identifier: GPL-2.0-or-later ++ */ ++ ++#include "qemu/osdep.h" ++#include ++#include ++#include ++ ++#include "hw/vfio/vfio-common.h" ++#include "qemu/error-report.h" ++#include "trace.h" ++#include "qapi/error.h" ++#include "sysemu/iommufd.h" ++#include "hw/qdev-core.h" ++#include "sysemu/reset.h" ++#include "qemu/cutils.h" ++#include "qemu/chardev_open.h" ++ ++static int iommufd_cdev_map(VFIOContainerBase *bcontainer, hwaddr iova, ++ ram_addr_t size, void *vaddr, bool readonly) ++{ ++ VFIOIOMMUFDContainer *container = ++ container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); ++ ++ return iommufd_backend_map_dma(container->be, ++ container->ioas_id, ++ iova, size, vaddr, readonly); ++} ++ ++static int iommufd_cdev_unmap(VFIOContainerBase *bcontainer, ++ hwaddr iova, ram_addr_t size, ++ IOMMUTLBEntry *iotlb) ++{ ++ VFIOIOMMUFDContainer *container = ++ container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); ++ ++ /* TODO: Handle dma_unmap_bitmap with iotlb args (migration) */ ++ return iommufd_backend_unmap_dma(container->be, ++ container->ioas_id, iova, size); ++} ++ ++static int iommufd_cdev_kvm_device_add(VFIODevice *vbasedev, Error **errp) ++{ ++ return vfio_kvm_device_add_fd(vbasedev->fd, errp); ++} ++ ++static void iommufd_cdev_kvm_device_del(VFIODevice *vbasedev) ++{ ++ Error *err = NULL; ++ ++ if (vfio_kvm_device_del_fd(vbasedev->fd, &err)) { ++ error_report_err(err); ++ } ++} ++ ++static int iommufd_cdev_connect_and_bind(VFIODevice *vbasedev, Error **errp) ++{ ++ IOMMUFDBackend *iommufd = vbasedev->iommufd; ++ struct vfio_device_bind_iommufd bind = { ++ .argsz = sizeof(bind), ++ .flags = 0, ++ }; ++ int ret; ++ ++ ret = iommufd_backend_connect(iommufd, errp); ++ if (ret) { ++ return ret; ++ } ++ ++ /* ++ * Add device to kvm-vfio to be prepared for the tracking ++ * in KVM. Especially for some emulated devices, it requires ++ * to have kvm information in the device open. ++ */ ++ ret = iommufd_cdev_kvm_device_add(vbasedev, errp); ++ if (ret) { ++ goto err_kvm_device_add; ++ } ++ ++ /* Bind device to iommufd */ ++ bind.iommufd = iommufd->fd; ++ ret = ioctl(vbasedev->fd, VFIO_DEVICE_BIND_IOMMUFD, &bind); ++ if (ret) { ++ error_setg_errno(errp, errno, "error bind device fd=%d to iommufd=%d", ++ vbasedev->fd, bind.iommufd); ++ goto err_bind; ++ } ++ ++ vbasedev->devid = bind.out_devid; ++ trace_iommufd_cdev_connect_and_bind(bind.iommufd, vbasedev->name, ++ vbasedev->fd, vbasedev->devid); ++ return ret; ++err_bind: ++ iommufd_cdev_kvm_device_del(vbasedev); ++err_kvm_device_add: ++ iommufd_backend_disconnect(iommufd); ++ return ret; ++} ++ ++static void iommufd_cdev_unbind_and_disconnect(VFIODevice *vbasedev) ++{ ++ /* Unbind is automatically conducted when device fd is closed */ ++ iommufd_cdev_kvm_device_del(vbasedev); ++ iommufd_backend_disconnect(vbasedev->iommufd); ++} ++ ++static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp) ++{ ++ long int ret = -ENOTTY; ++ char *path, *vfio_dev_path = NULL, *vfio_path = NULL; ++ DIR *dir = NULL; ++ struct dirent *dent; ++ gchar *contents; ++ struct stat st; ++ gsize length; ++ int major, minor; ++ dev_t vfio_devt; ++ ++ path = g_strdup_printf("%s/vfio-dev", sysfs_path); ++ if (stat(path, &st) < 0) { ++ error_setg_errno(errp, errno, "no such host device"); ++ goto out_free_path; ++ } ++ ++ dir = opendir(path); ++ if (!dir) { ++ error_setg_errno(errp, errno, "couldn't open directory %s", path); ++ goto out_free_path; ++ } ++ ++ while ((dent = readdir(dir))) { ++ if (!strncmp(dent->d_name, "vfio", 4)) { ++ vfio_dev_path = g_strdup_printf("%s/%s/dev", path, dent->d_name); ++ break; ++ } ++ } ++ ++ if (!vfio_dev_path) { ++ error_setg(errp, "failed to find vfio-dev/vfioX/dev"); ++ goto out_close_dir; ++ } ++ ++ if (!g_file_get_contents(vfio_dev_path, &contents, &length, NULL)) { ++ error_setg(errp, "failed to load \"%s\"", vfio_dev_path); ++ goto out_free_dev_path; ++ } ++ ++ if (sscanf(contents, "%d:%d", &major, &minor) != 2) { ++ error_setg(errp, "failed to get major:minor for \"%s\"", vfio_dev_path); ++ goto out_free_dev_path; ++ } ++ g_free(contents); ++ vfio_devt = makedev(major, minor); ++ ++ vfio_path = g_strdup_printf("/dev/vfio/devices/%s", dent->d_name); ++ ret = open_cdev(vfio_path, vfio_devt); ++ if (ret < 0) { ++ error_setg(errp, "Failed to open %s", vfio_path); ++ } ++ ++ trace_iommufd_cdev_getfd(vfio_path, ret); ++ g_free(vfio_path); ++ ++out_free_dev_path: ++ g_free(vfio_dev_path); ++out_close_dir: ++ closedir(dir); ++out_free_path: ++ if (*errp) { ++ error_prepend(errp, VFIO_MSG_PREFIX, path); ++ } ++ g_free(path); ++ ++ return ret; ++} ++ ++static int iommufd_cdev_attach_ioas_hwpt(VFIODevice *vbasedev, uint32_t id, ++ Error **errp) ++{ ++ int ret, iommufd = vbasedev->iommufd->fd; ++ struct vfio_device_attach_iommufd_pt attach_data = { ++ .argsz = sizeof(attach_data), ++ .flags = 0, ++ .pt_id = id, ++ }; ++ ++ /* Attach device to an IOAS or hwpt within iommufd */ ++ ret = ioctl(vbasedev->fd, VFIO_DEVICE_ATTACH_IOMMUFD_PT, &attach_data); ++ if (ret) { ++ error_setg_errno(errp, errno, ++ "[iommufd=%d] error attach %s (%d) to id=%d", ++ iommufd, vbasedev->name, vbasedev->fd, id); ++ } else { ++ trace_iommufd_cdev_attach_ioas_hwpt(iommufd, vbasedev->name, ++ vbasedev->fd, id); ++ } ++ return ret; ++} ++ ++static int iommufd_cdev_detach_ioas_hwpt(VFIODevice *vbasedev, Error **errp) ++{ ++ int ret, iommufd = vbasedev->iommufd->fd; ++ struct vfio_device_detach_iommufd_pt detach_data = { ++ .argsz = sizeof(detach_data), ++ .flags = 0, ++ }; ++ ++ ret = ioctl(vbasedev->fd, VFIO_DEVICE_DETACH_IOMMUFD_PT, &detach_data); ++ if (ret) { ++ error_setg_errno(errp, errno, "detach %s failed", vbasedev->name); ++ } else { ++ trace_iommufd_cdev_detach_ioas_hwpt(iommufd, vbasedev->name); ++ } ++ return ret; ++} ++ ++static int iommufd_cdev_attach_container(VFIODevice *vbasedev, ++ VFIOIOMMUFDContainer *container, ++ Error **errp) ++{ ++ return iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp); ++} ++ ++static void iommufd_cdev_detach_container(VFIODevice *vbasedev, ++ VFIOIOMMUFDContainer *container) ++{ ++ Error *err = NULL; ++ ++ if (iommufd_cdev_detach_ioas_hwpt(vbasedev, &err)) { ++ error_report_err(err); ++ } ++} ++ ++static void iommufd_cdev_container_destroy(VFIOIOMMUFDContainer *container) ++{ ++ VFIOContainerBase *bcontainer = &container->bcontainer; ++ ++ if (!QLIST_EMPTY(&bcontainer->device_list)) { ++ return; ++ } ++ memory_listener_unregister(&bcontainer->listener); ++ vfio_container_destroy(bcontainer); ++ iommufd_backend_free_id(container->be, container->ioas_id); ++ g_free(container); ++} ++ ++static int iommufd_cdev_ram_block_discard_disable(bool state) ++{ ++ /* ++ * We support coordinated discarding of RAM via the RamDiscardManager. ++ */ ++ return ram_block_uncoordinated_discard_disable(state); ++} ++ ++static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, ++ AddressSpace *as, Error **errp) ++{ ++ VFIOContainerBase *bcontainer; ++ VFIOIOMMUFDContainer *container; ++ VFIOAddressSpace *space; ++ struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) }; ++ int ret, devfd; ++ uint32_t ioas_id; ++ Error *err = NULL; ++ ++ devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); ++ if (devfd < 0) { ++ return devfd; ++ } ++ vbasedev->fd = devfd; ++ ++ ret = iommufd_cdev_connect_and_bind(vbasedev, errp); ++ if (ret) { ++ goto err_connect_bind; ++ } ++ ++ space = vfio_get_address_space(as); ++ ++ /* try to attach to an existing container in this space */ ++ QLIST_FOREACH(bcontainer, &space->containers, next) { ++ container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); ++ if (bcontainer->ops != &vfio_iommufd_ops || ++ vbasedev->iommufd != container->be) { ++ continue; ++ } ++ if (iommufd_cdev_attach_container(vbasedev, container, &err)) { ++ const char *msg = error_get_pretty(err); ++ ++ trace_iommufd_cdev_fail_attach_existing_container(msg); ++ error_free(err); ++ err = NULL; ++ } else { ++ ret = iommufd_cdev_ram_block_discard_disable(true); ++ if (ret) { ++ error_setg(errp, ++ "Cannot set discarding of RAM broken (%d)", ret); ++ goto err_discard_disable; ++ } ++ goto found_container; ++ } ++ } ++ ++ /* Need to allocate a new dedicated container */ ++ ret = iommufd_backend_alloc_ioas(vbasedev->iommufd, &ioas_id, errp); ++ if (ret < 0) { ++ goto err_alloc_ioas; ++ } ++ ++ trace_iommufd_cdev_alloc_ioas(vbasedev->iommufd->fd, ioas_id); ++ ++ container = g_malloc0(sizeof(*container)); ++ container->be = vbasedev->iommufd; ++ container->ioas_id = ioas_id; ++ ++ bcontainer = &container->bcontainer; ++ vfio_container_init(bcontainer, space, &vfio_iommufd_ops); ++ QLIST_INSERT_HEAD(&space->containers, bcontainer, next); ++ ++ ret = iommufd_cdev_attach_container(vbasedev, container, errp); ++ if (ret) { ++ goto err_attach_container; ++ } ++ ++ ret = iommufd_cdev_ram_block_discard_disable(true); ++ if (ret) { ++ goto err_discard_disable; ++ } ++ ++ bcontainer->pgsizes = qemu_real_host_page_size(); ++ ++ bcontainer->listener = vfio_memory_listener; ++ memory_listener_register(&bcontainer->listener, bcontainer->space->as); ++ ++ if (bcontainer->error) { ++ ret = -1; ++ error_propagate_prepend(errp, bcontainer->error, ++ "memory listener initialization failed: "); ++ goto err_listener_register; ++ } ++ ++ bcontainer->initialized = true; ++ ++found_container: ++ ret = ioctl(devfd, VFIO_DEVICE_GET_INFO, &dev_info); ++ if (ret) { ++ error_setg_errno(errp, errno, "error getting device info"); ++ goto err_listener_register; ++ } ++ ++ /* ++ * TODO: examine RAM_BLOCK_DISCARD stuff, should we do group level ++ * for discarding incompatibility check as well? ++ */ ++ if (vbasedev->ram_block_discard_allowed) { ++ iommufd_cdev_ram_block_discard_disable(false); ++ } ++ ++ vbasedev->group = 0; ++ vbasedev->num_irqs = dev_info.num_irqs; ++ vbasedev->num_regions = dev_info.num_regions; ++ vbasedev->flags = dev_info.flags; ++ vbasedev->reset_works = !!(dev_info.flags & VFIO_DEVICE_FLAGS_RESET); ++ vbasedev->bcontainer = bcontainer; ++ QLIST_INSERT_HEAD(&bcontainer->device_list, vbasedev, container_next); ++ QLIST_INSERT_HEAD(&vfio_device_list, vbasedev, global_next); ++ ++ trace_iommufd_cdev_device_info(vbasedev->name, devfd, vbasedev->num_irqs, ++ vbasedev->num_regions, vbasedev->flags); ++ return 0; ++ ++err_listener_register: ++ iommufd_cdev_ram_block_discard_disable(false); ++err_discard_disable: ++ iommufd_cdev_detach_container(vbasedev, container); ++err_attach_container: ++ iommufd_cdev_container_destroy(container); ++err_alloc_ioas: ++ vfio_put_address_space(space); ++ iommufd_cdev_unbind_and_disconnect(vbasedev); ++err_connect_bind: ++ close(vbasedev->fd); ++ return ret; ++} ++ ++static void iommufd_cdev_detach(VFIODevice *vbasedev) ++{ ++ VFIOContainerBase *bcontainer = vbasedev->bcontainer; ++ VFIOAddressSpace *space = bcontainer->space; ++ VFIOIOMMUFDContainer *container = container_of(bcontainer, ++ VFIOIOMMUFDContainer, ++ bcontainer); ++ QLIST_REMOVE(vbasedev, global_next); ++ QLIST_REMOVE(vbasedev, container_next); ++ vbasedev->bcontainer = NULL; ++ ++ if (!vbasedev->ram_block_discard_allowed) { ++ iommufd_cdev_ram_block_discard_disable(false); ++ } ++ ++ iommufd_cdev_detach_container(vbasedev, container); ++ iommufd_cdev_container_destroy(container); ++ vfio_put_address_space(space); ++ ++ iommufd_cdev_unbind_and_disconnect(vbasedev); ++ close(vbasedev->fd); ++} ++ ++const VFIOIOMMUOps vfio_iommufd_ops = { ++ .dma_map = iommufd_cdev_map, ++ .dma_unmap = iommufd_cdev_unmap, ++ .attach_device = iommufd_cdev_attach, ++ .detach_device = iommufd_cdev_detach, ++}; +diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build +index eb6ce6229d..e5d98b6adc 100644 +--- a/hw/vfio/meson.build ++++ b/hw/vfio/meson.build +@@ -7,6 +7,9 @@ vfio_ss.add(files( + 'spapr.c', + 'migration.c', + )) ++vfio_ss.add(when: 'CONFIG_IOMMUFD', if_true: files( ++ 'iommufd.c', ++)) + vfio_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files( + 'display.c', + 'pci-quirks.c', +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 08a1f9dfa4..3340c93af0 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -164,3 +164,13 @@ vfio_state_pending_estimate(const char *name, uint64_t precopy, uint64_t postcop + vfio_state_pending_exact(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t stopcopy_size, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" stopcopy size 0x%"PRIx64" precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64 + vfio_vmstate_change(const char *name, int running, const char *reason, const char *dev_state) " (%s) running %d reason %s device state %s" + vfio_vmstate_change_prepare(const char *name, int running, const char *reason, const char *dev_state) " (%s) running %d reason %s device state %s" ++ ++#iommufd.c ++ ++iommufd_cdev_connect_and_bind(int iommufd, const char *name, int devfd, int devid) " [iommufd=%d] Successfully bound device %s (fd=%d): output devid=%d" ++iommufd_cdev_getfd(const char *dev, int devfd) " %s (fd=%d)" ++iommufd_cdev_attach_ioas_hwpt(int iommufd, const char *name, int devfd, int id) " [iommufd=%d] Successfully attached device %s (%d) to id=%d" ++iommufd_cdev_detach_ioas_hwpt(int iommufd, const char *name) " [iommufd=%d] Successfully detached %s" ++iommufd_cdev_fail_attach_existing_container(const char *msg) " %s" ++iommufd_cdev_alloc_ioas(int iommufd, int ioas_id) " [iommufd=%d] new IOMMUFD container with ioasid=%d" ++iommufd_cdev_device_info(char *name, int devfd, int num_irqs, int num_regions, int flags) " %s (%d) num_irqs=%d num_regions=%d flags=%d" +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 24ecc0e7ee..3dac5c167e 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -89,6 +89,14 @@ typedef struct VFIOHostDMAWindow { + QLIST_ENTRY(VFIOHostDMAWindow) hostwin_next; + } VFIOHostDMAWindow; + ++typedef struct IOMMUFDBackend IOMMUFDBackend; ++ ++typedef struct VFIOIOMMUFDContainer { ++ VFIOContainerBase bcontainer; ++ IOMMUFDBackend *be; ++ uint32_t ioas_id; ++} VFIOIOMMUFDContainer; ++ + typedef struct VFIODeviceOps VFIODeviceOps; + + typedef struct VFIODevice { +@@ -116,6 +124,8 @@ typedef struct VFIODevice { + OnOffAuto pre_copy_dirty_page_tracking; + bool dirty_pages_supported; + bool dirty_tracking; ++ int devid; ++ IOMMUFDBackend *iommufd; + } VFIODevice; + + struct VFIODeviceOps { +@@ -201,6 +211,7 @@ typedef QLIST_HEAD(VFIODeviceList, VFIODevice) VFIODeviceList; + extern VFIOGroupList vfio_group_list; + extern VFIODeviceList vfio_device_list; + extern const VFIOIOMMUOps vfio_legacy_ops; ++extern const VFIOIOMMUOps vfio_iommufd_ops; + extern const MemoryListener vfio_memory_listener; + extern int vfio_kvm_device_fd; + +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-iommufd-Introduce-a-VFIOIOMMU-iommufd-QOM-inter.patch b/SOURCES/kvm-vfio-iommufd-Introduce-a-VFIOIOMMU-iommufd-QOM-inter.patch new file mode 100644 index 0000000..866a437 --- /dev/null +++ b/SOURCES/kvm-vfio-iommufd-Introduce-a-VFIOIOMMU-iommufd-QOM-inter.patch @@ -0,0 +1,155 @@ +From f98defd6fe081bc44f5bd823d187d7d3b12832ac Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 19 Dec 2023 07:58:23 +0100 +Subject: [PATCH 056/101] vfio/iommufd: Introduce a VFIOIOMMU iommufd QOM + interface +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [55/67] 789ecf74ace326b0df5d494fd558d7d0b6294a85 (eauger1/centos-qemu-kvm) + +As previously done for the sPAPR and legacy IOMMU backends, convert +the VFIOIOMMUOps struct to a QOM interface. The set of of operations +for this backend can be referenced with a literal typename instead of +a C struct. + +Reviewed-by: Zhenzhong Duan +Tested-by: Eric Farman +Signed-off-by: Cédric Le Goater +(cherry picked from commit ce5f6d49f5845c3b9955cc377a5223c3f8d7ba1e) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 2 +- + hw/vfio/iommufd.c | 35 ++++++++++++++++++++------- + include/hw/vfio/vfio-common.h | 1 - + include/hw/vfio/vfio-container-base.h | 2 +- + 4 files changed, 28 insertions(+), 12 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 2329d0efc8..89ff1c7aed 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1508,7 +1508,7 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev, + + #ifdef CONFIG_IOMMUFD + if (vbasedev->iommufd) { +- ops = &vfio_iommufd_ops; ++ ops = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD)); + } + #endif + +diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c +index 87a561c545..d4c586e842 100644 +--- a/hw/vfio/iommufd.c ++++ b/hw/vfio/iommufd.c +@@ -319,6 +319,8 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, + int ret, devfd; + uint32_t ioas_id; + Error *err = NULL; ++ const VFIOIOMMUClass *iommufd_vioc = ++ VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD)); + + if (vbasedev->fd < 0) { + devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); +@@ -340,7 +342,7 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, + /* try to attach to an existing container in this space */ + QLIST_FOREACH(bcontainer, &space->containers, next) { + container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); +- if (bcontainer->ops != &vfio_iommufd_ops || ++ if (bcontainer->ops != iommufd_vioc || + vbasedev->iommufd != container->be) { + continue; + } +@@ -374,7 +376,7 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, + container->ioas_id = ioas_id; + + bcontainer = &container->bcontainer; +- vfio_container_init(bcontainer, space, &vfio_iommufd_ops); ++ vfio_container_init(bcontainer, space, iommufd_vioc); + QLIST_INSERT_HEAD(&space->containers, bcontainer, next); + + ret = iommufd_cdev_attach_container(vbasedev, container, errp); +@@ -476,9 +478,11 @@ static void iommufd_cdev_detach(VFIODevice *vbasedev) + static VFIODevice *iommufd_cdev_pci_find_by_devid(__u32 devid) + { + VFIODevice *vbasedev_iter; ++ const VFIOIOMMUClass *iommufd_vioc = ++ VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD)); + + QLIST_FOREACH(vbasedev_iter, &vfio_device_list, global_next) { +- if (vbasedev_iter->bcontainer->ops != &vfio_iommufd_ops) { ++ if (vbasedev_iter->bcontainer->ops != iommufd_vioc) { + continue; + } + if (devid == vbasedev_iter->devid) { +@@ -621,10 +625,23 @@ out_single: + return ret; + } + +-const VFIOIOMMUOps vfio_iommufd_ops = { +- .dma_map = iommufd_cdev_map, +- .dma_unmap = iommufd_cdev_unmap, +- .attach_device = iommufd_cdev_attach, +- .detach_device = iommufd_cdev_detach, +- .pci_hot_reset = iommufd_cdev_pci_hot_reset, ++static void vfio_iommu_iommufd_class_init(ObjectClass *klass, void *data) ++{ ++ VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); ++ ++ vioc->dma_map = iommufd_cdev_map; ++ vioc->dma_unmap = iommufd_cdev_unmap; ++ vioc->attach_device = iommufd_cdev_attach; ++ vioc->detach_device = iommufd_cdev_detach; ++ vioc->pci_hot_reset = iommufd_cdev_pci_hot_reset; + }; ++ ++static const TypeInfo types[] = { ++ { ++ .name = TYPE_VFIO_IOMMU_IOMMUFD, ++ .parent = TYPE_VFIO_IOMMU, ++ .class_init = vfio_iommu_iommufd_class_init, ++ }, ++}; ++ ++DEFINE_TYPES(types) +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 14c497b6b0..9b7ef7d02b 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -210,7 +210,6 @@ typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList; + typedef QLIST_HEAD(VFIODeviceList, VFIODevice) VFIODeviceList; + extern VFIOGroupList vfio_group_list; + extern VFIODeviceList vfio_device_list; +-extern const VFIOIOMMUOps vfio_iommufd_ops; + extern const MemoryListener vfio_memory_listener; + extern int vfio_kvm_device_fd; + +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 9e21d7811f..b2813b0c11 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -17,7 +17,6 @@ + + typedef struct VFIODevice VFIODevice; + typedef struct VFIOIOMMUClass VFIOIOMMUClass; +-#define VFIOIOMMUOps VFIOIOMMUClass /* To remove */ + + typedef struct { + unsigned long *bitmap; +@@ -96,6 +95,7 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer); + #define TYPE_VFIO_IOMMU "vfio-iommu" + #define TYPE_VFIO_IOMMU_LEGACY TYPE_VFIO_IOMMU "-legacy" + #define TYPE_VFIO_IOMMU_SPAPR TYPE_VFIO_IOMMU "-spapr" ++#define TYPE_VFIO_IOMMU_IOMMUFD TYPE_VFIO_IOMMU "-iommufd" + + /* + * VFIOContainerBase is not an abstract QOM object because it felt +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-iommufd-Relax-assert-check-for-iommufd-backend.patch b/SOURCES/kvm-vfio-iommufd-Relax-assert-check-for-iommufd-backend.patch new file mode 100644 index 0000000..f77032b --- /dev/null +++ b/SOURCES/kvm-vfio-iommufd-Relax-assert-check-for-iommufd-backend.patch @@ -0,0 +1,71 @@ +From 5a49c5bb690d55fc88b6fb12f059ae932de0a716 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 21 Nov 2023 16:44:04 +0800 +Subject: [PATCH 025/101] vfio/iommufd: Relax assert check for iommufd backend +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [24/67] 2c9e41e9ca0b67ebf807d1643a98866a0cb75768 (eauger1/centos-qemu-kvm) + +Currently iommufd doesn't support dirty page sync yet, +but it will not block us doing live migration if VFIO +migration is force enabled. + +So in this case we allow set_dirty_page_tracking to be NULL. +Note we don't need same change for query_dirty_bitmap because +when dirty page sync isn't supported, query_dirty_bitmap will +never be called. + +Suggested-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Reviewed-by: Eric Auger +Tested-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit 36e84d0c17102fa1c887d8c650a13ec08fca0ec0) +Signed-off-by: Eric Auger +--- + hw/vfio/container-base.c | 4 ++++ + hw/vfio/container.c | 4 ---- + 2 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c +index 71f7274973..eee2dcfe76 100644 +--- a/hw/vfio/container-base.c ++++ b/hw/vfio/container-base.c +@@ -55,6 +55,10 @@ void vfio_container_del_section_window(VFIOContainerBase *bcontainer, + int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, + bool start) + { ++ if (!bcontainer->dirty_pages_supported) { ++ return 0; ++ } ++ + g_assert(bcontainer->ops->set_dirty_page_tracking); + return bcontainer->ops->set_dirty_page_tracking(bcontainer, start); + } +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 6bacf38222..ed2d721b2b 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -216,10 +216,6 @@ static int vfio_legacy_set_dirty_page_tracking(VFIOContainerBase *bcontainer, + .argsz = sizeof(dirty), + }; + +- if (!bcontainer->dirty_pages_supported) { +- return 0; +- } +- + if (start) { + dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START; + } else { +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-iommufd-Remove-CONFIG_IOMMUFD-usage.patch b/SOURCES/kvm-vfio-iommufd-Remove-CONFIG_IOMMUFD-usage.patch new file mode 100644 index 0000000..97d30c9 --- /dev/null +++ b/SOURCES/kvm-vfio-iommufd-Remove-CONFIG_IOMMUFD-usage.patch @@ -0,0 +1,55 @@ +From 5549bf1b2e07213c23e280a43ab2ab67d5b7304a Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 19 Dec 2023 07:58:25 +0100 +Subject: [PATCH 058/101] vfio/iommufd: Remove CONFIG_IOMMUFD usage +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [57/67] 3a6a45d379241d9412e0b8bcfeb9be0b4add59a5 (eauger1/centos-qemu-kvm) + +Availability of the IOMMUFD backend can now be fully determined at +runtime and the ifdef check was a build time protection (for PPC not +supporting it mostly). + +Reviewed-by: Zhenzhong Duan +Tested-by: Eric Farman +Signed-off-by: Cédric Le Goater +(cherry picked from commit c1139fa4feba8c320e4bd0a4e34af55caa5ffbb9) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 3 --- + 1 file changed, 3 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 89ff1c7aed..0d4d8b8416 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -19,7 +19,6 @@ + */ + + #include "qemu/osdep.h" +-#include CONFIG_DEVICES /* CONFIG_IOMMUFD */ + #include + #ifdef CONFIG_KVM + #include +@@ -1506,11 +1505,9 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev, + const VFIOIOMMUClass *ops = + VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY)); + +-#ifdef CONFIG_IOMMUFD + if (vbasedev->iommufd) { + ops = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD)); + } +-#endif + + assert(ops); + +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-iommufd-Remove-the-use-of-stat-to-check-file-ex.patch b/SOURCES/kvm-vfio-iommufd-Remove-the-use-of-stat-to-check-file-ex.patch new file mode 100644 index 0000000..7401d52 --- /dev/null +++ b/SOURCES/kvm-vfio-iommufd-Remove-the-use-of-stat-to-check-file-ex.patch @@ -0,0 +1,56 @@ +From 6b36dc2a305af856af03aad2e315eea96a349153 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Thu, 21 Dec 2023 09:09:57 +0100 +Subject: [PATCH 061/101] vfio/iommufd: Remove the use of stat() to check file + existence +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [60/67] 485770e45c1a6399780939bfb8b01b615d9213c6 (eauger1/centos-qemu-kvm) + +Using stat() before opening a file or a directory can lead to a +time-of-check to time-of-use (TOCTOU) filesystem race, which is +reported by coverity as a Security best practices violations. The +sequence could be replaced by open and fdopendir but it doesn't add +much in this case. Simply use opendir to avoid the race. + +Fixes: CID 1531551 +Signed-off-by: Cédric Le Goater +Reviewed-by: Zhenzhong Duan +(cherry picked from commit 6ba254801f6bc7f3ef68a6414f1b107237c7eb26) +Signed-off-by: Eric Auger +--- + hw/vfio/iommufd.c | 6 ------ + 1 file changed, 6 deletions(-) + +diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c +index d4c586e842..9bfddc1360 100644 +--- a/hw/vfio/iommufd.c ++++ b/hw/vfio/iommufd.c +@@ -121,17 +121,11 @@ static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp) + DIR *dir = NULL; + struct dirent *dent; + gchar *contents; +- struct stat st; + gsize length; + int major, minor; + dev_t vfio_devt; + + path = g_strdup_printf("%s/vfio-dev", sysfs_path); +- if (stat(path, &st) < 0) { +- error_setg_errno(errp, errno, "no such host device"); +- goto out_free_path; +- } +- + dir = opendir(path); + if (!dir) { + error_setg_errno(errp, errno, "couldn't open directory %s", path); +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-migration-Add-VFIO-migration-pre-copy-support.patch b/SOURCES/kvm-vfio-migration-Add-VFIO-migration-pre-copy-support.patch deleted file mode 100644 index b8e72e6..0000000 --- a/SOURCES/kvm-vfio-migration-Add-VFIO-migration-pre-copy-support.patch +++ /dev/null @@ -1,438 +0,0 @@ -From 080d28c191b7d951f1f4596dcaa13d590c07d886 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 15/37] vfio/migration: Add VFIO migration pre-copy support -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [13/28] 7b2ea1471440d47e5aed1211c96942ca7bface96 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit eda7362af959 -Author: Avihai Horon -Date: Wed Jun 21 14:12:00 2023 +0300 - - vfio/migration: Add VFIO migration pre-copy support - - Pre-copy support allows the VFIO device data to be transferred while the - VM is running. This helps to accommodate VFIO devices that have a large - amount of data that needs to be transferred, and it can reduce migration - downtime. - - Pre-copy support is optional in VFIO migration protocol v2. - Implement pre-copy of VFIO migration protocol v2 and use it for devices - that support it. Full description of it can be found in the following - Linux commit: 4db52602a607 ("vfio: Extend the device migration protocol - with PRE_COPY"). - - Signed-off-by: Avihai Horon - Reviewed-by: Cédric Le Goater - Tested-by: YangHang Liu - Acked-by: Alex Williamson - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - docs/devel/vfio-migration.rst | 35 +++++--- - hw/vfio/common.c | 6 +- - hw/vfio/migration.c | 165 ++++++++++++++++++++++++++++++++-- - hw/vfio/trace-events | 4 +- - include/hw/vfio/vfio-common.h | 2 + - 5 files changed, 190 insertions(+), 22 deletions(-) - -diff --git a/docs/devel/vfio-migration.rst b/docs/devel/vfio-migration.rst -index 1b68ccf115..e896b2a673 100644 ---- a/docs/devel/vfio-migration.rst -+++ b/docs/devel/vfio-migration.rst -@@ -7,12 +7,14 @@ the guest is running on source host and restoring this saved state on the - destination host. This document details how saving and restoring of VFIO - devices is done in QEMU. - --Migration of VFIO devices currently consists of a single stop-and-copy phase. --During the stop-and-copy phase the guest is stopped and the entire VFIO device --data is transferred to the destination. -- --The pre-copy phase of migration is currently not supported for VFIO devices. --Support for VFIO pre-copy will be added later on. -+Migration of VFIO devices consists of two phases: the optional pre-copy phase, -+and the stop-and-copy phase. The pre-copy phase is iterative and allows to -+accommodate VFIO devices that have a large amount of data that needs to be -+transferred. The iterative pre-copy phase of migration allows for the guest to -+continue whilst the VFIO device state is transferred to the destination, this -+helps to reduce the total downtime of the VM. VFIO devices opt-in to pre-copy -+support by reporting the VFIO_MIGRATION_PRE_COPY flag in the -+VFIO_DEVICE_FEATURE_MIGRATION ioctl. - - Note that currently VFIO migration is supported only for a single device. This - is due to VFIO migration's lack of P2P support. However, P2P support is planned -@@ -29,10 +31,20 @@ VFIO implements the device hooks for the iterative approach as follows: - * A ``load_setup`` function that sets the VFIO device on the destination in - _RESUMING state. - -+* A ``state_pending_estimate`` function that reports an estimate of the -+ remaining pre-copy data that the vendor driver has yet to save for the VFIO -+ device. -+ - * A ``state_pending_exact`` function that reads pending_bytes from the vendor - driver, which indicates the amount of data that the vendor driver has yet to - save for the VFIO device. - -+* An ``is_active_iterate`` function that indicates ``save_live_iterate`` is -+ active only when the VFIO device is in pre-copy states. -+ -+* A ``save_live_iterate`` function that reads the VFIO device's data from the -+ vendor driver during iterative pre-copy phase. -+ - * A ``save_state`` function to save the device config space if it is present. - - * A ``save_live_complete_precopy`` function that sets the VFIO device in -@@ -111,8 +123,10 @@ Flow of state changes during Live migration - =========================================== - - Below is the flow of state change during live migration. --The values in the brackets represent the VM state, the migration state, and -+The values in the parentheses represent the VM state, the migration state, and - the VFIO device state, respectively. -+The text in the square brackets represents the flow if the VFIO device supports -+pre-copy. - - Live migration save path - ------------------------ -@@ -124,11 +138,12 @@ Live migration save path - | - migrate_init spawns migration_thread - Migration thread then calls each device's .save_setup() -- (RUNNING, _SETUP, _RUNNING) -+ (RUNNING, _SETUP, _RUNNING [_PRE_COPY]) - | -- (RUNNING, _ACTIVE, _RUNNING) -- If device is active, get pending_bytes by .state_pending_exact() -+ (RUNNING, _ACTIVE, _RUNNING [_PRE_COPY]) -+ If device is active, get pending_bytes by .state_pending_{estimate,exact}() - If total pending_bytes >= threshold_size, call .save_live_iterate() -+ [Data of VFIO device for pre-copy phase is copied] - Iterate till total pending bytes converge and are less than threshold - | - On migration completion, vCPU stops and calls .save_live_complete_precopy for -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 78358ede27..b73086e17a 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -492,7 +492,8 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) - } - - if (vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF && -- migration->device_state == VFIO_DEVICE_STATE_RUNNING) { -+ (migration->device_state == VFIO_DEVICE_STATE_RUNNING || -+ migration->device_state == VFIO_DEVICE_STATE_PRE_COPY)) { - return false; - } - } -@@ -537,7 +538,8 @@ static bool vfio_devices_all_running_and_mig_active(VFIOContainer *container) - return false; - } - -- if (migration->device_state == VFIO_DEVICE_STATE_RUNNING) { -+ if (migration->device_state == VFIO_DEVICE_STATE_RUNNING || -+ migration->device_state == VFIO_DEVICE_STATE_PRE_COPY) { - continue; - } else { - return false; -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index 8d33414379..d8f6a22ae1 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -68,6 +68,8 @@ static const char *mig_state_to_str(enum vfio_device_mig_state state) - return "STOP_COPY"; - case VFIO_DEVICE_STATE_RESUMING: - return "RESUMING"; -+ case VFIO_DEVICE_STATE_PRE_COPY: -+ return "PRE_COPY"; - default: - return "UNKNOWN STATE"; - } -@@ -241,6 +243,25 @@ static int vfio_query_stop_copy_size(VFIODevice *vbasedev, - return 0; - } - -+static int vfio_query_precopy_size(VFIOMigration *migration) -+{ -+ struct vfio_precopy_info precopy = { -+ .argsz = sizeof(precopy), -+ }; -+ -+ migration->precopy_init_size = 0; -+ migration->precopy_dirty_size = 0; -+ -+ if (ioctl(migration->data_fd, VFIO_MIG_GET_PRECOPY_INFO, &precopy)) { -+ return -errno; -+ } -+ -+ migration->precopy_init_size = precopy.initial_bytes; -+ migration->precopy_dirty_size = precopy.dirty_bytes; -+ -+ return 0; -+} -+ - /* Returns the size of saved data on success and -errno on error */ - static ssize_t vfio_save_block(QEMUFile *f, VFIOMigration *migration) - { -@@ -249,6 +270,14 @@ static ssize_t vfio_save_block(QEMUFile *f, VFIOMigration *migration) - data_size = read(migration->data_fd, migration->data_buffer, - migration->data_buffer_size); - if (data_size < 0) { -+ /* -+ * Pre-copy emptied all the device state for now. For more information, -+ * please refer to the Linux kernel VFIO uAPI. -+ */ -+ if (errno == ENOMSG) { -+ return 0; -+ } -+ - return -errno; - } - if (data_size == 0) { -@@ -265,6 +294,38 @@ static ssize_t vfio_save_block(QEMUFile *f, VFIOMigration *migration) - return qemu_file_get_error(f) ?: data_size; - } - -+static void vfio_update_estimated_pending_data(VFIOMigration *migration, -+ uint64_t data_size) -+{ -+ if (!data_size) { -+ /* -+ * Pre-copy emptied all the device state for now, update estimated sizes -+ * accordingly. -+ */ -+ migration->precopy_init_size = 0; -+ migration->precopy_dirty_size = 0; -+ -+ return; -+ } -+ -+ if (migration->precopy_init_size) { -+ uint64_t init_size = MIN(migration->precopy_init_size, data_size); -+ -+ migration->precopy_init_size -= init_size; -+ data_size -= init_size; -+ } -+ -+ migration->precopy_dirty_size -= MIN(migration->precopy_dirty_size, -+ data_size); -+} -+ -+static bool vfio_precopy_supported(VFIODevice *vbasedev) -+{ -+ VFIOMigration *migration = vbasedev->migration; -+ -+ return migration->mig_flags & VFIO_MIGRATION_PRE_COPY; -+} -+ - /* ---------------------------------------------------------------------- */ - - static int vfio_save_setup(QEMUFile *f, void *opaque) -@@ -285,6 +346,28 @@ static int vfio_save_setup(QEMUFile *f, void *opaque) - return -ENOMEM; - } - -+ if (vfio_precopy_supported(vbasedev)) { -+ int ret; -+ -+ switch (migration->device_state) { -+ case VFIO_DEVICE_STATE_RUNNING: -+ ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_PRE_COPY, -+ VFIO_DEVICE_STATE_RUNNING); -+ if (ret) { -+ return ret; -+ } -+ -+ vfio_query_precopy_size(migration); -+ -+ break; -+ case VFIO_DEVICE_STATE_STOP: -+ /* vfio_save_complete_precopy() will go to STOP_COPY */ -+ break; -+ default: -+ return -EINVAL; -+ } -+ } -+ - trace_vfio_save_setup(vbasedev->name, migration->data_buffer_size); - - qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); -@@ -299,26 +382,42 @@ static void vfio_save_cleanup(void *opaque) - - g_free(migration->data_buffer); - migration->data_buffer = NULL; -+ migration->precopy_init_size = 0; -+ migration->precopy_dirty_size = 0; - vfio_migration_cleanup(vbasedev); - trace_vfio_save_cleanup(vbasedev->name); - } - -+static void vfio_state_pending_estimate(void *opaque, uint64_t *must_precopy, -+ uint64_t *can_postcopy) -+{ -+ VFIODevice *vbasedev = opaque; -+ VFIOMigration *migration = vbasedev->migration; -+ -+ if (migration->device_state != VFIO_DEVICE_STATE_PRE_COPY) { -+ return; -+ } -+ -+ *must_precopy += -+ migration->precopy_init_size + migration->precopy_dirty_size; -+ -+ trace_vfio_state_pending_estimate(vbasedev->name, *must_precopy, -+ *can_postcopy, -+ migration->precopy_init_size, -+ migration->precopy_dirty_size); -+} -+ - /* - * Migration size of VFIO devices can be as little as a few KBs or as big as - * many GBs. This value should be big enough to cover the worst case. - */ - #define VFIO_MIG_STOP_COPY_SIZE (100 * GiB) - --/* -- * Only exact function is implemented and not estimate function. The reason is -- * that during pre-copy phase of migration the estimate function is called -- * repeatedly while pending RAM size is over the threshold, thus migration -- * can't converge and querying the VFIO device pending data size is useless. -- */ - static void vfio_state_pending_exact(void *opaque, uint64_t *must_precopy, - uint64_t *can_postcopy) - { - VFIODevice *vbasedev = opaque; -+ VFIOMigration *migration = vbasedev->migration; - uint64_t stop_copy_size = VFIO_MIG_STOP_COPY_SIZE; - - /* -@@ -328,8 +427,48 @@ static void vfio_state_pending_exact(void *opaque, uint64_t *must_precopy, - vfio_query_stop_copy_size(vbasedev, &stop_copy_size); - *must_precopy += stop_copy_size; - -+ if (migration->device_state == VFIO_DEVICE_STATE_PRE_COPY) { -+ vfio_query_precopy_size(migration); -+ -+ *must_precopy += -+ migration->precopy_init_size + migration->precopy_dirty_size; -+ } -+ - trace_vfio_state_pending_exact(vbasedev->name, *must_precopy, *can_postcopy, -- stop_copy_size); -+ stop_copy_size, migration->precopy_init_size, -+ migration->precopy_dirty_size); -+} -+ -+static bool vfio_is_active_iterate(void *opaque) -+{ -+ VFIODevice *vbasedev = opaque; -+ VFIOMigration *migration = vbasedev->migration; -+ -+ return migration->device_state == VFIO_DEVICE_STATE_PRE_COPY; -+} -+ -+static int vfio_save_iterate(QEMUFile *f, void *opaque) -+{ -+ VFIODevice *vbasedev = opaque; -+ VFIOMigration *migration = vbasedev->migration; -+ ssize_t data_size; -+ -+ data_size = vfio_save_block(f, migration); -+ if (data_size < 0) { -+ return data_size; -+ } -+ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); -+ -+ vfio_update_estimated_pending_data(migration, data_size); -+ -+ trace_vfio_save_iterate(vbasedev->name, migration->precopy_init_size, -+ migration->precopy_dirty_size); -+ -+ /* -+ * A VFIO device's pre-copy dirty_bytes is not guaranteed to reach zero. -+ * Return 1 so following handlers will not be potentially blocked. -+ */ -+ return 1; - } - - static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) -@@ -338,7 +477,7 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) - ssize_t data_size; - int ret; - -- /* We reach here with device state STOP only */ -+ /* We reach here with device state STOP or STOP_COPY only */ - ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY, - VFIO_DEVICE_STATE_STOP); - if (ret) { -@@ -457,7 +596,10 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) - static const SaveVMHandlers savevm_vfio_handlers = { - .save_setup = vfio_save_setup, - .save_cleanup = vfio_save_cleanup, -+ .state_pending_estimate = vfio_state_pending_estimate, - .state_pending_exact = vfio_state_pending_exact, -+ .is_active_iterate = vfio_is_active_iterate, -+ .save_live_iterate = vfio_save_iterate, - .save_live_complete_precopy = vfio_save_complete_precopy, - .save_state = vfio_save_state, - .load_setup = vfio_load_setup, -@@ -470,13 +612,18 @@ static const SaveVMHandlers savevm_vfio_handlers = { - static void vfio_vmstate_change(void *opaque, bool running, RunState state) - { - VFIODevice *vbasedev = opaque; -+ VFIOMigration *migration = vbasedev->migration; - enum vfio_device_mig_state new_state; - int ret; - - if (running) { - new_state = VFIO_DEVICE_STATE_RUNNING; - } else { -- new_state = VFIO_DEVICE_STATE_STOP; -+ new_state = -+ (migration->device_state == VFIO_DEVICE_STATE_PRE_COPY && -+ (state == RUN_STATE_FINISH_MIGRATE || state == RUN_STATE_PAUSED)) ? -+ VFIO_DEVICE_STATE_STOP_COPY : -+ VFIO_DEVICE_STATE_STOP; - } - - /* -diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events -index 646e42fd27..4150b59e58 100644 ---- a/hw/vfio/trace-events -+++ b/hw/vfio/trace-events -@@ -162,6 +162,8 @@ vfio_save_block(const char *name, int data_size) " (%s) data_size %d" - vfio_save_cleanup(const char *name) " (%s)" - vfio_save_complete_precopy(const char *name, int ret) " (%s) ret %d" - vfio_save_device_config_state(const char *name) " (%s)" -+vfio_save_iterate(const char *name, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64 - vfio_save_setup(const char *name, uint64_t data_buffer_size) " (%s) data buffer size 0x%"PRIx64 --vfio_state_pending_exact(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t stopcopy_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" stopcopy size 0x%"PRIx64 -+vfio_state_pending_estimate(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64 -+vfio_state_pending_exact(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t stopcopy_size, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" stopcopy size 0x%"PRIx64" precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64 - vfio_vmstate_change(const char *name, int running, const char *reason, const char *dev_state) " (%s) running %d reason %s device state %s" -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 5f29dab839..1db901c194 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -67,6 +67,8 @@ typedef struct VFIOMigration { - void *data_buffer; - size_t data_buffer_size; - uint64_t mig_flags; -+ uint64_t precopy_init_size; -+ uint64_t precopy_dirty_size; - } VFIOMigration; - - typedef struct VFIOAddressSpace { --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-migration-Add-helper-function-to-set-state-or-r.patch b/SOURCES/kvm-vfio-migration-Add-helper-function-to-set-state-or-r.patch new file mode 100644 index 0000000..6556a19 --- /dev/null +++ b/SOURCES/kvm-vfio-migration-Add-helper-function-to-set-state-or-r.patch @@ -0,0 +1,115 @@ +From 0c0435e7210b99a6bf7b8f8205f7af8277b7525b Mon Sep 17 00:00:00 2001 +From: Avihai Horon +Date: Sun, 31 Dec 2023 12:48:18 +0200 +Subject: [PATCH 063/101] vfio/migration: Add helper function to set state or + reset device +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [62/67] 1a63eea289561a05a6a8527c2a9da0289a7836d9 (eauger1/centos-qemu-kvm) + +There are several places where failure in setting the device state leads +to a device reset, which is done by setting ERROR as the recover state. + +Add a helper function that sets the device state and resets the device +in case of failure. This will make the code cleaner and remove duplicate +comments. + +Signed-off-by: Avihai Horon +Reviewed-by: Cédric Le Goater +Reviewed-by: Philippe Mathieu-Daudé +(cherry picked from commit c817e5a377a334241eed149e35760aca58bdeb34) +Signed-off-by: Eric Auger +--- + hw/vfio/migration.c | 41 +++++++++++++++++------------------------ + 1 file changed, 17 insertions(+), 24 deletions(-) + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index 28d422b39f..70e6b1a709 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -163,6 +163,19 @@ reset_device: + return ret; + } + ++/* ++ * Some device state transitions require resetting the device if they fail. ++ * This function sets the device in new_state and resets the device if that ++ * fails. Reset is done by using ERROR as the recover state. ++ */ ++static int ++vfio_migration_set_state_or_reset(VFIODevice *vbasedev, ++ enum vfio_device_mig_state new_state) ++{ ++ return vfio_migration_set_state(vbasedev, new_state, ++ VFIO_DEVICE_STATE_ERROR); ++} ++ + static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev, + uint64_t data_size) + { +@@ -422,12 +435,7 @@ static void vfio_save_cleanup(void *opaque) + * after migration has completed, so it won't increase downtime. + */ + if (migration->device_state == VFIO_DEVICE_STATE_STOP_COPY) { +- /* +- * If setting the device in STOP state fails, the device should be +- * reset. To do so, use ERROR state as a recover state. +- */ +- vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP, +- VFIO_DEVICE_STATE_ERROR); ++ vfio_migration_set_state_or_reset(vbasedev, VFIO_DEVICE_STATE_STOP); + } + + g_free(migration->data_buffer); +@@ -699,12 +707,7 @@ static void vfio_vmstate_change_prepare(void *opaque, bool running, + VFIO_DEVICE_STATE_PRE_COPY_P2P : + VFIO_DEVICE_STATE_RUNNING_P2P; + +- /* +- * If setting the device in new_state fails, the device should be reset. +- * To do so, use ERROR state as a recover state. +- */ +- ret = vfio_migration_set_state(vbasedev, new_state, +- VFIO_DEVICE_STATE_ERROR); ++ ret = vfio_migration_set_state_or_reset(vbasedev, new_state); + if (ret) { + /* + * Migration should be aborted in this case, but vm_state_notify() +@@ -736,12 +739,7 @@ static void vfio_vmstate_change(void *opaque, bool running, RunState state) + VFIO_DEVICE_STATE_STOP; + } + +- /* +- * If setting the device in new_state fails, the device should be reset. +- * To do so, use ERROR state as a recover state. +- */ +- ret = vfio_migration_set_state(vbasedev, new_state, +- VFIO_DEVICE_STATE_ERROR); ++ ret = vfio_migration_set_state_or_reset(vbasedev, new_state); + if (ret) { + /* + * Migration should be aborted in this case, but vm_state_notify() +@@ -770,12 +768,7 @@ static void vfio_migration_state_notifier(Notifier *notifier, void *data) + case MIGRATION_STATUS_CANCELLING: + case MIGRATION_STATUS_CANCELLED: + case MIGRATION_STATUS_FAILED: +- /* +- * If setting the device in RUNNING state fails, the device should +- * be reset. To do so, use ERROR state as a recover state. +- */ +- vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RUNNING, +- VFIO_DEVICE_STATE_ERROR); ++ vfio_migration_set_state_or_reset(vbasedev, VFIO_DEVICE_STATE_RUNNING); + } + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-migration-Add-support-for-switchover-ack-capabi.patch b/SOURCES/kvm-vfio-migration-Add-support-for-switchover-ack-capabi.patch deleted file mode 100644 index d87680d..0000000 --- a/SOURCES/kvm-vfio-migration-Add-support-for-switchover-ack-capabi.patch +++ /dev/null @@ -1,192 +0,0 @@ -From 169dc1bb051b3aebc571936d956b49ba0621ae43 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 16/37] vfio/migration: Add support for switchover ack - capability -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [14/28] b3bd2eb2d0ca49ff05a0a82ae5bb956a354aed47 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 745c42912a04 -Author: Avihai Horon -Date: Wed Jun 21 14:12:01 2023 +0300 - - vfio/migration: Add support for switchover ack capability - - Loading of a VFIO device's data can take a substantial amount of time as - the device may need to allocate resources, prepare internal data - structures, etc. This can increase migration downtime, especially for - VFIO devices with a lot of resources. - - To solve this, VFIO migration uAPI defines "initial bytes" as part of - its precopy data stream. Initial bytes can be used in various ways to - improve VFIO migration performance. For example, it can be used to - transfer device metadata to pre-allocate resources in the destination. - However, for this to work we need to make sure that all initial bytes - are sent and loaded in the destination before the source VM is stopped. - - Use migration switchover ack capability to make sure a VFIO device's - initial bytes are sent and loaded in the destination before the source - stops the VM and attempts to complete the migration. - This can significantly reduce migration downtime for some devices. - - Signed-off-by: Avihai Horon - Reviewed-by: Cédric Le Goater - Tested-by: YangHang Liu - Acked-by: Alex Williamson - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - docs/devel/vfio-migration.rst | 10 +++++++++ - hw/vfio/migration.c | 39 ++++++++++++++++++++++++++++++++++- - include/hw/vfio/vfio-common.h | 1 + - 3 files changed, 49 insertions(+), 1 deletion(-) - -diff --git a/docs/devel/vfio-migration.rst b/docs/devel/vfio-migration.rst -index e896b2a673..b433cb5bb2 100644 ---- a/docs/devel/vfio-migration.rst -+++ b/docs/devel/vfio-migration.rst -@@ -16,6 +16,13 @@ helps to reduce the total downtime of the VM. VFIO devices opt-in to pre-copy - support by reporting the VFIO_MIGRATION_PRE_COPY flag in the - VFIO_DEVICE_FEATURE_MIGRATION ioctl. - -+When pre-copy is supported, it's possible to further reduce downtime by -+enabling "switchover-ack" migration capability. -+VFIO migration uAPI defines "initial bytes" as part of its pre-copy data stream -+and recommends that the initial bytes are sent and loaded in the destination -+before stopping the source VM. Enabling this migration capability will -+guarantee that and thus, can potentially reduce downtime even further. -+ - Note that currently VFIO migration is supported only for a single device. This - is due to VFIO migration's lack of P2P support. However, P2P support is planned - to be added later on. -@@ -45,6 +52,9 @@ VFIO implements the device hooks for the iterative approach as follows: - * A ``save_live_iterate`` function that reads the VFIO device's data from the - vendor driver during iterative pre-copy phase. - -+* A ``switchover_ack_needed`` function that checks if the VFIO device uses -+ "switchover-ack" migration capability when this capability is enabled. -+ - * A ``save_state`` function to save the device config space if it is present. - - * A ``save_live_complete_precopy`` function that sets the VFIO device in -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index d8f6a22ae1..acbf0bb7ab 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -18,6 +18,8 @@ - #include "sysemu/runstate.h" - #include "hw/vfio/vfio-common.h" - #include "migration/migration.h" -+#include "migration/options.h" -+#include "migration/savevm.h" - #include "migration/vmstate.h" - #include "migration/qemu-file.h" - #include "migration/register.h" -@@ -45,6 +47,7 @@ - #define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL) - #define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) - #define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) -+#define VFIO_MIG_FLAG_DEV_INIT_DATA_SENT (0xffffffffef100005ULL) - - /* - * This is an arbitrary size based on migration of mlx5 devices, where typically -@@ -384,6 +387,7 @@ static void vfio_save_cleanup(void *opaque) - migration->data_buffer = NULL; - migration->precopy_init_size = 0; - migration->precopy_dirty_size = 0; -+ migration->initial_data_sent = false; - vfio_migration_cleanup(vbasedev); - trace_vfio_save_cleanup(vbasedev->name); - } -@@ -457,10 +461,17 @@ static int vfio_save_iterate(QEMUFile *f, void *opaque) - if (data_size < 0) { - return data_size; - } -- qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); - - vfio_update_estimated_pending_data(migration, data_size); - -+ if (migrate_switchover_ack() && !migration->precopy_init_size && -+ !migration->initial_data_sent) { -+ qemu_put_be64(f, VFIO_MIG_FLAG_DEV_INIT_DATA_SENT); -+ migration->initial_data_sent = true; -+ } else { -+ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); -+ } -+ - trace_vfio_save_iterate(vbasedev->name, migration->precopy_init_size, - migration->precopy_dirty_size); - -@@ -579,6 +590,24 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) - } - break; - } -+ case VFIO_MIG_FLAG_DEV_INIT_DATA_SENT: -+ { -+ if (!vfio_precopy_supported(vbasedev) || -+ !migrate_switchover_ack()) { -+ error_report("%s: Received INIT_DATA_SENT but switchover ack " -+ "is not used", vbasedev->name); -+ return -EINVAL; -+ } -+ -+ ret = qemu_loadvm_approve_switchover(); -+ if (ret) { -+ error_report( -+ "%s: qemu_loadvm_approve_switchover failed, err=%d (%s)", -+ vbasedev->name, ret, strerror(-ret)); -+ } -+ -+ return ret; -+ } - default: - error_report("%s: Unknown tag 0x%"PRIx64, vbasedev->name, data); - return -EINVAL; -@@ -593,6 +622,13 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) - return ret; - } - -+static bool vfio_switchover_ack_needed(void *opaque) -+{ -+ VFIODevice *vbasedev = opaque; -+ -+ return vfio_precopy_supported(vbasedev); -+} -+ - static const SaveVMHandlers savevm_vfio_handlers = { - .save_setup = vfio_save_setup, - .save_cleanup = vfio_save_cleanup, -@@ -605,6 +641,7 @@ static const SaveVMHandlers savevm_vfio_handlers = { - .load_setup = vfio_load_setup, - .load_cleanup = vfio_load_cleanup, - .load_state = vfio_load_state, -+ .switchover_ack_needed = vfio_switchover_ack_needed, - }; - - /* ---------------------------------------------------------------------- */ -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 1db901c194..3dc5f2104c 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -69,6 +69,7 @@ typedef struct VFIOMigration { - uint64_t mig_flags; - uint64_t precopy_init_size; - uint64_t precopy_dirty_size; -+ bool initial_data_sent; - } VFIOMigration; - - typedef struct VFIOAddressSpace { --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-migration-Block-VFIO-migration-with-postcopy-mi.patch b/SOURCES/kvm-vfio-migration-Block-VFIO-migration-with-postcopy-mi.patch deleted file mode 100644 index da43b4b..0000000 --- a/SOURCES/kvm-vfio-migration-Block-VFIO-migration-with-postcopy-mi.patch +++ /dev/null @@ -1,90 +0,0 @@ -From df7814de08c8c7c45eacb7b9d9ead9be4d1e3baf Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Mon, 11 Sep 2023 16:10:19 +0200 -Subject: [PATCH 4/4] vfio/migration: Block VFIO migration with postcopy - migration -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 318: VFIO migration: fix a QEMU crash when postcopy is enabled -RH-Bugzilla: 2229868 -RH-Acked-by: Alex Williamson -RH-Acked-by: Peter Xu -RH-Commit: [4/4] 36eedf879547044c2ba2763fb48784a95f9e4ea7 - -Bugzilla: https://bugzilla.redhat.com/2229868 - -commit bf7ef7a2da3e61dc104f26c679c9465e3fbe7dde -Author: Avihai Horon -Date: Wed Sep 6 18:08:52 2023 +0300 - - vfio/migration: Block VFIO migration with postcopy migration - - VFIO migration is not compatible with postcopy migration. A VFIO device - in the destination can't handle page faults for pages that have not been - sent yet. - - Doing such migration will cause the VM to crash in the destination: - - qemu-system-x86_64: VFIO_MAP_DMA failed: Bad address - qemu-system-x86_64: vfio_dma_map(0x55a28c7659d0, 0xc0000, 0xb000, 0x7f1b11a00000) = -14 (Bad address) - qemu: hardware error: vfio: DMA mapping failed, unable to continue - - To prevent this, block VFIO migration with postcopy migration. - - Reported-by: Yanghang Liu - Signed-off-by: Avihai Horon - Tested-by: Yanghang Liu - Reviewed-by: Peter Xu - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/migration.c | 22 ++++++++++++++++++++++ - 1 file changed, 22 insertions(+) - -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index 2674f4bc47..4f018c7531 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -331,6 +331,27 @@ static bool vfio_precopy_supported(VFIODevice *vbasedev) - - /* ---------------------------------------------------------------------- */ - -+static int vfio_save_prepare(void *opaque, Error **errp) -+{ -+ VFIODevice *vbasedev = opaque; -+ -+ /* -+ * Snapshot doesn't use postcopy, so allow snapshot even if postcopy is on. -+ */ -+ if (runstate_check(RUN_STATE_SAVE_VM)) { -+ return 0; -+ } -+ -+ if (migrate_postcopy_ram()) { -+ error_setg( -+ errp, "%s: VFIO migration is not supported with postcopy migration", -+ vbasedev->name); -+ return -EOPNOTSUPP; -+ } -+ -+ return 0; -+} -+ - static int vfio_save_setup(QEMUFile *f, void *opaque) - { - VFIODevice *vbasedev = opaque; -@@ -630,6 +651,7 @@ static bool vfio_switchover_ack_needed(void *opaque) - } - - static const SaveVMHandlers savevm_vfio_handlers = { -+ .save_prepare = vfio_save_prepare, - .save_setup = vfio_save_setup, - .save_cleanup = vfio_save_cleanup, - .state_pending_estimate = vfio_state_pending_estimate, --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-migration-Change-vIOMMU-blocker-from-global-to-.patch b/SOURCES/kvm-vfio-migration-Change-vIOMMU-blocker-from-global-to-.patch deleted file mode 100644 index dde2e24..0000000 --- a/SOURCES/kvm-vfio-migration-Change-vIOMMU-blocker-from-global-to-.patch +++ /dev/null @@ -1,171 +0,0 @@ -From 35c7d0d3b02d61d6f29afae74bd83edd70a6a1b4 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 26/37] vfio/migration: Change vIOMMU blocker from global to - per device -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [24/28] 8fda1c82a81fadd4f38e6a5e878c9228a81c0f6e (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 3c26c80a0a26 -Author: Zhenzhong Duan -Date: Mon Jul 3 15:15:07 2023 +0800 - - vfio/migration: Change vIOMMU blocker from global to per device - - Contrary to multiple device blocker which needs to consider already-attached - devices to unblock/block dynamically, the vIOMMU migration blocker is a device - specific config. Meaning it only needs to know whether the device is bypassing - or not the vIOMMU (via machine property, or per pxb-pcie::bypass_iommu), and - does not need the state of currently present devices. For this reason, the - vIOMMU global migration blocker can be consolidated into the per-device - migration blocker, allowing us to remove some unnecessary code. - - This change also makes vfio_mig_active() more accurate as it doesn't check for - global blocker. - - Signed-off-by: Zhenzhong Duan - Reviewed-by: Joao Martins - Reviewed-by: Cédric Le Goater - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/common.c | 51 ++--------------------------------- - hw/vfio/migration.c | 7 ++--- - hw/vfio/pci.c | 1 - - include/hw/vfio/vfio-common.h | 3 +-- - 4 files changed, 7 insertions(+), 55 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 136d8243d6..e815f6ba30 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -362,7 +362,6 @@ bool vfio_mig_active(void) - } - - static Error *multiple_devices_migration_blocker; --static Error *giommu_migration_blocker; - - static unsigned int vfio_migratable_device_num(void) - { -@@ -420,55 +419,9 @@ void vfio_unblock_multiple_devices_migration(void) - multiple_devices_migration_blocker = NULL; - } - --static bool vfio_viommu_preset(void) -+bool vfio_viommu_preset(VFIODevice *vbasedev) - { -- VFIOAddressSpace *space; -- -- QLIST_FOREACH(space, &vfio_address_spaces, list) { -- if (space->as != &address_space_memory) { -- return true; -- } -- } -- -- return false; --} -- --int vfio_block_giommu_migration(VFIODevice *vbasedev, Error **errp) --{ -- int ret; -- -- if (giommu_migration_blocker || -- !vfio_viommu_preset()) { -- return 0; -- } -- -- if (vbasedev->enable_migration == ON_OFF_AUTO_ON) { -- error_setg(errp, -- "Migration is currently not supported with vIOMMU enabled"); -- return -EINVAL; -- } -- -- error_setg(&giommu_migration_blocker, -- "Migration is currently not supported with vIOMMU enabled"); -- ret = migrate_add_blocker(giommu_migration_blocker, errp); -- if (ret < 0) { -- error_free(giommu_migration_blocker); -- giommu_migration_blocker = NULL; -- } -- -- return ret; --} -- --void vfio_migration_finalize(void) --{ -- if (!giommu_migration_blocker || -- vfio_viommu_preset()) { -- return; -- } -- -- migrate_del_blocker(giommu_migration_blocker); -- error_free(giommu_migration_blocker); -- giommu_migration_blocker = NULL; -+ return vbasedev->group->container->space->as != &address_space_memory; - } - - static void vfio_set_migration_error(int err) -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index 1db7d52ab2..e6e5e85f75 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -878,9 +878,10 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) - return ret; - } - -- ret = vfio_block_giommu_migration(vbasedev, errp); -- if (ret) { -- return ret; -+ if (vfio_viommu_preset(vbasedev)) { -+ error_setg(&err, "%s: Migration is currently not supported " -+ "with vIOMMU enabled", vbasedev->name); -+ return vfio_block_migration(vbasedev, err, errp); - } - - trace_vfio_migration_realize(vbasedev->name); -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 2d059832a4..922c81872c 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -3279,7 +3279,6 @@ static void vfio_instance_finalize(Object *obj) - */ - vfio_put_device(vdev); - vfio_put_group(group); -- vfio_migration_finalize(); - } - - static void vfio_exitfn(PCIDevice *pdev) -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 93429b9abb..45167c8a8a 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -227,7 +227,7 @@ extern VFIOGroupList vfio_group_list; - bool vfio_mig_active(void); - int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp); - void vfio_unblock_multiple_devices_migration(void); --int vfio_block_giommu_migration(VFIODevice *vbasedev, Error **errp); -+bool vfio_viommu_preset(VFIODevice *vbasedev); - int64_t vfio_mig_bytes_transferred(void); - void vfio_reset_bytes_transferred(void); - -@@ -254,6 +254,5 @@ int vfio_spapr_remove_window(VFIOContainer *container, - - int vfio_migration_realize(VFIODevice *vbasedev, Error **errp); - void vfio_migration_exit(VFIODevice *vbasedev); --void vfio_migration_finalize(void); - - #endif /* HW_VFIO_VFIO_COMMON_H */ --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-migration-Free-resources-when-vfio_migration_re.patch b/SOURCES/kvm-vfio-migration-Free-resources-when-vfio_migration_re.patch deleted file mode 100644 index 9deaf1a..0000000 --- a/SOURCES/kvm-vfio-migration-Free-resources-when-vfio_migration_re.patch +++ /dev/null @@ -1,145 +0,0 @@ -From a36fa46369fe9bf2a2174e9ed6ab83042e904066 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 27/37] vfio/migration: Free resources when - vfio_migration_realize fails -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [25/28] b3ab8d3443d4bc12a689dc7d88a94da315814bb7 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 2b43b2995b02 -Author: Zhenzhong Duan -Date: Mon Jul 3 15:15:08 2023 +0800 - - vfio/migration: Free resources when vfio_migration_realize fails - - When vfio_realize() succeeds, hot unplug will call vfio_exitfn() - to free resources allocated in vfio_realize(); when vfio_realize() - fails, vfio_exitfn() is never called and we need to free resources - in vfio_realize(). - - In the case that vfio_migration_realize() fails, - e.g: with -only-migratable & enable-migration=off, we see below: - - (qemu) device_add vfio-pci,host=81:11.1,id=vfio1,bus=root1,enable-migration=off - 0000:81:11.1: Migration disabled - Error: disallowing migration blocker (--only-migratable) for: 0000:81:11.1: Migration is disabled for VFIO device - - If we hotplug again we should see same log as above, but we see: - (qemu) device_add vfio-pci,host=81:11.1,id=vfio1,bus=root1,enable-migration=off - Error: vfio 0000:81:11.1: device is already attached - - That's because some references to VFIO device isn't released. - For resources allocated in vfio_migration_realize(), free them by - jumping to out_deinit path with calling a new function - vfio_migration_deinit(). For resources allocated in vfio_realize(), - free them by jumping to de-register path in vfio_realize(). - - Signed-off-by: Zhenzhong Duan - Fixes: a22651053b59 ("vfio: Make vfio-pci device migration capable") - Reviewed-by: Cédric Le Goater - Reviewed-by: Joao Martins - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/migration.c | 33 +++++++++++++++++++++++---------- - hw/vfio/pci.c | 1 + - 2 files changed, 24 insertions(+), 10 deletions(-) - -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index e6e5e85f75..e3954570c8 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -802,6 +802,17 @@ static int vfio_migration_init(VFIODevice *vbasedev) - return 0; - } - -+static void vfio_migration_deinit(VFIODevice *vbasedev) -+{ -+ VFIOMigration *migration = vbasedev->migration; -+ -+ remove_migration_state_change_notifier(&migration->migration_state); -+ qemu_del_vm_change_state_handler(migration->vm_state); -+ unregister_savevm(VMSTATE_IF(vbasedev->dev), "vfio", vbasedev); -+ vfio_migration_free(vbasedev); -+ vfio_unblock_multiple_devices_migration(); -+} -+ - static int vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp) - { - int ret; -@@ -866,7 +877,7 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) - error_setg(&err, - "%s: VFIO device doesn't support device dirty tracking", - vbasedev->name); -- return vfio_block_migration(vbasedev, err, errp); -+ goto add_blocker; - } - - warn_report("%s: VFIO device doesn't support device dirty tracking", -@@ -875,29 +886,31 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) - - ret = vfio_block_multiple_devices_migration(vbasedev, errp); - if (ret) { -- return ret; -+ goto out_deinit; - } - - if (vfio_viommu_preset(vbasedev)) { - error_setg(&err, "%s: Migration is currently not supported " - "with vIOMMU enabled", vbasedev->name); -- return vfio_block_migration(vbasedev, err, errp); -+ goto add_blocker; - } - - trace_vfio_migration_realize(vbasedev->name); - return 0; -+ -+add_blocker: -+ ret = vfio_block_migration(vbasedev, err, errp); -+out_deinit: -+ if (ret) { -+ vfio_migration_deinit(vbasedev); -+ } -+ return ret; - } - - void vfio_migration_exit(VFIODevice *vbasedev) - { - if (vbasedev->migration) { -- VFIOMigration *migration = vbasedev->migration; -- -- remove_migration_state_change_notifier(&migration->migration_state); -- qemu_del_vm_change_state_handler(migration->vm_state); -- unregister_savevm(VMSTATE_IF(vbasedev->dev), "vfio", vbasedev); -- vfio_migration_free(vbasedev); -- vfio_unblock_multiple_devices_migration(); -+ vfio_migration_deinit(vbasedev); - } - - if (vbasedev->migration_blocker) { -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 922c81872c..037b7d4176 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -3234,6 +3234,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - ret = vfio_migration_realize(vbasedev, errp); - if (ret) { - error_report("%s: Migration disabled", vbasedev->name); -+ goto out_deregister; - } - } - --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-migration-Make-VFIO-migration-non-experimental.patch b/SOURCES/kvm-vfio-migration-Make-VFIO-migration-non-experimental.patch deleted file mode 100644 index 3258541..0000000 --- a/SOURCES/kvm-vfio-migration-Make-VFIO-migration-non-experimental.patch +++ /dev/null @@ -1,283 +0,0 @@ -From 747c34c0a3b8048ebdab387d22f2b922c81d572a Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 21/37] vfio/migration: Make VFIO migration non-experimental -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [19/28] 2f457c1c0de95a3fced0270f2edbbc5193cc4de9 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 8bbcb64a71d8 -Author: Avihai Horon -Date: Wed Jun 28 10:31:12 2023 +0300 - - vfio/migration: Make VFIO migration non-experimental - - The major parts of VFIO migration are supported today in QEMU. This - includes basic VFIO migration, device dirty page tracking and precopy - support. - - Thus, at this point in time, it seems appropriate to make VFIO migration - non-experimental: remove the x prefix from enable_migration property, - change it to ON_OFF_AUTO and let the default value be AUTO. - - In addition, make the following adjustments: - 1. When enable_migration is ON and migration is not supported, fail VFIO - device realization. - 2. When enable_migration is AUTO (i.e., not explicitly enabled), require - device dirty tracking support. This is because device dirty tracking - is currently the only method to do dirty page tracking, which is - essential for migrating in a reasonable downtime. Setting - enable_migration to ON will not require device dirty tracking. - 3. Make migration error and blocker messages more elaborate. - 4. Remove error prints in vfio_migration_query_flags(). - 5. Rename trace_vfio_migration_probe() to - trace_vfio_migration_realize(). - - Signed-off-by: Avihai Horon - Reviewed-by: Joao Martins - Reviewed-by: Cédric Le Goater - Reviewed-by: Alex Williamson - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/common.c | 16 ++++++- - hw/vfio/migration.c | 79 +++++++++++++++++++++++------------ - hw/vfio/pci.c | 4 +- - hw/vfio/trace-events | 2 +- - include/hw/vfio/vfio-common.h | 6 +-- - 5 files changed, 73 insertions(+), 34 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 3b4ac53f15..136d8243d6 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -381,7 +381,7 @@ static unsigned int vfio_migratable_device_num(void) - return device_num; - } - --int vfio_block_multiple_devices_migration(Error **errp) -+int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp) - { - int ret; - -@@ -390,6 +390,12 @@ int vfio_block_multiple_devices_migration(Error **errp) - return 0; - } - -+ if (vbasedev->enable_migration == ON_OFF_AUTO_ON) { -+ error_setg(errp, "Migration is currently not supported with multiple " -+ "VFIO devices"); -+ return -EINVAL; -+ } -+ - error_setg(&multiple_devices_migration_blocker, - "Migration is currently not supported with multiple " - "VFIO devices"); -@@ -427,7 +433,7 @@ static bool vfio_viommu_preset(void) - return false; - } - --int vfio_block_giommu_migration(Error **errp) -+int vfio_block_giommu_migration(VFIODevice *vbasedev, Error **errp) - { - int ret; - -@@ -436,6 +442,12 @@ int vfio_block_giommu_migration(Error **errp) - return 0; - } - -+ if (vbasedev->enable_migration == ON_OFF_AUTO_ON) { -+ error_setg(errp, -+ "Migration is currently not supported with vIOMMU enabled"); -+ return -EINVAL; -+ } -+ - error_setg(&giommu_migration_blocker, - "Migration is currently not supported with vIOMMU enabled"); - ret = migrate_add_blocker(giommu_migration_blocker, errp); -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index 7cf143926c..1db7d52ab2 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -724,14 +724,6 @@ static int vfio_migration_query_flags(VFIODevice *vbasedev, uint64_t *mig_flags) - feature->argsz = sizeof(buf); - feature->flags = VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_MIGRATION; - if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) { -- if (errno == ENOTTY) { -- error_report("%s: VFIO migration is not supported in kernel", -- vbasedev->name); -- } else { -- error_report("%s: Failed to query VFIO migration support, err: %s", -- vbasedev->name, strerror(errno)); -- } -- - return -errno; - } - -@@ -810,6 +802,27 @@ static int vfio_migration_init(VFIODevice *vbasedev) - return 0; - } - -+static int vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp) -+{ -+ int ret; -+ -+ if (vbasedev->enable_migration == ON_OFF_AUTO_ON) { -+ error_propagate(errp, err); -+ return -EINVAL; -+ } -+ -+ vbasedev->migration_blocker = error_copy(err); -+ error_free(err); -+ -+ ret = migrate_add_blocker(vbasedev->migration_blocker, errp); -+ if (ret < 0) { -+ error_free(vbasedev->migration_blocker); -+ vbasedev->migration_blocker = NULL; -+ } -+ -+ return ret; -+} -+ - /* ---------------------------------------------------------------------- */ - - int64_t vfio_mig_bytes_transferred(void) -@@ -824,40 +837,54 @@ void vfio_reset_bytes_transferred(void) - - int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) - { -- int ret = -ENOTSUP; -+ Error *err = NULL; -+ int ret; - -- if (!vbasedev->enable_migration) { -- goto add_blocker; -+ if (vbasedev->enable_migration == ON_OFF_AUTO_OFF) { -+ error_setg(&err, "%s: Migration is disabled for VFIO device", -+ vbasedev->name); -+ return vfio_block_migration(vbasedev, err, errp); - } - - ret = vfio_migration_init(vbasedev); - if (ret) { -- goto add_blocker; -+ if (ret == -ENOTTY) { -+ error_setg(&err, "%s: VFIO migration is not supported in kernel", -+ vbasedev->name); -+ } else { -+ error_setg(&err, -+ "%s: Migration couldn't be initialized for VFIO device, " -+ "err: %d (%s)", -+ vbasedev->name, ret, strerror(-ret)); -+ } -+ -+ return vfio_block_migration(vbasedev, err, errp); -+ } -+ -+ if (!vbasedev->dirty_pages_supported) { -+ if (vbasedev->enable_migration == ON_OFF_AUTO_AUTO) { -+ error_setg(&err, -+ "%s: VFIO device doesn't support device dirty tracking", -+ vbasedev->name); -+ return vfio_block_migration(vbasedev, err, errp); -+ } -+ -+ warn_report("%s: VFIO device doesn't support device dirty tracking", -+ vbasedev->name); - } - -- ret = vfio_block_multiple_devices_migration(errp); -+ ret = vfio_block_multiple_devices_migration(vbasedev, errp); - if (ret) { - return ret; - } - -- ret = vfio_block_giommu_migration(errp); -+ ret = vfio_block_giommu_migration(vbasedev, errp); - if (ret) { - return ret; - } - -- trace_vfio_migration_probe(vbasedev->name); -+ trace_vfio_migration_realize(vbasedev->name); - return 0; -- --add_blocker: -- error_setg(&vbasedev->migration_blocker, -- "VFIO device doesn't support migration"); -- -- ret = migrate_add_blocker(vbasedev->migration_blocker, errp); -- if (ret < 0) { -- error_free(vbasedev->migration_blocker); -- vbasedev->migration_blocker = NULL; -- } -- return ret; - } - - void vfio_migration_exit(VFIODevice *vbasedev) -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 15e7554954..6634945a70 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -3371,8 +3371,8 @@ static Property vfio_pci_dev_properties[] = { - VFIO_FEATURE_ENABLE_REQ_BIT, true), - DEFINE_PROP_BIT("x-igd-opregion", VFIOPCIDevice, features, - VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false), -- DEFINE_PROP_BOOL("x-enable-migration", VFIOPCIDevice, -- vbasedev.enable_migration, false), -+ DEFINE_PROP_ON_OFF_AUTO("enable-migration", VFIOPCIDevice, -+ vbasedev.enable_migration, ON_OFF_AUTO_AUTO), - DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false), - DEFINE_PROP_BOOL("x-balloon-allowed", VFIOPCIDevice, - vbasedev.ram_block_discard_allowed, false), -diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events -index 4150b59e58..0391bd583b 100644 ---- a/hw/vfio/trace-events -+++ b/hw/vfio/trace-events -@@ -155,7 +155,7 @@ vfio_load_cleanup(const char *name) " (%s)" - vfio_load_device_config_state(const char *name) " (%s)" - vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64 - vfio_load_state_device_data(const char *name, uint64_t data_size, int ret) " (%s) size 0x%"PRIx64" ret %d" --vfio_migration_probe(const char *name) " (%s)" -+vfio_migration_realize(const char *name) " (%s)" - vfio_migration_set_state(const char *name, const char *state) " (%s) state %s" - vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s" - vfio_save_block(const char *name, int data_size) " (%s) data_size %d" -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 1d19c6f251..93429b9abb 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -139,7 +139,7 @@ typedef struct VFIODevice { - bool needs_reset; - bool no_mmap; - bool ram_block_discard_allowed; -- bool enable_migration; -+ OnOffAuto enable_migration; - VFIODeviceOps *ops; - unsigned int num_irqs; - unsigned int num_regions; -@@ -225,9 +225,9 @@ typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList; - extern VFIOGroupList vfio_group_list; - - bool vfio_mig_active(void); --int vfio_block_multiple_devices_migration(Error **errp); -+int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp); - void vfio_unblock_multiple_devices_migration(void); --int vfio_block_giommu_migration(Error **errp); -+int vfio_block_giommu_migration(VFIODevice *vbasedev, Error **errp); - int64_t vfio_mig_bytes_transferred(void); - void vfio_reset_bytes_transferred(void); - --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-migration-Refactor-vfio_save_block-to-return-sa.patch b/SOURCES/kvm-vfio-migration-Refactor-vfio_save_block-to-return-sa.patch deleted file mode 100644 index 3b61c5d..0000000 --- a/SOURCES/kvm-vfio-migration-Refactor-vfio_save_block-to-return-sa.patch +++ /dev/null @@ -1,102 +0,0 @@ -From edcf24a08d66d620a10c746824e31d230c8516ce Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 13/37] vfio/migration: Refactor vfio_save_block() to return - saved data size -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [11/28] b4aed6ddcbde159e98275a0675dcdf45d644673b (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit cf53efbbda2e -Author: Avihai Horon -Date: Wed Jun 21 14:11:58 2023 +0300 - - vfio/migration: Refactor vfio_save_block() to return saved data size - - Refactor vfio_save_block() to return the size of saved data on success - and -errno on error. - - This will be used in next patch to implement VFIO migration pre-copy - support. - - Signed-off-by: Avihai Horon - Reviewed-by: Cédric Le Goater - Reviewed-by: Juan Quintela - Tested-by: YangHang Liu - Acked-by: Alex Williamson - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/migration.c | 17 +++++++++-------- - 1 file changed, 9 insertions(+), 8 deletions(-) - -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index 6b58dddb88..235978fd68 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -241,8 +241,8 @@ static int vfio_query_stop_copy_size(VFIODevice *vbasedev, - return 0; - } - --/* Returns 1 if end-of-stream is reached, 0 if more data and -errno if error */ --static int vfio_save_block(QEMUFile *f, VFIOMigration *migration) -+/* Returns the size of saved data on success and -errno on error */ -+static ssize_t vfio_save_block(QEMUFile *f, VFIOMigration *migration) - { - ssize_t data_size; - -@@ -252,7 +252,7 @@ static int vfio_save_block(QEMUFile *f, VFIOMigration *migration) - return -errno; - } - if (data_size == 0) { -- return 1; -+ return 0; - } - - qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE); -@@ -262,7 +262,7 @@ static int vfio_save_block(QEMUFile *f, VFIOMigration *migration) - - trace_vfio_save_block(migration->vbasedev->name, data_size); - -- return qemu_file_get_error(f); -+ return qemu_file_get_error(f) ?: data_size; - } - - /* ---------------------------------------------------------------------- */ -@@ -335,6 +335,7 @@ static void vfio_state_pending_exact(void *opaque, uint64_t *must_precopy, - static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) - { - VFIODevice *vbasedev = opaque; -+ ssize_t data_size; - int ret; - - /* We reach here with device state STOP only */ -@@ -345,11 +346,11 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) - } - - do { -- ret = vfio_save_block(f, vbasedev->migration); -- if (ret < 0) { -- return ret; -+ data_size = vfio_save_block(f, vbasedev->migration); -+ if (data_size < 0) { -+ return data_size; - } -- } while (!ret); -+ } while (data_size); - - qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); - ret = qemu_file_get_error(f); --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-migration-Remove-print-of-Migration-disabled.patch b/SOURCES/kvm-vfio-migration-Remove-print-of-Migration-disabled.patch deleted file mode 100644 index ad3c6ca..0000000 --- a/SOURCES/kvm-vfio-migration-Remove-print-of-Migration-disabled.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 5bb94c4eaeb94f0b41a57660098a4c12a295b725 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 28/37] vfio/migration: Remove print of "Migration disabled" -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [26/28] c7ff1f9c90b4cfcb327ef474042ea71ea577a94d (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 0520d63c7701 -Author: Zhenzhong Duan -Date: Mon Jul 3 15:15:09 2023 +0800 - - vfio/migration: Remove print of "Migration disabled" - - Property enable_migration supports [on/off/auto]. - In ON mode, error pointer is passed to errp and logged. - In OFF mode, we doesn't need to log "Migration disabled" as it's intentional. - In AUTO mode, we should only ever see errors or warnings if the device - supports migration and an error or incompatibility occurs while further - probing or configuring it. Lack of support for migration shoundn't - generate an error or warning. - - Signed-off-by: Zhenzhong Duan - Reviewed-by: Joao Martins - Reviewed-by: Cédric Le Goater - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/pci.c | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 037b7d4176..a60b868c38 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -3233,7 +3233,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - if (!pdev->failover_pair_id) { - ret = vfio_migration_realize(vbasedev, errp); - if (ret) { -- error_report("%s: Migration disabled", vbasedev->name); - goto out_deregister; - } - } --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-migration-Reset-bytes_transferred-properly.patch b/SOURCES/kvm-vfio-migration-Reset-bytes_transferred-properly.patch deleted file mode 100644 index 2666460..0000000 --- a/SOURCES/kvm-vfio-migration-Reset-bytes_transferred-properly.patch +++ /dev/null @@ -1,165 +0,0 @@ -From a63b4010ba4f491c9144afff363bebcf35ecf496 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 20/37] vfio/migration: Reset bytes_transferred properly -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [18/28] e9a70faeca4fd5aa7ef36502cf76bf0b62f65057 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 808642a2f640 -Author: Avihai Horon -Date: Wed Jun 28 10:31:11 2023 +0300 - - vfio/migration: Reset bytes_transferred properly - - Currently, VFIO bytes_transferred is not reset properly: - 1. bytes_transferred is not reset after a VM snapshot (so a migration - following a snapshot will report incorrect value). - 2. bytes_transferred is a single counter for all VFIO devices, however - upon migration failure it is reset multiple times, by each VFIO - device. - - Fix it by introducing a new function vfio_reset_bytes_transferred() and - calling it during migration and snapshot start. - - Remove existing bytes_transferred reset in VFIO migration state - notifier, which is not needed anymore. - - Fixes: 3710586caa5d ("qapi: Add VFIO devices migration stats in Migration stats") - Signed-off-by: Avihai Horon - Reviewed-by: Cédric Le Goater - Reviewed-by: Alex Williamson - Signed-off-by: Cédric Le Goater - -Conflicts: - - migration/migration.c - migration/savevm.c - context changes due to commit aff3f6606d14 ("migration: Rename - ram_counters to mig_stats") - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/migration.c | 6 +++++- - include/hw/vfio/vfio-common.h | 1 + - migration/migration.c | 1 + - migration/migration.h | 1 + - migration/savevm.c | 1 + - migration/target.c | 17 +++++++++++++++-- - 6 files changed, 24 insertions(+), 3 deletions(-) - -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index acbf0bb7ab..7cf143926c 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -697,7 +697,6 @@ static void vfio_migration_state_notifier(Notifier *notifier, void *data) - case MIGRATION_STATUS_CANCELLING: - case MIGRATION_STATUS_CANCELLED: - case MIGRATION_STATUS_FAILED: -- bytes_transferred = 0; - /* - * If setting the device in RUNNING state fails, the device should - * be reset. To do so, use ERROR state as a recover state. -@@ -818,6 +817,11 @@ int64_t vfio_mig_bytes_transferred(void) - return bytes_transferred; - } - -+void vfio_reset_bytes_transferred(void) -+{ -+ bytes_transferred = 0; -+} -+ - int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) - { - int ret = -ENOTSUP; -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 6d1b8487c3..1d19c6f251 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -229,6 +229,7 @@ int vfio_block_multiple_devices_migration(Error **errp); - void vfio_unblock_multiple_devices_migration(void); - int vfio_block_giommu_migration(Error **errp); - int64_t vfio_mig_bytes_transferred(void); -+void vfio_reset_bytes_transferred(void); - - #ifdef CONFIG_LINUX - int vfio_get_region_info(VFIODevice *vbasedev, int index, -diff --git a/migration/migration.c b/migration/migration.c -index 9bf1caee6c..47ad6c43cb 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1638,6 +1638,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, - */ - memset(&ram_counters, 0, sizeof(ram_counters)); - memset(&compression_counters, 0, sizeof(compression_counters)); -+ reset_vfio_bytes_transferred(); - - return true; - } -diff --git a/migration/migration.h b/migration/migration.h -index e9679f8029..7ccf460aa2 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -495,6 +495,7 @@ bool migration_rate_limit(void); - void migration_cancel(const Error *error); - - void populate_vfio_info(MigrationInfo *info); -+void reset_vfio_bytes_transferred(void); - void postcopy_temp_page_reset(PostcopyTmpPage *tmp_page); - - #endif -diff --git a/migration/savevm.c b/migration/savevm.c -index aff70e6263..83088fc3f8 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -1620,6 +1620,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) - migrate_init(ms); - memset(&ram_counters, 0, sizeof(ram_counters)); - memset(&compression_counters, 0, sizeof(compression_counters)); -+ reset_vfio_bytes_transferred(); - ms->to_dst_file = f; - - qemu_mutex_unlock_iothread(); -diff --git a/migration/target.c b/migration/target.c -index 00ca007f97..f39c9a8d88 100644 ---- a/migration/target.c -+++ b/migration/target.c -@@ -14,12 +14,25 @@ - #include "hw/vfio/vfio-common.h" - #endif - -+#ifdef CONFIG_VFIO - void populate_vfio_info(MigrationInfo *info) - { --#ifdef CONFIG_VFIO - if (vfio_mig_active()) { - info->vfio = g_malloc0(sizeof(*info->vfio)); - info->vfio->transferred = vfio_mig_bytes_transferred(); - } --#endif - } -+ -+void reset_vfio_bytes_transferred(void) -+{ -+ vfio_reset_bytes_transferred(); -+} -+#else -+void populate_vfio_info(MigrationInfo *info) -+{ -+} -+ -+void reset_vfio_bytes_transferred(void) -+{ -+} -+#endif --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-migration-Return-bool-type-for-vfio_migration_r.patch b/SOURCES/kvm-vfio-migration-Return-bool-type-for-vfio_migration_r.patch deleted file mode 100644 index efd42a9..0000000 --- a/SOURCES/kvm-vfio-migration-Return-bool-type-for-vfio_migration_r.patch +++ /dev/null @@ -1,125 +0,0 @@ -From 223eef8363c9ba58514b2d4f93e5ff015d111ff2 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 29/37] vfio/migration: Return bool type for - vfio_migration_realize() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [27/28] d5aea3ea4c53e4573076cbacbbe3134f9f0f9e53 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit d4a2af747d5a -Author: Zhenzhong Duan -Date: Mon Jul 3 15:15:10 2023 +0800 - - vfio/migration: Return bool type for vfio_migration_realize() - - Make vfio_migration_realize() adhere to the convention of other realize() - callbacks(like qdev_realize) by returning bool instead of int. - - Suggested-by: Cédric Le Goater - Suggested-by: Joao Martins - Signed-off-by: Zhenzhong Duan - Reviewed-by: Joao Martins - Reviewed-by: Cédric Le Goater - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/migration.c | 15 ++++++++++----- - hw/vfio/pci.c | 3 +-- - include/hw/vfio/vfio-common.h | 2 +- - 3 files changed, 12 insertions(+), 8 deletions(-) - -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index e3954570c8..2674f4bc47 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -846,7 +846,12 @@ void vfio_reset_bytes_transferred(void) - bytes_transferred = 0; - } - --int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) -+/* -+ * Return true when either migration initialized or blocker registered. -+ * Currently only return false when adding blocker fails which will -+ * de-register vfio device. -+ */ -+bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp) - { - Error *err = NULL; - int ret; -@@ -854,7 +859,7 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) - if (vbasedev->enable_migration == ON_OFF_AUTO_OFF) { - error_setg(&err, "%s: Migration is disabled for VFIO device", - vbasedev->name); -- return vfio_block_migration(vbasedev, err, errp); -+ return !vfio_block_migration(vbasedev, err, errp); - } - - ret = vfio_migration_init(vbasedev); -@@ -869,7 +874,7 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) - vbasedev->name, ret, strerror(-ret)); - } - -- return vfio_block_migration(vbasedev, err, errp); -+ return !vfio_block_migration(vbasedev, err, errp); - } - - if (!vbasedev->dirty_pages_supported) { -@@ -896,7 +901,7 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) - } - - trace_vfio_migration_realize(vbasedev->name); -- return 0; -+ return true; - - add_blocker: - ret = vfio_block_migration(vbasedev, err, errp); -@@ -904,7 +909,7 @@ out_deinit: - if (ret) { - vfio_migration_deinit(vbasedev); - } -- return ret; -+ return !ret; - } - - void vfio_migration_exit(VFIODevice *vbasedev) -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index a60b868c38..ba40ca8784 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -3231,8 +3231,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - } - - if (!pdev->failover_pair_id) { -- ret = vfio_migration_realize(vbasedev, errp); -- if (ret) { -+ if (!vfio_migration_realize(vbasedev, errp)) { - goto out_deregister; - } - } -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 45167c8a8a..da43d27352 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -252,7 +252,7 @@ int vfio_spapr_create_window(VFIOContainer *container, - int vfio_spapr_remove_window(VFIOContainer *container, - hwaddr offset_within_address_space); - --int vfio_migration_realize(VFIODevice *vbasedev, Error **errp); -+bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp); - void vfio_migration_exit(VFIODevice *vbasedev); - - #endif /* HW_VFIO_VFIO_COMMON_H */ --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-migration-Skip-log_sync-during-migration-SETUP-.patch b/SOURCES/kvm-vfio-migration-Skip-log_sync-during-migration-SETUP-.patch deleted file mode 100644 index 6211db7..0000000 --- a/SOURCES/kvm-vfio-migration-Skip-log_sync-during-migration-SETUP-.patch +++ /dev/null @@ -1,68 +0,0 @@ -From 76208f7824d5139ac8d86140b0e01031b67638cc Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:56 +0200 -Subject: [PATCH 04/37] vfio/migration: Skip log_sync during migration SETUP - state -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/28] 4c340992b472ac4627b57705f4e971f14bbb0846 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit ff180c6bd7a8 -Author: Avihai Horon -Date: Mon Apr 3 16:00:00 2023 +0300 - - vfio/migration: Skip log_sync during migration SETUP state - - Currently, VFIO log_sync can be issued while migration is in SETUP - state. However, doing this log_sync is at best redundant and at worst - can fail. - - Redundant -- all RAM is marked dirty in migration SETUP state and is - transferred only after migration is set to ACTIVE state, so doing - log_sync during migration SETUP is pointless. - - Can fail -- there is a time window, between setting migration state to - SETUP and starting dirty tracking by RAM save_live_setup handler, during - which dirty tracking is still not started. Any VFIO log_sync call that - is issued during this time window will fail. For example, this error can - be triggered by migrating a VM when a GUI is active, which constantly - calls log_sync. - - Fix it by skipping VFIO log_sync while migration is in SETUP state. - - Fixes: 758b96b61d5c ("vfio/migrate: Move switch of dirty tracking into vfio_memory_listener") - Signed-off-by: Avihai Horon - Link: https://lore.kernel.org/r/20230403130000.6422-1-avihaih@nvidia.com - Signed-off-by: Alex Williamson - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/common.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 4d01ea3515..78358ede27 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -478,7 +478,8 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) - VFIODevice *vbasedev; - MigrationState *ms = migrate_get_current(); - -- if (!migration_is_setup_or_active(ms->state)) { -+ if (ms->state != MIGRATION_STATUS_ACTIVE && -+ ms->state != MIGRATION_STATUS_DEVICE) { - return false; - } - --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-migration-Store-VFIO-migration-flags-in-VFIOMig.patch b/SOURCES/kvm-vfio-migration-Store-VFIO-migration-flags-in-VFIOMig.patch deleted file mode 100644 index 2db8511..0000000 --- a/SOURCES/kvm-vfio-migration-Store-VFIO-migration-flags-in-VFIOMig.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 77353cdafd08562dff9c99e9f3984d12224bee52 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 14/37] vfio/migration: Store VFIO migration flags in - VFIOMigration -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [12/28] 31a9c39e6ee6338a35dc08c3e7f5c1a204166249 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 6cd1fe11598a -Author: Avihai Horon -Date: Wed Jun 21 14:11:59 2023 +0300 - - vfio/migration: Store VFIO migration flags in VFIOMigration - - VFIO migration flags are queried once in vfio_migration_init(). Store - them in VFIOMigration so they can be used later to check the device's - migration capabilities without re-querying them. - - This will be used in the next patch to check if the device supports - precopy migration. - - Signed-off-by: Avihai Horon - Reviewed-by: Cédric Le Goater - Tested-by: YangHang Liu - Acked-by: Alex Williamson - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/migration.c | 1 + - include/hw/vfio/vfio-common.h | 1 + - 2 files changed, 2 insertions(+) - -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index 235978fd68..8d33414379 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -603,6 +603,7 @@ static int vfio_migration_init(VFIODevice *vbasedev) - migration->vbasedev = vbasedev; - migration->device_state = VFIO_DEVICE_STATE_RUNNING; - migration->data_fd = -1; -+ migration->mig_flags = mig_flags; - - vbasedev->dirty_pages_supported = vfio_dma_logging_supported(vbasedev); - -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index eed244f25f..5f29dab839 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -66,6 +66,7 @@ typedef struct VFIOMigration { - int data_fd; - void *data_buffer; - size_t data_buffer_size; -+ uint64_t mig_flags; - } VFIOMigration; - - typedef struct VFIOAddressSpace { --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-pci-Allow-the-selection-of-a-given-iommu-backen.patch b/SOURCES/kvm-vfio-pci-Allow-the-selection-of-a-given-iommu-backen.patch new file mode 100644 index 0000000..f79de18 --- /dev/null +++ b/SOURCES/kvm-vfio-pci-Allow-the-selection-of-a-given-iommu-backen.patch @@ -0,0 +1,81 @@ +From 7788fdc2375e01ead0c8a705c3b3d7467dd93d67 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Tue, 21 Nov 2023 16:44:09 +0800 +Subject: [PATCH 030/101] vfio/pci: Allow the selection of a given iommu + backend +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [29/67] 363c62607a11093ea0062489e11a708117d8ffb9 (eauger1/centos-qemu-kvm) + +Now we support two types of iommu backends, let's add the capability +to select one of them. This depends on whether an iommufd object has +been linked with the vfio-pci device: + +If the user wants to use the legacy backend, it shall not +link the vfio-pci device with any iommufd object: + + -device vfio-pci,host=0000:02:00.0 + +This is called the legacy mode/backend. + +If the user wants to use the iommufd backend (/dev/iommu) it +shall pass an iommufd object id in the vfio-pci device options: + + -object iommufd,id=iommufd0 + -device vfio-pci,host=0000:02:00.0,iommufd=iommufd0 + +Suggested-by: Alex Williamson +Signed-off-by: Eric Auger +Signed-off-by: Yi Liu +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Tested-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit ee42b261b0a2e465ae003ddcaf1caf117c201f74) +Signed-off-by: Eric Auger +--- + hw/vfio/pci.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 83b2561908..39e6a6678e 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -19,6 +19,7 @@ + */ + + #include "qemu/osdep.h" ++#include CONFIG_DEVICES /* CONFIG_IOMMUFD */ + #include + #include + +@@ -42,6 +43,7 @@ + #include "qapi/error.h" + #include "migration/blocker.h" + #include "migration/qemu-file.h" ++#include "sysemu/iommufd.h" + + #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug" + +@@ -3415,6 +3417,10 @@ static Property vfio_pci_dev_properties[] = { + * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name), + * DEFINE_PROP_STRING("vfiogroupfd, VFIOPCIDevice, vfiogroupfd_name), + */ ++#ifdef CONFIG_IOMMUFD ++ DEFINE_PROP_LINK("iommufd", VFIOPCIDevice, vbasedev.iommufd, ++ TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *), ++#endif + DEFINE_PROP_END_OF_LIST(), + }; + +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-pci-Call-vfio_prepare_kvm_msi_virq_batch-in-MSI.patch b/SOURCES/kvm-vfio-pci-Call-vfio_prepare_kvm_msi_virq_batch-in-MSI.patch deleted file mode 100644 index b5d9d37..0000000 --- a/SOURCES/kvm-vfio-pci-Call-vfio_prepare_kvm_msi_virq_batch-in-MSI.patch +++ /dev/null @@ -1,67 +0,0 @@ -From b5a69101abac153c9c9be7f539d810e3e4af3bdf Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 19/37] vfio/pci: Call vfio_prepare_kvm_msi_virq_batch() in MSI - retry path -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [17/28] 2067bb58f3a2c1a793e5566cee3c78a8299c9c1c (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit c17408892319 -Author: Shameer Kolothum -Date: Tue Jun 13 15:09:43 2023 +0100 - - vfio/pci: Call vfio_prepare_kvm_msi_virq_batch() in MSI retry path - - When vfio_enable_vectors() returns with less than requested nr_vectors - we retry with what kernel reported back. But the retry path doesn't - call vfio_prepare_kvm_msi_virq_batch() and this results in, - - qemu-system-aarch64: vfio: Error: Failed to enable 4 MSI vectors, retry with 1 - qemu-system-aarch64: ../hw/vfio/pci.c:602: vfio_commit_kvm_msi_virq_batch: Assertion `vdev->defer_kvm_irq_routing' failed - - Fixes: dc580d51f7dd ("vfio: defer to commit kvm irq routing when enable msi/msix") - Reviewed-by: Longpeng - Signed-off-by: Shameer Kolothum - Reviewed-by: Cédric Le Goater - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/pci.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 7c5e2b5996..15e7554954 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -666,6 +666,8 @@ static void vfio_msi_enable(VFIOPCIDevice *vdev) - - vfio_disable_interrupts(vdev); - -+ vdev->nr_vectors = msi_nr_vectors_allocated(&vdev->pdev); -+retry: - /* - * Setting vector notifiers needs to enable route for each vector. - * Deferring to commit the KVM routes once rather than per vector -@@ -673,8 +675,6 @@ static void vfio_msi_enable(VFIOPCIDevice *vdev) - */ - vfio_prepare_kvm_msi_virq_batch(vdev); - -- vdev->nr_vectors = msi_nr_vectors_allocated(&vdev->pdev); --retry: - vdev->msi_vectors = g_new0(VFIOMSIVector, vdev->nr_vectors); - - for (i = 0; i < vdev->nr_vectors; i++) { --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-pci-Clear-MSI-X-IRQ-index-always.patch b/SOURCES/kvm-vfio-pci-Clear-MSI-X-IRQ-index-always.patch new file mode 100644 index 0000000..837e490 --- /dev/null +++ b/SOURCES/kvm-vfio-pci-Clear-MSI-X-IRQ-index-always.patch @@ -0,0 +1,69 @@ +From 43236995e8ad336d366b625fb8362046be53fc34 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Mon, 29 Jan 2024 09:46:34 +0100 +Subject: [PATCH] vfio/pci: Clear MSI-X IRQ index always +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 218: vfio/pci: Clear MSI-X IRQ index always +RH-Jira: RHEL-21293 +RH-Acked-by: Eric Auger +RH-Acked-by: Alex Williamson +RH-Commit: [1/1] b4b587b13c11e350d3e5fcc11ba66a006b25a763 (clegoate/qemu-kvm-c9s) + +JIRA: https://issues.redhat.com/browse/RHEL-21293 + +commit d2b668fca5652760b435ce812a743bba03d2f316 +Author: Cédric Le Goater +Date: Thu Jan 25 14:27:36 2024 +0100 + + vfio/pci: Clear MSI-X IRQ index always + + When doing device assignment of a physical device, MSI-X can be + enabled with no vectors enabled and this sets the IRQ index to + VFIO_PCI_MSIX_IRQ_INDEX. However, when MSI-X is disabled, the IRQ + index is left untouched if no vectors are in use. Then, when INTx + is enabled, the IRQ index value is considered incompatible (set to + MSI-X) and VFIO_DEVICE_SET_IRQS fails. QEMU complains with : + + qemu-system-x86_64: vfio 0000:08:00.0: Failed to set up TRIGGER eventfd signaling for interrupt INTX-0: VFIO_DEVICE_SET_IRQS failure: Invalid argument + + To avoid that, unconditionaly clear the IRQ index when MSI-X is + disabled. + + Buglink: https://issues.redhat.com/browse/RHEL-21293 + Fixes: 5ebffa4e87e7 ("vfio/pci: use an invalid fd to enable MSI-X") + Cc: Jing Liu + Cc: Alex Williamson + Reviewed-by: Alex Williamson + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/pci.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index adb7c09367..29bb8067eb 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -829,9 +829,11 @@ static void vfio_msix_disable(VFIOPCIDevice *vdev) + } + } + +- if (vdev->nr_vectors) { +- vfio_disable_irqindex(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX); +- } ++ /* ++ * Always clear MSI-X IRQ index. A PF device could have enabled ++ * MSI-X with no vectors. See vfio_msix_enable(). ++ */ ++ vfio_disable_irqindex(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX); + + vfio_msi_disable_common(vdev); + vfio_intx_enable(vdev, &err); +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-pci-Disable-INTx-in-vfio_realize-error-path.patch b/SOURCES/kvm-vfio-pci-Disable-INTx-in-vfio_realize-error-path.patch deleted file mode 100644 index 0aca4ef..0000000 --- a/SOURCES/kvm-vfio-pci-Disable-INTx-in-vfio_realize-error-path.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 816c20b23546d31316c9ca450db8a6668ac6216c Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 25/37] vfio/pci: Disable INTx in vfio_realize error path -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [23/28] 2fde4bad00c4286e6bbe24947c2bfd6468fc0ff3 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit adee0da0368f -Author: Zhenzhong Duan -Date: Mon Jul 3 15:15:06 2023 +0800 - - vfio/pci: Disable INTx in vfio_realize error path - - When vfio realize fails, INTx isn't disabled if it has been enabled. - This may confuse host side with unhandled interrupt report. - - Fixes: c5478fea27ac ("vfio/pci: Respond to KVM irqchip change notifier") - Signed-off-by: Zhenzhong Duan - Reviewed-by: Joao Martins - Reviewed-by: Cédric Le Goater - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/pci.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 87bd440504..2d059832a4 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -3244,6 +3244,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - return; - - out_deregister: -+ if (vdev->interrupt == VFIO_INT_INTx) { -+ vfio_intx_disable(vdev); -+ } - pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); - if (vdev->irqchip_change_notifier.notify) { - kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-pci-Extract-out-a-helper-vfio_pci_get_pci_hot_r.patch b/SOURCES/kvm-vfio-pci-Extract-out-a-helper-vfio_pci_get_pci_hot_r.patch new file mode 100644 index 0000000..af6593c --- /dev/null +++ b/SOURCES/kvm-vfio-pci-Extract-out-a-helper-vfio_pci_get_pci_hot_r.patch @@ -0,0 +1,139 @@ +From fe5ecedd452754eeb238b23eb0544ed3c5086157 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 21 Nov 2023 16:44:06 +0800 +Subject: [PATCH 027/101] vfio/pci: Extract out a helper + vfio_pci_get_pci_hot_reset_info +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [26/67] 730b7f1496f4f21310fa13c79cb87f8d5e2ad2a8 (eauger1/centos-qemu-kvm) + +This helper will be used by both legacy and iommufd backends. + +No functional changes intended. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Reviewed-by: Eric Auger +Tested-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit 4d36ec23a75eb387492f4d68ff1b8eeee5d68142) +Signed-off-by: Eric Auger +--- + hw/vfio/pci.c | 54 +++++++++++++++++++++++++++++++++++---------------- + hw/vfio/pci.h | 3 +++ + 2 files changed, 40 insertions(+), 17 deletions(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index ec98080f28..b482e5479f 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -2448,22 +2448,13 @@ static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name) + return (strcmp(tmp, name) == 0); + } + +-static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) ++int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev, ++ struct vfio_pci_hot_reset_info **info_p) + { +- VFIOGroup *group; + struct vfio_pci_hot_reset_info *info; +- struct vfio_pci_dependent_device *devices; +- struct vfio_pci_hot_reset *reset; +- int32_t *fds; +- int ret, i, count; +- bool multi = false; ++ int ret, count; + +- trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi"); +- +- if (!single) { +- vfio_pci_pre_reset(vdev); +- } +- vdev->vbasedev.needs_reset = false; ++ assert(info_p && !*info_p); + + info = g_malloc0(sizeof(*info)); + info->argsz = sizeof(*info); +@@ -2471,24 +2462,53 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) + ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info); + if (ret && errno != ENOSPC) { + ret = -errno; ++ g_free(info); + if (!vdev->has_pm_reset) { + error_report("vfio: Cannot reset device %s, " + "no available reset mechanism.", vdev->vbasedev.name); + } +- goto out_single; ++ return ret; + } + + count = info->count; +- info = g_realloc(info, sizeof(*info) + (count * sizeof(*devices))); +- info->argsz = sizeof(*info) + (count * sizeof(*devices)); +- devices = &info->devices[0]; ++ info = g_realloc(info, sizeof(*info) + (count * sizeof(info->devices[0]))); ++ info->argsz = sizeof(*info) + (count * sizeof(info->devices[0])); + + ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info); + if (ret) { + ret = -errno; ++ g_free(info); + error_report("vfio: hot reset info failed: %m"); ++ return ret; ++ } ++ ++ *info_p = info; ++ return 0; ++} ++ ++static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) ++{ ++ VFIOGroup *group; ++ struct vfio_pci_hot_reset_info *info = NULL; ++ struct vfio_pci_dependent_device *devices; ++ struct vfio_pci_hot_reset *reset; ++ int32_t *fds; ++ int ret, i, count; ++ bool multi = false; ++ ++ trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi"); ++ ++ if (!single) { ++ vfio_pci_pre_reset(vdev); ++ } ++ vdev->vbasedev.needs_reset = false; ++ ++ ret = vfio_pci_get_pci_hot_reset_info(vdev, &info); ++ ++ if (ret) { + goto out_single; + } ++ devices = &info->devices[0]; + + trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name); + +diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h +index eb74d9de2d..3568a6135d 100644 +--- a/hw/vfio/pci.h ++++ b/hw/vfio/pci.h +@@ -219,6 +219,9 @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr); + + extern const PropertyInfo qdev_prop_nv_gpudirect_clique; + ++int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev, ++ struct vfio_pci_hot_reset_info **info_p); ++ + int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp); + + int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-pci-Fix-a-segfault-in-vfio_realize.patch b/SOURCES/kvm-vfio-pci-Fix-a-segfault-in-vfio_realize.patch deleted file mode 100644 index d05d114..0000000 --- a/SOURCES/kvm-vfio-pci-Fix-a-segfault-in-vfio_realize.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 0b1ab3aacc02e70bfe8440236eb9def426bbe10e Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 22/37] vfio/pci: Fix a segfault in vfio_realize -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [20/28] 48b9c1efe295c2672693d9c99f6d11738d2b98d1 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 357bd7932a13 -Author: Zhenzhong Duan -Date: Thu Jun 29 16:40:38 2023 +0800 - - vfio/pci: Fix a segfault in vfio_realize - - The kvm irqchip notifier is only registered if the device supports - INTx, however it's unconditionally removed in vfio realize error - path. If the assigned device does not support INTx, this will cause - QEMU to crash when vfio realize fails. Change it to conditionally - remove the notifier only if the notify hook is setup. - - Before fix: - (qemu) device_add vfio-pci,host=81:11.1,id=vfio1,bus=root1,xres=1 - Connection closed by foreign host. - - After fix: - (qemu) device_add vfio-pci,host=81:11.1,id=vfio1,bus=root1,xres=1 - Error: vfio 0000:81:11.1: xres and yres properties require display=on - (qemu) - - Fixes: c5478fea27ac ("vfio/pci: Respond to KVM irqchip change notifier") - Signed-off-by: Zhenzhong Duan - Reviewed-by: Cédric Le Goater - Reviewed-by: Joao Martins - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/pci.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 6634945a70..d08e6c1a20 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -3245,7 +3245,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - - out_deregister: - pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); -- kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); -+ if (vdev->irqchip_change_notifier.notify) { -+ kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); -+ } - out_teardown: - vfio_teardown_msi(vdev); - vfio_bars_exit(vdev); --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-pci-Fix-a-use-after-free-issue.patch b/SOURCES/kvm-vfio-pci-Fix-a-use-after-free-issue.patch deleted file mode 100644 index 1fa725f..0000000 --- a/SOURCES/kvm-vfio-pci-Fix-a-use-after-free-issue.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 2437a06ff137c4bc856df096e42407c1f50b25b0 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:56 +0200 -Subject: [PATCH 06/37] vfio/pci: Fix a use-after-free issue -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [4/28] eca69a7e0a6fb8c1c70be8b91209a53b040e30ba (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit b83b40b61484 -Author: Zhenzhong Duan -Date: Wed May 17 10:46:51 2023 +0800 - - vfio/pci: Fix a use-after-free issue - - vbasedev->name is freed wrongly which leads to garbage VFIO trace log. - Fix it by allocating a dup of vbasedev->name and then free the dup. - - Fixes: 2dca1b37a760 ("vfio/pci: add support for VF token") - Suggested-by: Alex Williamson - Signed-off-by: Zhenzhong Duan - Reviewed-by: Cédric Le Goater - Reviewed-by: Matthew Rosato - Acked-by: Alex Williamson - Reviewed-by: Philippe Mathieu-Daudé - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/pci.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 6cd3a98c39..7c5e2b5996 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -3018,7 +3018,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - qemu_uuid_unparse(&vdev->vf_token, uuid); - name = g_strdup_printf("%s vf_token=%s", vbasedev->name, uuid); - } else { -- name = vbasedev->name; -+ name = g_strdup(vbasedev->name); - } - - ret = vfio_get_device(group, name, vbasedev, errp); --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-pci-Free-leaked-timer-in-vfio_realize-error-pat.patch b/SOURCES/kvm-vfio-pci-Free-leaked-timer-in-vfio_realize-error-pat.patch deleted file mode 100644 index 3978b96..0000000 --- a/SOURCES/kvm-vfio-pci-Free-leaked-timer-in-vfio_realize-error-pat.patch +++ /dev/null @@ -1,55 +0,0 @@ -From 9c5016c9b3f9cf66d1b531de829e8b5010962695 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 23/37] vfio/pci: Free leaked timer in vfio_realize error path -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [21/28] dbaae4e484de4613f7f7735be519b7357627326e (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 0cc889c8826c -Author: Zhenzhong Duan -Date: Thu Jun 29 16:40:39 2023 +0800 - - vfio/pci: Free leaked timer in vfio_realize error path - - When vfio_realize fails, the mmap_timer used for INTx optimization - isn't freed. As this timer isn't activated yet, the potential impact - is just a piece of leaked memory. - - Fixes: ea486926b07d ("vfio-pci: Update slow path INTx algorithm timer related") - Signed-off-by: Zhenzhong Duan - Reviewed-by: Cédric Le Goater - Reviewed-by: Joao Martins - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/pci.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index d08e6c1a20..87bd440504 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -3248,6 +3248,9 @@ out_deregister: - if (vdev->irqchip_change_notifier.notify) { - kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); - } -+ if (vdev->intx.mmap_timer) { -+ timer_free(vdev->intx.mmap_timer); -+ } - out_teardown: - vfio_teardown_msi(vdev); - vfio_bars_exit(vdev); --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-pci-Introduce-a-vfio-pci-hot-reset-interface.patch b/SOURCES/kvm-vfio-pci-Introduce-a-vfio-pci-hot-reset-interface.patch new file mode 100644 index 0000000..2a2db5f --- /dev/null +++ b/SOURCES/kvm-vfio-pci-Introduce-a-vfio-pci-hot-reset-interface.patch @@ -0,0 +1,466 @@ +From acc3e5306e184567006bc45e7f36f2473e75d08a Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 21 Nov 2023 16:44:07 +0800 +Subject: [PATCH 028/101] vfio/pci: Introduce a vfio pci hot reset interface +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [27/67] 192088dbf2cf88663acd2416f69b7eeb175b2525 (eauger1/centos-qemu-kvm) + +Legacy vfio pci and iommufd cdev have different process to hot reset +vfio device, expand current code to abstract out pci_hot_reset callback +for legacy vfio, this same interface will also be used by iommufd +cdev vfio device. + +Rename vfio_pci_hot_reset to vfio_legacy_pci_hot_reset and move it +into container.c. + +vfio_pci_[pre/post]_reset and vfio_pci_host_match are exported so +they could be called in legacy and iommufd pci_hot_reset callback. + +Suggested-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Eric Auger +Tested-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit c328e7e8ad1c969dbcbe90ee76afcd3cfec5e945) +Signed-off-by: Eric Auger +--- + hw/vfio/container.c | 170 ++++++++++++++++++++++++++ + hw/vfio/pci.c | 168 +------------------------ + hw/vfio/pci.h | 3 + + include/hw/vfio/vfio-container-base.h | 3 + + 4 files changed, 182 insertions(+), 162 deletions(-) + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index ed2d721b2b..1dbf9b9a17 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -33,6 +33,7 @@ + #include "trace.h" + #include "qapi/error.h" + #include "migration/migration.h" ++#include "pci.h" + + VFIOGroupList vfio_group_list = + QLIST_HEAD_INITIALIZER(vfio_group_list); +@@ -922,6 +923,174 @@ static void vfio_legacy_detach_device(VFIODevice *vbasedev) + vfio_put_group(group); + } + ++static int vfio_legacy_pci_hot_reset(VFIODevice *vbasedev, bool single) ++{ ++ VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); ++ VFIOGroup *group; ++ struct vfio_pci_hot_reset_info *info = NULL; ++ struct vfio_pci_dependent_device *devices; ++ struct vfio_pci_hot_reset *reset; ++ int32_t *fds; ++ int ret, i, count; ++ bool multi = false; ++ ++ trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi"); ++ ++ if (!single) { ++ vfio_pci_pre_reset(vdev); ++ } ++ vdev->vbasedev.needs_reset = false; ++ ++ ret = vfio_pci_get_pci_hot_reset_info(vdev, &info); ++ ++ if (ret) { ++ goto out_single; ++ } ++ devices = &info->devices[0]; ++ ++ trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name); ++ ++ /* Verify that we have all the groups required */ ++ for (i = 0; i < info->count; i++) { ++ PCIHostDeviceAddress host; ++ VFIOPCIDevice *tmp; ++ VFIODevice *vbasedev_iter; ++ ++ host.domain = devices[i].segment; ++ host.bus = devices[i].bus; ++ host.slot = PCI_SLOT(devices[i].devfn); ++ host.function = PCI_FUNC(devices[i].devfn); ++ ++ trace_vfio_pci_hot_reset_dep_devices(host.domain, ++ host.bus, host.slot, host.function, devices[i].group_id); ++ ++ if (vfio_pci_host_match(&host, vdev->vbasedev.name)) { ++ continue; ++ } ++ ++ QLIST_FOREACH(group, &vfio_group_list, next) { ++ if (group->groupid == devices[i].group_id) { ++ break; ++ } ++ } ++ ++ if (!group) { ++ if (!vdev->has_pm_reset) { ++ error_report("vfio: Cannot reset device %s, " ++ "depends on group %d which is not owned.", ++ vdev->vbasedev.name, devices[i].group_id); ++ } ++ ret = -EPERM; ++ goto out; ++ } ++ ++ /* Prep dependent devices for reset and clear our marker. */ ++ QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { ++ if (!vbasedev_iter->dev->realized || ++ vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) { ++ continue; ++ } ++ tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); ++ if (vfio_pci_host_match(&host, tmp->vbasedev.name)) { ++ if (single) { ++ ret = -EINVAL; ++ goto out_single; ++ } ++ vfio_pci_pre_reset(tmp); ++ tmp->vbasedev.needs_reset = false; ++ multi = true; ++ break; ++ } ++ } ++ } ++ ++ if (!single && !multi) { ++ ret = -EINVAL; ++ goto out_single; ++ } ++ ++ /* Determine how many group fds need to be passed */ ++ count = 0; ++ QLIST_FOREACH(group, &vfio_group_list, next) { ++ for (i = 0; i < info->count; i++) { ++ if (group->groupid == devices[i].group_id) { ++ count++; ++ break; ++ } ++ } ++ } ++ ++ reset = g_malloc0(sizeof(*reset) + (count * sizeof(*fds))); ++ reset->argsz = sizeof(*reset) + (count * sizeof(*fds)); ++ fds = &reset->group_fds[0]; ++ ++ /* Fill in group fds */ ++ QLIST_FOREACH(group, &vfio_group_list, next) { ++ for (i = 0; i < info->count; i++) { ++ if (group->groupid == devices[i].group_id) { ++ fds[reset->count++] = group->fd; ++ break; ++ } ++ } ++ } ++ ++ /* Bus reset! */ ++ ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset); ++ g_free(reset); ++ if (ret) { ++ ret = -errno; ++ } ++ ++ trace_vfio_pci_hot_reset_result(vdev->vbasedev.name, ++ ret ? strerror(errno) : "Success"); ++ ++out: ++ /* Re-enable INTx on affected devices */ ++ for (i = 0; i < info->count; i++) { ++ PCIHostDeviceAddress host; ++ VFIOPCIDevice *tmp; ++ VFIODevice *vbasedev_iter; ++ ++ host.domain = devices[i].segment; ++ host.bus = devices[i].bus; ++ host.slot = PCI_SLOT(devices[i].devfn); ++ host.function = PCI_FUNC(devices[i].devfn); ++ ++ if (vfio_pci_host_match(&host, vdev->vbasedev.name)) { ++ continue; ++ } ++ ++ QLIST_FOREACH(group, &vfio_group_list, next) { ++ if (group->groupid == devices[i].group_id) { ++ break; ++ } ++ } ++ ++ if (!group) { ++ break; ++ } ++ ++ QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { ++ if (!vbasedev_iter->dev->realized || ++ vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) { ++ continue; ++ } ++ tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); ++ if (vfio_pci_host_match(&host, tmp->vbasedev.name)) { ++ vfio_pci_post_reset(tmp); ++ break; ++ } ++ } ++ } ++out_single: ++ if (!single) { ++ vfio_pci_post_reset(vdev); ++ } ++ g_free(info); ++ ++ return ret; ++} ++ + const VFIOIOMMUOps vfio_legacy_ops = { + .dma_map = vfio_legacy_dma_map, + .dma_unmap = vfio_legacy_dma_unmap, +@@ -929,4 +1098,5 @@ const VFIOIOMMUOps vfio_legacy_ops = { + .detach_device = vfio_legacy_detach_device, + .set_dirty_page_tracking = vfio_legacy_set_dirty_page_tracking, + .query_dirty_bitmap = vfio_legacy_query_dirty_bitmap, ++ .pci_hot_reset = vfio_legacy_pci_hot_reset, + }; +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index b482e5479f..83b2561908 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -2377,7 +2377,7 @@ static int vfio_add_capabilities(VFIOPCIDevice *vdev, Error **errp) + return 0; + } + +-static void vfio_pci_pre_reset(VFIOPCIDevice *vdev) ++void vfio_pci_pre_reset(VFIOPCIDevice *vdev) + { + PCIDevice *pdev = &vdev->pdev; + uint16_t cmd; +@@ -2414,7 +2414,7 @@ static void vfio_pci_pre_reset(VFIOPCIDevice *vdev) + vfio_pci_write_config(pdev, PCI_COMMAND, cmd, 2); + } + +-static void vfio_pci_post_reset(VFIOPCIDevice *vdev) ++void vfio_pci_post_reset(VFIOPCIDevice *vdev) + { + Error *err = NULL; + int nr; +@@ -2438,7 +2438,7 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev) + vfio_quirk_reset(vdev); + } + +-static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name) ++bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name) + { + char tmp[13]; + +@@ -2488,166 +2488,10 @@ int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev, + + static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) + { +- VFIOGroup *group; +- struct vfio_pci_hot_reset_info *info = NULL; +- struct vfio_pci_dependent_device *devices; +- struct vfio_pci_hot_reset *reset; +- int32_t *fds; +- int ret, i, count; +- bool multi = false; +- +- trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi"); +- +- if (!single) { +- vfio_pci_pre_reset(vdev); +- } +- vdev->vbasedev.needs_reset = false; +- +- ret = vfio_pci_get_pci_hot_reset_info(vdev, &info); +- +- if (ret) { +- goto out_single; +- } +- devices = &info->devices[0]; +- +- trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name); +- +- /* Verify that we have all the groups required */ +- for (i = 0; i < info->count; i++) { +- PCIHostDeviceAddress host; +- VFIOPCIDevice *tmp; +- VFIODevice *vbasedev_iter; +- +- host.domain = devices[i].segment; +- host.bus = devices[i].bus; +- host.slot = PCI_SLOT(devices[i].devfn); +- host.function = PCI_FUNC(devices[i].devfn); +- +- trace_vfio_pci_hot_reset_dep_devices(host.domain, +- host.bus, host.slot, host.function, devices[i].group_id); +- +- if (vfio_pci_host_match(&host, vdev->vbasedev.name)) { +- continue; +- } +- +- QLIST_FOREACH(group, &vfio_group_list, next) { +- if (group->groupid == devices[i].group_id) { +- break; +- } +- } +- +- if (!group) { +- if (!vdev->has_pm_reset) { +- error_report("vfio: Cannot reset device %s, " +- "depends on group %d which is not owned.", +- vdev->vbasedev.name, devices[i].group_id); +- } +- ret = -EPERM; +- goto out; +- } +- +- /* Prep dependent devices for reset and clear our marker. */ +- QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { +- if (!vbasedev_iter->dev->realized || +- vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) { +- continue; +- } +- tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); +- if (vfio_pci_host_match(&host, tmp->vbasedev.name)) { +- if (single) { +- ret = -EINVAL; +- goto out_single; +- } +- vfio_pci_pre_reset(tmp); +- tmp->vbasedev.needs_reset = false; +- multi = true; +- break; +- } +- } +- } +- +- if (!single && !multi) { +- ret = -EINVAL; +- goto out_single; +- } +- +- /* Determine how many group fds need to be passed */ +- count = 0; +- QLIST_FOREACH(group, &vfio_group_list, next) { +- for (i = 0; i < info->count; i++) { +- if (group->groupid == devices[i].group_id) { +- count++; +- break; +- } +- } +- } +- +- reset = g_malloc0(sizeof(*reset) + (count * sizeof(*fds))); +- reset->argsz = sizeof(*reset) + (count * sizeof(*fds)); +- fds = &reset->group_fds[0]; +- +- /* Fill in group fds */ +- QLIST_FOREACH(group, &vfio_group_list, next) { +- for (i = 0; i < info->count; i++) { +- if (group->groupid == devices[i].group_id) { +- fds[reset->count++] = group->fd; +- break; +- } +- } +- } +- +- /* Bus reset! */ +- ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset); +- g_free(reset); +- +- trace_vfio_pci_hot_reset_result(vdev->vbasedev.name, +- ret ? strerror(errno) : "Success"); +- +-out: +- /* Re-enable INTx on affected devices */ +- for (i = 0; i < info->count; i++) { +- PCIHostDeviceAddress host; +- VFIOPCIDevice *tmp; +- VFIODevice *vbasedev_iter; +- +- host.domain = devices[i].segment; +- host.bus = devices[i].bus; +- host.slot = PCI_SLOT(devices[i].devfn); +- host.function = PCI_FUNC(devices[i].devfn); +- +- if (vfio_pci_host_match(&host, vdev->vbasedev.name)) { +- continue; +- } +- +- QLIST_FOREACH(group, &vfio_group_list, next) { +- if (group->groupid == devices[i].group_id) { +- break; +- } +- } +- +- if (!group) { +- break; +- } +- +- QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { +- if (!vbasedev_iter->dev->realized || +- vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) { +- continue; +- } +- tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); +- if (vfio_pci_host_match(&host, tmp->vbasedev.name)) { +- vfio_pci_post_reset(tmp); +- break; +- } +- } +- } +-out_single: +- if (!single) { +- vfio_pci_post_reset(vdev); +- } +- g_free(info); ++ VFIODevice *vbasedev = &vdev->vbasedev; ++ const VFIOIOMMUOps *ops = vbasedev->bcontainer->ops; + +- return ret; ++ return ops->pci_hot_reset(vbasedev, single); + } + + /* +diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h +index 3568a6135d..b7de39c010 100644 +--- a/hw/vfio/pci.h ++++ b/hw/vfio/pci.h +@@ -219,6 +219,9 @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr); + + extern const PropertyInfo qdev_prop_nv_gpudirect_clique; + ++void vfio_pci_pre_reset(VFIOPCIDevice *vdev); ++void vfio_pci_post_reset(VFIOPCIDevice *vdev); ++bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name); + int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev, + struct vfio_pci_hot_reset_info **info_p); + +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 4b6f017c6f..45bb19c767 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -106,6 +106,9 @@ struct VFIOIOMMUOps { + int (*set_dirty_page_tracking)(VFIOContainerBase *bcontainer, bool start); + int (*query_dirty_bitmap)(VFIOContainerBase *bcontainer, VFIOBitmap *vbmap, + hwaddr iova, hwaddr size); ++ /* PCI specific */ ++ int (*pci_hot_reset)(VFIODevice *vbasedev, bool single); ++ + /* SPAPR specific */ + int (*add_window)(VFIOContainerBase *bcontainer, + MemoryRegionSection *section, +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-pci-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch b/SOURCES/kvm-vfio-pci-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch new file mode 100644 index 0000000..4a973b5 --- /dev/null +++ b/SOURCES/kvm-vfio-pci-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch @@ -0,0 +1,237 @@ +From 965a44793806fef2094906947bd3b428638bf89a Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 21 Nov 2023 16:44:10 +0800 +Subject: [PATCH 031/101] vfio/pci: Make vfio cdev pre-openable by passing a + file handle +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [30/67] a14b824b700e8fb36633cd159bcc422d992a316f (eauger1/centos-qemu-kvm) + +Conflicts: contextual conflict in hw/vfio/pci.c due to +RHEL-only f73562144e492 vfio: cap number of devices that can be assigned + +This gives management tools like libvirt a chance to open the vfio +cdev with privilege and pass FD to qemu. This way qemu never needs +to have privilege to open a VFIO or iommu cdev node. + +Together with the earlier support of pre-opening /dev/iommu device, +now we have full support of passing a vfio device to unprivileged +qemu by management tool. This mode is no more considered for the +legacy backend. So let's remove the "TODO" comment. + +Add helper functions vfio_device_set_fd() and vfio_device_get_name() +to set fd and get device name, they will also be used by other vfio +devices. + +There is no easy way to check if a device is mdev with FD passing, +so fail the x-balloon-allowed check unconditionally in this case. + +There is also no easy way to get BDF as name with FD passing, so +we fake a name by VFIO_FD[fd]. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Tested-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit da3e04b26fd8d15b344944504d5ffa9c5f20b54b) +Signed-off-by: Eric Auger +--- + hw/vfio/helpers.c | 43 +++++++++++++++++++++++++++++++++++ + hw/vfio/iommufd.c | 12 ++++++---- + hw/vfio/pci.c | 28 +++++++++++++---------- + include/hw/vfio/vfio-common.h | 4 ++++ + 4 files changed, 71 insertions(+), 16 deletions(-) + +diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c +index 168847e7c5..3592c3d54e 100644 +--- a/hw/vfio/helpers.c ++++ b/hw/vfio/helpers.c +@@ -27,6 +27,7 @@ + #include "trace.h" + #include "qapi/error.h" + #include "qemu/error-report.h" ++#include "monitor/monitor.h" + + /* + * Common VFIO interrupt disable +@@ -609,3 +610,45 @@ bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type) + + return ret; + } ++ ++int vfio_device_get_name(VFIODevice *vbasedev, Error **errp) ++{ ++ struct stat st; ++ ++ if (vbasedev->fd < 0) { ++ if (stat(vbasedev->sysfsdev, &st) < 0) { ++ error_setg_errno(errp, errno, "no such host device"); ++ error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev); ++ return -errno; ++ } ++ /* User may specify a name, e.g: VFIO platform device */ ++ if (!vbasedev->name) { ++ vbasedev->name = g_path_get_basename(vbasedev->sysfsdev); ++ } ++ } else { ++ if (!vbasedev->iommufd) { ++ error_setg(errp, "Use FD passing only with iommufd backend"); ++ return -EINVAL; ++ } ++ /* ++ * Give a name with fd so any function printing out vbasedev->name ++ * will not break. ++ */ ++ if (!vbasedev->name) { ++ vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd); ++ } ++ } ++ ++ return 0; ++} ++ ++void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp) ++{ ++ int fd = monitor_fd_param(monitor_cur(), str, errp); ++ ++ if (fd < 0) { ++ error_prepend(errp, "Could not parse remote object fd %s:", str); ++ return; ++ } ++ vbasedev->fd = fd; ++} +diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c +index 6e53e013ef..5accd26484 100644 +--- a/hw/vfio/iommufd.c ++++ b/hw/vfio/iommufd.c +@@ -320,11 +320,15 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, + uint32_t ioas_id; + Error *err = NULL; + +- devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); +- if (devfd < 0) { +- return devfd; ++ if (vbasedev->fd < 0) { ++ devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); ++ if (devfd < 0) { ++ return devfd; ++ } ++ vbasedev->fd = devfd; ++ } else { ++ devfd = vbasedev->fd; + } +- vbasedev->fd = devfd; + + ret = iommufd_cdev_connect_and_bind(vbasedev, errp); + if (ret) { +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 39e6a6678e..3412a63bb1 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -2949,7 +2949,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + VFIOGroup *group; + char *tmp, *subsys; + Error *err = NULL; +- struct stat st; + int ret, i = 0; + bool is_mdev; + char uuid[UUID_STR_LEN]; +@@ -2976,11 +2975,14 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + return; + } + +- if (!vbasedev->sysfsdev) { ++ if (vbasedev->fd < 0 && !vbasedev->sysfsdev) { + if (!(~vdev->host.domain || ~vdev->host.bus || + ~vdev->host.slot || ~vdev->host.function)) { + error_setg(errp, "No provided host device"); + error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F " ++#ifdef CONFIG_IOMMUFD ++ "or -device vfio-pci,fd=DEVICE_FD " ++#endif + "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n"); + return; + } +@@ -2990,13 +2992,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + vdev->host.slot, vdev->host.function); + } + +- if (stat(vbasedev->sysfsdev, &st) < 0) { +- error_setg_errno(errp, errno, "no such host device"); +- error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev); ++ if (vfio_device_get_name(vbasedev, errp) < 0) { + return; + } +- +- vbasedev->name = g_path_get_basename(vbasedev->sysfsdev); + vbasedev->ops = &vfio_pci_ops; + vbasedev->type = VFIO_DEVICE_TYPE_PCI; + vbasedev->dev = DEVICE(vdev); +@@ -3356,6 +3354,7 @@ static void vfio_instance_init(Object *obj) + vdev->host.bus = ~0U; + vdev->host.slot = ~0U; + vdev->host.function = ~0U; ++ vdev->vbasedev.fd = -1; + + vdev->nv_gpudirect_clique = 0xFF; + +@@ -3412,11 +3411,6 @@ static Property vfio_pci_dev_properties[] = { + qdev_prop_nv_gpudirect_clique, uint8_t), + DEFINE_PROP_OFF_AUTO_PCIBAR("x-msix-relocation", VFIOPCIDevice, msix_relo, + OFF_AUTOPCIBAR_OFF), +- /* +- * TODO - support passed fds... is this necessary? +- * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name), +- * DEFINE_PROP_STRING("vfiogroupfd, VFIOPCIDevice, vfiogroupfd_name), +- */ + #ifdef CONFIG_IOMMUFD + DEFINE_PROP_LINK("iommufd", VFIOPCIDevice, vbasedev.iommufd, + TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *), +@@ -3424,6 +3418,13 @@ static Property vfio_pci_dev_properties[] = { + DEFINE_PROP_END_OF_LIST(), + }; + ++#ifdef CONFIG_IOMMUFD ++static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp) ++{ ++ vfio_device_set_fd(&VFIO_PCI(obj)->vbasedev, str, errp); ++} ++#endif ++ + static void vfio_pci_dev_class_init(ObjectClass *klass, void *data) + { + DeviceClass *dc = DEVICE_CLASS(klass); +@@ -3431,6 +3432,9 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data) + + dc->reset = vfio_pci_reset; + device_class_set_props(dc, vfio_pci_dev_properties); ++#ifdef CONFIG_IOMMUFD ++ object_class_property_add_str(klass, "fd", NULL, vfio_pci_set_fd); ++#endif + dc->desc = "VFIO-based PCI device assignment"; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + pdc->realize = vfio_realize; +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 3dac5c167e..697bf24a35 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -251,4 +251,8 @@ int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer, + hwaddr size); + int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova, + uint64_t size, ram_addr_t ram_addr); ++ ++/* Returns 0 on success, or a negative errno. */ ++int vfio_device_get_name(VFIODevice *vbasedev, Error **errp); ++void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp); + #endif /* HW_VFIO_VFIO_COMMON_H */ +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-pci-Move-VFIODevice-initializations-in-vfio_ins.patch b/SOURCES/kvm-vfio-pci-Move-VFIODevice-initializations-in-vfio_ins.patch new file mode 100644 index 0000000..d426ede --- /dev/null +++ b/SOURCES/kvm-vfio-pci-Move-VFIODevice-initializations-in-vfio_ins.patch @@ -0,0 +1,70 @@ +From 942bd7251d166f558e0e6acf7ba853e940e2fb52 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 21 Nov 2023 16:44:21 +0800 +Subject: [PATCH 042/101] vfio/pci: Move VFIODevice initializations in + vfio_instance_init +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [41/67] 67392d7a92a6ec2155697a355c88d295338a0785 (eauger1/centos-qemu-kvm) + +Some of the VFIODevice initializations is in vfio_realize, +move all of them in vfio_instance_init. + +No functional change intended. + +Suggested-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Philippe Mathieu-Daudé +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit dd2fcb1716be9b89c726b3446f38446bb99d6b3a) +Signed-off-by: Eric Auger +--- + hw/vfio/pci.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 3412a63bb1..3f5900cc46 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -2995,9 +2995,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + if (vfio_device_get_name(vbasedev, errp) < 0) { + return; + } +- vbasedev->ops = &vfio_pci_ops; +- vbasedev->type = VFIO_DEVICE_TYPE_PCI; +- vbasedev->dev = DEVICE(vdev); + + /* + * Mediated devices *might* operate compatibly with discarding of RAM, but +@@ -3346,6 +3343,7 @@ static void vfio_instance_init(Object *obj) + { + PCIDevice *pci_dev = PCI_DEVICE(obj); + VFIOPCIDevice *vdev = VFIO_PCI(obj); ++ VFIODevice *vbasedev = &vdev->vbasedev; + + device_add_bootindex_property(obj, &vdev->bootindex, + "bootindex", NULL, +@@ -3354,7 +3352,11 @@ static void vfio_instance_init(Object *obj) + vdev->host.bus = ~0U; + vdev->host.slot = ~0U; + vdev->host.function = ~0U; +- vdev->vbasedev.fd = -1; ++ ++ vbasedev->type = VFIO_DEVICE_TYPE_PCI; ++ vbasedev->ops = &vfio_pci_ops; ++ vbasedev->dev = DEVICE(vdev); ++ vbasedev->fd = -1; + + vdev->nv_gpudirect_clique = 0xFF; + +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-pci-Static-Resizable-BAR-capability.patch b/SOURCES/kvm-vfio-pci-Static-Resizable-BAR-capability.patch deleted file mode 100644 index d937140..0000000 --- a/SOURCES/kvm-vfio-pci-Static-Resizable-BAR-capability.patch +++ /dev/null @@ -1,141 +0,0 @@ -From db53345dba5682c3ba0bc3fc596b30a98dadb88f Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:56 +0200 -Subject: [PATCH 05/37] vfio/pci: Static Resizable BAR capability -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/28] 42e9f4b517eb919c77c6fdbe771d9d05a91955bd (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit b5048a4cbfa0 -Author: Alex Williamson -Date: Thu May 4 14:42:48 2023 -0600 - - vfio/pci: Static Resizable BAR capability - - The PCI Resizable BAR (ReBAR) capability is currently hidden from the - VM because the protocol for interacting with the capability does not - support a mechanism for the device to reject an advertised supported - BAR size. However, when assigned to a VM, the act of resizing the - BAR requires adjustment of host resources for the device, which - absolutely can fail. Linux does not currently allow us to reserve - resources for the device independent of the current usage. - - The only writable field within the ReBAR capability is the BAR Size - register. The PCIe spec indicates that when written, the device - should immediately begin to operate with the provided BAR size. The - spec however also notes that software must only write values - corresponding to supported sizes as indicated in the capability and - control registers. Writing unsupported sizes produces undefined - results. Therefore, if the hypervisor were to virtualize the - capability and control registers such that the current size is the - only indicated available size, then a write of anything other than - the current size falls into the category of undefined behavior, - where we can essentially expose the modified ReBAR capability as - read-only. - - This may seem pointless, but users have reported that virtualizing - the capability in this way not only allows guest software to expose - related features as available (even if only cosmetic), but in some - scenarios can resolve guest driver issues. Additionally, no - regressions in behavior have been reported for this change. - - A caveat here is that the PCIe spec requires for compatibility that - devices report support for a size in the range of 1MB to 512GB, - therefore if the current BAR size falls outside that range we revert - to hiding the capability. - - Reviewed-by: Cédric Le Goater - Link: https://lore.kernel.org/r/20230505232308.2869912-1-alex.williamson@redhat.com - Signed-off-by: Alex Williamson - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/pci.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++- - 1 file changed, 53 insertions(+), 1 deletion(-) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 579b92a6ed..6cd3a98c39 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -2069,6 +2069,54 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos, Error **errp) - return 0; - } - -+static int vfio_setup_rebar_ecap(VFIOPCIDevice *vdev, uint16_t pos) -+{ -+ uint32_t ctrl; -+ int i, nbar; -+ -+ ctrl = pci_get_long(vdev->pdev.config + pos + PCI_REBAR_CTRL); -+ nbar = (ctrl & PCI_REBAR_CTRL_NBAR_MASK) >> PCI_REBAR_CTRL_NBAR_SHIFT; -+ -+ for (i = 0; i < nbar; i++) { -+ uint32_t cap; -+ int size; -+ -+ ctrl = pci_get_long(vdev->pdev.config + pos + PCI_REBAR_CTRL + (i * 8)); -+ size = (ctrl & PCI_REBAR_CTRL_BAR_SIZE) >> PCI_REBAR_CTRL_BAR_SHIFT; -+ -+ /* The cap register reports sizes 1MB to 128TB, with 4 reserved bits */ -+ cap = size <= 27 ? 1U << (size + 4) : 0; -+ -+ /* -+ * The PCIe spec (v6.0.1, 7.8.6) requires HW to support at least one -+ * size in the range 1MB to 512GB. We intend to mask all sizes except -+ * the one currently enabled in the size field, therefore if it's -+ * outside the range, hide the whole capability as this virtualization -+ * trick won't work. If >512GB resizable BARs start to appear, we -+ * might need an opt-in or reservation scheme in the kernel. -+ */ -+ if (!(cap & PCI_REBAR_CAP_SIZES)) { -+ return -EINVAL; -+ } -+ -+ /* Hide all sizes reported in the ctrl reg per above requirement. */ -+ ctrl &= (PCI_REBAR_CTRL_BAR_SIZE | -+ PCI_REBAR_CTRL_NBAR_MASK | -+ PCI_REBAR_CTRL_BAR_IDX); -+ -+ /* -+ * The BAR size field is RW, however we've mangled the capability -+ * register such that we only report a single size, ie. the current -+ * BAR size. A write of an unsupported value is undefined, therefore -+ * the register field is essentially RO. -+ */ -+ vfio_add_emulated_long(vdev, pos + PCI_REBAR_CAP + (i * 8), cap, ~0); -+ vfio_add_emulated_long(vdev, pos + PCI_REBAR_CTRL + (i * 8), ctrl, ~0); -+ } -+ -+ return 0; -+} -+ - static void vfio_add_ext_cap(VFIOPCIDevice *vdev) - { - PCIDevice *pdev = &vdev->pdev; -@@ -2142,9 +2190,13 @@ static void vfio_add_ext_cap(VFIOPCIDevice *vdev) - case 0: /* kernel masked capability */ - case PCI_EXT_CAP_ID_SRIOV: /* Read-only VF BARs confuse OVMF */ - case PCI_EXT_CAP_ID_ARI: /* XXX Needs next function virtualization */ -- case PCI_EXT_CAP_ID_REBAR: /* Can't expose read-only */ - trace_vfio_add_ext_cap_dropped(vdev->vbasedev.name, cap_id, next); - break; -+ case PCI_EXT_CAP_ID_REBAR: -+ if (!vfio_setup_rebar_ecap(vdev, next)) { -+ pcie_add_capability(pdev, cap_id, cap_ver, next, size); -+ } -+ break; - default: - pcie_add_capability(pdev, cap_id, cap_ver, next, size); - } --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-pci-add-support-for-VF-token.patch b/SOURCES/kvm-vfio-pci-add-support-for-VF-token.patch deleted file mode 100644 index 7b40e5e..0000000 --- a/SOURCES/kvm-vfio-pci-add-support-for-VF-token.patch +++ /dev/null @@ -1,104 +0,0 @@ -From 3022cc31bca5a5441e285c971eaf72b7643b9be0 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:56 +0200 -Subject: [PATCH 03/37] vfio/pci: add support for VF token -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/28] ff24284ede2806e21f4f6709d8abd4c4029b7d5c (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 2dca1b37a760 -Author: Minwoo Im -Date: Mon Mar 20 16:35:22 2023 +0900 - - vfio/pci: add support for VF token - - VF token was introduced [1] to kernel vfio-pci along with SR-IOV - support [2]. This patch adds support VF token among PF and VF(s). To - passthu PCIe VF to a VM, kernel >= v5.7 needs this. - - It can be configured with UUID like: - - -device vfio-pci,host=DDDD:BB:DD:F,vf-token=,... - - [1] https://lore.kernel.org/linux-pci/158396393244.5601.10297430724964025753.stgit@gimli.home/ - [2] https://lore.kernel.org/linux-pci/158396044753.5601.14804870681174789709.stgit@gimli.home/ - - Cc: Alex Williamson - Signed-off-by: Minwoo Im - Reviewed-by: Klaus Jensen - Link: https://lore.kernel.org/r/20230320073522epcms2p48f682ecdb73e0ae1a4850ad0712fd780@epcms2p4 - Signed-off-by: Alex Williamson - -Conflicts: - - hw/vfio/pci.c - context changes in vfio_realize () due to redhat commit 267071d16b23 - ("vfio: cap number of devices that can be assigned") - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/pci.c | 13 ++++++++++++- - hw/vfio/pci.h | 1 + - 2 files changed, 13 insertions(+), 1 deletion(-) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index a779053be3..579b92a6ed 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -2859,6 +2859,8 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - int groupid; - int ret, i = 0; - bool is_mdev; -+ char uuid[UUID_FMT_LEN]; -+ char *name; - - if (device_limit && device_limit != vdev->assigned_device_limit) { - error_setg(errp, "Assigned device limit has been redefined. " -@@ -2960,7 +2962,15 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - goto error; - } - -- ret = vfio_get_device(group, vbasedev->name, vbasedev, errp); -+ if (!qemu_uuid_is_null(&vdev->vf_token)) { -+ qemu_uuid_unparse(&vdev->vf_token, uuid); -+ name = g_strdup_printf("%s vf_token=%s", vbasedev->name, uuid); -+ } else { -+ name = vbasedev->name; -+ } -+ -+ ret = vfio_get_device(group, name, vbasedev, errp); -+ g_free(name); - if (ret) { - vfio_put_group(group); - goto error; -@@ -3292,6 +3302,7 @@ static void vfio_instance_init(Object *obj) - - static Property vfio_pci_dev_properties[] = { - DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIOPCIDevice, host), -+ DEFINE_PROP_UUID_NODEFAULT("vf-token", VFIOPCIDevice, vf_token), - DEFINE_PROP_STRING("sysfsdev", VFIOPCIDevice, vbasedev.sysfsdev), - DEFINE_PROP_ON_OFF_AUTO("x-pre-copy-dirty-page-tracking", VFIOPCIDevice, - vbasedev.pre_copy_dirty_page_tracking, -diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h -index 45235d38ba..10530743ad 100644 ---- a/hw/vfio/pci.h -+++ b/hw/vfio/pci.h -@@ -137,6 +137,7 @@ struct VFIOPCIDevice { - VFIOVGA *vga; /* 0xa0000, 0x3b0, 0x3c0 */ - void *igd_opregion; - PCIHostDeviceAddress host; -+ QemuUUID vf_token; - EventNotifier err_notifier; - EventNotifier req_notifier; - int (*resetfn)(struct VFIOPCIDevice *); --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-platform-Allow-the-selection-of-a-given-iommu-b.patch b/SOURCES/kvm-vfio-platform-Allow-the-selection-of-a-given-iommu-b.patch new file mode 100644 index 0000000..06c2f0f --- /dev/null +++ b/SOURCES/kvm-vfio-platform-Allow-the-selection-of-a-given-iommu-b.patch @@ -0,0 +1,77 @@ +From ede579d6d5fe5be9235d6a218efdb237192aee0e Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 21 Nov 2023 16:44:11 +0800 +Subject: [PATCH 032/101] vfio/platform: Allow the selection of a given iommu + backend +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [31/67] aba1dc16cada602edd7be1a28b0f57991131e6f7 (eauger1/centos-qemu-kvm) + +Now we support two types of iommu backends, let's add the capability +to select one of them. This depends on whether an iommufd object has +been linked with the vfio-platform device: + +If the user wants to use the legacy backend, it shall not +link the vfio-platform device with any iommufd object: + + -device vfio-platform,host=XXX + +This is called the legacy mode/backend. + +If the user wants to use the iommufd backend (/dev/iommu) it +shall pass an iommufd object id in the vfio-platform device options: + + -object iommufd,id=iommufd0 + -device vfio-platform,host=XXX,iommufd=iommufd0 + +Suggested-by: Alex Williamson +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Reviewed-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit a6c50e1c3f8d0eb77edaea392e61508bb3c516f8) +Signed-off-by: Eric Auger +--- + hw/vfio/platform.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c +index 8e3d4ac458..98ae4bc655 100644 +--- a/hw/vfio/platform.c ++++ b/hw/vfio/platform.c +@@ -15,11 +15,13 @@ + */ + + #include "qemu/osdep.h" ++#include CONFIG_DEVICES /* CONFIG_IOMMUFD */ + #include "qapi/error.h" + #include + #include + + #include "hw/vfio/vfio-platform.h" ++#include "sysemu/iommufd.h" + #include "migration/vmstate.h" + #include "qemu/error-report.h" + #include "qemu/lockable.h" +@@ -649,6 +651,10 @@ static Property vfio_platform_dev_properties[] = { + DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice, + mmap_timeout, 1100), + DEFINE_PROP_BOOL("x-irqfd", VFIOPlatformDevice, irqfd_allowed, true), ++#ifdef CONFIG_IOMMUFD ++ DEFINE_PROP_LINK("iommufd", VFIOPlatformDevice, vbasedev.iommufd, ++ TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *), ++#endif + DEFINE_PROP_END_OF_LIST(), + }; + +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-platform-Make-vfio-cdev-pre-openable-by-passing.patch b/SOURCES/kvm-vfio-platform-Make-vfio-cdev-pre-openable-by-passing.patch new file mode 100644 index 0000000..f931524 --- /dev/null +++ b/SOURCES/kvm-vfio-platform-Make-vfio-cdev-pre-openable-by-passing.patch @@ -0,0 +1,108 @@ +From 22664f4115d9b297ef4276e48f8ba0bc195ec99e Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 21 Nov 2023 16:44:12 +0800 +Subject: [PATCH 033/101] vfio/platform: Make vfio cdev pre-openable by passing + a file handle +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [32/67] 069867dce64b826e92dc2051405a4ded5261981f (eauger1/centos-qemu-kvm) + +This gives management tools like libvirt a chance to open the vfio +cdev with privilege and pass FD to qemu. This way qemu never needs +to have privilege to open a VFIO or iommu cdev node. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit 3016e60f8f715d2058a48e4956be994482c5e218) +Signed-off-by: Eric Auger +--- + hw/vfio/platform.c | 32 ++++++++++++++++++++++++-------- + 1 file changed, 24 insertions(+), 8 deletions(-) + +diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c +index 98ae4bc655..a97d9c6234 100644 +--- a/hw/vfio/platform.c ++++ b/hw/vfio/platform.c +@@ -531,14 +531,13 @@ static VFIODeviceOps vfio_platform_ops = { + */ + static int vfio_base_device_init(VFIODevice *vbasedev, Error **errp) + { +- struct stat st; + int ret; + +- /* @sysfsdev takes precedence over @host */ +- if (vbasedev->sysfsdev) { ++ /* @fd takes precedence over @sysfsdev which takes precedence over @host */ ++ if (vbasedev->fd < 0 && vbasedev->sysfsdev) { + g_free(vbasedev->name); + vbasedev->name = g_path_get_basename(vbasedev->sysfsdev); +- } else { ++ } else if (vbasedev->fd < 0) { + if (!vbasedev->name || strchr(vbasedev->name, '/')) { + error_setg(errp, "wrong host device name"); + return -EINVAL; +@@ -548,10 +547,9 @@ static int vfio_base_device_init(VFIODevice *vbasedev, Error **errp) + vbasedev->name); + } + +- if (stat(vbasedev->sysfsdev, &st) < 0) { +- error_setg_errno(errp, errno, +- "failed to get the sysfs host device file status"); +- return -errno; ++ ret = vfio_device_get_name(vbasedev, errp); ++ if (ret) { ++ return ret; + } + + ret = vfio_attach_device(vbasedev->name, vbasedev, +@@ -658,6 +656,20 @@ static Property vfio_platform_dev_properties[] = { + DEFINE_PROP_END_OF_LIST(), + }; + ++static void vfio_platform_instance_init(Object *obj) ++{ ++ VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(obj); ++ ++ vdev->vbasedev.fd = -1; ++} ++ ++#ifdef CONFIG_IOMMUFD ++static void vfio_platform_set_fd(Object *obj, const char *str, Error **errp) ++{ ++ vfio_device_set_fd(&VFIO_PLATFORM_DEVICE(obj)->vbasedev, str, errp); ++} ++#endif ++ + static void vfio_platform_class_init(ObjectClass *klass, void *data) + { + DeviceClass *dc = DEVICE_CLASS(klass); +@@ -665,6 +677,9 @@ static void vfio_platform_class_init(ObjectClass *klass, void *data) + + dc->realize = vfio_platform_realize; + device_class_set_props(dc, vfio_platform_dev_properties); ++#ifdef CONFIG_IOMMUFD ++ object_class_property_add_str(klass, "fd", NULL, vfio_platform_set_fd); ++#endif + dc->vmsd = &vfio_platform_vmstate; + dc->desc = "VFIO-based platform device assignment"; + sbc->connect_irq_notifier = vfio_start_irqfd_injection; +@@ -677,6 +692,7 @@ static const TypeInfo vfio_platform_dev_info = { + .name = TYPE_VFIO_PLATFORM, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(VFIOPlatformDevice), ++ .instance_init = vfio_platform_instance_init, + .class_init = vfio_platform_class_init, + .class_size = sizeof(VFIOPlatformDeviceClass), + }; +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-platform-Move-VFIODevice-initializations-in-vfi.patch b/SOURCES/kvm-vfio-platform-Move-VFIODevice-initializations-in-vfi.patch new file mode 100644 index 0000000..56283a6 --- /dev/null +++ b/SOURCES/kvm-vfio-platform-Move-VFIODevice-initializations-in-vfi.patch @@ -0,0 +1,64 @@ +From 2417020283532030f424fe07dfeb7477e6489640 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 21 Nov 2023 16:44:22 +0800 +Subject: [PATCH 043/101] vfio/platform: Move VFIODevice initializations in + vfio_platform_instance_init +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [42/67] 53a459b6246d7d7bdc7a62ac92f02f1e775a54a6 (eauger1/centos-qemu-kvm) + +Some of the VFIODevice initializations is in vfio_platform_realize, +move all of them in vfio_platform_instance_init. + +No functional change intended. + +Suggested-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Philippe Mathieu-Daudé +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit a0cf44c8d618578843a65ea7f6d3db8ce52185bc) +Signed-off-by: Eric Auger +--- + hw/vfio/platform.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c +index a97d9c6234..506eb8193f 100644 +--- a/hw/vfio/platform.c ++++ b/hw/vfio/platform.c +@@ -581,10 +581,6 @@ static void vfio_platform_realize(DeviceState *dev, Error **errp) + VFIODevice *vbasedev = &vdev->vbasedev; + int i, ret; + +- vbasedev->type = VFIO_DEVICE_TYPE_PLATFORM; +- vbasedev->dev = dev; +- vbasedev->ops = &vfio_platform_ops; +- + qemu_mutex_init(&vdev->intp_mutex); + + trace_vfio_platform_realize(vbasedev->sysfsdev ? +@@ -659,8 +655,12 @@ static Property vfio_platform_dev_properties[] = { + static void vfio_platform_instance_init(Object *obj) + { + VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(obj); ++ VFIODevice *vbasedev = &vdev->vbasedev; + +- vdev->vbasedev.fd = -1; ++ vbasedev->type = VFIO_DEVICE_TYPE_PLATFORM; ++ vbasedev->ops = &vfio_platform_ops; ++ vbasedev->dev = DEVICE(vdev); ++ vbasedev->fd = -1; + } + + #ifdef CONFIG_IOMMUFD +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-spapr-Extend-VFIOIOMMUOps-with-a-release-handle.patch b/SOURCES/kvm-vfio-spapr-Extend-VFIOIOMMUOps-with-a-release-handle.patch new file mode 100644 index 0000000..fb7e707 --- /dev/null +++ b/SOURCES/kvm-vfio-spapr-Extend-VFIOIOMMUOps-with-a-release-handle.patch @@ -0,0 +1,129 @@ +From e75ec2aca351daabe597ca6322c1589885f30d7a Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 19 Dec 2023 07:58:16 +0100 +Subject: [PATCH 049/101] vfio/spapr: Extend VFIOIOMMUOps with a release + handler +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [48/67] 1c4d22a6f69324805d050767fcf178d8566f2030 (eauger1/centos-qemu-kvm) + +This allows to abstract a bit more the sPAPR IOMMU support in the +legacy IOMMU backend. + +Reviewed-by: Zhenzhong Duan +Tested-by: Eric Farman +Signed-off-by: Cédric Le Goater +(cherry picked from commit 001a013ea3f125d2ec0e709b5765754149d8d968) +Signed-off-by: Eric Auger +--- + hw/vfio/container.c | 10 +++----- + hw/vfio/spapr.c | 35 +++++++++++++++------------ + include/hw/vfio/vfio-container-base.h | 1 + + 3 files changed, 24 insertions(+), 22 deletions(-) + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index b22feb8ded..1e77a2929e 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -632,9 +632,8 @@ listener_release_exit: + QLIST_REMOVE(bcontainer, next); + vfio_kvm_device_del_group(group); + memory_listener_unregister(&bcontainer->listener); +- if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU || +- container->iommu_type == VFIO_SPAPR_TCE_IOMMU) { +- vfio_spapr_container_deinit(container); ++ if (bcontainer->ops->release) { ++ bcontainer->ops->release(bcontainer); + } + + enable_discards_exit: +@@ -667,9 +666,8 @@ static void vfio_disconnect_container(VFIOGroup *group) + */ + if (QLIST_EMPTY(&container->group_list)) { + memory_listener_unregister(&bcontainer->listener); +- if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU || +- container->iommu_type == VFIO_SPAPR_TCE_IOMMU) { +- vfio_spapr_container_deinit(container); ++ if (bcontainer->ops->release) { ++ bcontainer->ops->release(bcontainer); + } + } + +diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c +index 5c6426e697..44617dfc6b 100644 +--- a/hw/vfio/spapr.c ++++ b/hw/vfio/spapr.c +@@ -440,6 +440,24 @@ vfio_spapr_container_del_section_window(VFIOContainerBase *bcontainer, + } + } + ++static void vfio_spapr_container_release(VFIOContainerBase *bcontainer) ++{ ++ VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); ++ VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, ++ container); ++ VFIOHostDMAWindow *hostwin, *next; ++ ++ if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { ++ memory_listener_unregister(&scontainer->prereg_listener); ++ } ++ QLIST_FOREACH_SAFE(hostwin, &scontainer->hostwin_list, hostwin_next, ++ next) { ++ QLIST_REMOVE(hostwin, hostwin_next); ++ g_free(hostwin); ++ } ++} ++ + static VFIOIOMMUOps vfio_iommu_spapr_ops; + + static void setup_spapr_ops(VFIOContainerBase *bcontainer) +@@ -447,6 +465,7 @@ static void setup_spapr_ops(VFIOContainerBase *bcontainer) + vfio_iommu_spapr_ops = *bcontainer->ops; + vfio_iommu_spapr_ops.add_window = vfio_spapr_container_add_section_window; + vfio_iommu_spapr_ops.del_window = vfio_spapr_container_del_section_window; ++ vfio_iommu_spapr_ops.release = vfio_spapr_container_release; + bcontainer->ops = &vfio_iommu_spapr_ops; + } + +@@ -527,19 +546,3 @@ listener_unregister_exit: + } + return ret; + } +- +-void vfio_spapr_container_deinit(VFIOContainer *container) +-{ +- VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, +- container); +- VFIOHostDMAWindow *hostwin, *next; +- +- if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { +- memory_listener_unregister(&scontainer->prereg_listener); +- } +- QLIST_FOREACH_SAFE(hostwin, &scontainer->hostwin_list, hostwin_next, +- next) { +- QLIST_REMOVE(hostwin, hostwin_next); +- g_free(hostwin); +- } +-} +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 2ae297ccda..5c9594b6c7 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -117,5 +117,6 @@ struct VFIOIOMMUOps { + Error **errp); + void (*del_window)(VFIOContainerBase *bcontainer, + MemoryRegionSection *section); ++ void (*release)(VFIOContainerBase *bcontainer); + }; + #endif /* HW_VFIO_VFIO_CONTAINER_BASE_H */ +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-spapr-Introduce-a-sPAPR-VFIOIOMMU-QOM-interface.patch b/SOURCES/kvm-vfio-spapr-Introduce-a-sPAPR-VFIOIOMMU-QOM-interface.patch new file mode 100644 index 0000000..f835acb --- /dev/null +++ b/SOURCES/kvm-vfio-spapr-Introduce-a-sPAPR-VFIOIOMMU-QOM-interface.patch @@ -0,0 +1,150 @@ +From 645ed97633935712edcc2c56f252738b38f15e3a Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 19 Dec 2023 07:58:22 +0100 +Subject: [PATCH 055/101] vfio/spapr: Introduce a sPAPR VFIOIOMMU QOM interface +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [54/67] 2ceac3c07d71790dc3852fbbbd4084a7affb9373 (eauger1/centos-qemu-kvm) + +Move vfio_spapr_container_setup() to a VFIOIOMMUClass::setup handler +and convert the sPAPR VFIOIOMMUOps struct to a QOM interface. The +sPAPR QOM interface inherits from the legacy QOM interface because +because both have the same basic needs. The sPAPR interface is then +extended with the handlers specific to the sPAPR IOMMU. + +This allows reuse and provides better abstraction of the backends. It +will be useful to avoid compiling the sPAPR IOMMU backend on targets +not supporting it. + +Reviewed-by: Zhenzhong Duan +Tested-by: Eric Farman +Signed-off-by: Cédric Le Goater +(cherry picked from commit f221f641a2fe69c2ca3857759551470664b0bec8) +Signed-off-by: Eric Auger +--- + hw/vfio/container.c | 18 +++++-------- + hw/vfio/spapr.c | 39 ++++++++++++++++----------- + include/hw/vfio/vfio-container-base.h | 1 + + 3 files changed, 31 insertions(+), 27 deletions(-) + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index c22bdd3216..688cf23bab 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -381,6 +381,10 @@ static const VFIOIOMMUClass *vfio_get_iommu_class(int iommu_type, Error **errp) + case VFIO_TYPE1_IOMMU: + klass = object_class_by_name(TYPE_VFIO_IOMMU_LEGACY); + break; ++ case VFIO_SPAPR_TCE_v2_IOMMU: ++ case VFIO_SPAPR_TCE_IOMMU: ++ klass = object_class_by_name(TYPE_VFIO_IOMMU_SPAPR); ++ break; + default: + g_assert_not_reached(); + }; +@@ -623,19 +627,9 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + goto free_container_exit; + } + +- switch (container->iommu_type) { +- case VFIO_TYPE1v2_IOMMU: +- case VFIO_TYPE1_IOMMU: +- ret = vfio_legacy_setup(bcontainer, errp); +- break; +- case VFIO_SPAPR_TCE_v2_IOMMU: +- case VFIO_SPAPR_TCE_IOMMU: +- ret = vfio_spapr_container_init(container, errp); +- break; +- default: +- g_assert_not_reached(); +- } ++ assert(bcontainer->ops->setup); + ++ ret = bcontainer->ops->setup(bcontainer, errp); + if (ret) { + goto enable_discards_exit; + } +diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c +index 44617dfc6b..0d949bb728 100644 +--- a/hw/vfio/spapr.c ++++ b/hw/vfio/spapr.c +@@ -458,20 +458,11 @@ static void vfio_spapr_container_release(VFIOContainerBase *bcontainer) + } + } + +-static VFIOIOMMUOps vfio_iommu_spapr_ops; +- +-static void setup_spapr_ops(VFIOContainerBase *bcontainer) +-{ +- vfio_iommu_spapr_ops = *bcontainer->ops; +- vfio_iommu_spapr_ops.add_window = vfio_spapr_container_add_section_window; +- vfio_iommu_spapr_ops.del_window = vfio_spapr_container_del_section_window; +- vfio_iommu_spapr_ops.release = vfio_spapr_container_release; +- bcontainer->ops = &vfio_iommu_spapr_ops; +-} +- +-int vfio_spapr_container_init(VFIOContainer *container, Error **errp) ++static int vfio_spapr_container_setup(VFIOContainerBase *bcontainer, ++ Error **errp) + { +- VFIOContainerBase *bcontainer = &container->bcontainer; ++ VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); + VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, + container); + struct vfio_iommu_spapr_tce_info info; +@@ -536,8 +527,6 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) + 0x1000); + } + +- setup_spapr_ops(bcontainer); +- + return 0; + + listener_unregister_exit: +@@ -546,3 +535,23 @@ listener_unregister_exit: + } + return ret; + } ++ ++static void vfio_iommu_spapr_class_init(ObjectClass *klass, void *data) ++{ ++ VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); ++ ++ vioc->add_window = vfio_spapr_container_add_section_window; ++ vioc->del_window = vfio_spapr_container_del_section_window; ++ vioc->release = vfio_spapr_container_release; ++ vioc->setup = vfio_spapr_container_setup; ++}; ++ ++static const TypeInfo types[] = { ++ { ++ .name = TYPE_VFIO_IOMMU_SPAPR, ++ .parent = TYPE_VFIO_IOMMU_LEGACY, ++ .class_init = vfio_iommu_spapr_class_init, ++ }, ++}; ++ ++DEFINE_TYPES(types) +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index ce8b1fba88..9e21d7811f 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -95,6 +95,7 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer); + + #define TYPE_VFIO_IOMMU "vfio-iommu" + #define TYPE_VFIO_IOMMU_LEGACY TYPE_VFIO_IOMMU "-legacy" ++#define TYPE_VFIO_IOMMU_SPAPR TYPE_VFIO_IOMMU "-spapr" + + /* + * VFIOContainerBase is not an abstract QOM object because it felt +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-spapr-Introduce-spapr-backend-and-target-interf.patch b/SOURCES/kvm-vfio-spapr-Introduce-spapr-backend-and-target-interf.patch new file mode 100644 index 0000000..f1ca4a2 --- /dev/null +++ b/SOURCES/kvm-vfio-spapr-Introduce-spapr-backend-and-target-interf.patch @@ -0,0 +1,91 @@ +From ff0c13c22878eed0f3879c0805bef5b9f9d83e04 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Thu, 2 Nov 2023 15:12:42 +0800 +Subject: [PATCH 017/101] vfio/spapr: Introduce spapr backend and target + interface +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [16/67] e35cda157a2a1afeded3305622c861abd07edb51 (eauger1/centos-qemu-kvm) + +Introduce an empty spapr backend which will hold spapr specific +content, currently only prereg_listener and hostwin_list. + +Also introduce two spapr specific callbacks add/del_window into +VFIOIOMMUOps. Instantiate a spapr ops with a helper setup_spapr_ops +and assign it to bcontainer->ops. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +(cherry picked from commit 9b7d38bf5a2c1054bfe6de08806954cdc45d8d98) +Signed-off-by: Eric Auger +--- + hw/vfio/spapr.c | 14 ++++++++++++++ + include/hw/vfio/vfio-container-base.h | 6 ++++++ + 2 files changed, 20 insertions(+) + +diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c +index 7a50975f25..e1a6b35563 100644 +--- a/hw/vfio/spapr.c ++++ b/hw/vfio/spapr.c +@@ -24,6 +24,10 @@ + #include "qapi/error.h" + #include "trace.h" + ++typedef struct VFIOSpaprContainer { ++ VFIOContainer container; ++} VFIOSpaprContainer; ++ + static bool vfio_prereg_listener_skipped_section(MemoryRegionSection *section) + { + if (memory_region_is_iommu(section->mr)) { +@@ -421,6 +425,14 @@ void vfio_container_del_section_window(VFIOContainer *container, + } + } + ++static VFIOIOMMUOps vfio_iommu_spapr_ops; ++ ++static void setup_spapr_ops(VFIOContainerBase *bcontainer) ++{ ++ vfio_iommu_spapr_ops = *bcontainer->ops; ++ bcontainer->ops = &vfio_iommu_spapr_ops; ++} ++ + int vfio_spapr_container_init(VFIOContainer *container, Error **errp) + { + VFIOContainerBase *bcontainer = &container->bcontainer; +@@ -486,6 +498,8 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) + 0x1000); + } + ++ setup_spapr_ops(bcontainer); ++ + return 0; + + listener_unregister_exit: +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 9658ffb526..f62a14ac73 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -101,5 +101,11 @@ struct VFIOIOMMUOps { + int (*set_dirty_page_tracking)(VFIOContainerBase *bcontainer, bool start); + int (*query_dirty_bitmap)(VFIOContainerBase *bcontainer, VFIOBitmap *vbmap, + hwaddr iova, hwaddr size); ++ /* SPAPR specific */ ++ int (*add_window)(VFIOContainerBase *bcontainer, ++ MemoryRegionSection *section, ++ Error **errp); ++ void (*del_window)(VFIOContainerBase *bcontainer, ++ MemoryRegionSection *section); + }; + #endif /* HW_VFIO_VFIO_CONTAINER_BASE_H */ +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-spapr-Move-hostwin_list-into-spapr-container.patch b/SOURCES/kvm-vfio-spapr-Move-hostwin_list-into-spapr-container.patch new file mode 100644 index 0000000..93cb6b8 --- /dev/null +++ b/SOURCES/kvm-vfio-spapr-Move-hostwin_list-into-spapr-container.patch @@ -0,0 +1,188 @@ +From 3e9e7b57b15ac328f5d663b4e04df546d49f5fa6 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Thu, 2 Nov 2023 15:12:45 +0800 +Subject: [PATCH 020/101] vfio/spapr: Move hostwin_list into spapr container +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [19/67] 87cfeaa32ad32a260a89b2bb1866d59e20c0fe30 (eauger1/centos-qemu-kvm) + +No functional changes intended. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +(cherry picked from commit dbb9d0c9691d145338686d3e0920da047f2ab3da) +Signed-off-by: Eric Auger +--- + hw/vfio/spapr.c | 36 +++++++++++++++++++---------------- + include/hw/vfio/vfio-common.h | 1 - + 2 files changed, 20 insertions(+), 17 deletions(-) + +diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c +index 68c3dd6c75..5c6426e697 100644 +--- a/hw/vfio/spapr.c ++++ b/hw/vfio/spapr.c +@@ -27,6 +27,7 @@ + typedef struct VFIOSpaprContainer { + VFIOContainer container; + MemoryListener prereg_listener; ++ QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; + } VFIOSpaprContainer; + + static bool vfio_prereg_listener_skipped_section(MemoryRegionSection *section) +@@ -154,12 +155,12 @@ static const MemoryListener vfio_prereg_listener = { + .region_del = vfio_prereg_listener_region_del, + }; + +-static void vfio_host_win_add(VFIOContainer *container, hwaddr min_iova, ++static void vfio_host_win_add(VFIOSpaprContainer *scontainer, hwaddr min_iova, + hwaddr max_iova, uint64_t iova_pgsizes) + { + VFIOHostDMAWindow *hostwin; + +- QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { ++ QLIST_FOREACH(hostwin, &scontainer->hostwin_list, hostwin_next) { + if (ranges_overlap(hostwin->min_iova, + hostwin->max_iova - hostwin->min_iova + 1, + min_iova, +@@ -173,15 +174,15 @@ static void vfio_host_win_add(VFIOContainer *container, hwaddr min_iova, + hostwin->min_iova = min_iova; + hostwin->max_iova = max_iova; + hostwin->iova_pgsizes = iova_pgsizes; +- QLIST_INSERT_HEAD(&container->hostwin_list, hostwin, hostwin_next); ++ QLIST_INSERT_HEAD(&scontainer->hostwin_list, hostwin, hostwin_next); + } + +-static int vfio_host_win_del(VFIOContainer *container, ++static int vfio_host_win_del(VFIOSpaprContainer *scontainer, + hwaddr min_iova, hwaddr max_iova) + { + VFIOHostDMAWindow *hostwin; + +- QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { ++ QLIST_FOREACH(hostwin, &scontainer->hostwin_list, hostwin_next) { + if (hostwin->min_iova == min_iova && hostwin->max_iova == max_iova) { + QLIST_REMOVE(hostwin, hostwin_next); + g_free(hostwin); +@@ -192,7 +193,7 @@ static int vfio_host_win_del(VFIOContainer *container, + return -1; + } + +-static VFIOHostDMAWindow *vfio_find_hostwin(VFIOContainer *container, ++static VFIOHostDMAWindow *vfio_find_hostwin(VFIOSpaprContainer *container, + hwaddr iova, hwaddr end) + { + VFIOHostDMAWindow *hostwin; +@@ -329,6 +330,8 @@ vfio_spapr_container_add_section_window(VFIOContainerBase *bcontainer, + { + VFIOContainer *container = container_of(bcontainer, VFIOContainer, + bcontainer); ++ VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, ++ container); + VFIOHostDMAWindow *hostwin; + hwaddr pgsize = 0; + int ret; +@@ -344,7 +347,7 @@ vfio_spapr_container_add_section_window(VFIOContainerBase *bcontainer, + iova = section->offset_within_address_space; + end = iova + int128_get64(section->size) - 1; + +- if (!vfio_find_hostwin(container, iova, end)) { ++ if (!vfio_find_hostwin(scontainer, iova, end)) { + error_setg(errp, "Container %p can't map guest IOVA region" + " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx, container, + iova, end); +@@ -358,7 +361,7 @@ vfio_spapr_container_add_section_window(VFIOContainerBase *bcontainer, + } + + /* For now intersections are not allowed, we may relax this later */ +- QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { ++ QLIST_FOREACH(hostwin, &scontainer->hostwin_list, hostwin_next) { + if (ranges_overlap(hostwin->min_iova, + hostwin->max_iova - hostwin->min_iova + 1, + section->offset_within_address_space, +@@ -380,7 +383,7 @@ vfio_spapr_container_add_section_window(VFIOContainerBase *bcontainer, + return ret; + } + +- vfio_host_win_add(container, section->offset_within_address_space, ++ vfio_host_win_add(scontainer, section->offset_within_address_space, + section->offset_within_address_space + + int128_get64(section->size) - 1, pgsize); + #ifdef CONFIG_KVM +@@ -419,6 +422,8 @@ vfio_spapr_container_del_section_window(VFIOContainerBase *bcontainer, + { + VFIOContainer *container = container_of(bcontainer, VFIOContainer, + bcontainer); ++ VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, ++ container); + + if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) { + return; +@@ -426,7 +431,7 @@ vfio_spapr_container_del_section_window(VFIOContainerBase *bcontainer, + + vfio_spapr_remove_window(container, + section->offset_within_address_space); +- if (vfio_host_win_del(container, ++ if (vfio_host_win_del(scontainer, + section->offset_within_address_space, + section->offset_within_address_space + + int128_get64(section->size) - 1) < 0) { +@@ -454,7 +459,7 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) + bool v2 = container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU; + int ret, fd = container->fd; + +- QLIST_INIT(&container->hostwin_list); ++ QLIST_INIT(&scontainer->hostwin_list); + + /* + * The host kernel code implementing VFIO_IOMMU_DISABLE is called +@@ -506,7 +511,7 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) + } else { + /* The default table uses 4K pages */ + bcontainer->pgsizes = 0x1000; +- vfio_host_win_add(container, info.dma32_window_start, ++ vfio_host_win_add(scontainer, info.dma32_window_start, + info.dma32_window_start + + info.dma32_window_size - 1, + 0x1000); +@@ -525,15 +530,14 @@ listener_unregister_exit: + + void vfio_spapr_container_deinit(VFIOContainer *container) + { ++ VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, ++ container); + VFIOHostDMAWindow *hostwin, *next; + + if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { +- VFIOSpaprContainer *scontainer = container_of(container, +- VFIOSpaprContainer, +- container); + memory_listener_unregister(&scontainer->prereg_listener); + } +- QLIST_FOREACH_SAFE(hostwin, &container->hostwin_list, hostwin_next, ++ QLIST_FOREACH_SAFE(hostwin, &scontainer->hostwin_list, hostwin_next, + next) { + QLIST_REMOVE(hostwin, hostwin_next); + g_free(hostwin); +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index ed6148c058..24ecc0e7ee 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -79,7 +79,6 @@ typedef struct VFIOContainer { + VFIOContainerBase bcontainer; + int fd; /* /dev/vfio/vfio, empowered by the attached groups */ + unsigned iommu_type; +- QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; + QLIST_HEAD(, VFIOGroup) group_list; + } VFIOContainer; + +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-spapr-Move-prereg_listener-into-spapr-container.patch b/SOURCES/kvm-vfio-spapr-Move-prereg_listener-into-spapr-container.patch new file mode 100644 index 0000000..1db4b55 --- /dev/null +++ b/SOURCES/kvm-vfio-spapr-Move-prereg_listener-into-spapr-container.patch @@ -0,0 +1,120 @@ +From 17e6dad3e43e173147c0ca33f6f1f4f317a77d0b Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Thu, 2 Nov 2023 15:12:44 +0800 +Subject: [PATCH 019/101] vfio/spapr: Move prereg_listener into spapr container +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [18/67] dbea1b0b759e91b953271da92bba4ca6853bec82 (eauger1/centos-qemu-kvm) + +No functional changes intended. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +(cherry picked from commit 6ad359ec29af7f21dcb206c8edb26905a4925f80) +Signed-off-by: Eric Auger +--- + hw/vfio/spapr.c | 24 ++++++++++++++++-------- + include/hw/vfio/vfio-common.h | 1 - + 2 files changed, 16 insertions(+), 9 deletions(-) + +diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c +index 5be1911aad..68c3dd6c75 100644 +--- a/hw/vfio/spapr.c ++++ b/hw/vfio/spapr.c +@@ -26,6 +26,7 @@ + + typedef struct VFIOSpaprContainer { + VFIOContainer container; ++ MemoryListener prereg_listener; + } VFIOSpaprContainer; + + static bool vfio_prereg_listener_skipped_section(MemoryRegionSection *section) +@@ -48,8 +49,9 @@ static void *vfio_prereg_gpa_to_vaddr(MemoryRegionSection *section, hwaddr gpa) + static void vfio_prereg_listener_region_add(MemoryListener *listener, + MemoryRegionSection *section) + { +- VFIOContainer *container = container_of(listener, VFIOContainer, +- prereg_listener); ++ VFIOSpaprContainer *scontainer = container_of(listener, VFIOSpaprContainer, ++ prereg_listener); ++ VFIOContainer *container = &scontainer->container; + VFIOContainerBase *bcontainer = &container->bcontainer; + const hwaddr gpa = section->offset_within_address_space; + hwaddr end; +@@ -107,8 +109,9 @@ static void vfio_prereg_listener_region_add(MemoryListener *listener, + static void vfio_prereg_listener_region_del(MemoryListener *listener, + MemoryRegionSection *section) + { +- VFIOContainer *container = container_of(listener, VFIOContainer, +- prereg_listener); ++ VFIOSpaprContainer *scontainer = container_of(listener, VFIOSpaprContainer, ++ prereg_listener); ++ VFIOContainer *container = &scontainer->container; + const hwaddr gpa = section->offset_within_address_space; + hwaddr end; + int ret; +@@ -445,6 +448,8 @@ static void setup_spapr_ops(VFIOContainerBase *bcontainer) + int vfio_spapr_container_init(VFIOContainer *container, Error **errp) + { + VFIOContainerBase *bcontainer = &container->bcontainer; ++ VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, ++ container); + struct vfio_iommu_spapr_tce_info info; + bool v2 = container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU; + int ret, fd = container->fd; +@@ -463,9 +468,9 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) + return -errno; + } + } else { +- container->prereg_listener = vfio_prereg_listener; ++ scontainer->prereg_listener = vfio_prereg_listener; + +- memory_listener_register(&container->prereg_listener, ++ memory_listener_register(&scontainer->prereg_listener, + &address_space_memory); + if (bcontainer->error) { + ret = -1; +@@ -513,7 +518,7 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) + + listener_unregister_exit: + if (v2) { +- memory_listener_unregister(&container->prereg_listener); ++ memory_listener_unregister(&scontainer->prereg_listener); + } + return ret; + } +@@ -523,7 +528,10 @@ void vfio_spapr_container_deinit(VFIOContainer *container) + VFIOHostDMAWindow *hostwin, *next; + + if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { +- memory_listener_unregister(&container->prereg_listener); ++ VFIOSpaprContainer *scontainer = container_of(container, ++ VFIOSpaprContainer, ++ container); ++ memory_listener_unregister(&scontainer->prereg_listener); + } + QLIST_FOREACH_SAFE(hostwin, &container->hostwin_list, hostwin_next, + next) { +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 055f679363..ed6148c058 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -78,7 +78,6 @@ struct VFIOGroup; + typedef struct VFIOContainer { + VFIOContainerBase bcontainer; + int fd; /* /dev/vfio/vfio, empowered by the attached groups */ +- MemoryListener prereg_listener; + unsigned iommu_type; + QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; + QLIST_HEAD(, VFIOGroup) group_list; +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-spapr-Only-compile-sPAPR-IOMMU-support-when-nee.patch b/SOURCES/kvm-vfio-spapr-Only-compile-sPAPR-IOMMU-support-when-nee.patch new file mode 100644 index 0000000..7762804 --- /dev/null +++ b/SOURCES/kvm-vfio-spapr-Only-compile-sPAPR-IOMMU-support-when-nee.patch @@ -0,0 +1,46 @@ +From 5d485eb1442a81b51688124ce30024e96490acbf Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 19 Dec 2023 07:58:24 +0100 +Subject: [PATCH 057/101] vfio/spapr: Only compile sPAPR IOMMU support when + needed +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [56/67] 4dc0cbde470f877a8aac2bf6fab6923f2f919285 (eauger1/centos-qemu-kvm) + +sPAPR IOMMU support is only needed for pseries machines. Compile out +support when CONFIG_PSERIES is not set. This saves ~7K of text. + +Reviewed-by: Zhenzhong Duan +Tested-by: Eric Farman +Signed-off-by: Cédric Le Goater +(cherry picked from commit 10164df6ed3d41cbf67105dcd954a663ef4cc3e9) +Signed-off-by: Eric Auger +--- + hw/vfio/meson.build | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build +index e5d98b6adc..bb98493b53 100644 +--- a/hw/vfio/meson.build ++++ b/hw/vfio/meson.build +@@ -4,9 +4,9 @@ vfio_ss.add(files( + 'common.c', + 'container-base.c', + 'container.c', +- 'spapr.c', + 'migration.c', + )) ++vfio_ss.add(when: 'CONFIG_PSERIES', if_true: files('spapr.c')) + vfio_ss.add(when: 'CONFIG_IOMMUFD', if_true: files( + 'iommufd.c', + )) +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-spapr-switch-to-spapr-IOMMU-BE-add-del_section_.patch b/SOURCES/kvm-vfio-spapr-switch-to-spapr-IOMMU-BE-add-del_section_.patch new file mode 100644 index 0000000..4d8db61 --- /dev/null +++ b/SOURCES/kvm-vfio-spapr-switch-to-spapr-IOMMU-BE-add-del_section_.patch @@ -0,0 +1,184 @@ +From 3b7f044f15b4a9daf4ad7eda58777aba6dbe3fc0 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Thu, 2 Nov 2023 15:12:43 +0800 +Subject: [PATCH 018/101] vfio/spapr: switch to spapr IOMMU BE + add/del_section_window +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [17/67] a0d9f1f2d4d2592f3d9fc2ee5b2c38236a986e38 (eauger1/centos-qemu-kvm) + +No functional change intended. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +(cherry picked from commit 233309e8e4c158af6c6b126d5ad021bae40a918a) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 8 ++------ + hw/vfio/container-base.c | 21 +++++++++++++++++++++ + hw/vfio/spapr.c | 19 ++++++++++++++----- + include/hw/vfio/vfio-common.h | 5 ----- + include/hw/vfio/vfio-container-base.h | 5 +++++ + 5 files changed, 42 insertions(+), 16 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 483ba82089..572ae7c934 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -571,8 +571,6 @@ static void vfio_listener_region_add(MemoryListener *listener, + { + VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, + listener); +- VFIOContainer *container = container_of(bcontainer, VFIOContainer, +- bcontainer); + hwaddr iova, end; + Int128 llend, llsize; + void *vaddr; +@@ -595,7 +593,7 @@ static void vfio_listener_region_add(MemoryListener *listener, + return; + } + +- if (vfio_container_add_section_window(container, section, &err)) { ++ if (vfio_container_add_section_window(bcontainer, section, &err)) { + goto fail; + } + +@@ -738,8 +736,6 @@ static void vfio_listener_region_del(MemoryListener *listener, + { + VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, + listener); +- VFIOContainer *container = container_of(bcontainer, VFIOContainer, +- bcontainer); + hwaddr iova, end; + Int128 llend, llsize; + int ret; +@@ -818,7 +814,7 @@ static void vfio_listener_region_del(MemoryListener *listener, + + memory_region_unref(section->mr); + +- vfio_container_del_section_window(container, section); ++ vfio_container_del_section_window(bcontainer, section); + } + + typedef struct VFIODirtyRanges { +diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c +index 0177f43741..71f7274973 100644 +--- a/hw/vfio/container-base.c ++++ b/hw/vfio/container-base.c +@@ -31,6 +31,27 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, + return bcontainer->ops->dma_unmap(bcontainer, iova, size, iotlb); + } + ++int vfio_container_add_section_window(VFIOContainerBase *bcontainer, ++ MemoryRegionSection *section, ++ Error **errp) ++{ ++ if (!bcontainer->ops->add_window) { ++ return 0; ++ } ++ ++ return bcontainer->ops->add_window(bcontainer, section, errp); ++} ++ ++void vfio_container_del_section_window(VFIOContainerBase *bcontainer, ++ MemoryRegionSection *section) ++{ ++ if (!bcontainer->ops->del_window) { ++ return; ++ } ++ ++ return bcontainer->ops->del_window(bcontainer, section); ++} ++ + int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, + bool start) + { +diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c +index e1a6b35563..5be1911aad 100644 +--- a/hw/vfio/spapr.c ++++ b/hw/vfio/spapr.c +@@ -319,10 +319,13 @@ static int vfio_spapr_create_window(VFIOContainer *container, + return 0; + } + +-int vfio_container_add_section_window(VFIOContainer *container, +- MemoryRegionSection *section, +- Error **errp) ++static int ++vfio_spapr_container_add_section_window(VFIOContainerBase *bcontainer, ++ MemoryRegionSection *section, ++ Error **errp) + { ++ VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); + VFIOHostDMAWindow *hostwin; + hwaddr pgsize = 0; + int ret; +@@ -407,9 +410,13 @@ int vfio_container_add_section_window(VFIOContainer *container, + return 0; + } + +-void vfio_container_del_section_window(VFIOContainer *container, +- MemoryRegionSection *section) ++static void ++vfio_spapr_container_del_section_window(VFIOContainerBase *bcontainer, ++ MemoryRegionSection *section) + { ++ VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); ++ + if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) { + return; + } +@@ -430,6 +437,8 @@ static VFIOIOMMUOps vfio_iommu_spapr_ops; + static void setup_spapr_ops(VFIOContainerBase *bcontainer) + { + vfio_iommu_spapr_ops = *bcontainer->ops; ++ vfio_iommu_spapr_ops.add_window = vfio_spapr_container_add_section_window; ++ vfio_iommu_spapr_ops.del_window = vfio_spapr_container_del_section_window; + bcontainer->ops = &vfio_iommu_spapr_ops; + } + +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index b9e5a0e64b..055f679363 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -169,11 +169,6 @@ VFIOAddressSpace *vfio_get_address_space(AddressSpace *as); + void vfio_put_address_space(VFIOAddressSpace *space); + + /* SPAPR specific */ +-int vfio_container_add_section_window(VFIOContainer *container, +- MemoryRegionSection *section, +- Error **errp); +-void vfio_container_del_section_window(VFIOContainer *container, +- MemoryRegionSection *section); + int vfio_spapr_container_init(VFIOContainer *container, Error **errp); + void vfio_spapr_container_deinit(VFIOContainer *container); + +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index f62a14ac73..4b6f017c6f 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -75,6 +75,11 @@ int vfio_container_dma_map(VFIOContainerBase *bcontainer, + int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + IOMMUTLBEntry *iotlb); ++int vfio_container_add_section_window(VFIOContainerBase *bcontainer, ++ MemoryRegionSection *section, ++ Error **errp); ++void vfio_container_del_section_window(VFIOContainerBase *bcontainer, ++ MemoryRegionSection *section); + int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, + bool start); + int vfio_container_query_dirty_bitmap(VFIOContainerBase *bcontainer, +-- +2.39.3 + diff --git a/SOURCES/kvm-vhost-fix-vhost_dev_enable_notifiers-error-case.patch b/SOURCES/kvm-vhost-fix-vhost_dev_enable_notifiers-error-case.patch deleted file mode 100644 index 3282c24..0000000 --- a/SOURCES/kvm-vhost-fix-vhost_dev_enable_notifiers-error-case.patch +++ /dev/null @@ -1,138 +0,0 @@ -From ac54f5f746782da89ab674733af5622e524b58eb Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Fri, 2 Jun 2023 18:27:35 +0200 -Subject: [PATCH 4/6] vhost: fix vhost_dev_enable_notifiers() error case -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 176: vhost: fix vhost_dev_enable_notifiers() error case -RH-Jira: RHEL-330 -RH-Acked-by: MST -RH-Acked-by: Cindy Lu -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Jason Wang -RH-Commit: [1/1] fd30d7501be59f7e5b9d6fc5ed84efcc4037d08e (lvivier/qemu-kvm-centos) - -JIRA: https://issues.redhat.com/browse/RHEL-330 - -in vhost_dev_enable_notifiers(), if virtio_bus_set_host_notifier(true) -fails, we call vhost_dev_disable_notifiers() that executes -virtio_bus_set_host_notifier(false) on all queues, even on queues that -have failed to be initialized. - -This triggers a core dump in memory_region_del_eventfd(): - - virtio_bus_set_host_notifier: unable to init event notifier: Too many open files (-24) - vhost VQ 1 notifier binding failed: 24 - .../softmmu/memory.c:2611: memory_region_del_eventfd: Assertion `i != mr->ioeventfd_nb' failed. - -Fix the problem by providing to vhost_dev_disable_notifiers() the -number of queues to disable. - -Fixes: 8771589b6f81 ("vhost: simplify vhost_dev_enable_notifiers") -Cc: longpeng2@huawei.com -Signed-off-by: Laurent Vivier -Message-Id: <20230602162735.3670785-1-lvivier@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Philippe Mathieu-Daudé -(cherry picked from commit 92099aa4e9a3bb6856c290afaf41c76f9e3dd9fd) ---- - hw/virtio/vhost.c | 65 ++++++++++++++++++++++++++--------------------- - 1 file changed, 36 insertions(+), 29 deletions(-) - -diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c -index a266396576..ae0a033e60 100644 ---- a/hw/virtio/vhost.c -+++ b/hw/virtio/vhost.c -@@ -1545,6 +1545,40 @@ void vhost_dev_cleanup(struct vhost_dev *hdev) - memset(hdev, 0, sizeof(struct vhost_dev)); - } - -+static void vhost_dev_disable_notifiers_nvqs(struct vhost_dev *hdev, -+ VirtIODevice *vdev, -+ unsigned int nvqs) -+{ -+ BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); -+ int i, r; -+ -+ /* -+ * Batch all the host notifiers in a single transaction to avoid -+ * quadratic time complexity in address_space_update_ioeventfds(). -+ */ -+ memory_region_transaction_begin(); -+ -+ for (i = 0; i < nvqs; ++i) { -+ r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i, -+ false); -+ if (r < 0) { -+ error_report("vhost VQ %d notifier cleanup failed: %d", i, -r); -+ } -+ assert(r >= 0); -+ } -+ -+ /* -+ * The transaction expects the ioeventfds to be open when it -+ * commits. Do it now, before the cleanup loop. -+ */ -+ memory_region_transaction_commit(); -+ -+ for (i = 0; i < nvqs; ++i) { -+ virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i); -+ } -+ virtio_device_release_ioeventfd(vdev); -+} -+ - /* Stop processing guest IO notifications in qemu. - * Start processing them in vhost in kernel. - */ -@@ -1574,7 +1608,7 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) - if (r < 0) { - error_report("vhost VQ %d notifier binding failed: %d", i, -r); - memory_region_transaction_commit(); -- vhost_dev_disable_notifiers(hdev, vdev); -+ vhost_dev_disable_notifiers_nvqs(hdev, vdev, i); - return r; - } - } -@@ -1591,34 +1625,7 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) - */ - void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) - { -- BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); -- int i, r; -- -- /* -- * Batch all the host notifiers in a single transaction to avoid -- * quadratic time complexity in address_space_update_ioeventfds(). -- */ -- memory_region_transaction_begin(); -- -- for (i = 0; i < hdev->nvqs; ++i) { -- r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i, -- false); -- if (r < 0) { -- error_report("vhost VQ %d notifier cleanup failed: %d", i, -r); -- } -- assert (r >= 0); -- } -- -- /* -- * The transaction expects the ioeventfds to be open when it -- * commits. Do it now, before the cleanup loop. -- */ -- memory_region_transaction_commit(); -- -- for (i = 0; i < hdev->nvqs; ++i) { -- virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i); -- } -- virtio_device_release_ioeventfd(vdev); -+ vhost_dev_disable_notifiers_nvqs(hdev, vdev, hdev->nvqs); - } - - /* Test and clear event pending status. --- -2.39.3 - diff --git a/SOURCES/kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch b/SOURCES/kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch deleted file mode 100644 index fd29eb7..0000000 --- a/SOURCES/kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 4e30ca551fb3740a428017a0debf0a6aab976639 Mon Sep 17 00:00:00 2001 -From: Ani Sinha -Date: Mon, 19 Jun 2023 12:22:09 +0530 -Subject: [PATCH 6/6] vhost-vdpa: do not cleanup the vdpa/vhost-net structures - if peer nic is present - -RH-Author: Ani Sinha -RH-MergeRequest: 174: vhost-vdpa: do not cleanup the vdpa/vhost-net structures if peer nic is present -RH-Bugzilla: 2128929 -RH-Acked-by: Igor Mammedov -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/1] c70d4e5fd93256326d318e0b507db6b9eb93ad86 (anisinha/centos-qemu-kvm) - -When a peer nic is still attached to the vdpa backend, it is too early to free -up the vhost-net and vdpa structures. If these structures are freed here, then -QEMU crashes when the guest is being shut down. The following call chain -would result in an assertion failure since the pointer returned from -vhost_vdpa_get_vhost_net() would be NULL: - -do_vm_stop() -> vm_state_notify() -> virtio_set_status() -> -virtio_net_vhost_status() -> get_vhost_net(). - -Therefore, we defer freeing up the structures until at guest shutdown -time when qemu_cleanup() calls net_cleanup() which then calls -qemu_del_net_client() which would eventually call vhost_vdpa_cleanup() -again to free up the structures. This time, the loop in net_cleanup() -ensures that vhost_vdpa_cleanup() will be called one last time when -all the peer nics are detached and freed. - -All unit tests pass with this change. - -CC: imammedo@redhat.com -CC: jusual@redhat.com -CC: mst@redhat.com -Fixes: CVE-2023-3301 -Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2128929 -Signed-off-by: Ani Sinha -Message-Id: <20230619065209.442185-1-anisinha@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit a0d7215e339b61c7d7a7b3fcf754954d80d93eb8) ---- - net/vhost-vdpa.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 99904a0da7..8c8900f0f4 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -184,6 +184,14 @@ static void vhost_vdpa_cleanup(NetClientState *nc) - { - VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); - -+ /* -+ * If a peer NIC is attached, do not cleanup anything. -+ * Cleanup will happen as a part of qemu_cleanup() -> net_cleanup() -+ * when the guest is shutting down. -+ */ -+ if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_NIC) { -+ return; -+ } - qemu_vfree(s->cvq_cmd_out_buffer); - qemu_vfree(s->status); - if (s->vhost_net) { --- -2.39.3 - diff --git a/SOURCES/kvm-vhost-vdpa-mute-unaligned-memory-error-report.patch b/SOURCES/kvm-vhost-vdpa-mute-unaligned-memory-error-report.patch deleted file mode 100644 index 3711949..0000000 --- a/SOURCES/kvm-vhost-vdpa-mute-unaligned-memory-error-report.patch +++ /dev/null @@ -1,86 +0,0 @@ -From 3b51a7b84ea21360c6d551284aecb8b6f371e888 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Tue, 4 Jul 2023 09:19:31 +0200 -Subject: [PATCH 9/9] vhost-vdpa: mute unaligned memory error report -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 193: vhost-vdpa: mute unaligned memory error report -RH-Bugzilla: 2141965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Eugenio Pérez -RH-Commit: [1/1] 60f5385d41269ce9310e1e8e0a2f1106e3a16ada (lvivier/qemu-kvm-centos) - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2141965 - -With TPM CRM device, vhost-vdpa reports an error when it tries -to register a listener for a non aligned memory region: - - qemu-system-x86_64: vhost_vdpa_listener_region_add received unaligned region - qemu-system-x86_64: vhost_vdpa_listener_region_del received unaligned region - -This error can be confusing for the user whereas we only need to skip -the region (as it's already done after the error_report()) - -Rather than introducing a special case for TPM CRB memory section -to not display the message in this case, simply replace the -error_report() by a trace function (with more information, like the -memory region name). - -Signed-off-by: Laurent Vivier -Message-Id: <20230704071931.575888-2-lvivier@redhat.com> -Reviewed-by: David Hildenbrand -Acked-by: Jason Wang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 77812aa7b1fdf8f547c35a7f9a4eb1cbf3a073db) ---- - hw/virtio/trace-events | 2 ++ - hw/virtio/vhost-vdpa.c | 8 ++++++-- - 2 files changed, 8 insertions(+), 2 deletions(-) - -diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events -index 68b752e304..300dec8d3e 100644 ---- a/hw/virtio/trace-events -+++ b/hw/virtio/trace-events -@@ -34,7 +34,9 @@ vhost_vdpa_dma_map(void *vdpa, int fd, uint32_t msg_type, uint32_t asid, uint64_ - vhost_vdpa_dma_unmap(void *vdpa, int fd, uint32_t msg_type, uint32_t asid, uint64_t iova, uint64_t size, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" asid: %"PRIu32" iova: 0x%"PRIx64" size: 0x%"PRIx64" type: %"PRIu8 - vhost_vdpa_listener_begin_batch(void *v, int fd, uint32_t msg_type, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" type: %"PRIu8 - vhost_vdpa_listener_commit(void *v, int fd, uint32_t msg_type, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" type: %"PRIu8 -+vhost_vdpa_listener_region_add_unaligned(void *v, const char *name, uint64_t offset_as, uint64_t offset_page) "vdpa: %p region %s offset_within_address_space %"PRIu64" offset_within_region %"PRIu64 - vhost_vdpa_listener_region_add(void *vdpa, uint64_t iova, uint64_t llend, void *vaddr, bool readonly) "vdpa: %p iova 0x%"PRIx64" llend 0x%"PRIx64" vaddr: %p read-only: %d" -+vhost_vdpa_listener_region_del_unaligned(void *v, const char *name, uint64_t offset_as, uint64_t offset_page) "vdpa: %p region %s offset_within_address_space %"PRIu64" offset_within_region %"PRIu64 - vhost_vdpa_listener_region_del(void *vdpa, uint64_t iova, uint64_t llend) "vdpa: %p iova 0x%"PRIx64" llend 0x%"PRIx64 - vhost_vdpa_add_status(void *dev, uint8_t status) "dev: %p status: 0x%"PRIx8 - vhost_vdpa_init(void *dev, void *vdpa) "dev: %p vdpa: %p" -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index bc6bad23d5..c04f14420d 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -202,7 +202,9 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, - - if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != - (section->offset_within_region & ~TARGET_PAGE_MASK))) { -- error_report("%s received unaligned region", __func__); -+ trace_vhost_vdpa_listener_region_add_unaligned(v, section->mr->name, -+ section->offset_within_address_space & ~TARGET_PAGE_MASK, -+ section->offset_within_region & ~TARGET_PAGE_MASK); - return; - } - -@@ -281,7 +283,9 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, - - if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != - (section->offset_within_region & ~TARGET_PAGE_MASK))) { -- error_report("%s received unaligned region", __func__); -+ trace_vhost_vdpa_listener_region_del_unaligned(v, section->mr->name, -+ section->offset_within_address_space & ~TARGET_PAGE_MASK, -+ section->offset_within_region & ~TARGET_PAGE_MASK); - return; - } - --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-Drop-out-of-coroutine-context-in-virtio_load.patch b/SOURCES/kvm-virtio-Drop-out-of-coroutine-context-in-virtio_load.patch deleted file mode 100644 index 20a99e5..0000000 --- a/SOURCES/kvm-virtio-Drop-out-of-coroutine-context-in-virtio_load.patch +++ /dev/null @@ -1,151 +0,0 @@ -From b99a7e5e5631af3ee806fd0d78d7c7056eb559b5 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Tue, 5 Sep 2023 16:50:02 +0200 -Subject: [PATCH] virtio: Drop out of coroutine context in virtio_load() - -RH-Author: Kevin Wolf -RH-MergeRequest: 319: virtio: Drop out of coroutine context in virtio_load() [9.3.0.z 0day] -RH-Jira: RHEL-4453 -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [1/1] 6ae1d5a464e27bfaf892e093febcaf211a1ff5ec - -virtio_load() as a whole should run in coroutine context because it -reads from the migration stream and we don't want this to block. - -However, it calls virtio_set_features_nocheck() and devices don't -expect their .set_features callback to run in a coroutine and therefore -call functions that may not be called in coroutine context. To fix this, -drop out of coroutine context for calling virtio_set_features_nocheck(). - -Without this fix, the following crash was reported: - - #0 __pthread_kill_implementation (threadid=, signo=signo@entry=6, no_tid=no_tid@entry=0) at pthread_kill.c:44 - #1 0x00007efc738c05d3 in __pthread_kill_internal (signo=6, threadid=) at pthread_kill.c:78 - #2 0x00007efc73873d26 in __GI_raise (sig=sig@entry=6) at ../sysdeps/posix/raise.c:26 - #3 0x00007efc738477f3 in __GI_abort () at abort.c:79 - #4 0x00007efc7384771b in __assert_fail_base (fmt=0x7efc739dbcb8 "", assertion=assertion@entry=0x560aebfbf5cf "!qemu_in_coroutine()", - file=file@entry=0x560aebfcd2d4 "../block/graph-lock.c", line=line@entry=275, function=function@entry=0x560aebfcd34d "void bdrv_graph_rdlock_main_loop(void)") at assert.c:92 - #5 0x00007efc7386ccc6 in __assert_fail (assertion=0x560aebfbf5cf "!qemu_in_coroutine()", file=0x560aebfcd2d4 "../block/graph-lock.c", line=275, - function=0x560aebfcd34d "void bdrv_graph_rdlock_main_loop(void)") at assert.c:101 - #6 0x0000560aebcd8dd6 in bdrv_register_buf () - #7 0x0000560aeb97ed97 in ram_block_added.llvm () - #8 0x0000560aebb8303f in ram_block_add.llvm () - #9 0x0000560aebb834fa in qemu_ram_alloc_internal.llvm () - #10 0x0000560aebb2ac98 in vfio_region_mmap () - #11 0x0000560aebb3ea0f in vfio_bars_register () - #12 0x0000560aebb3c628 in vfio_realize () - #13 0x0000560aeb90f0c2 in pci_qdev_realize () - #14 0x0000560aebc40305 in device_set_realized () - #15 0x0000560aebc48e07 in property_set_bool.llvm () - #16 0x0000560aebc46582 in object_property_set () - #17 0x0000560aebc4cd58 in object_property_set_qobject () - #18 0x0000560aebc46ba7 in object_property_set_bool () - #19 0x0000560aeb98b3ca in qdev_device_add_from_qdict () - #20 0x0000560aebb1fbaf in virtio_net_set_features () - #21 0x0000560aebb46b51 in virtio_set_features_nocheck () - #22 0x0000560aebb47107 in virtio_load () - #23 0x0000560aeb9ae7ce in vmstate_load_state () - #24 0x0000560aeb9d2ee9 in qemu_loadvm_state_main () - #25 0x0000560aeb9d45e1 in qemu_loadvm_state () - #26 0x0000560aeb9bc32c in process_incoming_migration_co.llvm () - #27 0x0000560aebeace56 in coroutine_trampoline.llvm () - -Cc: qemu-stable@nongnu.org -Buglink: https://issues.redhat.com/browse/RHEL-832 -Signed-off-by: Kevin Wolf -Message-ID: <20230905145002.46391-3-kwolf@redhat.com> -Reviewed-by: Stefan Hajnoczi -Signed-off-by: Kevin Wolf -(cherry picked from commit 92e2e6a867334a990f8d29f07ca34e3162fdd6ec) -Signed-off-by: Kevin Wolf ---- - hw/virtio/virtio.c | 45 ++++++++++++++++++++++++++++++++++++++++----- - 1 file changed, 40 insertions(+), 5 deletions(-) - -diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c -index 98c4819fcc..0010a9a5f1 100644 ---- a/hw/virtio/virtio.c -+++ b/hw/virtio/virtio.c -@@ -2825,8 +2825,9 @@ static int virtio_device_put(QEMUFile *f, void *opaque, size_t size, - } - - /* A wrapper for use as a VMState .get function */ --static int virtio_device_get(QEMUFile *f, void *opaque, size_t size, -- const VMStateField *field) -+static int coroutine_mixed_fn -+virtio_device_get(QEMUFile *f, void *opaque, size_t size, -+ const VMStateField *field) - { - VirtIODevice *vdev = VIRTIO_DEVICE(opaque); - DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev)); -@@ -2853,6 +2854,39 @@ static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val) - return bad ? -1 : 0; - } - -+typedef struct VirtioSetFeaturesNocheckData { -+ Coroutine *co; -+ VirtIODevice *vdev; -+ uint64_t val; -+ int ret; -+} VirtioSetFeaturesNocheckData; -+ -+static void virtio_set_features_nocheck_bh(void *opaque) -+{ -+ VirtioSetFeaturesNocheckData *data = opaque; -+ -+ data->ret = virtio_set_features_nocheck(data->vdev, data->val); -+ aio_co_wake(data->co); -+} -+ -+static int coroutine_mixed_fn -+virtio_set_features_nocheck_maybe_co(VirtIODevice *vdev, uint64_t val) -+{ -+ if (qemu_in_coroutine()) { -+ VirtioSetFeaturesNocheckData data = { -+ .co = qemu_coroutine_self(), -+ .vdev = vdev, -+ .val = val, -+ }; -+ aio_bh_schedule_oneshot(qemu_get_current_aio_context(), -+ virtio_set_features_nocheck_bh, &data); -+ qemu_coroutine_yield(); -+ return data.ret; -+ } else { -+ return virtio_set_features_nocheck(vdev, val); -+ } -+} -+ - int virtio_set_features(VirtIODevice *vdev, uint64_t val) - { - int ret; -@@ -2906,7 +2940,8 @@ size_t virtio_get_config_size(const VirtIOConfigSizeParams *params, - return config_size; - } - --int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) -+int coroutine_mixed_fn -+virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) - { - int i, ret; - int32_t config_len; -@@ -3023,14 +3058,14 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) - * host_features. - */ - uint64_t features64 = vdev->guest_features; -- if (virtio_set_features_nocheck(vdev, features64) < 0) { -+ if (virtio_set_features_nocheck_maybe_co(vdev, features64) < 0) { - error_report("Features 0x%" PRIx64 " unsupported. " - "Allowed features: 0x%" PRIx64, - features64, vdev->host_features); - return -1; - } - } else { -- if (virtio_set_features_nocheck(vdev, features) < 0) { -+ if (virtio_set_features_nocheck_maybe_co(vdev, features) < 0) { - error_report("Features 0x%x unsupported. " - "Allowed features: 0x%" PRIx64, - features, vdev->host_features); --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-Re-enable-notifications-after-drain.patch b/SOURCES/kvm-virtio-Re-enable-notifications-after-drain.patch new file mode 100644 index 0000000..ef770fd --- /dev/null +++ b/SOURCES/kvm-virtio-Re-enable-notifications-after-drain.patch @@ -0,0 +1,139 @@ +From 2a758da4e1433564998def68447008908c96e113 Mon Sep 17 00:00:00 2001 +From: Hanna Czenczek +Date: Fri, 2 Feb 2024 16:31:57 +0100 +Subject: [PATCH 2/6] virtio: Re-enable notifications after drain + +RH-Author: Hanna Czenczek +RH-MergeRequest: 223: virtio: Re-enable notifications after drain +RH-Jira: RHEL-3934 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [2/3] e3be798e6259a378fc03f4364ecaeb875b01f64c (hreitz/qemu-kvm-c-9-s) + +During drain, we do not care about virtqueue notifications, which is why +we remove the handlers on it. When removing those handlers, whether vq +notifications are enabled or not depends on whether we were in polling +mode or not; if not, they are enabled (by default); if so, they have +been disabled by the io_poll_start callback. + +Because we do not care about those notifications after removing the +handlers, this is fine. However, we have to explicitly ensure they are +enabled when re-attaching the handlers, so we will resume receiving +notifications. We do this in virtio_queue_aio_attach_host_notifier*(). +If such a function is called while we are in a polling section, +attaching the notifiers will then invoke the io_poll_start callback, +re-disabling notifications. + +Because we will always miss virtqueue updates in the drained section, we +also need to poll the virtqueue once after attaching the notifiers. + +Buglink: https://issues.redhat.com/browse/RHEL-3934 +Signed-off-by: Hanna Czenczek +Message-ID: <20240202153158.788922-3-hreitz@redhat.com> +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 5bdbaebcce18fe6a627cafad2043ec08f3de5744) +--- + hw/virtio/virtio.c | 42 ++++++++++++++++++++++++++++++++++++++++++ + include/block/aio.h | 7 ++++++- + 2 files changed, 48 insertions(+), 1 deletion(-) + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index 3a160f86ed..356d690cc9 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -3556,6 +3556,17 @@ static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n) + + void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx) + { ++ /* ++ * virtio_queue_aio_detach_host_notifier() can leave notifications disabled. ++ * Re-enable them. (And if detach has not been used before, notifications ++ * being enabled is still the default state while a notifier is attached; ++ * see virtio_queue_host_notifier_aio_poll_end(), which will always leave ++ * notifications enabled once the polling section is left.) ++ */ ++ if (!virtio_queue_get_notification(vq)) { ++ virtio_queue_set_notification(vq, 1); ++ } ++ + aio_set_event_notifier(ctx, &vq->host_notifier, + virtio_queue_host_notifier_read, + virtio_queue_host_notifier_aio_poll, +@@ -3563,6 +3574,13 @@ void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx) + aio_set_event_notifier_poll(ctx, &vq->host_notifier, + virtio_queue_host_notifier_aio_poll_begin, + virtio_queue_host_notifier_aio_poll_end); ++ ++ /* ++ * We will have ignored notifications about new requests from the guest ++ * while no notifiers were attached, so "kick" the virt queue to process ++ * those requests now. ++ */ ++ event_notifier_set(&vq->host_notifier); + } + + /* +@@ -3573,14 +3591,38 @@ void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx) + */ + void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx) + { ++ /* See virtio_queue_aio_attach_host_notifier() */ ++ if (!virtio_queue_get_notification(vq)) { ++ virtio_queue_set_notification(vq, 1); ++ } ++ + aio_set_event_notifier(ctx, &vq->host_notifier, + virtio_queue_host_notifier_read, + NULL, NULL); ++ ++ /* ++ * See virtio_queue_aio_attach_host_notifier(). ++ * Note that this may be unnecessary for the type of virtqueues this ++ * function is used for. Still, it will not hurt to have a quick look into ++ * whether we can/should process any of the virtqueue elements. ++ */ ++ event_notifier_set(&vq->host_notifier); + } + + void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx) + { + aio_set_event_notifier(ctx, &vq->host_notifier, NULL, NULL, NULL); ++ ++ /* ++ * aio_set_event_notifier_poll() does not guarantee whether io_poll_end() ++ * will run after io_poll_begin(), so by removing the notifier, we do not ++ * know whether virtio_queue_host_notifier_aio_poll_end() has run after a ++ * previous virtio_queue_host_notifier_aio_poll_begin(), i.e. whether ++ * notifications are enabled or disabled. It does not really matter anyway; ++ * we just removed the notifier, so we do not care about notifications until ++ * we potentially re-attach it. The attach_host_notifier functions will ++ * ensure that notifications are enabled again when they are needed. ++ */ + } + + void virtio_queue_host_notifier_read(EventNotifier *n) +diff --git a/include/block/aio.h b/include/block/aio.h +index af05512a7d..261c77fd9a 100644 +--- a/include/block/aio.h ++++ b/include/block/aio.h +@@ -480,9 +480,14 @@ void aio_set_event_notifier(AioContext *ctx, + AioPollFn *io_poll, + EventNotifierHandler *io_poll_ready); + +-/* Set polling begin/end callbacks for an event notifier that has already been ++/* ++ * Set polling begin/end callbacks for an event notifier that has already been + * registered with aio_set_event_notifier. Do nothing if the event notifier is + * not registered. ++ * ++ * Note that if the io_poll_end() callback (or the entire notifier) is removed ++ * during polling, it will not be called, so an io_poll_begin() is not ++ * necessarily always followed by an io_poll_end(). + */ + void aio_set_event_notifier_poll(AioContext *ctx, + EventNotifier *notifier, +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-blk-Fix-potential-nullpointer-read-access-in-.patch b/SOURCES/kvm-virtio-blk-Fix-potential-nullpointer-read-access-in-.patch new file mode 100644 index 0000000..0565357 --- /dev/null +++ b/SOURCES/kvm-virtio-blk-Fix-potential-nullpointer-read-access-in-.patch @@ -0,0 +1,47 @@ +From 376df80fbba5a9bb0ec43cad083cde9de59128d7 Mon Sep 17 00:00:00 2001 +From: Stefan Weil via +Date: Sun, 24 Dec 2023 12:43:14 +0100 +Subject: [PATCH 10/22] virtio-blk: Fix potential nullpointer read access in + virtio_blk_data_plane_destroy + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter +RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Czenczek +RH-Commit: [6/17] 460005fc7719b2e1dd577dfe75d18537ab2b8d06 (stefanha/centos-stream-qemu-kvm) + +Fixes: CID 1532828 +Fixes: b6948ab01d ("virtio-blk: add iothread-vq-mapping parameter") +Signed-off-by: Stefan Weil +Signed-off-by: Michael Tokarev +(cherry picked from commit d819fc9516a4ec71e37a6c9edfcd285b7f98c2dc) +Signed-off-by: Stefan Hajnoczi +--- + hw/block/dataplane/virtio-blk.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c +index 6debd4401e..97a302cf49 100644 +--- a/hw/block/dataplane/virtio-blk.c ++++ b/hw/block/dataplane/virtio-blk.c +@@ -152,7 +152,7 @@ bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, + void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s) + { + VirtIOBlock *vblk; +- VirtIOBlkConf *conf = s->conf; ++ VirtIOBlkConf *conf; + + if (!s) { + return; +@@ -160,6 +160,7 @@ void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s) + + vblk = VIRTIO_BLK(s->vdev); + assert(!vblk->dataplane_started); ++ conf = s->conf; + + if (conf->iothread_vq_mapping_list) { + IOThreadVirtQueueMappingList *node; +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-blk-Use-ioeventfd_attach-in-start_ioeventfd.patch b/SOURCES/kvm-virtio-blk-Use-ioeventfd_attach-in-start_ioeventfd.patch new file mode 100644 index 0000000..1a3771e --- /dev/null +++ b/SOURCES/kvm-virtio-blk-Use-ioeventfd_attach-in-start_ioeventfd.patch @@ -0,0 +1,75 @@ +From 094941b2c3e66e078d93718933eb07e800a7dd60 Mon Sep 17 00:00:00 2001 +From: Hanna Czenczek +Date: Fri, 2 Feb 2024 16:31:58 +0100 +Subject: [PATCH 3/6] virtio-blk: Use ioeventfd_attach in start_ioeventfd + +RH-Author: Hanna Czenczek +RH-MergeRequest: 223: virtio: Re-enable notifications after drain +RH-Jira: RHEL-3934 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [3/3] 96d6760d1b7b12df695b6825b15a2a3b8a79a74c (hreitz/qemu-kvm-c-9-s) + +Commit d3f6f294aeadd5f88caf0155e4360808c95b3146 ("virtio-blk: always set +ioeventfd during startup") has made virtio_blk_start_ioeventfd() always +kick the virtqueue (set the ioeventfd), regardless of whether the BB is +drained. That is no longer necessary, because attaching the host +notifier will now set the ioeventfd, too; this happens either +immediately right here in virtio_blk_start_ioeventfd(), or later when +the drain ends, in virtio_blk_ioeventfd_attach(). + +With event_notifier_set() removed, the code becomes the same as the one +in virtio_blk_ioeventfd_attach(), so we can reuse that function. + +Signed-off-by: Hanna Czenczek +Message-ID: <20240202153158.788922-4-hreitz@redhat.com> +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 52bff01f64eec017ffb0d5903a0ee1d67ca7a548) +--- + hw/block/virtio-blk.c | 21 ++++++++++----------- + 1 file changed, 10 insertions(+), 11 deletions(-) + +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index 0b9100b746..7fdeaf2d12 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -37,6 +37,8 @@ + #include "hw/virtio/virtio-blk-common.h" + #include "qemu/coroutine.h" + ++static void virtio_blk_ioeventfd_attach(VirtIOBlock *s); ++ + static void virtio_blk_init_request(VirtIOBlock *s, VirtQueue *vq, + VirtIOBlockReq *req) + { +@@ -1808,17 +1810,14 @@ static int virtio_blk_start_ioeventfd(VirtIODevice *vdev) + s->ioeventfd_started = true; + smp_wmb(); /* paired with aio_notify_accept() on the read side */ + +- /* Get this show started by hooking up our callbacks */ +- for (i = 0; i < nvqs; i++) { +- VirtQueue *vq = virtio_get_queue(vdev, i); +- AioContext *ctx = s->vq_aio_context[i]; +- +- /* Kick right away to begin processing requests already in vring */ +- event_notifier_set(virtio_queue_get_host_notifier(vq)); +- +- if (!blk_in_drain(s->conf.conf.blk)) { +- virtio_queue_aio_attach_host_notifier(vq, ctx); +- } ++ /* ++ * Get this show started by hooking up our callbacks. If drained now, ++ * virtio_blk_drained_end() will do this later. ++ * Attaching the notifier also kicks the virtqueues, processing any requests ++ * they may already have. ++ */ ++ if (!blk_in_drain(s->conf.conf.blk)) { ++ virtio_blk_ioeventfd_attach(s); + } + return 0; + +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-blk-add-iothread-vq-mapping-parameter.patch b/SOURCES/kvm-virtio-blk-add-iothread-vq-mapping-parameter.patch new file mode 100644 index 0000000..65a96a0 --- /dev/null +++ b/SOURCES/kvm-virtio-blk-add-iothread-vq-mapping-parameter.patch @@ -0,0 +1,464 @@ +From 733fc13f65286c849ad6618be89df450f8bc5f7e Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Wed, 20 Dec 2023 08:47:55 -0500 +Subject: [PATCH 09/22] virtio-blk: add iothread-vq-mapping parameter + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter +RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Czenczek +RH-Commit: [5/17] c371fe62376c4eb54da88272a5966cec28404224 (stefanha/centos-stream-qemu-kvm) + +Add the iothread-vq-mapping parameter to assign virtqueues to IOThreads. +Store the vq:AioContext mapping in the new struct +VirtIOBlockDataPlane->vq_aio_context[] field and refactor the code to +use the per-vq AioContext instead of the BlockDriverState's AioContext. + +Reimplement --device virtio-blk-pci,iothread= and non-IOThread mode by +assigning all virtqueues to the IOThread and main loop's AioContext in +vq_aio_context[], respectively. + +The comment in struct VirtIOBlockDataPlane about EventNotifiers is +stale. Remove it. + +Signed-off-by: Stefan Hajnoczi +Message-ID: <20231220134755.814917-5-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit b6948ab01df068bef591868c22d1f873d2d05cde) +Signed-off-by: Stefan Hajnoczi +--- + hw/block/dataplane/virtio-blk.c | 155 ++++++++++++++++++++++++-------- + hw/block/dataplane/virtio-blk.h | 3 + + hw/block/virtio-blk.c | 92 ++++++++++++++++--- + include/hw/virtio/virtio-blk.h | 2 + + 4 files changed, 202 insertions(+), 50 deletions(-) + +diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c +index 7bbbd981ad..6debd4401e 100644 +--- a/hw/block/dataplane/virtio-blk.c ++++ b/hw/block/dataplane/virtio-blk.c +@@ -32,13 +32,11 @@ struct VirtIOBlockDataPlane { + VirtIOBlkConf *conf; + VirtIODevice *vdev; + +- /* Note that these EventNotifiers are assigned by value. This is +- * fine as long as you do not call event_notifier_cleanup on them +- * (because you don't own the file descriptor or handle; you just +- * use it). ++ /* ++ * The AioContext for each virtqueue. The BlockDriverState will use the ++ * first element as its AioContext. + */ +- IOThread *iothread; +- AioContext *ctx; ++ AioContext **vq_aio_context; + }; + + /* Raise an interrupt to signal guest, if necessary */ +@@ -47,6 +45,45 @@ void virtio_blk_data_plane_notify(VirtIOBlockDataPlane *s, VirtQueue *vq) + virtio_notify_irqfd(s->vdev, vq); + } + ++/* Generate vq:AioContext mappings from a validated iothread-vq-mapping list */ ++static void ++apply_vq_mapping(IOThreadVirtQueueMappingList *iothread_vq_mapping_list, ++ AioContext **vq_aio_context, uint16_t num_queues) ++{ ++ IOThreadVirtQueueMappingList *node; ++ size_t num_iothreads = 0; ++ size_t cur_iothread = 0; ++ ++ for (node = iothread_vq_mapping_list; node; node = node->next) { ++ num_iothreads++; ++ } ++ ++ for (node = iothread_vq_mapping_list; node; node = node->next) { ++ IOThread *iothread = iothread_by_id(node->value->iothread); ++ AioContext *ctx = iothread_get_aio_context(iothread); ++ ++ /* Released in virtio_blk_data_plane_destroy() */ ++ object_ref(OBJECT(iothread)); ++ ++ if (node->value->vqs) { ++ uint16List *vq; ++ ++ /* Explicit vq:IOThread assignment */ ++ for (vq = node->value->vqs; vq; vq = vq->next) { ++ vq_aio_context[vq->value] = ctx; ++ } ++ } else { ++ /* Round-robin vq:IOThread assignment */ ++ for (unsigned i = cur_iothread; i < num_queues; ++ i += num_iothreads) { ++ vq_aio_context[i] = ctx; ++ } ++ } ++ ++ cur_iothread++; ++ } ++} ++ + /* Context: QEMU global mutex held */ + bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, + VirtIOBlockDataPlane **dataplane, +@@ -58,7 +95,7 @@ bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, + + *dataplane = NULL; + +- if (conf->iothread) { ++ if (conf->iothread || conf->iothread_vq_mapping_list) { + if (!k->set_guest_notifiers || !k->ioeventfd_assign) { + error_setg(errp, + "device is incompatible with iothread " +@@ -86,13 +123,24 @@ bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, + s = g_new0(VirtIOBlockDataPlane, 1); + s->vdev = vdev; + s->conf = conf; ++ s->vq_aio_context = g_new(AioContext *, conf->num_queues); ++ ++ if (conf->iothread_vq_mapping_list) { ++ apply_vq_mapping(conf->iothread_vq_mapping_list, s->vq_aio_context, ++ conf->num_queues); ++ } else if (conf->iothread) { ++ AioContext *ctx = iothread_get_aio_context(conf->iothread); ++ for (unsigned i = 0; i < conf->num_queues; i++) { ++ s->vq_aio_context[i] = ctx; ++ } + +- if (conf->iothread) { +- s->iothread = conf->iothread; +- object_ref(OBJECT(s->iothread)); +- s->ctx = iothread_get_aio_context(s->iothread); ++ /* Released in virtio_blk_data_plane_destroy() */ ++ object_ref(OBJECT(conf->iothread)); + } else { +- s->ctx = qemu_get_aio_context(); ++ AioContext *ctx = qemu_get_aio_context(); ++ for (unsigned i = 0; i < conf->num_queues; i++) { ++ s->vq_aio_context[i] = ctx; ++ } + } + + *dataplane = s; +@@ -104,6 +152,7 @@ bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, + void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s) + { + VirtIOBlock *vblk; ++ VirtIOBlkConf *conf = s->conf; + + if (!s) { + return; +@@ -111,9 +160,21 @@ void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s) + + vblk = VIRTIO_BLK(s->vdev); + assert(!vblk->dataplane_started); +- if (s->iothread) { +- object_unref(OBJECT(s->iothread)); ++ ++ if (conf->iothread_vq_mapping_list) { ++ IOThreadVirtQueueMappingList *node; ++ ++ for (node = conf->iothread_vq_mapping_list; node; node = node->next) { ++ IOThread *iothread = iothread_by_id(node->value->iothread); ++ object_unref(OBJECT(iothread)); ++ } ++ } ++ ++ if (conf->iothread) { ++ object_unref(OBJECT(conf->iothread)); + } ++ ++ g_free(s->vq_aio_context); + g_free(s); + } + +@@ -177,19 +238,13 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) + + trace_virtio_blk_data_plane_start(s); + +- r = blk_set_aio_context(s->conf->conf.blk, s->ctx, &local_err); ++ r = blk_set_aio_context(s->conf->conf.blk, s->vq_aio_context[0], ++ &local_err); + if (r < 0) { + error_report_err(local_err); + goto fail_aio_context; + } + +- /* Kick right away to begin processing requests already in vring */ +- for (i = 0; i < nvqs; i++) { +- VirtQueue *vq = virtio_get_queue(s->vdev, i); +- +- event_notifier_set(virtio_queue_get_host_notifier(vq)); +- } +- + /* + * These fields must be visible to the IOThread when it processes the + * virtqueue, otherwise it will think dataplane has not started yet. +@@ -206,8 +261,12 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) + if (!blk_in_drain(s->conf->conf.blk)) { + for (i = 0; i < nvqs; i++) { + VirtQueue *vq = virtio_get_queue(s->vdev, i); ++ AioContext *ctx = s->vq_aio_context[i]; + +- virtio_queue_aio_attach_host_notifier(vq, s->ctx); ++ /* Kick right away to begin processing requests already in vring */ ++ event_notifier_set(virtio_queue_get_host_notifier(vq)); ++ ++ virtio_queue_aio_attach_host_notifier(vq, ctx); + } + } + return 0; +@@ -236,23 +295,18 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) + * + * Context: BH in IOThread + */ +-static void virtio_blk_data_plane_stop_bh(void *opaque) ++static void virtio_blk_data_plane_stop_vq_bh(void *opaque) + { +- VirtIOBlockDataPlane *s = opaque; +- unsigned i; +- +- for (i = 0; i < s->conf->num_queues; i++) { +- VirtQueue *vq = virtio_get_queue(s->vdev, i); +- EventNotifier *host_notifier = virtio_queue_get_host_notifier(vq); ++ VirtQueue *vq = opaque; ++ EventNotifier *host_notifier = virtio_queue_get_host_notifier(vq); + +- virtio_queue_aio_detach_host_notifier(vq, s->ctx); ++ virtio_queue_aio_detach_host_notifier(vq, qemu_get_current_aio_context()); + +- /* +- * Test and clear notifier after disabling event, in case poll callback +- * didn't have time to run. +- */ +- virtio_queue_host_notifier_read(host_notifier); +- } ++ /* ++ * Test and clear notifier after disabling event, in case poll callback ++ * didn't have time to run. ++ */ ++ virtio_queue_host_notifier_read(host_notifier); + } + + /* Context: QEMU global mutex held */ +@@ -279,7 +333,12 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) + trace_virtio_blk_data_plane_stop(s); + + if (!blk_in_drain(s->conf->conf.blk)) { +- aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s); ++ for (i = 0; i < nvqs; i++) { ++ VirtQueue *vq = virtio_get_queue(s->vdev, i); ++ AioContext *ctx = s->vq_aio_context[i]; ++ ++ aio_wait_bh_oneshot(ctx, virtio_blk_data_plane_stop_vq_bh, vq); ++ } + } + + /* +@@ -322,3 +381,23 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) + + s->stopping = false; + } ++ ++void virtio_blk_data_plane_detach(VirtIOBlockDataPlane *s) ++{ ++ VirtIODevice *vdev = VIRTIO_DEVICE(s->vdev); ++ ++ for (uint16_t i = 0; i < s->conf->num_queues; i++) { ++ VirtQueue *vq = virtio_get_queue(vdev, i); ++ virtio_queue_aio_detach_host_notifier(vq, s->vq_aio_context[i]); ++ } ++} ++ ++void virtio_blk_data_plane_attach(VirtIOBlockDataPlane *s) ++{ ++ VirtIODevice *vdev = VIRTIO_DEVICE(s->vdev); ++ ++ for (uint16_t i = 0; i < s->conf->num_queues; i++) { ++ VirtQueue *vq = virtio_get_queue(vdev, i); ++ virtio_queue_aio_attach_host_notifier(vq, s->vq_aio_context[i]); ++ } ++} +diff --git a/hw/block/dataplane/virtio-blk.h b/hw/block/dataplane/virtio-blk.h +index 5e18bb99ae..1a806fe447 100644 +--- a/hw/block/dataplane/virtio-blk.h ++++ b/hw/block/dataplane/virtio-blk.h +@@ -28,4 +28,7 @@ void virtio_blk_data_plane_notify(VirtIOBlockDataPlane *s, VirtQueue *vq); + int virtio_blk_data_plane_start(VirtIODevice *vdev); + void virtio_blk_data_plane_stop(VirtIODevice *vdev); + ++void virtio_blk_data_plane_detach(VirtIOBlockDataPlane *s); ++void virtio_blk_data_plane_attach(VirtIOBlockDataPlane *s); ++ + #endif /* HW_DATAPLANE_VIRTIO_BLK_H */ +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index ec9ed09a6a..46e73b2c96 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -1151,6 +1151,7 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) + return; + } + } ++ + virtio_blk_handle_vq(s, vq); + } + +@@ -1463,6 +1464,68 @@ static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f, + return 0; + } + ++static bool ++validate_iothread_vq_mapping_list(IOThreadVirtQueueMappingList *list, ++ uint16_t num_queues, Error **errp) ++{ ++ g_autofree unsigned long *vqs = bitmap_new(num_queues); ++ g_autoptr(GHashTable) iothreads = ++ g_hash_table_new(g_str_hash, g_str_equal); ++ ++ for (IOThreadVirtQueueMappingList *node = list; node; node = node->next) { ++ const char *name = node->value->iothread; ++ uint16List *vq; ++ ++ if (!iothread_by_id(name)) { ++ error_setg(errp, "IOThread \"%s\" object does not exist", name); ++ return false; ++ } ++ ++ if (!g_hash_table_add(iothreads, (gpointer)name)) { ++ error_setg(errp, ++ "duplicate IOThread name \"%s\" in iothread-vq-mapping", ++ name); ++ return false; ++ } ++ ++ if (node != list) { ++ if (!!node->value->vqs != !!list->value->vqs) { ++ error_setg(errp, "either all items in iothread-vq-mapping " ++ "must have vqs or none of them must have it"); ++ return false; ++ } ++ } ++ ++ for (vq = node->value->vqs; vq; vq = vq->next) { ++ if (vq->value >= num_queues) { ++ error_setg(errp, "vq index %u for IOThread \"%s\" must be " ++ "less than num_queues %u in iothread-vq-mapping", ++ vq->value, name, num_queues); ++ return false; ++ } ++ ++ if (test_and_set_bit(vq->value, vqs)) { ++ error_setg(errp, "cannot assign vq %u to IOThread \"%s\" " ++ "because it is already assigned", vq->value, name); ++ return false; ++ } ++ } ++ } ++ ++ if (list->value->vqs) { ++ for (uint16_t i = 0; i < num_queues; i++) { ++ if (!test_bit(i, vqs)) { ++ error_setg(errp, ++ "missing vq %u IOThread assignment in iothread-vq-mapping", ++ i); ++ return false; ++ } ++ } ++ } ++ ++ return true; ++} ++ + static void virtio_resize_cb(void *opaque) + { + VirtIODevice *vdev = opaque; +@@ -1487,34 +1550,24 @@ static void virtio_blk_resize(void *opaque) + static void virtio_blk_drained_begin(void *opaque) + { + VirtIOBlock *s = opaque; +- VirtIODevice *vdev = VIRTIO_DEVICE(opaque); +- AioContext *ctx = blk_get_aio_context(s->conf.conf.blk); + + if (!s->dataplane || !s->dataplane_started) { + return; + } + +- for (uint16_t i = 0; i < s->conf.num_queues; i++) { +- VirtQueue *vq = virtio_get_queue(vdev, i); +- virtio_queue_aio_detach_host_notifier(vq, ctx); +- } ++ virtio_blk_data_plane_detach(s->dataplane); + } + + /* Resume virtqueue ioeventfd processing after drain */ + static void virtio_blk_drained_end(void *opaque) + { + VirtIOBlock *s = opaque; +- VirtIODevice *vdev = VIRTIO_DEVICE(opaque); +- AioContext *ctx = blk_get_aio_context(s->conf.conf.blk); + + if (!s->dataplane || !s->dataplane_started) { + return; + } + +- for (uint16_t i = 0; i < s->conf.num_queues; i++) { +- VirtQueue *vq = virtio_get_queue(vdev, i); +- virtio_queue_aio_attach_host_notifier(vq, ctx); +- } ++ virtio_blk_data_plane_attach(s->dataplane); + } + + static const BlockDevOps virtio_block_ops = { +@@ -1600,6 +1653,19 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) + return; + } + ++ if (conf->iothread_vq_mapping_list) { ++ if (conf->iothread) { ++ error_setg(errp, "iothread and iothread-vq-mapping properties " ++ "cannot be set at the same time"); ++ return; ++ } ++ ++ if (!validate_iothread_vq_mapping_list(conf->iothread_vq_mapping_list, ++ conf->num_queues, errp)) { ++ return; ++ } ++ } ++ + s->config_size = virtio_get_config_size(&virtio_blk_cfg_size_params, + s->host_features); + virtio_init(vdev, VIRTIO_ID_BLOCK, s->config_size); +@@ -1702,6 +1768,8 @@ static Property virtio_blk_properties[] = { + DEFINE_PROP_BOOL("seg-max-adjust", VirtIOBlock, conf.seg_max_adjust, true), + DEFINE_PROP_LINK("iothread", VirtIOBlock, conf.iothread, TYPE_IOTHREAD, + IOThread *), ++ DEFINE_PROP_IOTHREAD_VQ_MAPPING_LIST("iothread-vq-mapping", VirtIOBlock, ++ conf.iothread_vq_mapping_list), + DEFINE_PROP_BIT64("discard", VirtIOBlock, host_features, + VIRTIO_BLK_F_DISCARD, true), + DEFINE_PROP_BOOL("report-discard-granularity", VirtIOBlock, +diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h +index 9881009c22..5e4091e4da 100644 +--- a/include/hw/virtio/virtio-blk.h ++++ b/include/hw/virtio/virtio-blk.h +@@ -21,6 +21,7 @@ + #include "sysemu/block-backend.h" + #include "sysemu/block-ram-registrar.h" + #include "qom/object.h" ++#include "qapi/qapi-types-virtio.h" + + #define TYPE_VIRTIO_BLK "virtio-blk-device" + OBJECT_DECLARE_SIMPLE_TYPE(VirtIOBlock, VIRTIO_BLK) +@@ -37,6 +38,7 @@ struct VirtIOBlkConf + { + BlockConf conf; + IOThread *iothread; ++ IOThreadVirtQueueMappingList *iothread_vq_mapping_list; + char *serial; + uint32_t request_merging; + uint16_t num_queues; +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-blk-add-lock-to-protect-s-rq.patch b/SOURCES/kvm-virtio-blk-add-lock-to-protect-s-rq.patch new file mode 100644 index 0000000..31e83a2 --- /dev/null +++ b/SOURCES/kvm-virtio-blk-add-lock-to-protect-s-rq.patch @@ -0,0 +1,177 @@ +From d54e88103aa76f3bf755b3f4308d8ab60367c6ef Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 14 Sep 2023 10:00:59 -0400 +Subject: [PATCH 074/101] virtio-blk: add lock to protect s->rq + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [5/26] 17dcd5ba18c03e5633a014d8d62d34d8dd7b43bf (kmwolf/centos-qemu-kvm) + +s->rq is accessed from IO_CODE and GLOBAL_STATE_CODE. Introduce a lock +to protect s->rq and eliminate reliance on the AioContext lock. + +Signed-off-by: Stefan Hajnoczi +Message-ID: <20230914140101.1065008-3-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Reviewed-by: Eric Blake +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Kevin Wolf +--- + hw/block/virtio-blk.c | 67 +++++++++++++++++++++++----------- + include/hw/virtio/virtio-blk.h | 3 +- + 2 files changed, 47 insertions(+), 23 deletions(-) + +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index a1f8e15522..ee38e089bc 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -82,8 +82,11 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error, + /* Break the link as the next request is going to be parsed from the + * ring again. Otherwise we may end up doing a double completion! */ + req->mr_next = NULL; +- req->next = s->rq; +- s->rq = req; ++ ++ WITH_QEMU_LOCK_GUARD(&s->rq_lock) { ++ req->next = s->rq; ++ s->rq = req; ++ } + } else if (action == BLOCK_ERROR_ACTION_REPORT) { + virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); + if (acct_failed) { +@@ -1183,10 +1186,13 @@ static void virtio_blk_dma_restart_bh(void *opaque) + { + VirtIOBlock *s = opaque; + +- VirtIOBlockReq *req = s->rq; ++ VirtIOBlockReq *req; + MultiReqBuffer mrb = {}; + +- s->rq = NULL; ++ WITH_QEMU_LOCK_GUARD(&s->rq_lock) { ++ req = s->rq; ++ s->rq = NULL; ++ } + + aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); + while (req) { +@@ -1238,22 +1244,29 @@ static void virtio_blk_reset(VirtIODevice *vdev) + AioContext *ctx; + VirtIOBlockReq *req; + ++ /* Dataplane has stopped... */ ++ assert(!s->dataplane_started); ++ ++ /* ...but requests may still be in flight. */ + ctx = blk_get_aio_context(s->blk); + aio_context_acquire(ctx); + blk_drain(s->blk); ++ aio_context_release(ctx); + + /* We drop queued requests after blk_drain() because blk_drain() itself can + * produce them. */ +- while (s->rq) { +- req = s->rq; +- s->rq = req->next; +- virtqueue_detach_element(req->vq, &req->elem, 0); +- virtio_blk_free_request(req); +- } ++ WITH_QEMU_LOCK_GUARD(&s->rq_lock) { ++ while (s->rq) { ++ req = s->rq; ++ s->rq = req->next; + +- aio_context_release(ctx); ++ /* No other threads can access req->vq here */ ++ virtqueue_detach_element(req->vq, &req->elem, 0); ++ ++ virtio_blk_free_request(req); ++ } ++ } + +- assert(!s->dataplane_started); + blk_set_enable_write_cache(s->blk, s->original_wce); + } + +@@ -1443,18 +1456,22 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) + static void virtio_blk_save_device(VirtIODevice *vdev, QEMUFile *f) + { + VirtIOBlock *s = VIRTIO_BLK(vdev); +- VirtIOBlockReq *req = s->rq; + +- while (req) { +- qemu_put_sbyte(f, 1); ++ WITH_QEMU_LOCK_GUARD(&s->rq_lock) { ++ VirtIOBlockReq *req = s->rq; + +- if (s->conf.num_queues > 1) { +- qemu_put_be32(f, virtio_get_queue_index(req->vq)); +- } ++ while (req) { ++ qemu_put_sbyte(f, 1); + +- qemu_put_virtqueue_element(vdev, f, &req->elem); +- req = req->next; ++ if (s->conf.num_queues > 1) { ++ qemu_put_be32(f, virtio_get_queue_index(req->vq)); ++ } ++ ++ qemu_put_virtqueue_element(vdev, f, &req->elem); ++ req = req->next; ++ } + } ++ + qemu_put_sbyte(f, 0); + } + +@@ -1480,8 +1497,11 @@ static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f, + + req = qemu_get_virtqueue_element(vdev, f, sizeof(VirtIOBlockReq)); + virtio_blk_init_request(s, virtio_get_queue(vdev, vq_idx), req); +- req->next = s->rq; +- s->rq = req; ++ ++ WITH_QEMU_LOCK_GUARD(&s->rq_lock) { ++ req->next = s->rq; ++ s->rq = req; ++ } + } + + return 0; +@@ -1628,6 +1648,8 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) + s->host_features); + virtio_init(vdev, VIRTIO_ID_BLOCK, s->config_size); + ++ qemu_mutex_init(&s->rq_lock); ++ + s->blk = conf->conf.blk; + s->rq = NULL; + s->sector_mask = (s->conf.conf.logical_block_size / BDRV_SECTOR_SIZE) - 1; +@@ -1679,6 +1701,7 @@ static void virtio_blk_device_unrealize(DeviceState *dev) + virtio_del_queue(vdev, i); + } + qemu_coroutine_dec_pool_size(conf->num_queues * conf->queue_size / 2); ++ qemu_mutex_destroy(&s->rq_lock); + blk_ram_registrar_destroy(&s->blk_ram_registrar); + qemu_del_vm_change_state_handler(s->change); + blockdev_mark_auto_del(s->blk); +diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h +index dafec432ce..9881009c22 100644 +--- a/include/hw/virtio/virtio-blk.h ++++ b/include/hw/virtio/virtio-blk.h +@@ -54,7 +54,8 @@ struct VirtIOBlockReq; + struct VirtIOBlock { + VirtIODevice parent_obj; + BlockBackend *blk; +- void *rq; ++ QemuMutex rq_lock; ++ void *rq; /* protected by rq_lock */ + VirtIOBlkConf conf; + unsigned short sector_mask; + bool original_wce; +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-blk-always-set-ioeventfd-during-startup.patch b/SOURCES/kvm-virtio-blk-always-set-ioeventfd-during-startup.patch new file mode 100644 index 0000000..a7b518d --- /dev/null +++ b/SOURCES/kvm-virtio-blk-always-set-ioeventfd-during-startup.patch @@ -0,0 +1,63 @@ +From 22730552442003e81c8c508c3e7ebacf647e4e75 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Fri, 19 Jan 2024 08:57:48 -0500 +Subject: [PATCH 19/22] virtio-blk: always set ioeventfd during startup + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter +RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Czenczek +RH-Commit: [15/17] 5f7142aeaa54fda41bd5c4fd3222fd8e3e18f370 (stefanha/centos-stream-qemu-kvm) + +When starting ioeventfd it is common practice to set the event notifier +so that the ioeventfd handler is triggered to run immediately. There may +be no requests waiting to be processed, but the idea is that if a +request snuck in then we guarantee that it will be detected. + +One scenario where self-triggering the ioeventfd is necessary is when +virtio_blk_handle_output() is called from a vCPU thread before the +VIRTIO Device Status transitions to DRIVER_OK. In that case we need to +self-trigger the ioeventfd so that the kick handled by the vCPU thread +causes the vq AioContext thread to take over handling the request(s). + +Fixes: b6948ab01df0 ("virtio-blk: add iothread-vq-mapping parameter") +Reported-by: Kevin Wolf +Signed-off-by: Stefan Hajnoczi +Message-ID: <20240119135748.270944-7-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit d3f6f294aeadd5f88caf0155e4360808c95b3146) +Signed-off-by: Stefan Hajnoczi +--- + hw/block/virtio-blk.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index 81de06c9f6..0b9100b746 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -1809,14 +1809,14 @@ static int virtio_blk_start_ioeventfd(VirtIODevice *vdev) + smp_wmb(); /* paired with aio_notify_accept() on the read side */ + + /* Get this show started by hooking up our callbacks */ +- if (!blk_in_drain(s->conf.conf.blk)) { +- for (i = 0; i < nvqs; i++) { +- VirtQueue *vq = virtio_get_queue(vdev, i); +- AioContext *ctx = s->vq_aio_context[i]; ++ for (i = 0; i < nvqs; i++) { ++ VirtQueue *vq = virtio_get_queue(vdev, i); ++ AioContext *ctx = s->vq_aio_context[i]; + +- /* Kick right away to begin processing requests already in vring */ +- event_notifier_set(virtio_queue_get_host_notifier(vq)); ++ /* Kick right away to begin processing requests already in vring */ ++ event_notifier_set(virtio_queue_get_host_notifier(vq)); + ++ if (!blk_in_drain(s->conf.conf.blk)) { + virtio_queue_aio_attach_host_notifier(vq, ctx); + } + } +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-blk-avoid-using-ioeventfd-state-in-irqfd-cond.patch b/SOURCES/kvm-virtio-blk-avoid-using-ioeventfd-state-in-irqfd-cond.patch new file mode 100644 index 0000000..8d93bf6 --- /dev/null +++ b/SOURCES/kvm-virtio-blk-avoid-using-ioeventfd-state-in-irqfd-cond.patch @@ -0,0 +1,72 @@ +From f62b56c68d50a149a07e15797bf3605e63b2c501 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Mon, 22 Jan 2024 12:26:25 -0500 +Subject: [PATCH 4/6] virtio-blk: avoid using ioeventfd state in irqfd + conditional + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 224: virtio-blk: avoid using ioeventfd state in irqfd conditional +RH-Jira: RHEL-15394 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Czenczek +RH-Commit: [1/1] 8f24084669db52457e55e2523b9f56f5560dd6ce (stefanha/centos-stream-qemu-kvm) + +Requests that complete in an IOThread use irqfd to notify the guest +while requests that complete in the main loop thread use the traditional +qdev irq code path. The reason for this conditional is that the irq code +path requires the BQL: + + if (s->ioeventfd_started && !s->ioeventfd_disabled) { + virtio_notify_irqfd(vdev, req->vq); + } else { + virtio_notify(vdev, req->vq); + } + +There is a corner case where the conditional invokes the irq code path +instead of the irqfd code path: + + static void virtio_blk_stop_ioeventfd(VirtIODevice *vdev) + { + ... + /* + * Set ->ioeventfd_started to false before draining so that host notifiers + * are not detached/attached anymore. + */ + s->ioeventfd_started = false; + + /* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */ + blk_drain(s->conf.conf.blk); + +During blk_drain() the conditional produces the wrong result because +ioeventfd_started is false. + +Use qemu_in_iothread() instead of checking the ioeventfd state. + +Cc: qemu-stable@nongnu.org +Buglink: https://issues.redhat.com/browse/RHEL-15394 +Signed-off-by: Stefan Hajnoczi +Message-ID: <20240122172625.415386-1-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit bfa36802d1704fc413c590ebdcc4e5ae0eacf439) +Signed-off-by: Stefan Hajnoczi +--- + hw/block/virtio-blk.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index 7fdeaf2d12..2ae2f6a823 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -66,7 +66,7 @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status) + iov_discard_undo(&req->inhdr_undo); + iov_discard_undo(&req->outhdr_undo); + virtqueue_push(req->vq, &req->elem, req->in_len); +- if (s->ioeventfd_started && !s->ioeventfd_disabled) { ++ if (qemu_in_iothread()) { + virtio_notify_irqfd(vdev, req->vq); + } else { + virtio_notify(vdev, req->vq); +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-blk-don-t-lock-AioContext-in-the-completion-c.patch b/SOURCES/kvm-virtio-blk-don-t-lock-AioContext-in-the-completion-c.patch new file mode 100644 index 0000000..be3c7db --- /dev/null +++ b/SOURCES/kvm-virtio-blk-don-t-lock-AioContext-in-the-completion-c.patch @@ -0,0 +1,167 @@ +From a2069ff76637365cacf5b96f9427b98a6ca2c9ba Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 14 Sep 2023 10:01:00 -0400 +Subject: [PATCH 075/101] virtio-blk: don't lock AioContext in the completion + code path + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [6/26] 3426f62c2156f6967bb4ffbce75a4ff46d3312a3 (kmwolf/centos-qemu-kvm) + +Nothing in the completion code path relies on the AioContext lock +anymore. Virtqueues are only accessed from one thread at any moment and +the s->rq global state is protected by its own lock now. + +Signed-off-by: Stefan Hajnoczi +Message-ID: <20230914140101.1065008-4-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Reviewed-by: Eric Blake +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Kevin Wolf +--- + hw/block/virtio-blk.c | 34 ++++------------------------------ + 1 file changed, 4 insertions(+), 30 deletions(-) + +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index ee38e089bc..f5315df042 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -105,7 +105,6 @@ static void virtio_blk_rw_complete(void *opaque, int ret) + VirtIOBlock *s = next->dev; + VirtIODevice *vdev = VIRTIO_DEVICE(s); + +- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); + while (next) { + VirtIOBlockReq *req = next; + next = req->mr_next; +@@ -138,7 +137,6 @@ static void virtio_blk_rw_complete(void *opaque, int ret) + block_acct_done(blk_get_stats(s->blk), &req->acct); + virtio_blk_free_request(req); + } +- aio_context_release(blk_get_aio_context(s->conf.conf.blk)); + } + + static void virtio_blk_flush_complete(void *opaque, int ret) +@@ -146,19 +144,13 @@ static void virtio_blk_flush_complete(void *opaque, int ret) + VirtIOBlockReq *req = opaque; + VirtIOBlock *s = req->dev; + +- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); +- if (ret) { +- if (virtio_blk_handle_rw_error(req, -ret, 0, true)) { +- goto out; +- } ++ if (ret && virtio_blk_handle_rw_error(req, -ret, 0, true)) { ++ return; + } + + virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); + block_acct_done(blk_get_stats(s->blk), &req->acct); + virtio_blk_free_request(req); +- +-out: +- aio_context_release(blk_get_aio_context(s->conf.conf.blk)); + } + + static void virtio_blk_discard_write_zeroes_complete(void *opaque, int ret) +@@ -168,11 +160,8 @@ static void virtio_blk_discard_write_zeroes_complete(void *opaque, int ret) + bool is_write_zeroes = (virtio_ldl_p(VIRTIO_DEVICE(s), &req->out.type) & + ~VIRTIO_BLK_T_BARRIER) == VIRTIO_BLK_T_WRITE_ZEROES; + +- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); +- if (ret) { +- if (virtio_blk_handle_rw_error(req, -ret, false, is_write_zeroes)) { +- goto out; +- } ++ if (ret && virtio_blk_handle_rw_error(req, -ret, false, is_write_zeroes)) { ++ return; + } + + virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); +@@ -180,9 +169,6 @@ static void virtio_blk_discard_write_zeroes_complete(void *opaque, int ret) + block_acct_done(blk_get_stats(s->blk), &req->acct); + } + virtio_blk_free_request(req); +- +-out: +- aio_context_release(blk_get_aio_context(s->conf.conf.blk)); + } + + #ifdef __linux__ +@@ -229,10 +215,8 @@ static void virtio_blk_ioctl_complete(void *opaque, int status) + virtio_stl_p(vdev, &scsi->data_len, hdr->dxfer_len); + + out: +- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); + virtio_blk_req_complete(req, status); + virtio_blk_free_request(req); +- aio_context_release(blk_get_aio_context(s->conf.conf.blk)); + g_free(ioctl_req); + } + +@@ -672,7 +656,6 @@ static void virtio_blk_zone_report_complete(void *opaque, int ret) + { + ZoneCmdData *data = opaque; + VirtIOBlockReq *req = data->req; +- VirtIOBlock *s = req->dev; + VirtIODevice *vdev = VIRTIO_DEVICE(req->dev); + struct iovec *in_iov = data->in_iov; + unsigned in_num = data->in_num; +@@ -763,10 +746,8 @@ static void virtio_blk_zone_report_complete(void *opaque, int ret) + } + + out: +- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); + virtio_blk_req_complete(req, err_status); + virtio_blk_free_request(req); +- aio_context_release(blk_get_aio_context(s->conf.conf.blk)); + g_free(data->zone_report_data.zones); + g_free(data); + } +@@ -829,10 +810,8 @@ static void virtio_blk_zone_mgmt_complete(void *opaque, int ret) + err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD; + } + +- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); + virtio_blk_req_complete(req, err_status); + virtio_blk_free_request(req); +- aio_context_release(blk_get_aio_context(s->conf.conf.blk)); + } + + static int virtio_blk_handle_zone_mgmt(VirtIOBlockReq *req, BlockZoneOp op) +@@ -882,7 +861,6 @@ static void virtio_blk_zone_append_complete(void *opaque, int ret) + { + ZoneCmdData *data = opaque; + VirtIOBlockReq *req = data->req; +- VirtIOBlock *s = req->dev; + VirtIODevice *vdev = VIRTIO_DEVICE(req->dev); + int64_t append_sector, n; + uint8_t err_status = VIRTIO_BLK_S_OK; +@@ -905,10 +883,8 @@ static void virtio_blk_zone_append_complete(void *opaque, int ret) + trace_virtio_blk_zone_append_complete(vdev, req, append_sector, ret); + + out: +- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); + virtio_blk_req_complete(req, err_status); + virtio_blk_free_request(req); +- aio_context_release(blk_get_aio_context(s->conf.conf.blk)); + g_free(data); + } + +@@ -944,10 +920,8 @@ static int virtio_blk_handle_zone_append(VirtIOBlockReq *req, + return 0; + + out: +- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); + virtio_blk_req_complete(req, err_status); + virtio_blk_free_request(req); +- aio_context_release(blk_get_aio_context(s->conf.conf.blk)); + return err_status; + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-blk-don-t-lock-AioContext-in-the-submission-c.patch b/SOURCES/kvm-virtio-blk-don-t-lock-AioContext-in-the-submission-c.patch new file mode 100644 index 0000000..c31fcca --- /dev/null +++ b/SOURCES/kvm-virtio-blk-don-t-lock-AioContext-in-the-submission-c.patch @@ -0,0 +1,67 @@ +From 2816f6ce20c496e21947f215112be34a5cb93606 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 14 Sep 2023 10:01:01 -0400 +Subject: [PATCH 076/101] virtio-blk: don't lock AioContext in the submission + code path + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [7/26] e0de2744cb319569ea008334e45ee5fc2ba9b6d7 (kmwolf/centos-qemu-kvm) + +There is no need to acquire the AioContext lock around blk_aio_*() or +blk_get_geometry() anymore. I/O plugging (defer_call()) also does not +require the AioContext lock anymore. + +Signed-off-by: Stefan Hajnoczi +Message-ID: <20230914140101.1065008-5-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Reviewed-by: Eric Blake +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Kevin Wolf +--- + hw/block/virtio-blk.c | 5 ----- + 1 file changed, 5 deletions(-) + +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index f5315df042..e110f9718b 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -1111,7 +1111,6 @@ void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq) + MultiReqBuffer mrb = {}; + bool suppress_notifications = virtio_queue_get_notification(vq); + +- aio_context_acquire(blk_get_aio_context(s->blk)); + defer_call_begin(); + + do { +@@ -1137,7 +1136,6 @@ void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq) + } + + defer_call_end(); +- aio_context_release(blk_get_aio_context(s->blk)); + } + + static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) +@@ -1168,7 +1166,6 @@ static void virtio_blk_dma_restart_bh(void *opaque) + s->rq = NULL; + } + +- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); + while (req) { + VirtIOBlockReq *next = req->next; + if (virtio_blk_handle_request(req, &mrb)) { +@@ -1192,8 +1189,6 @@ static void virtio_blk_dma_restart_bh(void *opaque) + + /* Paired with inc in virtio_blk_dma_restart_cb() */ + blk_dec_in_flight(s->conf.conf.blk); +- +- aio_context_release(blk_get_aio_context(s->conf.conf.blk)); + } + + static void virtio_blk_dma_restart_cb(void *opaque, bool running, +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-blk-move-dataplane-code-into-virtio-blk.c.patch b/SOURCES/kvm-virtio-blk-move-dataplane-code-into-virtio-blk.c.patch new file mode 100644 index 0000000..3fb8211 --- /dev/null +++ b/SOURCES/kvm-virtio-blk-move-dataplane-code-into-virtio-blk.c.patch @@ -0,0 +1,1009 @@ +From d9be1e1f199ee3171455636f32f3ba59b57e9351 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Fri, 19 Jan 2024 08:57:43 -0500 +Subject: [PATCH 14/22] virtio-blk: move dataplane code into virtio-blk.c + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter +RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Czenczek +RH-Commit: [10/17] ad854c6c7e808da272bd07229e8c915c1ee6f296 (stefanha/centos-stream-qemu-kvm) + +The dataplane code used to be significantly different from the +non-dataplane code and therefore had a separate source file. + +Over time the difference has gotten smaller because the I/O code paths +were unified. Nowadays the distinction between the VirtIOBlock and +VirtIOBlockDataPlane structs is more of an inconvenience that hinders +code simplification. + +Move hw/block/dataplane/virtio-blk.c into hw/block/virtio-blk.c, merging +VirtIOBlockDataPlane's fields into VirtIOBlock. + +hw/block/virtio-blk.c used VirtIOBlock->dataplane to check if +virtio_blk_data_plane_create() was successful. This is not necessary +because ->dataplane_started and ->dataplane_disabled can be used +instead. This patch makes those changes in order to drop +VirtIOBlock->dataplane. + +Signed-off-by: Stefan Hajnoczi +Message-ID: <20240119135748.270944-2-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 3bcc17f06526754fd675dcf601414442044fa0b6) +Signed-off-by: Stefan Hajnoczi + +Conflicts: + hw/block/dataplane/virtio-blk.c + Downstream is missing commit 0b2675c473f6 ("Rename "QEMU global mutex" + to "BQL" in comments and docs") so the source file still contains old + "QEMU global mutex held" comments instead of the new "BQL held" + phrasing. The code moved into hw/block/virtio-blk.c by this patch uses + the new "BQL held" phrasing so to minimize conflicts in future + backports. Either way, this is not a code change and therefore no risk + in introducing bugs. +--- + hw/block/dataplane/meson.build | 1 - + hw/block/dataplane/trace-events | 5 - + hw/block/dataplane/trace.h | 1 - + hw/block/dataplane/virtio-blk.c | 404 -------------------------------- + hw/block/dataplane/virtio-blk.h | 34 --- + hw/block/virtio-blk.c | 362 ++++++++++++++++++++++++++-- + include/hw/virtio/virtio-blk.h | 12 +- + meson.build | 1 - + 8 files changed, 357 insertions(+), 463 deletions(-) + delete mode 100644 hw/block/dataplane/trace-events + delete mode 100644 hw/block/dataplane/trace.h + delete mode 100644 hw/block/dataplane/virtio-blk.c + delete mode 100644 hw/block/dataplane/virtio-blk.h + +diff --git a/hw/block/dataplane/meson.build b/hw/block/dataplane/meson.build +index 025b3b061b..11a5eba2f4 100644 +--- a/hw/block/dataplane/meson.build ++++ b/hw/block/dataplane/meson.build +@@ -1,2 +1 @@ +-system_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c')) + specific_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xen-block.c')) +diff --git a/hw/block/dataplane/trace-events b/hw/block/dataplane/trace-events +deleted file mode 100644 +index 38fc3e7507..0000000000 +--- a/hw/block/dataplane/trace-events ++++ /dev/null +@@ -1,5 +0,0 @@ +-# See docs/devel/tracing.rst for syntax documentation. +- +-# virtio-blk.c +-virtio_blk_data_plane_start(void *s) "dataplane %p" +-virtio_blk_data_plane_stop(void *s) "dataplane %p" +diff --git a/hw/block/dataplane/trace.h b/hw/block/dataplane/trace.h +deleted file mode 100644 +index 240cc59834..0000000000 +--- a/hw/block/dataplane/trace.h ++++ /dev/null +@@ -1 +0,0 @@ +-#include "trace/trace-hw_block_dataplane.h" +diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c +deleted file mode 100644 +index 97a302cf49..0000000000 +--- a/hw/block/dataplane/virtio-blk.c ++++ /dev/null +@@ -1,404 +0,0 @@ +-/* +- * Dedicated thread for virtio-blk I/O processing +- * +- * Copyright 2012 IBM, Corp. +- * Copyright 2012 Red Hat, Inc. and/or its affiliates +- * +- * Authors: +- * Stefan Hajnoczi +- * +- * This work is licensed under the terms of the GNU GPL, version 2 or later. +- * See the COPYING file in the top-level directory. +- * +- */ +- +-#include "qemu/osdep.h" +-#include "qapi/error.h" +-#include "trace.h" +-#include "qemu/iov.h" +-#include "qemu/main-loop.h" +-#include "qemu/thread.h" +-#include "qemu/error-report.h" +-#include "hw/virtio/virtio-blk.h" +-#include "virtio-blk.h" +-#include "block/aio.h" +-#include "hw/virtio/virtio-bus.h" +-#include "qom/object_interfaces.h" +- +-struct VirtIOBlockDataPlane { +- bool starting; +- bool stopping; +- +- VirtIOBlkConf *conf; +- VirtIODevice *vdev; +- +- /* +- * The AioContext for each virtqueue. The BlockDriverState will use the +- * first element as its AioContext. +- */ +- AioContext **vq_aio_context; +-}; +- +-/* Raise an interrupt to signal guest, if necessary */ +-void virtio_blk_data_plane_notify(VirtIOBlockDataPlane *s, VirtQueue *vq) +-{ +- virtio_notify_irqfd(s->vdev, vq); +-} +- +-/* Generate vq:AioContext mappings from a validated iothread-vq-mapping list */ +-static void +-apply_vq_mapping(IOThreadVirtQueueMappingList *iothread_vq_mapping_list, +- AioContext **vq_aio_context, uint16_t num_queues) +-{ +- IOThreadVirtQueueMappingList *node; +- size_t num_iothreads = 0; +- size_t cur_iothread = 0; +- +- for (node = iothread_vq_mapping_list; node; node = node->next) { +- num_iothreads++; +- } +- +- for (node = iothread_vq_mapping_list; node; node = node->next) { +- IOThread *iothread = iothread_by_id(node->value->iothread); +- AioContext *ctx = iothread_get_aio_context(iothread); +- +- /* Released in virtio_blk_data_plane_destroy() */ +- object_ref(OBJECT(iothread)); +- +- if (node->value->vqs) { +- uint16List *vq; +- +- /* Explicit vq:IOThread assignment */ +- for (vq = node->value->vqs; vq; vq = vq->next) { +- vq_aio_context[vq->value] = ctx; +- } +- } else { +- /* Round-robin vq:IOThread assignment */ +- for (unsigned i = cur_iothread; i < num_queues; +- i += num_iothreads) { +- vq_aio_context[i] = ctx; +- } +- } +- +- cur_iothread++; +- } +-} +- +-/* Context: QEMU global mutex held */ +-bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, +- VirtIOBlockDataPlane **dataplane, +- Error **errp) +-{ +- VirtIOBlockDataPlane *s; +- BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); +- VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); +- +- *dataplane = NULL; +- +- if (conf->iothread || conf->iothread_vq_mapping_list) { +- if (!k->set_guest_notifiers || !k->ioeventfd_assign) { +- error_setg(errp, +- "device is incompatible with iothread " +- "(transport does not support notifiers)"); +- return false; +- } +- if (!virtio_device_ioeventfd_enabled(vdev)) { +- error_setg(errp, "ioeventfd is required for iothread"); +- return false; +- } +- +- /* If dataplane is (re-)enabled while the guest is running there could +- * be block jobs that can conflict. +- */ +- if (blk_op_is_blocked(conf->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { +- error_prepend(errp, "cannot start virtio-blk dataplane: "); +- return false; +- } +- } +- /* Don't try if transport does not support notifiers. */ +- if (!virtio_device_ioeventfd_enabled(vdev)) { +- return false; +- } +- +- s = g_new0(VirtIOBlockDataPlane, 1); +- s->vdev = vdev; +- s->conf = conf; +- s->vq_aio_context = g_new(AioContext *, conf->num_queues); +- +- if (conf->iothread_vq_mapping_list) { +- apply_vq_mapping(conf->iothread_vq_mapping_list, s->vq_aio_context, +- conf->num_queues); +- } else if (conf->iothread) { +- AioContext *ctx = iothread_get_aio_context(conf->iothread); +- for (unsigned i = 0; i < conf->num_queues; i++) { +- s->vq_aio_context[i] = ctx; +- } +- +- /* Released in virtio_blk_data_plane_destroy() */ +- object_ref(OBJECT(conf->iothread)); +- } else { +- AioContext *ctx = qemu_get_aio_context(); +- for (unsigned i = 0; i < conf->num_queues; i++) { +- s->vq_aio_context[i] = ctx; +- } +- } +- +- *dataplane = s; +- +- return true; +-} +- +-/* Context: QEMU global mutex held */ +-void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s) +-{ +- VirtIOBlock *vblk; +- VirtIOBlkConf *conf; +- +- if (!s) { +- return; +- } +- +- vblk = VIRTIO_BLK(s->vdev); +- assert(!vblk->dataplane_started); +- conf = s->conf; +- +- if (conf->iothread_vq_mapping_list) { +- IOThreadVirtQueueMappingList *node; +- +- for (node = conf->iothread_vq_mapping_list; node; node = node->next) { +- IOThread *iothread = iothread_by_id(node->value->iothread); +- object_unref(OBJECT(iothread)); +- } +- } +- +- if (conf->iothread) { +- object_unref(OBJECT(conf->iothread)); +- } +- +- g_free(s->vq_aio_context); +- g_free(s); +-} +- +-/* Context: QEMU global mutex held */ +-int virtio_blk_data_plane_start(VirtIODevice *vdev) +-{ +- VirtIOBlock *vblk = VIRTIO_BLK(vdev); +- VirtIOBlockDataPlane *s = vblk->dataplane; +- BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vblk))); +- VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); +- unsigned i; +- unsigned nvqs = s->conf->num_queues; +- Error *local_err = NULL; +- int r; +- +- if (vblk->dataplane_started || s->starting) { +- return 0; +- } +- +- s->starting = true; +- +- /* Set up guest notifier (irq) */ +- r = k->set_guest_notifiers(qbus->parent, nvqs, true); +- if (r != 0) { +- error_report("virtio-blk failed to set guest notifier (%d), " +- "ensure -accel kvm is set.", r); +- goto fail_guest_notifiers; +- } +- +- /* +- * Batch all the host notifiers in a single transaction to avoid +- * quadratic time complexity in address_space_update_ioeventfds(). +- */ +- memory_region_transaction_begin(); +- +- /* Set up virtqueue notify */ +- for (i = 0; i < nvqs; i++) { +- r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, true); +- if (r != 0) { +- int j = i; +- +- fprintf(stderr, "virtio-blk failed to set host notifier (%d)\n", r); +- while (i--) { +- virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); +- } +- +- /* +- * The transaction expects the ioeventfds to be open when it +- * commits. Do it now, before the cleanup loop. +- */ +- memory_region_transaction_commit(); +- +- while (j--) { +- virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), j); +- } +- goto fail_host_notifiers; +- } +- } +- +- memory_region_transaction_commit(); +- +- trace_virtio_blk_data_plane_start(s); +- +- r = blk_set_aio_context(s->conf->conf.blk, s->vq_aio_context[0], +- &local_err); +- if (r < 0) { +- error_report_err(local_err); +- goto fail_aio_context; +- } +- +- /* +- * These fields must be visible to the IOThread when it processes the +- * virtqueue, otherwise it will think dataplane has not started yet. +- * +- * Make sure ->dataplane_started is false when blk_set_aio_context() is +- * called above so that draining does not cause the host notifier to be +- * detached/attached prematurely. +- */ +- s->starting = false; +- vblk->dataplane_started = true; +- smp_wmb(); /* paired with aio_notify_accept() on the read side */ +- +- /* Get this show started by hooking up our callbacks */ +- if (!blk_in_drain(s->conf->conf.blk)) { +- for (i = 0; i < nvqs; i++) { +- VirtQueue *vq = virtio_get_queue(s->vdev, i); +- AioContext *ctx = s->vq_aio_context[i]; +- +- /* Kick right away to begin processing requests already in vring */ +- event_notifier_set(virtio_queue_get_host_notifier(vq)); +- +- virtio_queue_aio_attach_host_notifier(vq, ctx); +- } +- } +- return 0; +- +- fail_aio_context: +- memory_region_transaction_begin(); +- +- for (i = 0; i < nvqs; i++) { +- virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); +- } +- +- memory_region_transaction_commit(); +- +- for (i = 0; i < nvqs; i++) { +- virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); +- } +- fail_host_notifiers: +- k->set_guest_notifiers(qbus->parent, nvqs, false); +- fail_guest_notifiers: +- vblk->dataplane_disabled = true; +- s->starting = false; +- return -ENOSYS; +-} +- +-/* Stop notifications for new requests from guest. +- * +- * Context: BH in IOThread +- */ +-static void virtio_blk_data_plane_stop_vq_bh(void *opaque) +-{ +- VirtQueue *vq = opaque; +- EventNotifier *host_notifier = virtio_queue_get_host_notifier(vq); +- +- virtio_queue_aio_detach_host_notifier(vq, qemu_get_current_aio_context()); +- +- /* +- * Test and clear notifier after disabling event, in case poll callback +- * didn't have time to run. +- */ +- virtio_queue_host_notifier_read(host_notifier); +-} +- +-/* Context: QEMU global mutex held */ +-void virtio_blk_data_plane_stop(VirtIODevice *vdev) +-{ +- VirtIOBlock *vblk = VIRTIO_BLK(vdev); +- VirtIOBlockDataPlane *s = vblk->dataplane; +- BusState *qbus = qdev_get_parent_bus(DEVICE(vblk)); +- VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); +- unsigned i; +- unsigned nvqs = s->conf->num_queues; +- +- if (!vblk->dataplane_started || s->stopping) { +- return; +- } +- +- /* Better luck next time. */ +- if (vblk->dataplane_disabled) { +- vblk->dataplane_disabled = false; +- vblk->dataplane_started = false; +- return; +- } +- s->stopping = true; +- trace_virtio_blk_data_plane_stop(s); +- +- if (!blk_in_drain(s->conf->conf.blk)) { +- for (i = 0; i < nvqs; i++) { +- VirtQueue *vq = virtio_get_queue(s->vdev, i); +- AioContext *ctx = s->vq_aio_context[i]; +- +- aio_wait_bh_oneshot(ctx, virtio_blk_data_plane_stop_vq_bh, vq); +- } +- } +- +- /* +- * Batch all the host notifiers in a single transaction to avoid +- * quadratic time complexity in address_space_update_ioeventfds(). +- */ +- memory_region_transaction_begin(); +- +- for (i = 0; i < nvqs; i++) { +- virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); +- } +- +- /* +- * The transaction expects the ioeventfds to be open when it +- * commits. Do it now, before the cleanup loop. +- */ +- memory_region_transaction_commit(); +- +- for (i = 0; i < nvqs; i++) { +- virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); +- } +- +- /* +- * Set ->dataplane_started to false before draining so that host notifiers +- * are not detached/attached anymore. +- */ +- vblk->dataplane_started = false; +- +- /* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */ +- blk_drain(s->conf->conf.blk); +- +- /* +- * Try to switch bs back to the QEMU main loop. If other users keep the +- * BlockBackend in the iothread, that's ok +- */ +- blk_set_aio_context(s->conf->conf.blk, qemu_get_aio_context(), NULL); +- +- /* Clean up guest notifier (irq) */ +- k->set_guest_notifiers(qbus->parent, nvqs, false); +- +- s->stopping = false; +-} +- +-void virtio_blk_data_plane_detach(VirtIOBlockDataPlane *s) +-{ +- VirtIODevice *vdev = VIRTIO_DEVICE(s->vdev); +- +- for (uint16_t i = 0; i < s->conf->num_queues; i++) { +- VirtQueue *vq = virtio_get_queue(vdev, i); +- virtio_queue_aio_detach_host_notifier(vq, s->vq_aio_context[i]); +- } +-} +- +-void virtio_blk_data_plane_attach(VirtIOBlockDataPlane *s) +-{ +- VirtIODevice *vdev = VIRTIO_DEVICE(s->vdev); +- +- for (uint16_t i = 0; i < s->conf->num_queues; i++) { +- VirtQueue *vq = virtio_get_queue(vdev, i); +- virtio_queue_aio_attach_host_notifier(vq, s->vq_aio_context[i]); +- } +-} +diff --git a/hw/block/dataplane/virtio-blk.h b/hw/block/dataplane/virtio-blk.h +deleted file mode 100644 +index 1a806fe447..0000000000 +--- a/hw/block/dataplane/virtio-blk.h ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* +- * Dedicated thread for virtio-blk I/O processing +- * +- * Copyright 2012 IBM, Corp. +- * Copyright 2012 Red Hat, Inc. and/or its affiliates +- * +- * Authors: +- * Stefan Hajnoczi +- * +- * This work is licensed under the terms of the GNU GPL, version 2 or later. +- * See the COPYING file in the top-level directory. +- * +- */ +- +-#ifndef HW_DATAPLANE_VIRTIO_BLK_H +-#define HW_DATAPLANE_VIRTIO_BLK_H +- +-#include "hw/virtio/virtio.h" +- +-typedef struct VirtIOBlockDataPlane VirtIOBlockDataPlane; +- +-bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, +- VirtIOBlockDataPlane **dataplane, +- Error **errp); +-void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s); +-void virtio_blk_data_plane_notify(VirtIOBlockDataPlane *s, VirtQueue *vq); +- +-int virtio_blk_data_plane_start(VirtIODevice *vdev); +-void virtio_blk_data_plane_stop(VirtIODevice *vdev); +- +-void virtio_blk_data_plane_detach(VirtIOBlockDataPlane *s); +-void virtio_blk_data_plane_attach(VirtIOBlockDataPlane *s); +- +-#endif /* HW_DATAPLANE_VIRTIO_BLK_H */ +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index 46e73b2c96..cb623069f8 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -27,7 +27,6 @@ + #include "sysemu/sysemu.h" + #include "sysemu/runstate.h" + #include "hw/virtio/virtio-blk.h" +-#include "dataplane/virtio-blk.h" + #include "scsi/constants.h" + #ifdef __linux__ + # include +@@ -66,7 +65,7 @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status) + iov_discard_undo(&req->outhdr_undo); + virtqueue_push(req->vq, &req->elem, req->in_len); + if (s->dataplane_started && !s->dataplane_disabled) { +- virtio_blk_data_plane_notify(s->dataplane, req->vq); ++ virtio_notify_irqfd(vdev, req->vq); + } else { + virtio_notify(vdev, req->vq); + } +@@ -1142,7 +1141,7 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) + { + VirtIOBlock *s = (VirtIOBlock *)vdev; + +- if (s->dataplane && !s->dataplane_started) { ++ if (!s->dataplane_disabled && !s->dataplane_started) { + /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start + * dataplane here instead of waiting for .set_status(). + */ +@@ -1546,16 +1545,34 @@ static void virtio_blk_resize(void *opaque) + aio_bh_schedule_oneshot(qemu_get_aio_context(), virtio_resize_cb, vdev); + } + ++static void virtio_blk_data_plane_detach(VirtIOBlock *s) ++{ ++ VirtIODevice *vdev = VIRTIO_DEVICE(s); ++ ++ for (uint16_t i = 0; i < s->conf.num_queues; i++) { ++ VirtQueue *vq = virtio_get_queue(vdev, i); ++ virtio_queue_aio_detach_host_notifier(vq, s->vq_aio_context[i]); ++ } ++} ++ ++static void virtio_blk_data_plane_attach(VirtIOBlock *s) ++{ ++ VirtIODevice *vdev = VIRTIO_DEVICE(s); ++ ++ for (uint16_t i = 0; i < s->conf.num_queues; i++) { ++ VirtQueue *vq = virtio_get_queue(vdev, i); ++ virtio_queue_aio_attach_host_notifier(vq, s->vq_aio_context[i]); ++ } ++} ++ + /* Suspend virtqueue ioeventfd processing during drain */ + static void virtio_blk_drained_begin(void *opaque) + { + VirtIOBlock *s = opaque; + +- if (!s->dataplane || !s->dataplane_started) { +- return; ++ if (s->dataplane_started) { ++ virtio_blk_data_plane_detach(s); + } +- +- virtio_blk_data_plane_detach(s->dataplane); + } + + /* Resume virtqueue ioeventfd processing after drain */ +@@ -1563,11 +1580,9 @@ static void virtio_blk_drained_end(void *opaque) + { + VirtIOBlock *s = opaque; + +- if (!s->dataplane || !s->dataplane_started) { +- return; ++ if (s->dataplane_started) { ++ virtio_blk_data_plane_attach(s); + } +- +- virtio_blk_data_plane_attach(s->dataplane); + } + + static const BlockDevOps virtio_block_ops = { +@@ -1576,6 +1591,326 @@ static const BlockDevOps virtio_block_ops = { + .drained_end = virtio_blk_drained_end, + }; + ++/* Generate vq:AioContext mappings from a validated iothread-vq-mapping list */ ++static void ++apply_vq_mapping(IOThreadVirtQueueMappingList *iothread_vq_mapping_list, ++ AioContext **vq_aio_context, uint16_t num_queues) ++{ ++ IOThreadVirtQueueMappingList *node; ++ size_t num_iothreads = 0; ++ size_t cur_iothread = 0; ++ ++ for (node = iothread_vq_mapping_list; node; node = node->next) { ++ num_iothreads++; ++ } ++ ++ for (node = iothread_vq_mapping_list; node; node = node->next) { ++ IOThread *iothread = iothread_by_id(node->value->iothread); ++ AioContext *ctx = iothread_get_aio_context(iothread); ++ ++ /* Released in virtio_blk_data_plane_destroy() */ ++ object_ref(OBJECT(iothread)); ++ ++ if (node->value->vqs) { ++ uint16List *vq; ++ ++ /* Explicit vq:IOThread assignment */ ++ for (vq = node->value->vqs; vq; vq = vq->next) { ++ vq_aio_context[vq->value] = ctx; ++ } ++ } else { ++ /* Round-robin vq:IOThread assignment */ ++ for (unsigned i = cur_iothread; i < num_queues; ++ i += num_iothreads) { ++ vq_aio_context[i] = ctx; ++ } ++ } ++ ++ cur_iothread++; ++ } ++} ++ ++/* Context: BQL held */ ++static bool virtio_blk_data_plane_create(VirtIOBlock *s, Error **errp) ++{ ++ VirtIODevice *vdev = VIRTIO_DEVICE(s); ++ VirtIOBlkConf *conf = &s->conf; ++ BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); ++ VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); ++ ++ if (conf->iothread || conf->iothread_vq_mapping_list) { ++ if (!k->set_guest_notifiers || !k->ioeventfd_assign) { ++ error_setg(errp, ++ "device is incompatible with iothread " ++ "(transport does not support notifiers)"); ++ return false; ++ } ++ if (!virtio_device_ioeventfd_enabled(vdev)) { ++ error_setg(errp, "ioeventfd is required for iothread"); ++ return false; ++ } ++ ++ /* ++ * If dataplane is (re-)enabled while the guest is running there could ++ * be block jobs that can conflict. ++ */ ++ if (blk_op_is_blocked(conf->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { ++ error_prepend(errp, "cannot start virtio-blk dataplane: "); ++ return false; ++ } ++ } ++ /* Don't try if transport does not support notifiers. */ ++ if (!virtio_device_ioeventfd_enabled(vdev)) { ++ s->dataplane_disabled = true; ++ return false; ++ } ++ ++ s->vq_aio_context = g_new(AioContext *, conf->num_queues); ++ ++ if (conf->iothread_vq_mapping_list) { ++ apply_vq_mapping(conf->iothread_vq_mapping_list, s->vq_aio_context, ++ conf->num_queues); ++ } else if (conf->iothread) { ++ AioContext *ctx = iothread_get_aio_context(conf->iothread); ++ for (unsigned i = 0; i < conf->num_queues; i++) { ++ s->vq_aio_context[i] = ctx; ++ } ++ ++ /* Released in virtio_blk_data_plane_destroy() */ ++ object_ref(OBJECT(conf->iothread)); ++ } else { ++ AioContext *ctx = qemu_get_aio_context(); ++ for (unsigned i = 0; i < conf->num_queues; i++) { ++ s->vq_aio_context[i] = ctx; ++ } ++ } ++ ++ return true; ++} ++ ++/* Context: BQL held */ ++static void virtio_blk_data_plane_destroy(VirtIOBlock *s) ++{ ++ VirtIOBlkConf *conf = &s->conf; ++ ++ assert(!s->dataplane_started); ++ ++ if (conf->iothread_vq_mapping_list) { ++ IOThreadVirtQueueMappingList *node; ++ ++ for (node = conf->iothread_vq_mapping_list; node; node = node->next) { ++ IOThread *iothread = iothread_by_id(node->value->iothread); ++ object_unref(OBJECT(iothread)); ++ } ++ } ++ ++ if (conf->iothread) { ++ object_unref(OBJECT(conf->iothread)); ++ } ++ ++ g_free(s->vq_aio_context); ++ s->vq_aio_context = NULL; ++} ++ ++/* Context: BQL held */ ++static int virtio_blk_data_plane_start(VirtIODevice *vdev) ++{ ++ VirtIOBlock *s = VIRTIO_BLK(vdev); ++ BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s))); ++ VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); ++ unsigned i; ++ unsigned nvqs = s->conf.num_queues; ++ Error *local_err = NULL; ++ int r; ++ ++ if (s->dataplane_started || s->dataplane_starting) { ++ return 0; ++ } ++ ++ s->dataplane_starting = true; ++ ++ /* Set up guest notifier (irq) */ ++ r = k->set_guest_notifiers(qbus->parent, nvqs, true); ++ if (r != 0) { ++ error_report("virtio-blk failed to set guest notifier (%d), " ++ "ensure -accel kvm is set.", r); ++ goto fail_guest_notifiers; ++ } ++ ++ /* ++ * Batch all the host notifiers in a single transaction to avoid ++ * quadratic time complexity in address_space_update_ioeventfds(). ++ */ ++ memory_region_transaction_begin(); ++ ++ /* Set up virtqueue notify */ ++ for (i = 0; i < nvqs; i++) { ++ r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, true); ++ if (r != 0) { ++ int j = i; ++ ++ fprintf(stderr, "virtio-blk failed to set host notifier (%d)\n", r); ++ while (i--) { ++ virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); ++ } ++ ++ /* ++ * The transaction expects the ioeventfds to be open when it ++ * commits. Do it now, before the cleanup loop. ++ */ ++ memory_region_transaction_commit(); ++ ++ while (j--) { ++ virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), j); ++ } ++ goto fail_host_notifiers; ++ } ++ } ++ ++ memory_region_transaction_commit(); ++ ++ r = blk_set_aio_context(s->conf.conf.blk, s->vq_aio_context[0], ++ &local_err); ++ if (r < 0) { ++ error_report_err(local_err); ++ goto fail_aio_context; ++ } ++ ++ /* ++ * These fields must be visible to the IOThread when it processes the ++ * virtqueue, otherwise it will think dataplane has not started yet. ++ * ++ * Make sure ->dataplane_started is false when blk_set_aio_context() is ++ * called above so that draining does not cause the host notifier to be ++ * detached/attached prematurely. ++ */ ++ s->dataplane_starting = false; ++ s->dataplane_started = true; ++ smp_wmb(); /* paired with aio_notify_accept() on the read side */ ++ ++ /* Get this show started by hooking up our callbacks */ ++ if (!blk_in_drain(s->conf.conf.blk)) { ++ for (i = 0; i < nvqs; i++) { ++ VirtQueue *vq = virtio_get_queue(vdev, i); ++ AioContext *ctx = s->vq_aio_context[i]; ++ ++ /* Kick right away to begin processing requests already in vring */ ++ event_notifier_set(virtio_queue_get_host_notifier(vq)); ++ ++ virtio_queue_aio_attach_host_notifier(vq, ctx); ++ } ++ } ++ return 0; ++ ++ fail_aio_context: ++ memory_region_transaction_begin(); ++ ++ for (i = 0; i < nvqs; i++) { ++ virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); ++ } ++ ++ memory_region_transaction_commit(); ++ ++ for (i = 0; i < nvqs; i++) { ++ virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); ++ } ++ fail_host_notifiers: ++ k->set_guest_notifiers(qbus->parent, nvqs, false); ++ fail_guest_notifiers: ++ s->dataplane_disabled = true; ++ s->dataplane_starting = false; ++ return -ENOSYS; ++} ++ ++/* Stop notifications for new requests from guest. ++ * ++ * Context: BH in IOThread ++ */ ++static void virtio_blk_data_plane_stop_vq_bh(void *opaque) ++{ ++ VirtQueue *vq = opaque; ++ EventNotifier *host_notifier = virtio_queue_get_host_notifier(vq); ++ ++ virtio_queue_aio_detach_host_notifier(vq, qemu_get_current_aio_context()); ++ ++ /* ++ * Test and clear notifier after disabling event, in case poll callback ++ * didn't have time to run. ++ */ ++ virtio_queue_host_notifier_read(host_notifier); ++} ++ ++/* Context: BQL held */ ++static void virtio_blk_data_plane_stop(VirtIODevice *vdev) ++{ ++ VirtIOBlock *s = VIRTIO_BLK(vdev); ++ BusState *qbus = qdev_get_parent_bus(DEVICE(s)); ++ VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); ++ unsigned i; ++ unsigned nvqs = s->conf.num_queues; ++ ++ if (!s->dataplane_started || s->dataplane_stopping) { ++ return; ++ } ++ ++ /* Better luck next time. */ ++ if (s->dataplane_disabled) { ++ s->dataplane_disabled = false; ++ s->dataplane_started = false; ++ return; ++ } ++ s->dataplane_stopping = true; ++ ++ if (!blk_in_drain(s->conf.conf.blk)) { ++ for (i = 0; i < nvqs; i++) { ++ VirtQueue *vq = virtio_get_queue(vdev, i); ++ AioContext *ctx = s->vq_aio_context[i]; ++ ++ aio_wait_bh_oneshot(ctx, virtio_blk_data_plane_stop_vq_bh, vq); ++ } ++ } ++ ++ /* ++ * Batch all the host notifiers in a single transaction to avoid ++ * quadratic time complexity in address_space_update_ioeventfds(). ++ */ ++ memory_region_transaction_begin(); ++ ++ for (i = 0; i < nvqs; i++) { ++ virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); ++ } ++ ++ /* ++ * The transaction expects the ioeventfds to be open when it ++ * commits. Do it now, before the cleanup loop. ++ */ ++ memory_region_transaction_commit(); ++ ++ for (i = 0; i < nvqs; i++) { ++ virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); ++ } ++ ++ /* ++ * Set ->dataplane_started to false before draining so that host notifiers ++ * are not detached/attached anymore. ++ */ ++ s->dataplane_started = false; ++ ++ /* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */ ++ blk_drain(s->conf.conf.blk); ++ ++ /* ++ * Try to switch bs back to the QEMU main loop. If other users keep the ++ * BlockBackend in the iothread, that's ok ++ */ ++ blk_set_aio_context(s->conf.conf.blk, qemu_get_aio_context(), NULL); ++ ++ /* Clean up guest notifier (irq) */ ++ k->set_guest_notifiers(qbus->parent, nvqs, false); ++ ++ s->dataplane_stopping = false; ++} ++ + static void virtio_blk_device_realize(DeviceState *dev, Error **errp) + { + VirtIODevice *vdev = VIRTIO_DEVICE(dev); +@@ -1680,7 +2015,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) + virtio_add_queue(vdev, conf->queue_size, virtio_blk_handle_output); + } + qemu_coroutine_inc_pool_size(conf->num_queues * conf->queue_size / 2); +- virtio_blk_data_plane_create(vdev, conf, &s->dataplane, &err); ++ virtio_blk_data_plane_create(s, &err); + if (err != NULL) { + error_propagate(errp, err); + for (i = 0; i < conf->num_queues; i++) { +@@ -1717,8 +2052,7 @@ static void virtio_blk_device_unrealize(DeviceState *dev) + + blk_drain(s->blk); + del_boot_device_lchs(dev, "/disk@0,0"); +- virtio_blk_data_plane_destroy(s->dataplane); +- s->dataplane = NULL; ++ virtio_blk_data_plane_destroy(s); + for (i = 0; i < conf->num_queues; i++) { + virtio_del_queue(vdev, i); + } +diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h +index 5e4091e4da..fecffdc303 100644 +--- a/include/hw/virtio/virtio-blk.h ++++ b/include/hw/virtio/virtio-blk.h +@@ -50,8 +50,6 @@ struct VirtIOBlkConf + bool x_enable_wce_if_config_wce; + }; + +-struct VirtIOBlockDataPlane; +- + struct VirtIOBlockReq; + struct VirtIOBlock { + VirtIODevice parent_obj; +@@ -64,7 +62,15 @@ struct VirtIOBlock { + VMChangeStateEntry *change; + bool dataplane_disabled; + bool dataplane_started; +- struct VirtIOBlockDataPlane *dataplane; ++ bool dataplane_starting; ++ bool dataplane_stopping; ++ ++ /* ++ * The AioContext for each virtqueue. The BlockDriverState will use the ++ * first element as its AioContext. ++ */ ++ AioContext **vq_aio_context; ++ + uint64_t host_features; + size_t config_size; + BlockRAMRegistrar blk_ram_registrar; +diff --git a/meson.build b/meson.build +index 6c77d9687d..47c65d0f53 100644 +--- a/meson.build ++++ b/meson.build +@@ -3298,7 +3298,6 @@ if have_system + 'hw/arm', + 'hw/audio', + 'hw/block', +- 'hw/block/dataplane', + 'hw/char', + 'hw/display', + 'hw/dma', +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-blk-rename-dataplane-create-destroy-functions.patch b/SOURCES/kvm-virtio-blk-rename-dataplane-create-destroy-functions.patch new file mode 100644 index 0000000..5f45b9d --- /dev/null +++ b/SOURCES/kvm-virtio-blk-rename-dataplane-create-destroy-functions.patch @@ -0,0 +1,117 @@ +From 71257c2f320f1511de1e275779cf4b90effc1f02 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Fri, 19 Jan 2024 08:57:44 -0500 +Subject: [PATCH 15/22] virtio-blk: rename dataplane create/destroy functions + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter +RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Czenczek +RH-Commit: [11/17] 60e7016d5f3e4e9e89945578279b12f812f85ddf (stefanha/centos-stream-qemu-kvm) + +virtio_blk_data_plane_create() and virtio_blk_data_plane_destroy() are +actually about s->vq_aio_context[] rather than managing +dataplane-specific state. + +As a prerequisite to using s->vq_aio_context[] in all code paths (even +when dataplane is not used), rename these functions to reflect that they +just manage s->vq_aio_context and call them regardless of whether or not +dataplane is in use. + +Note that virtio-blk supports running with -device +virtio-blk-pci,ioevent=off where the vCPU thread enters the device +emulation code. In this mode ioeventfd is not used for virtqueue +processing. However, we still want to initialize s->vq_aio_context[] to +qemu_aio_context in that case since I/O completion callbacks will be +invoked in the main loop thread. + +Signed-off-by: Stefan Hajnoczi +Message-ID: <20240119135748.270944-3-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 57bc2658935778d1ae0edbcd4402763da8c7bae2) +Signed-off-by: Stefan Hajnoczi +--- + hw/block/virtio-blk.c | 23 ++++++++++++----------- + 1 file changed, 12 insertions(+), 11 deletions(-) + +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index cb623069f8..4d6f9377c6 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -1608,7 +1608,7 @@ apply_vq_mapping(IOThreadVirtQueueMappingList *iothread_vq_mapping_list, + IOThread *iothread = iothread_by_id(node->value->iothread); + AioContext *ctx = iothread_get_aio_context(iothread); + +- /* Released in virtio_blk_data_plane_destroy() */ ++ /* Released in virtio_blk_vq_aio_context_cleanup() */ + object_ref(OBJECT(iothread)); + + if (node->value->vqs) { +@@ -1631,7 +1631,7 @@ apply_vq_mapping(IOThreadVirtQueueMappingList *iothread_vq_mapping_list, + } + + /* Context: BQL held */ +-static bool virtio_blk_data_plane_create(VirtIOBlock *s, Error **errp) ++static bool virtio_blk_vq_aio_context_init(VirtIOBlock *s, Error **errp) + { + VirtIODevice *vdev = VIRTIO_DEVICE(s); + VirtIOBlkConf *conf = &s->conf; +@@ -1659,11 +1659,6 @@ static bool virtio_blk_data_plane_create(VirtIOBlock *s, Error **errp) + return false; + } + } +- /* Don't try if transport does not support notifiers. */ +- if (!virtio_device_ioeventfd_enabled(vdev)) { +- s->dataplane_disabled = true; +- return false; +- } + + s->vq_aio_context = g_new(AioContext *, conf->num_queues); + +@@ -1676,7 +1671,7 @@ static bool virtio_blk_data_plane_create(VirtIOBlock *s, Error **errp) + s->vq_aio_context[i] = ctx; + } + +- /* Released in virtio_blk_data_plane_destroy() */ ++ /* Released in virtio_blk_vq_aio_context_cleanup() */ + object_ref(OBJECT(conf->iothread)); + } else { + AioContext *ctx = qemu_get_aio_context(); +@@ -1689,7 +1684,7 @@ static bool virtio_blk_data_plane_create(VirtIOBlock *s, Error **errp) + } + + /* Context: BQL held */ +-static void virtio_blk_data_plane_destroy(VirtIOBlock *s) ++static void virtio_blk_vq_aio_context_cleanup(VirtIOBlock *s) + { + VirtIOBlkConf *conf = &s->conf; + +@@ -2015,7 +2010,13 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) + virtio_add_queue(vdev, conf->queue_size, virtio_blk_handle_output); + } + qemu_coroutine_inc_pool_size(conf->num_queues * conf->queue_size / 2); +- virtio_blk_data_plane_create(s, &err); ++ ++ /* Don't start dataplane if transport does not support notifiers. */ ++ if (!virtio_device_ioeventfd_enabled(vdev)) { ++ s->dataplane_disabled = true; ++ } ++ ++ virtio_blk_vq_aio_context_init(s, &err); + if (err != NULL) { + error_propagate(errp, err); + for (i = 0; i < conf->num_queues; i++) { +@@ -2052,7 +2053,7 @@ static void virtio_blk_device_unrealize(DeviceState *dev) + + blk_drain(s->blk); + del_boot_device_lchs(dev, "/disk@0,0"); +- virtio_blk_data_plane_destroy(s); ++ virtio_blk_vq_aio_context_cleanup(s); + for (i = 0; i < conf->num_queues; i++) { + virtio_del_queue(vdev, i); + } +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-blk-rename-dataplane-to-ioeventfd.patch b/SOURCES/kvm-virtio-blk-rename-dataplane-to-ioeventfd.patch new file mode 100644 index 0000000..a0c0b67 --- /dev/null +++ b/SOURCES/kvm-virtio-blk-rename-dataplane-to-ioeventfd.patch @@ -0,0 +1,307 @@ +From ba80cdcd5604b9b9efc4682ade9828ab74ebf5e6 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Fri, 19 Jan 2024 08:57:45 -0500 +Subject: [PATCH 16/22] virtio-blk: rename dataplane to ioeventfd + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter +RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Czenczek +RH-Commit: [12/17] 4230005e0d1b4629fe4540f1f63cd705e58618da (stefanha/centos-stream-qemu-kvm) + +The dataplane code is really about using ioeventfd. It's used both for +IOThreads (what we think of as dataplane) and for the core virtio-pci +code's ioeventfd feature (which is enabled by default and used when no +IOThread has been specified). Rename the code to reflect this. + +Signed-off-by: Stefan Hajnoczi +Message-ID: <20240119135748.270944-4-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 3cdaf3dd4a4ca94ebabe7eab23b432f1a6c547cc) +Signed-off-by: Stefan Hajnoczi +--- + hw/block/virtio-blk.c | 78 +++++++++++++++++----------------- + include/hw/virtio/virtio-blk.h | 8 ++-- + 2 files changed, 43 insertions(+), 43 deletions(-) + +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index 4d6f9377c6..08c566946a 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -64,7 +64,7 @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status) + iov_discard_undo(&req->inhdr_undo); + iov_discard_undo(&req->outhdr_undo); + virtqueue_push(req->vq, &req->elem, req->in_len); +- if (s->dataplane_started && !s->dataplane_disabled) { ++ if (s->ioeventfd_started && !s->ioeventfd_disabled) { + virtio_notify_irqfd(vdev, req->vq); + } else { + virtio_notify(vdev, req->vq); +@@ -1141,12 +1141,12 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) + { + VirtIOBlock *s = (VirtIOBlock *)vdev; + +- if (!s->dataplane_disabled && !s->dataplane_started) { ++ if (!s->ioeventfd_disabled && !s->ioeventfd_started) { + /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start +- * dataplane here instead of waiting for .set_status(). ++ * ioeventfd here instead of waiting for .set_status(). + */ + virtio_device_start_ioeventfd(vdev); +- if (!s->dataplane_disabled) { ++ if (!s->ioeventfd_disabled) { + return; + } + } +@@ -1213,7 +1213,7 @@ static void virtio_blk_reset(VirtIODevice *vdev) + VirtIOBlockReq *req; + + /* Dataplane has stopped... */ +- assert(!s->dataplane_started); ++ assert(!s->ioeventfd_started); + + /* ...but requests may still be in flight. */ + blk_drain(s->blk); +@@ -1380,7 +1380,7 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) + VirtIOBlock *s = VIRTIO_BLK(vdev); + + if (!(status & (VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK))) { +- assert(!s->dataplane_started); ++ assert(!s->ioeventfd_started); + } + + if (!(status & VIRTIO_CONFIG_S_DRIVER_OK)) { +@@ -1545,7 +1545,7 @@ static void virtio_blk_resize(void *opaque) + aio_bh_schedule_oneshot(qemu_get_aio_context(), virtio_resize_cb, vdev); + } + +-static void virtio_blk_data_plane_detach(VirtIOBlock *s) ++static void virtio_blk_ioeventfd_detach(VirtIOBlock *s) + { + VirtIODevice *vdev = VIRTIO_DEVICE(s); + +@@ -1555,7 +1555,7 @@ static void virtio_blk_data_plane_detach(VirtIOBlock *s) + } + } + +-static void virtio_blk_data_plane_attach(VirtIOBlock *s) ++static void virtio_blk_ioeventfd_attach(VirtIOBlock *s) + { + VirtIODevice *vdev = VIRTIO_DEVICE(s); + +@@ -1570,8 +1570,8 @@ static void virtio_blk_drained_begin(void *opaque) + { + VirtIOBlock *s = opaque; + +- if (s->dataplane_started) { +- virtio_blk_data_plane_detach(s); ++ if (s->ioeventfd_started) { ++ virtio_blk_ioeventfd_detach(s); + } + } + +@@ -1580,8 +1580,8 @@ static void virtio_blk_drained_end(void *opaque) + { + VirtIOBlock *s = opaque; + +- if (s->dataplane_started) { +- virtio_blk_data_plane_attach(s); ++ if (s->ioeventfd_started) { ++ virtio_blk_ioeventfd_attach(s); + } + } + +@@ -1651,11 +1651,11 @@ static bool virtio_blk_vq_aio_context_init(VirtIOBlock *s, Error **errp) + } + + /* +- * If dataplane is (re-)enabled while the guest is running there could ++ * If ioeventfd is (re-)enabled while the guest is running there could + * be block jobs that can conflict. + */ + if (blk_op_is_blocked(conf->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { +- error_prepend(errp, "cannot start virtio-blk dataplane: "); ++ error_prepend(errp, "cannot start virtio-blk ioeventfd: "); + return false; + } + } +@@ -1688,7 +1688,7 @@ static void virtio_blk_vq_aio_context_cleanup(VirtIOBlock *s) + { + VirtIOBlkConf *conf = &s->conf; + +- assert(!s->dataplane_started); ++ assert(!s->ioeventfd_started); + + if (conf->iothread_vq_mapping_list) { + IOThreadVirtQueueMappingList *node; +@@ -1708,7 +1708,7 @@ static void virtio_blk_vq_aio_context_cleanup(VirtIOBlock *s) + } + + /* Context: BQL held */ +-static int virtio_blk_data_plane_start(VirtIODevice *vdev) ++static int virtio_blk_start_ioeventfd(VirtIODevice *vdev) + { + VirtIOBlock *s = VIRTIO_BLK(vdev); + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s))); +@@ -1718,11 +1718,11 @@ static int virtio_blk_data_plane_start(VirtIODevice *vdev) + Error *local_err = NULL; + int r; + +- if (s->dataplane_started || s->dataplane_starting) { ++ if (s->ioeventfd_started || s->ioeventfd_starting) { + return 0; + } + +- s->dataplane_starting = true; ++ s->ioeventfd_starting = true; + + /* Set up guest notifier (irq) */ + r = k->set_guest_notifiers(qbus->parent, nvqs, true); +@@ -1773,14 +1773,14 @@ static int virtio_blk_data_plane_start(VirtIODevice *vdev) + + /* + * These fields must be visible to the IOThread when it processes the +- * virtqueue, otherwise it will think dataplane has not started yet. ++ * virtqueue, otherwise it will think ioeventfd has not started yet. + * +- * Make sure ->dataplane_started is false when blk_set_aio_context() is ++ * Make sure ->ioeventfd_started is false when blk_set_aio_context() is + * called above so that draining does not cause the host notifier to be + * detached/attached prematurely. + */ +- s->dataplane_starting = false; +- s->dataplane_started = true; ++ s->ioeventfd_starting = false; ++ s->ioeventfd_started = true; + smp_wmb(); /* paired with aio_notify_accept() on the read side */ + + /* Get this show started by hooking up our callbacks */ +@@ -1812,8 +1812,8 @@ static int virtio_blk_data_plane_start(VirtIODevice *vdev) + fail_host_notifiers: + k->set_guest_notifiers(qbus->parent, nvqs, false); + fail_guest_notifiers: +- s->dataplane_disabled = true; +- s->dataplane_starting = false; ++ s->ioeventfd_disabled = true; ++ s->ioeventfd_starting = false; + return -ENOSYS; + } + +@@ -1821,7 +1821,7 @@ static int virtio_blk_data_plane_start(VirtIODevice *vdev) + * + * Context: BH in IOThread + */ +-static void virtio_blk_data_plane_stop_vq_bh(void *opaque) ++static void virtio_blk_ioeventfd_stop_vq_bh(void *opaque) + { + VirtQueue *vq = opaque; + EventNotifier *host_notifier = virtio_queue_get_host_notifier(vq); +@@ -1836,7 +1836,7 @@ static void virtio_blk_data_plane_stop_vq_bh(void *opaque) + } + + /* Context: BQL held */ +-static void virtio_blk_data_plane_stop(VirtIODevice *vdev) ++static void virtio_blk_stop_ioeventfd(VirtIODevice *vdev) + { + VirtIOBlock *s = VIRTIO_BLK(vdev); + BusState *qbus = qdev_get_parent_bus(DEVICE(s)); +@@ -1844,24 +1844,24 @@ static void virtio_blk_data_plane_stop(VirtIODevice *vdev) + unsigned i; + unsigned nvqs = s->conf.num_queues; + +- if (!s->dataplane_started || s->dataplane_stopping) { ++ if (!s->ioeventfd_started || s->ioeventfd_stopping) { + return; + } + + /* Better luck next time. */ +- if (s->dataplane_disabled) { +- s->dataplane_disabled = false; +- s->dataplane_started = false; ++ if (s->ioeventfd_disabled) { ++ s->ioeventfd_disabled = false; ++ s->ioeventfd_started = false; + return; + } +- s->dataplane_stopping = true; ++ s->ioeventfd_stopping = true; + + if (!blk_in_drain(s->conf.conf.blk)) { + for (i = 0; i < nvqs; i++) { + VirtQueue *vq = virtio_get_queue(vdev, i); + AioContext *ctx = s->vq_aio_context[i]; + +- aio_wait_bh_oneshot(ctx, virtio_blk_data_plane_stop_vq_bh, vq); ++ aio_wait_bh_oneshot(ctx, virtio_blk_ioeventfd_stop_vq_bh, vq); + } + } + +@@ -1886,10 +1886,10 @@ static void virtio_blk_data_plane_stop(VirtIODevice *vdev) + } + + /* +- * Set ->dataplane_started to false before draining so that host notifiers ++ * Set ->ioeventfd_started to false before draining so that host notifiers + * are not detached/attached anymore. + */ +- s->dataplane_started = false; ++ s->ioeventfd_started = false; + + /* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */ + blk_drain(s->conf.conf.blk); +@@ -1903,7 +1903,7 @@ static void virtio_blk_data_plane_stop(VirtIODevice *vdev) + /* Clean up guest notifier (irq) */ + k->set_guest_notifiers(qbus->parent, nvqs, false); + +- s->dataplane_stopping = false; ++ s->ioeventfd_stopping = false; + } + + static void virtio_blk_device_realize(DeviceState *dev, Error **errp) +@@ -2011,9 +2011,9 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) + } + qemu_coroutine_inc_pool_size(conf->num_queues * conf->queue_size / 2); + +- /* Don't start dataplane if transport does not support notifiers. */ ++ /* Don't start ioeventfd if transport does not support notifiers. */ + if (!virtio_device_ioeventfd_enabled(vdev)) { +- s->dataplane_disabled = true; ++ s->ioeventfd_disabled = true; + } + + virtio_blk_vq_aio_context_init(s, &err); +@@ -2137,8 +2137,8 @@ static void virtio_blk_class_init(ObjectClass *klass, void *data) + vdc->reset = virtio_blk_reset; + vdc->save = virtio_blk_save_device; + vdc->load = virtio_blk_load_device; +- vdc->start_ioeventfd = virtio_blk_data_plane_start; +- vdc->stop_ioeventfd = virtio_blk_data_plane_stop; ++ vdc->start_ioeventfd = virtio_blk_start_ioeventfd; ++ vdc->stop_ioeventfd = virtio_blk_stop_ioeventfd; + } + + static const TypeInfo virtio_blk_info = { +diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h +index fecffdc303..833a9a344f 100644 +--- a/include/hw/virtio/virtio-blk.h ++++ b/include/hw/virtio/virtio-blk.h +@@ -60,10 +60,10 @@ struct VirtIOBlock { + unsigned short sector_mask; + bool original_wce; + VMChangeStateEntry *change; +- bool dataplane_disabled; +- bool dataplane_started; +- bool dataplane_starting; +- bool dataplane_stopping; ++ bool ioeventfd_disabled; ++ bool ioeventfd_started; ++ bool ioeventfd_starting; ++ bool ioeventfd_stopping; + + /* + * The AioContext for each virtqueue. The BlockDriverState will use the +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-blk-restart-s-rq-reqs-in-vq-AioContexts.patch b/SOURCES/kvm-virtio-blk-restart-s-rq-reqs-in-vq-AioContexts.patch new file mode 100644 index 0000000..611b881 --- /dev/null +++ b/SOURCES/kvm-virtio-blk-restart-s-rq-reqs-in-vq-AioContexts.patch @@ -0,0 +1,106 @@ +From 9311035821b3fea3f78c7f06ddb8a3861584f907 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Fri, 19 Jan 2024 08:57:46 -0500 +Subject: [PATCH 17/22] virtio-blk: restart s->rq reqs in vq AioContexts + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter +RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Czenczek +RH-Commit: [13/17] cf5ad0352a78458ffc7588f967963f62b267fd64 (stefanha/centos-stream-qemu-kvm) + +A virtio-blk device with the iothread-vq-mapping parameter has +per-virtqueue AioContexts. It is not thread-safe to process s->rq +requests in the BlockBackend AioContext since that may be different from +the virtqueue's AioContext to which this request belongs. The code +currently races and could crash. + +Adapt virtio_blk_dma_restart_cb() to first split s->rq into per-vq lists +and then schedule a BH each vq's AioContext as necessary. This way +requests are safely processed in their vq's AioContext. + +Signed-off-by: Stefan Hajnoczi +Message-ID: <20240119135748.270944-5-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 71ee0cdd14cc01a8b51aa4e9577dd0a1bb2f8e19) +Signed-off-by: Stefan Hajnoczi +--- + hw/block/virtio-blk.c | 44 ++++++++++++++++++++++++++++++++----------- + 1 file changed, 33 insertions(+), 11 deletions(-) + +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index 08c566946a..f48ce5cbb8 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -1156,16 +1156,11 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) + + static void virtio_blk_dma_restart_bh(void *opaque) + { +- VirtIOBlock *s = opaque; ++ VirtIOBlockReq *req = opaque; ++ VirtIOBlock *s = req->dev; /* we're called with at least one request */ + +- VirtIOBlockReq *req; + MultiReqBuffer mrb = {}; + +- WITH_QEMU_LOCK_GUARD(&s->rq_lock) { +- req = s->rq; +- s->rq = NULL; +- } +- + while (req) { + VirtIOBlockReq *next = req->next; + if (virtio_blk_handle_request(req, &mrb)) { +@@ -1195,16 +1190,43 @@ static void virtio_blk_dma_restart_cb(void *opaque, bool running, + RunState state) + { + VirtIOBlock *s = opaque; ++ uint16_t num_queues = s->conf.num_queues; + + if (!running) { + return; + } + +- /* Paired with dec in virtio_blk_dma_restart_bh() */ +- blk_inc_in_flight(s->conf.conf.blk); ++ /* Split the device-wide s->rq request list into per-vq request lists */ ++ g_autofree VirtIOBlockReq **vq_rq = g_new0(VirtIOBlockReq *, num_queues); ++ VirtIOBlockReq *rq; ++ ++ WITH_QEMU_LOCK_GUARD(&s->rq_lock) { ++ rq = s->rq; ++ s->rq = NULL; ++ } ++ ++ while (rq) { ++ VirtIOBlockReq *next = rq->next; ++ uint16_t idx = virtio_get_queue_index(rq->vq); ++ ++ rq->next = vq_rq[idx]; ++ vq_rq[idx] = rq; ++ rq = next; ++ } + +- aio_bh_schedule_oneshot(blk_get_aio_context(s->conf.conf.blk), +- virtio_blk_dma_restart_bh, s); ++ /* Schedule a BH to submit the requests in each vq's AioContext */ ++ for (uint16_t i = 0; i < num_queues; i++) { ++ if (!vq_rq[i]) { ++ continue; ++ } ++ ++ /* Paired with dec in virtio_blk_dma_restart_bh() */ ++ blk_inc_in_flight(s->conf.conf.blk); ++ ++ aio_bh_schedule_oneshot(s->vq_aio_context[i], ++ virtio_blk_dma_restart_bh, ++ vq_rq[i]); ++ } + } + + static void virtio_blk_reset(VirtIODevice *vdev) +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-blk-tolerate-failure-to-set-BlockBackend-AioC.patch b/SOURCES/kvm-virtio-blk-tolerate-failure-to-set-BlockBackend-AioC.patch new file mode 100644 index 0000000..303c007 --- /dev/null +++ b/SOURCES/kvm-virtio-blk-tolerate-failure-to-set-BlockBackend-AioC.patch @@ -0,0 +1,72 @@ +From 282cebc22987958d11efc76e4f6ddb9601e709d9 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Fri, 19 Jan 2024 08:57:47 -0500 +Subject: [PATCH 18/22] virtio-blk: tolerate failure to set BlockBackend + AioContext + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter +RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Czenczek +RH-Commit: [14/17] edb113ce9fea0c1a88ae7b5d61c35c1981e6993f (stefanha/centos-stream-qemu-kvm) + +We no longer rely on setting the AioContext since the block layer +IO_CODE APIs can be called from any thread. Now it's just a hint to help +block jobs and other operations co-locate themselves in a thread with +the guest I/O requests. Keep going if setting the AioContext fails. + +Suggested-by: Kevin Wolf +Signed-off-by: Stefan Hajnoczi +Message-ID: <20240119135748.270944-6-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit ea0736d7f84ead109a6b701427991828f97724c3) +Signed-off-by: Stefan Hajnoczi +--- + hw/block/virtio-blk.c | 19 +++++-------------- + 1 file changed, 5 insertions(+), 14 deletions(-) + +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index f48ce5cbb8..81de06c9f6 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -1786,11 +1786,14 @@ static int virtio_blk_start_ioeventfd(VirtIODevice *vdev) + + memory_region_transaction_commit(); + ++ /* ++ * Try to change the AioContext so that block jobs and other operations can ++ * co-locate their activity in the same AioContext. If it fails, nevermind. ++ */ + r = blk_set_aio_context(s->conf.conf.blk, s->vq_aio_context[0], + &local_err); + if (r < 0) { +- error_report_err(local_err); +- goto fail_aio_context; ++ warn_report_err(local_err); + } + + /* +@@ -1819,18 +1822,6 @@ static int virtio_blk_start_ioeventfd(VirtIODevice *vdev) + } + return 0; + +- fail_aio_context: +- memory_region_transaction_begin(); +- +- for (i = 0; i < nvqs; i++) { +- virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); +- } +- +- memory_region_transaction_commit(); +- +- for (i = 0; i < nvqs; i++) { +- virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); +- } + fail_host_notifiers: + k->set_guest_notifiers(qbus->parent, nvqs, false); + fail_guest_notifiers: +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-gpu-block-migration-of-VMs-with-blob-true.patch b/SOURCES/kvm-virtio-gpu-block-migration-of-VMs-with-blob-true.patch new file mode 100644 index 0000000..1f70049 --- /dev/null +++ b/SOURCES/kvm-virtio-gpu-block-migration-of-VMs-with-blob-true.patch @@ -0,0 +1,87 @@ +From 5db0b4131c56d96760b3300298f4bedab99d35cb Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Wed, 6 Sep 2023 17:00:22 +0400 +Subject: [PATCH 100/101] virtio-gpu: block migration of VMs with blob=true +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +RH-MergeRequest: 217: virtio-gpu: block migration of VMs with blob=true +RH-Jira: RHEL-7565 +RH-Commit: [1/1] f978ca697d574b1419eb027a1007c060dfb83298 + +JIRA: https://issues.redhat.com/browse/RHEL-7565 + +commit 9c549ab6895a43ad0cb33e684e11cdb0b5400897 +Author: Marc-André Lureau +Date: Wed Sep 6 17:00:22 2023 +0400 + +virtio-gpu: block migration of VMs with blob=true + +"blob" resources don't have an associated pixman image: + +#0 pixman_image_get_stride (image=0x0) at ../pixman/pixman-image.c:921 +#1 0x0000562327c25236 in virtio_gpu_save (f=0x56232bb13b00, opaque=0x56232b555a60, size=0, field=0x5623289ab6c8 <__compound_literal.3+104>, vmdesc=0x56232ab59fe0) at ../hw/display/virtio-gpu.c:1225 + +Related to: +https://bugzilla.redhat.com/show_bug.cgi?id=2236353 + +Signed-off-by: Marc-André Lureau +Acked-by: Peter Xu + +[ rhel backport - fix Error* vs Error** argument ] +Signed-off-by: Marc-André Lureau +--- + hw/display/virtio-gpu.c | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c +index b016d3bac8..1702190ead 100644 +--- a/hw/display/virtio-gpu.c ++++ b/hw/display/virtio-gpu.c +@@ -27,6 +27,7 @@ + #include "hw/virtio/virtio-gpu-pixman.h" + #include "hw/virtio/virtio-bus.h" + #include "hw/qdev-properties.h" ++#include "migration/blocker.h" + #include "qemu/log.h" + #include "qemu/module.h" + #include "qapi/error.h" +@@ -41,6 +42,8 @@ virtio_gpu_find_check_resource(VirtIOGPU *g, uint32_t resource_id, + + static void virtio_gpu_reset_bh(void *opaque); + ++static Error *blob_mig_blocker; ++ + void virtio_gpu_update_cursor_data(VirtIOGPU *g, + struct virtio_gpu_scanout *s, + uint32_t resource_id) +@@ -1452,6 +1455,14 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error **errp) + error_setg(errp, "blobs and virgl are not compatible (yet)"); + return; + } ++ ++ if (!blob_mig_blocker) { ++ error_setg(&blob_mig_blocker, ++ "virtio-gpu blob VMs are currently not migratable."); ++ } ++ if (migrate_add_blocker(&blob_mig_blocker, errp)) { ++ return; ++ } + } + + if (!virtio_gpu_base_device_realize(qdev, +@@ -1478,6 +1489,9 @@ static void virtio_gpu_device_unrealize(DeviceState *qdev) + { + VirtIOGPU *g = VIRTIO_GPU(qdev); + ++ if (virtio_gpu_blob_enabled(g->parent_obj.conf)) { ++ migrate_del_blocker(&blob_mig_blocker); ++ } + g_clear_pointer(&g->ctrl_bh, qemu_bh_delete); + g_clear_pointer(&g->cursor_bh, qemu_bh_delete); + g_clear_pointer(&g->reset_bh, qemu_bh_delete); +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-iommu-Fix-64kB-host-page-size-VFIO-device-ass.patch b/SOURCES/kvm-virtio-iommu-Fix-64kB-host-page-size-VFIO-device-ass.patch deleted file mode 100644 index acfb3ae..0000000 --- a/SOURCES/kvm-virtio-iommu-Fix-64kB-host-page-size-VFIO-device-ass.patch +++ /dev/null @@ -1,151 +0,0 @@ -From 08c8af80dbd03b46a6a8397ef0c41cda3e6de22c Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Wed, 5 Jul 2023 18:51:17 +0200 -Subject: [PATCH 01/37] virtio-iommu: Fix 64kB host page size VFIO device - assignment - -RH-Author: Eric Auger -RH-MergeRequest: 182: VIRTIO-IOMMU/VFIO page size related fixes -RH-Bugzilla: 2211609 2211634 -RH-Acked-by: Gavin Shan -RH-Acked-by: Sebastian Ott -RH-Commit: [1/2] b48db1c964559505dda4c6c9a3b79d68207b25eb (eauger1/centos-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2211634 - -When running on a 64kB page size host and protecting a VFIO device -with the virtio-iommu, qemu crashes with this kind of message: - -qemu-kvm: virtio-iommu page mask 0xfffffffffffff000 is incompatible -with mask 0x20010000 -qemu: hardware error: vfio: DMA mapping failed, unable to continue - -This is due to the fact the IOMMU MR corresponding to the VFIO device -is enabled very late on domain attach, after the machine init. -The device reports a minimal 64kB page size but it is too late to be -applied. virtio_iommu_set_page_size_mask() fails and this causes -vfio_listener_region_add() to end up with hw_error(); - -To work around this issue, we transiently enable the IOMMU MR on -machine init to collect the page size requirements and then restore -the bypass state. - -Fixes: 90519b9053 ("virtio-iommu: Add bypass mode support to assigned device") -Signed-off-by: Eric Auger - -Message-Id: <20230705165118.28194-2-eric.auger@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Jean-Philippe Brucker -Tested-by: Jean-Philippe Brucker -Reviewed-by: Zhenzhong Duan -(cherry picked from commit 94df5b2180d61fb2ee2b04cc007981e58b6479a9) -Signed-off-by: Eric Auger ---- - hw/virtio/trace-events | 1 + - hw/virtio/virtio-iommu.c | 31 +++++++++++++++++++++++++++++-- - include/hw/virtio/virtio-iommu.h | 2 ++ - 3 files changed, 32 insertions(+), 2 deletions(-) - -diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events -index 8f8d05cf9b..68b752e304 100644 ---- a/hw/virtio/trace-events -+++ b/hw/virtio/trace-events -@@ -131,6 +131,7 @@ virtio_iommu_set_page_size_mask(const char *name, uint64_t old, uint64_t new) "m - virtio_iommu_notify_flag_add(const char *name) "add notifier to mr %s" - virtio_iommu_notify_flag_del(const char *name) "del notifier from mr %s" - virtio_iommu_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)" -+virtio_iommu_freeze_granule(uint64_t page_size_mask) "granule set to 0x%"PRIx64 - - # virtio-mem.c - virtio_mem_send_response(uint16_t type) "type=%" PRIu16 -diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c -index 1cd258135d..542679b321 100644 ---- a/hw/virtio/virtio-iommu.c -+++ b/hw/virtio/virtio-iommu.c -@@ -24,6 +24,7 @@ - #include "hw/virtio/virtio.h" - #include "sysemu/kvm.h" - #include "sysemu/reset.h" -+#include "sysemu/sysemu.h" - #include "qapi/error.h" - #include "qemu/error-report.h" - #include "trace.h" -@@ -1106,12 +1107,12 @@ static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr, - } - - /* -- * After the machine is finalized, we can't change the mask anymore. If by -+ * Once the granule is frozen we can't change the mask anymore. If by - * chance the hotplugged device supports the same granule, we can still - * accept it. Having a different masks is possible but the guest will use - * sub-optimal block sizes, so warn about it. - */ -- if (phase_check(PHASE_MACHINE_READY)) { -+ if (s->granule_frozen) { - int new_granule = ctz64(new_mask); - int cur_granule = ctz64(cur_mask); - -@@ -1146,6 +1147,28 @@ static void virtio_iommu_system_reset(void *opaque) - - } - -+static void virtio_iommu_freeze_granule(Notifier *notifier, void *data) -+{ -+ VirtIOIOMMU *s = container_of(notifier, VirtIOIOMMU, machine_done); -+ int granule; -+ -+ if (likely(s->config.bypass)) { -+ /* -+ * Transient IOMMU MR enable to collect page_size_mask requirements -+ * through memory_region_iommu_set_page_size_mask() called by -+ * VFIO region_add() callback -+ */ -+ s->config.bypass = false; -+ virtio_iommu_switch_address_space_all(s); -+ /* restore default */ -+ s->config.bypass = true; -+ virtio_iommu_switch_address_space_all(s); -+ } -+ s->granule_frozen = true; -+ granule = ctz64(s->config.page_size_mask); -+ trace_virtio_iommu_freeze_granule(BIT(granule)); -+} -+ - static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) - { - VirtIODevice *vdev = VIRTIO_DEVICE(dev); -@@ -1189,6 +1212,9 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) - error_setg(errp, "VIRTIO-IOMMU is not attached to any PCI bus!"); - } - -+ s->machine_done.notify = virtio_iommu_freeze_granule; -+ qemu_add_machine_init_done_notifier(&s->machine_done); -+ - qemu_register_reset(virtio_iommu_system_reset, s); - } - -@@ -1198,6 +1224,7 @@ static void virtio_iommu_device_unrealize(DeviceState *dev) - VirtIOIOMMU *s = VIRTIO_IOMMU(dev); - - qemu_unregister_reset(virtio_iommu_system_reset, s); -+ qemu_remove_machine_init_done_notifier(&s->machine_done); - - g_hash_table_destroy(s->as_by_busptr); - if (s->domains) { -diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h -index 2ad5ee320b..a93fc5383e 100644 ---- a/include/hw/virtio/virtio-iommu.h -+++ b/include/hw/virtio/virtio-iommu.h -@@ -61,6 +61,8 @@ struct VirtIOIOMMU { - QemuRecMutex mutex; - GTree *endpoints; - bool boot_bypass; -+ Notifier machine_done; -+ bool granule_frozen; - }; - - #endif --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-iommu-Rework-the-traces-in-virtio_iommu_set_p.patch b/SOURCES/kvm-virtio-iommu-Rework-the-traces-in-virtio_iommu_set_p.patch deleted file mode 100644 index 7934a12..0000000 --- a/SOURCES/kvm-virtio-iommu-Rework-the-traces-in-virtio_iommu_set_p.patch +++ /dev/null @@ -1,83 +0,0 @@ -From 643d93343759a350fe0f6327d308bf6a93c79d25 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Wed, 5 Jul 2023 18:51:18 +0200 -Subject: [PATCH 02/37] virtio-iommu: Rework the traces in - virtio_iommu_set_page_size_mask() - -RH-Author: Eric Auger -RH-MergeRequest: 182: VIRTIO-IOMMU/VFIO page size related fixes -RH-Bugzilla: 2211609 2211634 -RH-Acked-by: Gavin Shan -RH-Acked-by: Sebastian Ott -RH-Commit: [2/2] 0af7078dde158f07c83e2b293adc5d9d475688ae (eauger1/centos-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2211609 - -The current error messages in virtio_iommu_set_page_size_mask() -sound quite similar for different situations and miss the IOMMU -memory region that causes the issue. - -Clarify them and rework the comment. - -Also remove the trace when the new page_size_mask is not applied as -the current frozen granule is kept. This message is rather confusing -for the end user and anyway the current granule would have been used -by the driver. - -Signed-off-by: Eric Auger -Reviewed-by: Zhenzhong Duan -Message-Id: <20230705165118.28194-3-eric.auger@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Jean-Philippe Brucker -Tested-by: Jean-Philippe Brucker -(cherry picked from commit 587a7641d53055054d68d67d94c9408ef808f127) -Signed-off-by: Eric Auger ---- - hw/virtio/virtio-iommu.c | 19 +++++++------------ - 1 file changed, 7 insertions(+), 12 deletions(-) - -diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c -index 542679b321..421e2a944f 100644 ---- a/hw/virtio/virtio-iommu.c -+++ b/hw/virtio/virtio-iommu.c -@@ -1101,29 +1101,24 @@ static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr, - new_mask); - - if ((cur_mask & new_mask) == 0) { -- error_setg(errp, "virtio-iommu page mask 0x%"PRIx64 -- " is incompatible with mask 0x%"PRIx64, cur_mask, new_mask); -+ error_setg(errp, "virtio-iommu %s reports a page size mask 0x%"PRIx64 -+ " incompatible with currently supported mask 0x%"PRIx64, -+ mr->parent_obj.name, new_mask, cur_mask); - return -1; - } - - /* - * Once the granule is frozen we can't change the mask anymore. If by - * chance the hotplugged device supports the same granule, we can still -- * accept it. Having a different masks is possible but the guest will use -- * sub-optimal block sizes, so warn about it. -+ * accept it. - */ - if (s->granule_frozen) { -- int new_granule = ctz64(new_mask); - int cur_granule = ctz64(cur_mask); - -- if (new_granule != cur_granule) { -- error_setg(errp, "virtio-iommu page mask 0x%"PRIx64 -- " is incompatible with mask 0x%"PRIx64, cur_mask, -- new_mask); -+ if (!(BIT(cur_granule) & new_mask)) { -+ error_setg(errp, "virtio-iommu %s does not support frozen granule 0x%llx", -+ mr->parent_obj.name, BIT_ULL(cur_granule)); - return -1; -- } else if (new_mask != cur_mask) { -- warn_report("virtio-iommu page mask 0x%"PRIx64 -- " does not match 0x%"PRIx64, cur_mask, new_mask); - } - return 0; - } --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-iommu-Standardize-granule-extraction-and-form.patch b/SOURCES/kvm-virtio-iommu-Standardize-granule-extraction-and-form.patch deleted file mode 100644 index 638ae98..0000000 --- a/SOURCES/kvm-virtio-iommu-Standardize-granule-extraction-and-form.patch +++ /dev/null @@ -1,88 +0,0 @@ -From 59cd85621b1b14ada843ea0562cc76b6a7c93df4 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Tue, 18 Jul 2023 20:21:36 +0200 -Subject: [PATCH 08/14] virtio-iommu: Standardize granule extraction and - formatting - -RH-Author: Eric Auger -RH-MergeRequest: 197: virtio-iommu/smmu: backport some late fixes -RH-Bugzilla: 2229133 -RH-Acked-by: Thomas Huth -RH-Acked-by: Peter Xu -RH-Commit: [2/3] 48784ef2a19174518f66479dcb532230bffe8bf1 (eauger1/centos-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2229133 - -At several locations we compute the granule from the config -page_size_mask using ctz() and then format it in traces using -BIT(). As the page_size_mask is 64b we should use ctz64 and -BIT_ULL() for formatting. We failed to be consistent. - -Note the page_size_mask is garanteed to be non null. The spec -mandates the device to set at least one bit, so ctz64 cannot -return 64. This is garanteed by the fact the device -initializes the page_size_mask to qemu_target_page_mask() -and then the page_size_mask is further constrained by -virtio_iommu_set_page_size_mask() callback which can't -result in a new mask being null. So if Coverity complains -round those ctz64/BIT_ULL with CID 1517772 this is a false -positive - -Signed-off-by: Eric Auger -Fixes: 94df5b2180 ("virtio-iommu: Fix 64kB host page size VFIO device assignment") -Message-Id: <20230718182136.40096-1-eric.auger@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Jean-Philippe Brucker -(cherry picked from commit 1084feddc6a677cdfdde56936bfb97cf32cc4dee) -Signed-off-by: Eric Auger ---- - hw/virtio/virtio-iommu.c | 8 +++++--- - 1 file changed, 5 insertions(+), 3 deletions(-) - -diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c -index 17ce630200..17b3dcd158 100644 ---- a/hw/virtio/virtio-iommu.c -+++ b/hw/virtio/virtio-iommu.c -@@ -854,17 +854,19 @@ static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr, - VirtIOIOMMUEndpoint *ep; - uint32_t sid, flags; - bool bypass_allowed; -+ int granule; - bool found; - int i; - - interval.low = addr; - interval.high = addr + 1; -+ granule = ctz64(s->config.page_size_mask); - - IOMMUTLBEntry entry = { - .target_as = &address_space_memory, - .iova = addr, - .translated_addr = addr, -- .addr_mask = (1 << ctz32(s->config.page_size_mask)) - 1, -+ .addr_mask = BIT_ULL(granule) - 1, - .perm = IOMMU_NONE, - }; - -@@ -1117,7 +1119,7 @@ static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr, - if (s->granule_frozen) { - int cur_granule = ctz64(cur_mask); - -- if (!(BIT(cur_granule) & new_mask)) { -+ if (!(BIT_ULL(cur_granule) & new_mask)) { - error_setg(errp, "virtio-iommu %s does not support frozen granule 0x%llx", - mr->parent_obj.name, BIT_ULL(cur_granule)); - return -1; -@@ -1163,7 +1165,7 @@ static void virtio_iommu_freeze_granule(Notifier *notifier, void *data) - } - s->granule_frozen = true; - granule = ctz64(s->config.page_size_mask); -- trace_virtio_iommu_freeze_granule(BIT(granule)); -+ trace_virtio_iommu_freeze_granule(BIT_ULL(granule)); - } - - static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-mem-default-enable-dynamic-memslots.patch b/SOURCES/kvm-virtio-mem-default-enable-dynamic-memslots.patch new file mode 100644 index 0000000..6ad1c98 --- /dev/null +++ b/SOURCES/kvm-virtio-mem-default-enable-dynamic-memslots.patch @@ -0,0 +1,70 @@ +From 94bccae527f1ab8328cc7692532046d700e2ca71 Mon Sep 17 00:00:00 2001 +From: David Hildenbrand +Date: Mon, 5 Feb 2024 19:27:07 +0100 +Subject: [PATCH 22/22] virtio-mem: default-enable "dynamic-memslots" + +RH-Author: David Hildenbrand +RH-MergeRequest: 220: virtio-mem: default-enable "dynamic-memslots" +RH-Jira: RHEL-24045 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/1] d9a60acd7de1d8703ea3ca938e388e19f31f5347 + +JIRA: https://issues.redhat.com/browse/RHEL-24045 +Upstream: RHEL only + +We only support selected vhost-user devices in combination with +virtio-mem in RHEL. One devices that works well is virtiofsd, devices that +are currently incompatible include DPDK and SPDK. + +The vhost devices we support must be compatible with the dynamic-memslot +feature (i.e., support at least 509 memslots, support dynamically adding/ +removing memslots), such that setting "dynamic-memslots=on" will work a +expected and not make certain QEMU commandlines or hotplug of vhost-user +devices bail out. + +Let's set "dynamic-memslots=on" starting with RHEL 9.4, so we +get the benefits (i.e., reduced metadata consumption in KVM, majority of +unplugged memory being inaccessible) as default. + +When wanting to run virtio-mem with incompatible vhost-user devices, it +might just work (if the vhost-user device is created before the +virtio-mem device), or the feature can be manually disabled by +specifying "dynamic-memslots=off". + +Signed-off-by: David Hildenbrand +--- + hw/core/machine.c | 2 ++ + hw/virtio/virtio-mem.c | 3 ++- + 2 files changed, 4 insertions(+), 1 deletion(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 446601ee30..309f6ba685 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -78,6 +78,8 @@ GlobalProperty hw_compat_rhel_9_4[] = { + { "vfio-pci-nohotplug", "x-ramfb-migrate", "off" }, + /* hw_compat_rhel_9_4 from hw_compat_8_1 */ + { "igb", "x-pcie-flr-init", "off" }, ++ /* hw_compat_rhel_9_4 jira RHEL-24045 */ ++ { "virtio-mem", "dynamic-memslots", "off" }, + }; + const size_t hw_compat_rhel_9_4_len = G_N_ELEMENTS(hw_compat_rhel_9_4); + +diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c +index 75ee38aa46..00ca91e8fe 100644 +--- a/hw/virtio/virtio-mem.c ++++ b/hw/virtio/virtio-mem.c +@@ -1696,8 +1696,9 @@ static Property virtio_mem_properties[] = { + #endif + DEFINE_PROP_BOOL(VIRTIO_MEM_EARLY_MIGRATION_PROP, VirtIOMEM, + early_migration, true), ++ /* RHEL: default-enable "dynamic-memslots" (jira RHEL-24045) */ + DEFINE_PROP_BOOL(VIRTIO_MEM_DYNAMIC_MEMSLOTS_PROP, VirtIOMEM, +- dynamic_memslots, false), ++ dynamic_memslots, true), + DEFINE_PROP_END_OF_LIST(), + }; + +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-net-correctly-report-maximum-tx_queue_size-va.patch b/SOURCES/kvm-virtio-net-correctly-report-maximum-tx_queue_size-va.patch deleted file mode 100644 index 119ea84..0000000 --- a/SOURCES/kvm-virtio-net-correctly-report-maximum-tx_queue_size-va.patch +++ /dev/null @@ -1,92 +0,0 @@ -From 4fe096a6fad61ab721fd29324d48383c7f427ac9 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Mon, 5 Jun 2023 16:21:25 +0200 -Subject: [PATCH 7/9] virtio-net: correctly report maximum tx_queue_size value -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 191: virtio-net: correctly report maximum tx_queue_size value -RH-Bugzilla: 2040509 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Eugenio Pérez -RH-Commit: [1/1] afb944c6d75fe476ac86fe267b1cca5f272dfbbd (lvivier/qemu-kvm-centos) - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2040509 - -Maximum value for tx_queue_size depends on the backend type. -1024 for vDPA/vhost-user, 256 for all the others. - -The value is returned by virtio_net_max_tx_queue_size() to set the -parameter: - - n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n), - n->net_conf.tx_queue_size); - -But the parameter checking uses VIRTQUEUE_MAX_SIZE (1024). - -So the parameter is silently ignored and ethtool reports a different -value than the one provided by the user. - - ... -netdev tap,... -device virtio-net,tx_queue_size=1024 - - # ethtool -g enp0s2 - Ring parameters for enp0s2: - Pre-set maximums: - RX: 256 - RX Mini: n/a - RX Jumbo: n/a - TX: 256 - Current hardware settings: - RX: 256 - RX Mini: n/a - RX Jumbo: n/a - TX: 256 - - ... -netdev vhost-user,... -device virtio-net,tx_queue_size=2048 - - Invalid tx_queue_size (= 2048), must be a power of 2 between 256 and 1024 - -With this patch the correct maximum value is checked and displayed. - -For vDPA/vhost-user: - - Invalid tx_queue_size (= 2048), must be a power of 2 between 256 and 1024 - -For all the others: - - Invalid tx_queue_size (= 512), must be a power of 2 between 256 and 256 - -Fixes: 2eef278b9e63 ("virtio-net: fix tx queue size for !vhost-user") -Cc: mst@redhat.com -Cc: qemu-stable@nongnu.org -Signed-off-by: Laurent Vivier -Signed-off-by: Jason Wang -(cherry picked from commit 4271f4038372f174dbafffacca1a748d058a03ba) ---- - hw/net/virtio-net.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index 447f669921..ae1e6a5e3d 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -3628,12 +3628,12 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) - } - - if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE || -- n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE || -+ n->net_conf.tx_queue_size > virtio_net_max_tx_queue_size(n) || - !is_power_of_2(n->net_conf.tx_queue_size)) { - error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), " - "must be a power of 2 between %d and %d", - n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE, -- VIRTQUEUE_MAX_SIZE); -+ virtio_net_max_tx_queue_size(n)); - virtio_cleanup(vdev); - return; - } --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-scsi-Attach-event-vq-notifier-with-no_poll.patch b/SOURCES/kvm-virtio-scsi-Attach-event-vq-notifier-with-no_poll.patch new file mode 100644 index 0000000..b8066b2 --- /dev/null +++ b/SOURCES/kvm-virtio-scsi-Attach-event-vq-notifier-with-no_poll.patch @@ -0,0 +1,78 @@ +From da3a5afa41790ae913d41cfcdc3c6a8731ae3fe8 Mon Sep 17 00:00:00 2001 +From: Hanna Czenczek +Date: Fri, 2 Feb 2024 16:31:56 +0100 +Subject: [PATCH 1/6] virtio-scsi: Attach event vq notifier with no_poll + +RH-Author: Hanna Czenczek +RH-MergeRequest: 223: virtio: Re-enable notifications after drain +RH-Jira: RHEL-3934 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [1/3] d29b461a0a4b584af0ee80fb3f9e45c92ea88eb0 (hreitz/qemu-kvm-c-9-s) + +As of commit 38738f7dbbda90fbc161757b7f4be35b52205552 ("virtio-scsi: +don't waste CPU polling the event virtqueue"), we only attach an io_read +notifier for the virtio-scsi event virtqueue instead, and no polling +notifiers. During operation, the event virtqueue is typically +non-empty, but none of the buffers are intended to be used immediately. +Instead, they only get used when certain events occur. Therefore, it +makes no sense to continuously poll it when non-empty, because it is +supposed to be and stay non-empty. + +We do this by using virtio_queue_aio_attach_host_notifier_no_poll() +instead of virtio_queue_aio_attach_host_notifier() for the event +virtqueue. + +Commit 766aa2de0f29b657148e04599320d771c36fd126 ("virtio-scsi: implement +BlockDevOps->drained_begin()") however has virtio_scsi_drained_end() use +virtio_queue_aio_attach_host_notifier() for all virtqueues, including +the event virtqueue. This can lead to it being polled again, undoing +the benefit of commit 38738f7dbbda90fbc161757b7f4be35b52205552. + +Fix it by using virtio_queue_aio_attach_host_notifier_no_poll() for the +event virtqueue. + +Reported-by: Fiona Ebner +Fixes: 766aa2de0f29b657148e04599320d771c36fd126 + ("virtio-scsi: implement BlockDevOps->drained_begin()") +Reviewed-by: Stefan Hajnoczi +Tested-by: Fiona Ebner +Reviewed-by: Fiona Ebner +Signed-off-by: Hanna Czenczek +Message-ID: <20240202153158.788922-2-hreitz@redhat.com> +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit c42c3833e0cfdf2b80fb3ca410acfd392b6874ab) +--- + hw/scsi/virtio-scsi.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index ca365a70e9..9943186917 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -1149,6 +1149,7 @@ static void virtio_scsi_drained_begin(SCSIBus *bus) + static void virtio_scsi_drained_end(SCSIBus *bus) + { + VirtIOSCSI *s = container_of(bus, VirtIOSCSI, bus); ++ VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); + VirtIODevice *vdev = VIRTIO_DEVICE(s); + uint32_t total_queues = VIRTIO_SCSI_VQ_NUM_FIXED + + s->parent_obj.conf.num_queues; +@@ -1166,7 +1167,11 @@ static void virtio_scsi_drained_end(SCSIBus *bus) + + for (uint32_t i = 0; i < total_queues; i++) { + VirtQueue *vq = virtio_get_queue(vdev, i); +- virtio_queue_aio_attach_host_notifier(vq, s->ctx); ++ if (vq == vs->event_vq) { ++ virtio_queue_aio_attach_host_notifier_no_poll(vq, s->ctx); ++ } else { ++ virtio_queue_aio_attach_host_notifier(vq, s->ctx); ++ } + } + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-scsi-don-t-lock-AioContext-around-virtio_queu.patch b/SOURCES/kvm-virtio-scsi-don-t-lock-AioContext-around-virtio_queu.patch new file mode 100644 index 0000000..9ad8fdf --- /dev/null +++ b/SOURCES/kvm-virtio-scsi-don-t-lock-AioContext-around-virtio_queu.patch @@ -0,0 +1,58 @@ +From 1ee3f919a51135a0798a14c734ca80d74d30025d Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Mon, 4 Dec 2023 11:42:57 -0500 +Subject: [PATCH 078/101] virtio-scsi: don't lock AioContext around + virtio_queue_aio_attach_host_notifier() + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [9/26] 5e1179e617d05bf765b285ba42393ec1ddbeba28 (kmwolf/centos-qemu-kvm) + +virtio_queue_aio_attach_host_notifier() does not require the AioContext +lock. Stop taking the lock and add an explicit smp_wmb() because we were +relying on the implicit barrier in the AioContext lock before. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Eric Blake +Reviewed-by: Kevin Wolf +Message-ID: <20231204164259.1515217-3-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +--- + hw/scsi/virtio-scsi-dataplane.c | 8 +------- + 1 file changed, 1 insertion(+), 7 deletions(-) + +diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c +index 1e684beebe..135e23fe54 100644 +--- a/hw/scsi/virtio-scsi-dataplane.c ++++ b/hw/scsi/virtio-scsi-dataplane.c +@@ -149,23 +149,17 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev) + + memory_region_transaction_commit(); + +- /* +- * These fields are visible to the IOThread so we rely on implicit barriers +- * in aio_context_acquire() on the write side and aio_notify_accept() on +- * the read side. +- */ + s->dataplane_starting = false; + s->dataplane_started = true; ++ smp_wmb(); /* paired with aio_notify_accept() */ + + if (s->bus.drain_count == 0) { +- aio_context_acquire(s->ctx); + virtio_queue_aio_attach_host_notifier(vs->ctrl_vq, s->ctx); + virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq, s->ctx); + + for (i = 0; i < vs->conf.num_queues; i++) { + virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], s->ctx); + } +- aio_context_release(s->ctx); + } + return 0; + +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-scsi-replace-AioContext-lock-with-tmf_bh_lock.patch b/SOURCES/kvm-virtio-scsi-replace-AioContext-lock-with-tmf_bh_lock.patch new file mode 100644 index 0000000..2654cb7 --- /dev/null +++ b/SOURCES/kvm-virtio-scsi-replace-AioContext-lock-with-tmf_bh_lock.patch @@ -0,0 +1,173 @@ +From c2d7633ead6e19d4b6af5552ca907ae071b8734b Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 5 Dec 2023 13:19:58 -0500 +Subject: [PATCH 081/101] virtio-scsi: replace AioContext lock with tmf_bh_lock + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [12/26] 8fb375bfd72a491d47321c78078577071a4e90fb (kmwolf/centos-qemu-kvm) + +Protect the Task Management Function BH state with a lock. The TMF BH +runs in the main loop thread. An IOThread might process a TMF at the +same time as the TMF BH is running. Therefore tmf_bh_list and tmf_bh +must be protected by a lock. + +Run TMF request completion in the IOThread using aio_wait_bh_oneshot(). +This avoids more locking to protect the virtqueue and SCSI layer state. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Eric Blake +Reviewed-by: Kevin Wolf +Message-ID: <20231205182011.1976568-2-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +--- + hw/scsi/virtio-scsi.c | 62 ++++++++++++++++++++++----------- + include/hw/virtio/virtio-scsi.h | 3 +- + 2 files changed, 43 insertions(+), 22 deletions(-) + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index 9c751bf296..4f8d35facc 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -123,6 +123,30 @@ static void virtio_scsi_complete_req(VirtIOSCSIReq *req) + virtio_scsi_free_req(req); + } + ++static void virtio_scsi_complete_req_bh(void *opaque) ++{ ++ VirtIOSCSIReq *req = opaque; ++ ++ virtio_scsi_complete_req(req); ++} ++ ++/* ++ * Called from virtio_scsi_do_one_tmf_bh() in main loop thread. The main loop ++ * thread cannot touch the virtqueue since that could race with an IOThread. ++ */ ++static void virtio_scsi_complete_req_from_main_loop(VirtIOSCSIReq *req) ++{ ++ VirtIOSCSI *s = req->dev; ++ ++ if (!s->ctx || s->ctx == qemu_get_aio_context()) { ++ /* No need to schedule a BH when there is no IOThread */ ++ virtio_scsi_complete_req(req); ++ } else { ++ /* Run request completion in the IOThread */ ++ aio_wait_bh_oneshot(s->ctx, virtio_scsi_complete_req_bh, req); ++ } ++} ++ + static void virtio_scsi_bad_req(VirtIOSCSIReq *req) + { + virtio_error(VIRTIO_DEVICE(req->dev), "wrong size for virtio-scsi headers"); +@@ -338,10 +362,7 @@ static void virtio_scsi_do_one_tmf_bh(VirtIOSCSIReq *req) + + out: + object_unref(OBJECT(d)); +- +- virtio_scsi_acquire(s); +- virtio_scsi_complete_req(req); +- virtio_scsi_release(s); ++ virtio_scsi_complete_req_from_main_loop(req); + } + + /* Some TMFs must be processed from the main loop thread */ +@@ -354,18 +375,16 @@ static void virtio_scsi_do_tmf_bh(void *opaque) + + GLOBAL_STATE_CODE(); + +- virtio_scsi_acquire(s); ++ WITH_QEMU_LOCK_GUARD(&s->tmf_bh_lock) { ++ QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) { ++ QTAILQ_REMOVE(&s->tmf_bh_list, req, next); ++ QTAILQ_INSERT_TAIL(&reqs, req, next); ++ } + +- QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) { +- QTAILQ_REMOVE(&s->tmf_bh_list, req, next); +- QTAILQ_INSERT_TAIL(&reqs, req, next); ++ qemu_bh_delete(s->tmf_bh); ++ s->tmf_bh = NULL; + } + +- qemu_bh_delete(s->tmf_bh); +- s->tmf_bh = NULL; +- +- virtio_scsi_release(s); +- + QTAILQ_FOREACH_SAFE(req, &reqs, next, tmp) { + QTAILQ_REMOVE(&reqs, req, next); + virtio_scsi_do_one_tmf_bh(req); +@@ -379,8 +398,7 @@ static void virtio_scsi_reset_tmf_bh(VirtIOSCSI *s) + + GLOBAL_STATE_CODE(); + +- virtio_scsi_acquire(s); +- ++ /* Called after ioeventfd has been stopped, so tmf_bh_lock is not needed */ + if (s->tmf_bh) { + qemu_bh_delete(s->tmf_bh); + s->tmf_bh = NULL; +@@ -393,19 +411,19 @@ static void virtio_scsi_reset_tmf_bh(VirtIOSCSI *s) + req->resp.tmf.response = VIRTIO_SCSI_S_TARGET_FAILURE; + virtio_scsi_complete_req(req); + } +- +- virtio_scsi_release(s); + } + + static void virtio_scsi_defer_tmf_to_bh(VirtIOSCSIReq *req) + { + VirtIOSCSI *s = req->dev; + +- QTAILQ_INSERT_TAIL(&s->tmf_bh_list, req, next); ++ WITH_QEMU_LOCK_GUARD(&s->tmf_bh_lock) { ++ QTAILQ_INSERT_TAIL(&s->tmf_bh_list, req, next); + +- if (!s->tmf_bh) { +- s->tmf_bh = qemu_bh_new(virtio_scsi_do_tmf_bh, s); +- qemu_bh_schedule(s->tmf_bh); ++ if (!s->tmf_bh) { ++ s->tmf_bh = qemu_bh_new(virtio_scsi_do_tmf_bh, s); ++ qemu_bh_schedule(s->tmf_bh); ++ } + } + } + +@@ -1235,6 +1253,7 @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp) + Error *err = NULL; + + QTAILQ_INIT(&s->tmf_bh_list); ++ qemu_mutex_init(&s->tmf_bh_lock); + + virtio_scsi_common_realize(dev, + virtio_scsi_handle_ctrl, +@@ -1277,6 +1296,7 @@ static void virtio_scsi_device_unrealize(DeviceState *dev) + + qbus_set_hotplug_handler(BUS(&s->bus), NULL); + virtio_scsi_common_unrealize(dev); ++ qemu_mutex_destroy(&s->tmf_bh_lock); + } + + static Property virtio_scsi_properties[] = { +diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h +index 779568ab5d..da8cb928d9 100644 +--- a/include/hw/virtio/virtio-scsi.h ++++ b/include/hw/virtio/virtio-scsi.h +@@ -85,8 +85,9 @@ struct VirtIOSCSI { + + /* + * TMFs deferred to main loop BH. These fields are protected by +- * virtio_scsi_acquire(). ++ * tmf_bh_lock. + */ ++ QemuMutex tmf_bh_lock; + QEMUBH *tmf_bh; + QTAILQ_HEAD(, VirtIOSCSIReq) tmf_bh_list; + +-- +2.39.3 + diff --git a/SOURCES/kvm-vl.c-Create-late-backends-before-migration-object.patch b/SOURCES/kvm-vl.c-Create-late-backends-before-migration-object.patch deleted file mode 100644 index e1eef6d..0000000 --- a/SOURCES/kvm-vl.c-Create-late-backends-before-migration-object.patch +++ /dev/null @@ -1,58 +0,0 @@ -From 63e2339a6f38706c6fc5eb251426812520db6a6d Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Wed, 19 Apr 2023 12:17:37 -0400 -Subject: [PATCH 03/56] vl.c: Create late backends before migration object -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [2/50] 7209bb94faa48650388be8fef08c77afd26517d8 (peterx/qemu-kvm) - -The migration object may want to check against different types of memory -when initialized. Delay the creation to be after late backends. - -Signed-off-by: Peter Xu -Reviewed-by: Juan Quintela -Reviewed-by: David Hildenbrand -Signed-off-by: Juan Quintela -(cherry picked from commit cb9d8b8ce1aaf38f53295fc59ec1b8b7eb4338d2) -Signed-off-by: Peter Xu ---- - softmmu/vl.c | 9 +++++++-- - 1 file changed, 7 insertions(+), 2 deletions(-) - -diff --git a/softmmu/vl.c b/softmmu/vl.c -index ad4173138d..a44b49430b 100644 ---- a/softmmu/vl.c -+++ b/softmmu/vl.c -@@ -3592,14 +3592,19 @@ void qemu_init(int argc, char **argv) - machine_class->name, machine_class->deprecation_reason); - } - -+ /* -+ * Create backends before creating migration objects, so that it can -+ * check against compatibilities on the backend memories (e.g. postcopy -+ * over memory-backend-file objects). -+ */ -+ qemu_create_late_backends(); -+ - /* - * Note: creates a QOM object, must run only after global and - * compat properties have been set up. - */ - migration_object_init(); - -- qemu_create_late_backends(); -- - /* parse features once if machine provides default cpu_type */ - current_machine->cpu_type = machine_class->default_cpu_type; - if (cpu_option) { --- -2.39.1 - diff --git a/SOURCES/kvm-x86-rhel-9.2.0-machine-type-compat-fix.patch b/SOURCES/kvm-x86-rhel-9.2.0-machine-type-compat-fix.patch new file mode 100644 index 0000000..a8bf6ac --- /dev/null +++ b/SOURCES/kvm-x86-rhel-9.2.0-machine-type-compat-fix.patch @@ -0,0 +1,48 @@ +From 2932c8de175fadeed4bb7c1024724cbabc53f6d5 Mon Sep 17 00:00:00 2001 +From: Sebastian Ott +Date: Mon, 19 Feb 2024 02:37:27 -0500 +Subject: [PATCH 6/6] x86: rhel 9.2.0 machine type compat fix + +RH-Author: Sebastian Ott +RH-MergeRequest: 342: Draft: x86: rhel 9.2.0 machine type compat fix (RHEL) +RH-Jira: RHEL-17068 +RH-Acked-by: Thomas Huth +RH-Commit: [23/23] 658dda965f34119de300eef26155f47b1b3fa7f1 + +Fix up the compatibility for 9.2.0 and older. + +Signed-off-by: Sebastian Ott's avatarSebastian Ott +--- + hw/i386/pc_piix.c | 2 ++ + hw/i386/pc_q35.c | 2 ++ + 2 files changed, 4 insertions(+) + +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 44038391fb..09d02cc91f 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -1023,6 +1023,8 @@ static void pc_machine_rhel760_options(MachineClass *m) + pcmc->enforce_amd_1tb_hole = false; + /* From pc_i440fx_8_0_machine_options() */ + pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32; ++ /* From pc_i440fx_8_1_machine_options() */ ++ pcmc->broken_32bit_mem_addr_check = true; + /* Introduced in QEMU 8.2 */ + pcmc->default_south_bridge = TYPE_PIIX3_DEVICE; + +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 6387df97c8..c6967e1846 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -759,6 +759,8 @@ static void pc_q35_machine_rhel920_options(MachineClass *m) + + /* From pc_q35_8_0_machine_options() */ + pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32; ++ /* From pc_q35_8_1_machine_options() */ ++ pcmc->broken_32bit_mem_addr_check = true; + + compat_props_add(m->compat_props, hw_compat_rhel_9_4, + hw_compat_rhel_9_4_len); +-- +2.39.3 + diff --git a/SOURCES/qemu-ga.sysconfig b/SOURCES/qemu-ga.sysconfig index a78b428..736b471 100644 --- a/SOURCES/qemu-ga.sysconfig +++ b/SOURCES/qemu-ga.sysconfig @@ -1,11 +1,19 @@ # This is a systemd environment file, not a shell script. # It provides settings for "/lib/systemd/system/qemu-guest-agent.service". -# Comma-separated blocked RPCs to disable, or empty list to enable all. +# Guest agent command with comma-separated blocked RPCs to disable, +# or empty list to enable all. # # You can get the list of RPC commands using "qemu-ga --block-rpcs='?'". # There should be no spaces between commas and commands in the block list. -BLOCK_RPCS=guest-file-open,guest-file-close,guest-file-read,guest-file-write,guest-file-seek,guest-file-flush,guest-exec,guest-exec-status +# FILTER_RPC_ARGS="--block-rpcs=guest-file-open,guest-file-close,guest-file-read,guest-file-write,guest-file-seek,guest-file-flush,guest-exec,guest-exec-status" + +# Guest agent command with comma-separated allowed RPCs to enable, +# or empty list to disable all. +# +# You can get the list of RPC commands using "qemu-ga --allow-rpcs='?'". +# There should be no spaces between commas and commands in the allow list. +FILTER_RPC_ARGS="--allow-rpcs=guest-sync-delimited,guest-sync,guest-ping,guest-get-time,guest-set-time,guest-info,guest-shutdown,guest-fsfreeze-status,guest-fsfreeze-freeze,guest-fsfreeze-freeze-list,guest-fsfreeze-thaw,guest-fstrim,guest-suspend-disk,guest-suspend-ram,guest-suspend-hybrid,guest-network-get-interfaces,guest-get-vcpus,guest-set-vcpus,guest-get-disks,guest-get-fsinfo,guest-set-user-password,guest-get-memory-blocks,guest-set-memory-blocks,guest-get-memory-block-info,guest-get-host-name,guest-get-users,guest-get-timezone,guest-get-osinfo,guest-get-devices,guest-ssh-get-authorized-keys,guest-ssh-add-authorized-keys,guest-ssh-remove-authorized-keys,guest-get-diskstats,guest-get-cpustats" # Fsfreeze hook script specification. # diff --git a/SOURCES/qemu-guest-agent.service b/SOURCES/qemu-guest-agent.service index 244da02..f74ebd0 100644 --- a/SOURCES/qemu-guest-agent.service +++ b/SOURCES/qemu-guest-agent.service @@ -10,7 +10,7 @@ EnvironmentFile=/etc/sysconfig/qemu-ga ExecStart=/usr/bin/qemu-ga \ --method=virtio-serial \ --path=/dev/virtio-ports/org.qemu.guest_agent.0 \ - --block-rpcs=${BLOCK_RPCS} \ + ${FILTER_RPC_ARGS} \ -F${FSFREEZE_HOOK_PATHNAME} Restart=always RestartSec=0 diff --git a/SPECS/qemu-kvm.spec b/SPECS/qemu-kvm.spec index 88a306c..fbe4f67 100644 --- a/SPECS/qemu-kvm.spec +++ b/SPECS/qemu-kvm.spec @@ -58,7 +58,7 @@ %global tools_only 1 %endif -%ifnarch %{ix86} x86_64 +%ifnarch %{ix86} x86_64 aarch64 %global have_usbredir 0 %endif @@ -148,8 +148,8 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm -Version: 8.0.0 -Release: 16%{?rcrel}%{?dist}%{?cc_suffix}.3 +Version: 8.2.0 +Release: 11%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -186,384 +186,430 @@ Patch0012: 0012-vfio-cap-number-of-devices-that-can-be-assigned.patch Patch0013: 0013-Add-support-statement-to-help-output.patch Patch0014: 0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch Patch0015: 0015-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch -Patch0016: 0016-qga-linux-add-usb-support-to-guest-get-fsinfo.patch -Patch0017: 0017-Add-RHEL-9.2.0-compat-structure.patch -Patch0018: 0018-redhat-hw-i386-pc-Update-x86-machine-type-compatibil.patch -Patch0019: 0019-Disable-unwanted-new-devices.patch -# For bz#2087047 - Disk detach is unsuccessful while the guest is still booting -Patch20: kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch -# For bz#1934134 - ACPI table limits warning when booting guest with 512 VCPUs -Patch21: kvm-hw-acpi-limit-warning-on-acpi-table-size-to-pc-machi.patch -# For bz#1934134 - ACPI table limits warning when booting guest with 512 VCPUs -Patch22: kvm-hw-acpi-Mark-acpi-blobs-as-resizable-on-RHEL-pc-mach.patch -# For bz#2058982 - Qemu core dump if cut off nfs storage during migration -Patch23: kvm-migration-Handle-block-device-inactivation-failures-.patch -# For bz#2058982 - Qemu core dump if cut off nfs storage during migration -Patch24: kvm-migration-Minor-control-flow-simplification.patch -# For bz#2058982 - Qemu core dump if cut off nfs storage during migration -Patch25: kvm-migration-Attempt-disk-reactivation-in-more-failure-.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch26: kvm-util-mmap-alloc-qemu_fd_getfs.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch27: kvm-vl.c-Create-late-backends-before-migration-object.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch28: kvm-migration-postcopy-Detect-file-system-on-dest-host.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch29: kvm-migration-mark-mixed-functions-that-can-suspend.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch30: kvm-postcopy-ram-do-not-use-qatomic_mb_read.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch31: kvm-migration-remove-extra-whitespace-character-for-code.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch32: kvm-migration-Merge-ram_counters-and-ram_atomic_counters.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch33: kvm-migration-Update-atomic-stats-out-of-the-mutex.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch34: kvm-migration-Make-multifd_bytes-atomic.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch35: kvm-migration-Make-dirty_sync_missed_zero_copy-atomic.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch36: kvm-migration-Make-precopy_bytes-atomic.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch37: kvm-migration-Make-downtime_bytes-atomic.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch38: kvm-migration-Make-dirty_sync_count-atomic.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch39: kvm-migration-Make-postcopy_requests-atomic.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch40: kvm-migration-Rename-duplicate-to-zero_pages.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch41: kvm-migration-Rename-normal-to-normal_pages.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch42: kvm-migration-rename-enabled_capabilities-to-capabilitie.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch43: kvm-migration-Pass-migrate_caps_check-the-old-and-new-ca.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch44: kvm-migration-move-migration_global_dump-to-migration-hm.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch45: kvm-spice-move-client_migrate_info-command-to-ui.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch46: kvm-migration-Create-migrate_cap_set.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch47: kvm-migration-Create-options.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch48: kvm-migration-Move-migrate_colo_enabled-to-options.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch49: kvm-migration-Move-migrate_use_compression-to-options.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch50: kvm-migration-Move-migrate_use_events-to-options.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch51: kvm-migration-Move-migrate_use_multifd-to-options.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch52: kvm-migration-Move-migrate_use_zero_copy_send-to-options.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch53: kvm-migration-Move-migrate_use_xbzrle-to-options.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch54: kvm-migration-Move-migrate_use_block-to-options.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch55: kvm-migration-Move-migrate_use_return-to-options.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch56: kvm-migration-Create-migrate_rdma_pin_all-function.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch57: kvm-migration-Move-migrate_caps_check-to-options.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch58: kvm-migration-Move-qmp_query_migrate_capabilities-to-opt.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch59: kvm-migration-Move-qmp_migrate_set_capabilities-to-optio.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch60: kvm-migration-Move-migrate_cap_set-to-options.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch61: kvm-migration-Move-parameters-functions-to-option.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch62: kvm-migration-Use-migrate_max_postcopy_bandwidth.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch63: kvm-migration-Move-migrate_use_block_incremental-to-opti.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch64: kvm-migration-Create-migrate_throttle_trigger_threshold.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch65: kvm-migration-Create-migrate_checkpoint_delay.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch66: kvm-migration-Create-migrate_max_cpu_throttle.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch67: kvm-migration-Move-migrate_announce_params-to-option.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch68: kvm-migration-Create-migrate_cpu_throttle_initial-to-opt.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch69: kvm-migration-Create-migrate_cpu_throttle_increment-func.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch70: kvm-migration-Create-migrate_cpu_throttle_tailslow-funct.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch71: kvm-migration-Move-migrate_postcopy-to-options.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch72: kvm-migration-Create-migrate_max_bandwidth-function.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch73: kvm-migration-Move-migrate_use_tls-to-options.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch74: kvm-migration-Move-qmp_migrate_set_parameters-to-options.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch75: kvm-migration-Allow-postcopy_ram_supported_by_host-to-re.patch -# For bz#2185688 - [qemu-kvm] no response with QMP command block_resize -Patch76: kvm-block-bdrv-blk_co_unref-for-calls-in-coroutine-conte.patch -# For bz#2185688 - [qemu-kvm] no response with QMP command block_resize -Patch77: kvm-block-Don-t-call-no_coroutine_fns-in-qmp_block_resiz.patch -# For bz#2185688 - [qemu-kvm] no response with QMP command block_resize -Patch78: kvm-iotests-Use-alternative-CPU-type-that-is-not-depreca.patch -# For bz#2185688 - [qemu-kvm] no response with QMP command block_resize -Patch79: kvm-iotests-Test-resizing-image-attached-to-an-iothread.patch -# For bz#2186725 - Qemu hang when commit during fio running(iothread enable) -Patch80: kvm-block-compile-out-assert_bdrv_graph_readable-by-defa.patch -# For bz#2186725 - Qemu hang when commit during fio running(iothread enable) -Patch81: kvm-graph-lock-Disable-locking-for-now.patch -# For bz#2186725 - Qemu hang when commit during fio running(iothread enable) -Patch82: kvm-nbd-server-Fix-drained_poll-to-wake-coroutine-in-rig.patch -# For bz#2186725 - Qemu hang when commit during fio running(iothread enable) -Patch83: kvm-iotests-Test-commit-with-iothreads-and-ongoing-I-O.patch -# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] -Patch84: kvm-memory-prevent-dma-reentracy-issues.patch -# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] -Patch85: kvm-async-Add-an-optional-reentrancy-guard-to-the-BH-API.patch -# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] -Patch86: kvm-checkpatch-add-qemu_bh_new-aio_bh_new-checks.patch -# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] -Patch87: kvm-hw-replace-most-qemu_bh_new-calls-with-qemu_bh_new_g.patch -# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] -Patch88: kvm-lsi53c895a-disable-reentrancy-detection-for-script-R.patch -# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] -Patch89: kvm-bcm2835_property-disable-reentrancy-detection-for-io.patch -# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] -Patch90: kvm-raven-disable-reentrancy-detection-for-iomem.patch -# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] -Patch91: kvm-apic-disable-reentrancy-detection-for-apic-msi.patch -# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] -Patch92: kvm-async-avoid-use-after-free-on-re-entrancy-guard.patch -# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] -Patch93: kvm-loongarch-mark-loongarch_ipi_iocsr-re-entrnacy-safe.patch -# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] -Patch94: kvm-memory-stricter-checks-prior-to-unsetting-engaged_in.patch -# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] -Patch95: kvm-lsi53c895a-disable-reentrancy-detection-for-MMIO-reg.patch -# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] -Patch96: kvm-hw-scsi-lsi53c895a-Fix-reentrancy-issues-in-the-LSI-.patch -# For bz#2189423 - Failed to migrate VM from rhel 9.3 to rhel 9.2 -Patch97: kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-register-for-machi.patch -# For bz#2196289 - Fix number of ready channels on multifd -Patch98: kvm-multifd-Fix-the-number-of-channels-ready.patch -# For bz#2168500 - [IBM 9.3 FEAT] KVM: Improve memory reclaiming for z15 Secure Execution guests - qemu part -Patch99: kvm-util-async-teardown-wire-up-query-command-line-optio.patch -# For bz#2168500 - [IBM 9.3 FEAT] KVM: Improve memory reclaiming for z15 Secure Execution guests - qemu part -Patch100: kvm-s390x-pv-Fix-spurious-warning-with-asynchronous-tear.patch -# For bz#2216201 - [qemu-kvm]VM reports vulnerabilty to mmio_stale_data on patched host with microcode -Patch101: kvm-target-i386-add-support-for-FLUSH_L1D-feature.patch -# For bz#2216201 - [qemu-kvm]VM reports vulnerabilty to mmio_stale_data on patched host with microcode -Patch102: kvm-target-i386-add-support-for-FB_CLEAR-feature.patch -# For bz#2180076 - [qemu-kvm] support fd passing for libblkio QEMU BlockDrivers -Patch103: kvm-block-blkio-use-qemu_open-to-support-fd-passing-for-.patch -# For bz#2180076 - [qemu-kvm] support fd passing for libblkio QEMU BlockDrivers -Patch104: kvm-qapi-add-fdset-feature-for-BlockdevOptionsVirtioBlkV.patch -# For bz#2171363 - [aarch64] Kernel hits Call trace with irregular CPU-to-NUMA association -Patch105: kvm-numa-Validate-cluster-and-NUMA-node-boundary-if-requ.patch -# For bz#2171363 - [aarch64] Kernel hits Call trace with irregular CPU-to-NUMA association -Patch106: kvm-hw-arm-Validate-cluster-and-NUMA-node-boundary.patch -# For bz#2171363 - [aarch64] Kernel hits Call trace with irregular CPU-to-NUMA association -Patch107: kvm-hw-arm-virt-Validate-cluster-and-NUMA-node-boundary-.patch -# For RHEL-330 - [virtual network][qemu-kvm-8.0.0-rc1]qemu core dump: qemu-kvm: ../softmmu/memory.c:2592: void memory_region_del_eventfd(MemoryRegion *, hwaddr, unsigned int, _Bool, uint64_t, EventNotifier *): Assertion `i != mr->ioeventfd_nb' failed -Patch108: kvm-vhost-fix-vhost_dev_enable_notifiers-error-case.patch -# For bz#2218644 - query-stats QMP command interrupts vcpus, the Max Latencies could be more than 100us (rhel 9.3.0 clone) -Patch109: kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch -# For bz#2128929 - [rhel9.2] hotplug/hotunplug mlx vdpa device to the occupied addr port, then qemu core dump occurs after shutdown guest -Patch110: kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch -# For bz#2211609 - With virtio-iommu and vfio-pci, qemu reports "warning: virtio-iommu page mask 0xfffffffffffff000 does not match 0x40201000" -# For bz#2211634 - [aarch64] With virtio-iommu and vfio-pci, qemu coredump when host using kernel-64k package -Patch111: kvm-virtio-iommu-Fix-64kB-host-page-size-VFIO-device-ass.patch -# For bz#2211609 - With virtio-iommu and vfio-pci, qemu reports "warning: virtio-iommu page mask 0xfffffffffffff000 does not match 0x40201000" -# For bz#2211634 - [aarch64] With virtio-iommu and vfio-pci, qemu coredump when host using kernel-64k package -Patch112: kvm-virtio-iommu-Rework-the-traces-in-virtio_iommu_set_p.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch113: kvm-vfio-pci-add-support-for-VF-token.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch114: kvm-vfio-migration-Skip-log_sync-during-migration-SETUP-.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch115: kvm-vfio-pci-Static-Resizable-BAR-capability.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch116: kvm-vfio-pci-Fix-a-use-after-free-issue.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch117: kvm-util-vfio-helpers-Use-g_file_read_link.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch118: kvm-migration-Make-all-functions-check-have-the-same-for.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch119: kvm-migration-Move-migration_properties-to-options.c.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch120: kvm-migration-Add-switchover-ack-capability.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch121: kvm-migration-Implement-switchover-ack-logic.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch122: kvm-migration-Enable-switchover-ack-capability.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch123: kvm-vfio-migration-Refactor-vfio_save_block-to-return-sa.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch124: kvm-vfio-migration-Store-VFIO-migration-flags-in-VFIOMig.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch125: kvm-vfio-migration-Add-VFIO-migration-pre-copy-support.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch126: kvm-vfio-migration-Add-support-for-switchover-ack-capabi.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch127: kvm-vfio-Implement-a-common-device-info-helper.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch128: kvm-hw-vfio-pci-quirks-Support-alternate-offset-for-GPUD.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch129: kvm-vfio-pci-Call-vfio_prepare_kvm_msi_virq_batch-in-MSI.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch130: kvm-vfio-migration-Reset-bytes_transferred-properly.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch131: kvm-vfio-migration-Make-VFIO-migration-non-experimental.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch132: kvm-vfio-pci-Fix-a-segfault-in-vfio_realize.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch133: kvm-vfio-pci-Free-leaked-timer-in-vfio_realize-error-pat.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch134: kvm-hw-vfio-pci-quirks-Sanitize-capability-pointer.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch135: kvm-vfio-pci-Disable-INTx-in-vfio_realize-error-path.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch136: kvm-vfio-migration-Change-vIOMMU-blocker-from-global-to-.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch137: kvm-vfio-migration-Free-resources-when-vfio_migration_re.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch138: kvm-vfio-migration-Remove-print-of-Migration-disabled.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch139: kvm-vfio-migration-Return-bool-type-for-vfio_migration_r.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch140: kvm-vfio-Fix-null-pointer-dereference-bug-in-vfio_bars_f.patch -# For bz#2220866 - Misaligned symbol for s390-ccw image during qemu-kvm build -Patch141: kvm-pc-bios-s390-ccw-Makefile-Use-z-noexecstack-to-silen.patch -# For bz#2220866 - Misaligned symbol for s390-ccw image during qemu-kvm build -Patch142: kvm-pc-bios-s390-ccw-Fix-indentation-in-start.S.patch -# For bz#2220866 - Misaligned symbol for s390-ccw image during qemu-kvm build -Patch143: kvm-pc-bios-s390-ccw-Provide-space-for-initial-stack-fra.patch -# For bz#2220866 - Misaligned symbol for s390-ccw image during qemu-kvm build -Patch144: kvm-pc-bios-s390-ccw-Don-t-use-__bss_start-with-the-larl.patch -# For bz#2222579 - PNG screendump doesn't save screen correctly -Patch145: kvm-ui-Fix-pixel-colour-channel-order-for-PNG-screenshot.patch -# For bz#2213317 - Enable libblkio-based block drivers in QEMU -Patch146: kvm-block-blkio-fix-module_block.py-parsing.patch -# For bz#2176702 - [RHEL9][virtio-scsi] scsi-hd cannot hot-plug successfully after hot-plug it repeatly -Patch147: kvm-scsi-fetch-unit-attention-when-creating-the-request.patch -# For bz#2176702 - [RHEL9][virtio-scsi] scsi-hd cannot hot-plug successfully after hot-plug it repeatly -Patch148: kvm-scsi-cleanup-scsi_clear_unit_attention.patch -# For bz#2176702 - [RHEL9][virtio-scsi] scsi-hd cannot hot-plug successfully after hot-plug it repeatly -Patch149: kvm-scsi-clear-unit-attention-only-for-REPORT-LUNS-comma.patch -# For RHEL-794 - Backport s390x fixes from QEMU 8.1 -Patch150: kvm-s390x-ap-Wire-up-the-device-request-notifier-interfa.patch -# For bz#2196295 - Multifd flushes its channels 10 times per second -Patch151: kvm-multifd-Create-property-multifd-flush-after-each-sec.patch -# For bz#2196295 - Multifd flushes its channels 10 times per second -Patch152: kvm-multifd-Protect-multifd_send_sync_main-calls.patch -# For bz#2196295 - Multifd flushes its channels 10 times per second -Patch153: kvm-multifd-Only-flush-once-each-full-round-of-memory.patch -# For RHEL-582 - [passt][rhel 9.3] qemu core dump occurs when guest is shutdown after hotunplug/hotplug a passt interface -Patch154: kvm-net-socket-prepare-to-cleanup-net_init_socket.patch -# For RHEL-582 - [passt][rhel 9.3] qemu core dump occurs when guest is shutdown after hotunplug/hotplug a passt interface -Patch155: kvm-net-socket-move-fd-type-checking-to-its-own-function.patch -# For RHEL-582 - [passt][rhel 9.3] qemu core dump occurs when guest is shutdown after hotunplug/hotplug a passt interface -Patch156: kvm-net-socket-remove-net_init_socket.patch -# For bz#2215819 - Migration test failed while guest with PCIe devices -Patch157: kvm-pcie-Add-hotplug-detect-state-register-to-cmask.patch -# For bz#2174676 - Guest hit EXT4-fs error on host 4K disk when repeatedly hot-plug/unplug running IO disk [RHEL9] -Patch158: kvm-util-iov-Make-qiov_slice-public.patch -# For bz#2174676 - Guest hit EXT4-fs error on host 4K disk when repeatedly hot-plug/unplug running IO disk [RHEL9] -Patch159: kvm-block-Collapse-padded-I-O-vecs-exceeding-IOV_MAX.patch -# For bz#2174676 - Guest hit EXT4-fs error on host 4K disk when repeatedly hot-plug/unplug running IO disk [RHEL9] -Patch160: kvm-util-iov-Remove-qemu_iovec_init_extended.patch -# For bz#2174676 - Guest hit EXT4-fs error on host 4K disk when repeatedly hot-plug/unplug running IO disk [RHEL9] -Patch161: kvm-iotests-iov-padding-New-test.patch -# For bz#2174676 - Guest hit EXT4-fs error on host 4K disk when repeatedly hot-plug/unplug running IO disk [RHEL9] -Patch162: kvm-block-Fix-pad_request-s-request-restriction.patch -# For RHEL-573 - [mlx vhost_vdpa][rhel 9.3]live migration fail with "net vdpa cannot migrate with CVQ feature" -Patch163: kvm-vdpa-do-not-block-migration-if-device-has-cvq-and-x-.patch -# For bz#2040509 - [RFE]:Add support for changing "tx_queue_size" to a setable value -Patch164: kvm-virtio-net-correctly-report-maximum-tx_queue_size-va.patch -# For bz#2223691 - [machine type 9.2]Failed to migrate VM from RHEL 9.3 to RHEL 9.2 -Patch165: kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-reg-for-machine-ty.patch -# For bz#2141965 - [TPM][vhost-vdpa][rhel9.2]Boot a guest with "vhost-vdpa + TPM emulator", qemu output: qemu-kvm: vhost_vdpa_listener_region_add received unaligned region -Patch166: kvm-vhost-vdpa-mute-unaligned-memory-error-report.patch -# For bz#2225354 - [vdpa-blk] The new driver virtio-blk-vhost-user not work in VM booting -# For bz#2225439 - [vdpa-blk] read-only=on option not work on driver virtio-blk-vhost-vdpa -Patch167: kvm-block-blkio-enable-the-completion-eventfd.patch -# For bz#2225354 - [vdpa-blk] The new driver virtio-blk-vhost-user not work in VM booting -# For bz#2225439 - [vdpa-blk] read-only=on option not work on driver virtio-blk-vhost-vdpa -Patch168: kvm-block-blkio-do-not-use-open-flags-in-qemu_open.patch -# For bz#2225354 - [vdpa-blk] The new driver virtio-blk-vhost-user not work in VM booting -# For bz#2225439 - [vdpa-blk] read-only=on option not work on driver virtio-blk-vhost-vdpa -Patch169: kvm-block-blkio-move-blkio_connect-in-the-drivers-functi.patch -# For bz#2225354 - [vdpa-blk] The new driver virtio-blk-vhost-user not work in VM booting -# For bz#2225439 - [vdpa-blk] read-only=on option not work on driver virtio-blk-vhost-vdpa -Patch170: kvm-block-blkio-retry-blkio_connect-if-it-fails-using-fd.patch -# For bz#2225354 - [vdpa-blk] The new driver virtio-blk-vhost-user not work in VM booting -# For bz#2225439 - [vdpa-blk] read-only=on option not work on driver virtio-blk-vhost-vdpa -Patch171: kvm-block-blkio-fall-back-on-using-path-when-fd-setting-.patch -# For bz#2225354 - [vdpa-blk] The new driver virtio-blk-vhost-user not work in VM booting -# For bz#2225439 - [vdpa-blk] read-only=on option not work on driver virtio-blk-vhost-vdpa -Patch172: kvm-block-blkio-use-blkio_set_int-fd-to-check-fd-support.patch -# For bz#2229133 - Backport some virtio-iommu and smmu fixes -Patch173: kvm-hw-virtio-iommu-Fix-potential-OOB-access-in-virtio_i.patch -# For bz#2229133 - Backport some virtio-iommu and smmu fixes -Patch174: kvm-virtio-iommu-Standardize-granule-extraction-and-form.patch -# For bz#2229133 - Backport some virtio-iommu and smmu fixes -Patch175: kvm-hw-arm-smmu-Handle-big-endian-hosts-correctly.patch -# For bz#2214839 - [AMDSERVER 9.3 Bug] Qemu SEV reduced-phys-bits fixes -Patch176: kvm-qapi-i386-sev-Change-the-reduced-phys-bits-value-fro.patch -# For bz#2214839 - [AMDSERVER 9.3 Bug] Qemu SEV reduced-phys-bits fixes -Patch177: kvm-qemu-options.hx-Update-the-reduced-phys-bits-documen.patch -# For bz#2214839 - [AMDSERVER 9.3 Bug] Qemu SEV reduced-phys-bits fixes -Patch178: kvm-i386-sev-Update-checks-and-information-related-to-re.patch -# For bz#2214839 - [AMDSERVER 9.3 Bug] Qemu SEV reduced-phys-bits fixes -Patch179: kvm-i386-cpu-Update-how-the-EBX-register-of-CPUID-0x8000.patch -# For bz#2094913 - Add EPYC-Genoa CPU model in qemu -Patch180: kvm-target-i386-allow-versioned-CPUs-to-specify-new-cach.patch -# For bz#2094913 - Add EPYC-Genoa CPU model in qemu -Patch181: kvm-target-i386-Add-new-EPYC-CPU-versions-with-updated-c.patch -# For bz#2094913 - Add EPYC-Genoa CPU model in qemu -Patch182: kvm-target-i386-Add-a-couple-of-feature-bits-in-8000_000.patch -# For bz#2094913 - Add EPYC-Genoa CPU model in qemu -Patch183: kvm-target-i386-Add-feature-bits-for-CPUID_Fn80000021_EA.patch -# For bz#2094913 - Add EPYC-Genoa CPU model in qemu -Patch184: kvm-target-i386-Add-missing-feature-bits-in-EPYC-Milan-m.patch -# For bz#2094913 - Add EPYC-Genoa CPU model in qemu -Patch185: kvm-target-i386-Add-VNMI-and-automatic-IBRS-feature-bits.patch -# For bz#2094913 - Add EPYC-Genoa CPU model in qemu -Patch186: kvm-target-i386-Add-EPYC-Genoa-model-to-support-Zen-4-pr.patch -# For RHEL-923 - vhost shadow virtqueue: state restore through CVQ -Patch187: kvm-vdpa-return-errno-in-vhost_vdpa_get_vring_group-erro.patch -# For RHEL-923 - vhost shadow virtqueue: state restore through CVQ -Patch188: kvm-vdpa-move-CVQ-isolation-check-to-net_init_vhost_vdpa.patch -# For RHEL-923 - vhost shadow virtqueue: state restore through CVQ -Patch189: kvm-vdpa-use-first-queue-SVQ-state-for-CVQ-default.patch -# For RHEL-923 - vhost shadow virtqueue: state restore through CVQ -Patch190: kvm-vdpa-export-vhost_vdpa_set_vring_ready.patch -# For RHEL-923 - vhost shadow virtqueue: state restore through CVQ -Patch191: kvm-vdpa-rename-vhost_vdpa_net_load-to-vhost_vdpa_net_cv.patch -# For RHEL-923 - vhost shadow virtqueue: state restore through CVQ -Patch192: kvm-vdpa-move-vhost_vdpa_set_vring_ready-to-the-caller.patch -# For RHEL-923 - vhost shadow virtqueue: state restore through CVQ -Patch193: kvm-vdpa-remove-net-cvq-migration-blocker.patch -# For bz#2216504 - CVE-2023-3354 qemu-kvm: QEMU: VNC: improper I/O watch removal in TLS handshake can lead to remote unauthenticated denial of service [rhel-9.3.0] -Patch194: kvm-io-remove-io-watch-if-TLS-channel-is-closed-during-h.patch -# For bz#2229868 - [vfio migration]Disable postcopy for VM with migratable vfio device -Patch195: kvm-migration-Add-migration-prefix-to-functions-in-targe.patch -# For bz#2229868 - [vfio migration]Disable postcopy for VM with migratable vfio device -Patch196: kvm-migration-Move-more-initializations-to-migrate_init.patch -# For bz#2229868 - [vfio migration]Disable postcopy for VM with migratable vfio device -Patch197: kvm-migration-Add-.save_prepare-handler-to-struct-SaveVM.patch -# For bz#2229868 - [vfio migration]Disable postcopy for VM with migratable vfio device -Patch198: kvm-vfio-migration-Block-VFIO-migration-with-postcopy-mi.patch -# For RHEL-4453 - qemu-kvm crashed when migrating guest with failover vf [rhel-9.3.0.z] -Patch199: kvm-virtio-Drop-out-of-coroutine-context-in-virtio_load.patch -# For RHEL-16997 - RHEL9.3 - KVM : Secure execution guest remains in "paused" state, post "virsh dump" failure (qemu-kvm) [rhel-9.3.0.z] -Patch200: kvm-target-s390x-dump-Remove-unneeded-dump-info-function.patch -# For RHEL-16997 - RHEL9.3 - KVM : Secure execution guest remains in "paused" state, post "virsh dump" failure (qemu-kvm) [rhel-9.3.0.z] -Patch201: kvm-dump-Add-arch-cleanup-function.patch -# For RHEL-16997 - RHEL9.3 - KVM : Secure execution guest remains in "paused" state, post "virsh dump" failure (qemu-kvm) [rhel-9.3.0.z] -Patch202: kvm-target-s390x-arch_dump-Add-arch-cleanup-function-for.patch +Patch0016: 0016-Introduce-RHEL-9.4.0-qemu-kvm-machine-type-for-aarch.patch +# For RHEL-17168 - Introduce virt-rhel9.4.0 arm-virt machine type [aarch64] +Patch17: kvm-hw-arm-virt-Fix-compats.patch +# For RHEL-19738 - Enable properties allowing to disable high memory regions +Patch18: kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch19: kvm-vfio-Introduce-base-object-for-VFIOContainer-and-tar.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch20: kvm-vfio-container-Introduce-a-empty-VFIOIOMMUOps.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch21: kvm-vfio-container-Switch-to-dma_map-unmap-API.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch22: kvm-vfio-common-Introduce-vfio_container_init-destroy-he.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch23: kvm-vfio-common-Move-giommu_list-in-base-container.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch24: kvm-vfio-container-Move-space-field-to-base-container.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch25: kvm-vfio-container-Switch-to-IOMMU-BE-set_dirty_page_tra.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch26: kvm-vfio-container-Move-per-container-device-list-in-bas.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch27: kvm-vfio-container-Convert-functions-to-base-container.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch28: kvm-vfio-container-Move-pgsizes-and-dma_max_mappings-to-.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch29: kvm-vfio-container-Move-vrdl_list-to-base-container.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch30: kvm-vfio-container-Move-listener-to-base-container.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch31: kvm-vfio-container-Move-dirty_pgsizes-and-max_dirty_bitm.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch32: kvm-vfio-container-Move-iova_ranges-to-base-container.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch33: kvm-vfio-container-Implement-attach-detach_device.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch34: kvm-vfio-spapr-Introduce-spapr-backend-and-target-interf.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch35: kvm-vfio-spapr-switch-to-spapr-IOMMU-BE-add-del_section_.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch36: kvm-vfio-spapr-Move-prereg_listener-into-spapr-container.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch37: kvm-vfio-spapr-Move-hostwin_list-into-spapr-container.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch38: kvm-backends-iommufd-Introduce-the-iommufd-object.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch39: kvm-util-char_dev-Add-open_cdev.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch40: kvm-vfio-common-return-early-if-space-isn-t-empty.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch41: kvm-vfio-iommufd-Implement-the-iommufd-backend.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch42: kvm-vfio-iommufd-Relax-assert-check-for-iommufd-backend.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch43: kvm-vfio-iommufd-Add-support-for-iova_ranges-and-pgsizes.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch44: kvm-vfio-pci-Extract-out-a-helper-vfio_pci_get_pci_hot_r.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch45: kvm-vfio-pci-Introduce-a-vfio-pci-hot-reset-interface.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch46: kvm-vfio-iommufd-Enable-pci-hot-reset-through-iommufd-cd.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch47: kvm-vfio-pci-Allow-the-selection-of-a-given-iommu-backen.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch48: kvm-vfio-pci-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch49: kvm-vfio-platform-Allow-the-selection-of-a-given-iommu-b.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch50: kvm-vfio-platform-Make-vfio-cdev-pre-openable-by-passing.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch51: kvm-vfio-ap-Allow-the-selection-of-a-given-iommu-backend.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch52: kvm-vfio-ap-Make-vfio-cdev-pre-openable-by-passing-a-fil.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch53: kvm-vfio-ccw-Allow-the-selection-of-a-given-iommu-backen.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch54: kvm-vfio-ccw-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch55: kvm-vfio-Make-VFIOContainerBase-poiner-parameter-const-i.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch56: kvm-hw-arm-Activate-IOMMUFD-for-virt-machines.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch57: kvm-kconfig-Activate-IOMMUFD-for-s390x-machines.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch58: kvm-hw-i386-Activate-IOMMUFD-for-q35-machines.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch59: kvm-vfio-pci-Move-VFIODevice-initializations-in-vfio_ins.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch60: kvm-vfio-platform-Move-VFIODevice-initializations-in-vfi.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch61: kvm-vfio-ap-Move-VFIODevice-initializations-in-vfio_ap_i.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch62: kvm-vfio-ccw-Move-VFIODevice-initializations-in-vfio_ccw.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch63: kvm-vfio-Introduce-a-helper-function-to-initialize-VFIOD.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch64: kvm-docs-devel-Add-VFIO-iommufd-backend-documentation.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch65: kvm-hw-ppc-Kconfig-Imply-VFIO_PCI.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch66: kvm-vfio-spapr-Extend-VFIOIOMMUOps-with-a-release-handle.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch67: kvm-vfio-container-Introduce-vfio_legacy_setup-for-furth.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch68: kvm-vfio-container-Initialize-VFIOIOMMUOps-under-vfio_in.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch69: kvm-vfio-container-Introduce-a-VFIOIOMMU-QOM-interface.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch70: kvm-vfio-container-Introduce-a-VFIOIOMMU-legacy-QOM-inte.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch71: kvm-vfio-container-Intoduce-a-new-VFIOIOMMUClass-setup-h.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch72: kvm-vfio-spapr-Introduce-a-sPAPR-VFIOIOMMU-QOM-interface.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch73: kvm-vfio-iommufd-Introduce-a-VFIOIOMMU-iommufd-QOM-inter.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch74: kvm-vfio-spapr-Only-compile-sPAPR-IOMMU-support-when-nee.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch75: kvm-vfio-iommufd-Remove-CONFIG_IOMMUFD-usage.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch76: kvm-vfio-container-Replace-basename-with-g_path_get_base.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch77: kvm-hw-vfio-fix-iteration-over-global-VFIODevice-list.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch78: kvm-vfio-iommufd-Remove-the-use-of-stat-to-check-file-ex.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch79: kvm-vfio-container-Rename-vfio_init_container-to-vfio_se.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch80: kvm-vfio-migration-Add-helper-function-to-set-state-or-r.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch81: kvm-backends-iommufd-Remove-check-on-number-of-backend-u.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch82: kvm-backends-iommufd-Remove-mutex.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch83: kvm-Compile-IOMMUFD-object-on-aarch64.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch84: kvm-Compile-IOMMUFD-on-s390x.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch85: kvm-Compile-IOMMUFD-on-x86_64.patch +# For RHEL-18212 - [RHEL9][Secure-execution][s390x] The error message is not clear when boot up a SE guest with wrong encryption +Patch86: kvm-target-s390x-kvm-pv-Provide-some-more-useful-informa.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch87: kvm-nbd-server-avoid-per-NBDRequest-nbd_client_get-put.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch88: kvm-nbd-server-only-traverse-NBDExport-clients-from-main.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch89: kvm-nbd-server-introduce-NBDClient-lock-to-protect-field.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch90: kvm-block-file-posix-set-up-Linux-AIO-and-io_uring-in-th.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch91: kvm-virtio-blk-add-lock-to-protect-s-rq.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch92: kvm-virtio-blk-don-t-lock-AioContext-in-the-completion-c.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch93: kvm-virtio-blk-don-t-lock-AioContext-in-the-submission-c.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch94: kvm-scsi-only-access-SCSIDevice-requests-from-one-thread.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch95: kvm-virtio-scsi-don-t-lock-AioContext-around-virtio_queu.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch96: kvm-scsi-don-t-lock-AioContext-in-I-O-code-path.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch97: kvm-dma-helpers-don-t-lock-AioContext-in-dma_blk_cb.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch98: kvm-virtio-scsi-replace-AioContext-lock-with-tmf_bh_lock.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch99: kvm-scsi-assert-that-callbacks-run-in-the-correct-AioCon.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch100: kvm-tests-remove-aio_context_acquire-tests.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch101: kvm-aio-make-aio_context_acquire-aio_context_release-a-n.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch102: kvm-graph-lock-remove-AioContext-locking.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch103: kvm-block-remove-AioContext-locking.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch104: kvm-block-remove-bdrv_co_lock.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch105: kvm-scsi-remove-AioContext-locking.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch106: kvm-aio-wait-draw-equivalence-between-AIO_WAIT_WHILE-and.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch107: kvm-aio-remove-aio_context_acquire-aio_context_release-A.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch108: kvm-docs-remove-AioContext-lock-from-IOThread-docs.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch109: kvm-scsi-remove-outdated-AioContext-lock-comment.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch110: kvm-job-remove-outdated-AioContext-locking-comments.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch111: kvm-block-remove-outdated-AioContext-locking-comments.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch112: kvm-block-coroutine-wrapper-use-qemu_get_current_aio_con.patch +# For RHEL-21169 - [s390x] VM fails to start with ISM passed through QEMU 8.2 +Patch113: kvm-s390x-pci-avoid-double-enable-disable-of-aif.patch +# For RHEL-21169 - [s390x] VM fails to start with ISM passed through QEMU 8.2 +Patch114: kvm-s390x-pci-refresh-fh-before-disabling-aif.patch +# For RHEL-21169 - [s390x] VM fails to start with ISM passed through QEMU 8.2 +Patch115: kvm-s390x-pci-drive-ISM-reset-from-subsystem-reset.patch +# For RHEL-21570 - Critical performance degradation for input devices in virtio vnc session +Patch116: kvm-include-ui-rect.h-fix-qemu_rect_init-mis-assignment.patch +# For RHEL-7565 - qemu crashed when migrate guest with blob resources enabled +Patch117: kvm-virtio-gpu-block-migration-of-VMs-with-blob-true.patch +# For RHEL-21293 - [emulated igb] Failed to set up TRIGGER eventfd signaling for interrupt INTX-0: VFIO_DEVICE_SET_IRQS failure: Invalid argument +Patch118: kvm-vfio-pci-Clear-MSI-X-IRQ-index-always.patch +# For RHEL-20341 - memory-device size alignment check invalid in QEMU 8.2 +Patch119: kvm-hv-balloon-use-get_min_alignment-to-express-32-GiB-a.patch +# For RHEL-20341 - memory-device size alignment check invalid in QEMU 8.2 +Patch120: kvm-memory-device-reintroduce-memory-region-size-check.patch +# For RHEL-24593 - qemu crash blk_get_aio_context(BlockBackend *): Assertion `ctx == blk->ctx' when repeatedly hotplug/unplug disk +Patch121: kvm-block-backend-Allow-concurrent-context-changes.patch +# For RHEL-24593 - qemu crash blk_get_aio_context(BlockBackend *): Assertion `ctx == blk->ctx' when repeatedly hotplug/unplug disk +Patch122: kvm-scsi-Await-request-purging.patch +# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. +# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support +# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] +Patch123: kvm-string-output-visitor-show-structs-as-omitted.patch +# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. +# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support +# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] +Patch124: kvm-string-output-visitor-Fix-pseudo-struct-handling.patch +# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. +# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support +# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] +Patch125: kvm-qdev-properties-alias-all-object-class-properties.patch +# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. +# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support +# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] +Patch126: kvm-qdev-add-IOThreadVirtQueueMappingList-property-type.patch +# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. +# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support +# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] +Patch127: kvm-virtio-blk-add-iothread-vq-mapping-parameter.patch +# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. +# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support +# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] +Patch128: kvm-virtio-blk-Fix-potential-nullpointer-read-access-in-.patch +# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. +# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support +# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] +Patch129: kvm-iotests-add-filter_qmp_generated_node_ids.patch +# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. +# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support +# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] +Patch130: kvm-iotests-port-141-to-Python-for-reliable-QMP-testing.patch +# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. +# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support +# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] +Patch131: kvm-monitor-only-run-coroutine-commands-in-qemu_aio_cont.patch +# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. +# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support +# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] +Patch132: kvm-virtio-blk-move-dataplane-code-into-virtio-blk.c.patch +# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. +# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support +# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] +Patch133: kvm-virtio-blk-rename-dataplane-create-destroy-functions.patch +# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. +# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support +# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] +Patch134: kvm-virtio-blk-rename-dataplane-to-ioeventfd.patch +# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. +# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support +# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] +Patch135: kvm-virtio-blk-restart-s-rq-reqs-in-vq-AioContexts.patch +# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. +# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support +# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] +Patch136: kvm-virtio-blk-tolerate-failure-to-set-BlockBackend-AioC.patch +# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. +# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support +# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] +Patch137: kvm-virtio-blk-always-set-ioeventfd-during-startup.patch +# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. +# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support +# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] +Patch138: kvm-tests-unit-Bump-test-replication-timeout-to-60-secon.patch +# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. +# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support +# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] +Patch139: kvm-iotests-iothreads-stream-Use-the-right-TimeoutError.patch +# For RHEL-24045 - QEMU: default-enable dynamically using multiple memslots for virtio-mem +Patch140: kvm-virtio-mem-default-enable-dynamic-memslots.patch +# For RHEL-3934 - [qemu-kvm] Failed on repeatedly hotplug/unplug disk iothread enabled +Patch141: kvm-virtio-scsi-Attach-event-vq-notifier-with-no_poll.patch +# For RHEL-3934 - [qemu-kvm] Failed on repeatedly hotplug/unplug disk iothread enabled +Patch142: kvm-virtio-Re-enable-notifications-after-drain.patch +# For RHEL-3934 - [qemu-kvm] Failed on repeatedly hotplug/unplug disk iothread enabled +Patch143: kvm-virtio-blk-Use-ioeventfd_attach-in-start_ioeventfd.patch +# For RHEL-15394 - virtio-blk: qemu hang on "no response on QMP query-status" when write data to disk without enough space +Patch144: kvm-virtio-blk-avoid-using-ioeventfd-state-in-irqfd-cond.patch +# For RHEL-24988 - Mark virt-rhel9.{0,2}.0 machine types as deprecated +Patch145: kvm-hw-arm-virt-deprecate-virt-rhel9.-0-2-.0-machine-typ.patch +# For RHEL-17068 - Check/fix machine type compatibility for qemu-kvm 8.2.0 [x86_64] +Patch146: kvm-x86-rhel-9.2.0-machine-type-compat-fix.patch +# For RHEL-26049 - When max vcpu is greater than or equal to 246, qemu unable to init event notifier +Patch147: kvm-qemu_init-increase-NOFILE-soft-limit-on-POSIX.patch +# For RHEL-24614 - [RHEL9][chardev][s390x] qemu hit core dump while using TLS server from host to guest +Patch148: kvm-chardev-char-socket-Fix-TLS-io-channels-sending-too-.patch +# For RHEL-19629 - CVE-2023-6683 qemu-kvm: QEMU: VNC: NULL pointer dereference in qemu_clipboard_request() [rhel-9] +Patch149: kvm-ui-clipboard-mark-type-as-not-available-when-there-i.patch +# For RHEL-19629 - CVE-2023-6683 qemu-kvm: QEMU: VNC: NULL pointer dereference in qemu_clipboard_request() [rhel-9] +Patch150: kvm-ui-clipboard-add-asserts-for-update-and-request.patch +# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information +Patch151: kvm-hw-i386-pc-Defer-smbios_set_defaults-to-machine_done.patch +# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information +Patch152: kvm-Implement-base-of-SMBIOS-type-9-descriptor.patch +# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information +Patch153: kvm-Implement-SMBIOS-type-9-v2.6.patch +# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information +Patch154: kvm-smbios-cleanup-smbios_get_tables-from-legacy-handlin.patch +# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information +Patch155: kvm-smbios-get-rid-of-smbios_smp_sockets-global.patch +# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information +Patch156: kvm-smbios-get-rid-of-smbios_legacy-global.patch +# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information +Patch157: kvm-smbios-avoid-mangling-user-provided-tables.patch +# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information +Patch158: kvm-smbios-don-t-check-type4-structures-in-legacy-mode.patch +# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information +Patch159: kvm-smbios-add-smbios_add_usr_blob_size-helper.patch +# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information +Patch160: kvm-smbios-rename-expose-structures-bitmaps-used-by-both.patch +# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information +Patch161: kvm-smbios-build-legacy-mode-code-only-for-pc-machine.patch +# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information +Patch162: kvm-smbios-handle-errors-consistently.patch +# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information +Patch163: kvm-smbios-get-rid-of-global-smbios_ep_type.patch +# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information +Patch164: kvm-smbios-clear-smbios_type4_count-before-building-tabl.patch +# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information +Patch165: kvm-smbios-extend-smbios-entry-point-type-with-auto-valu.patch +# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information +Patch166: kvm-smbios-in-case-of-entry-point-is-auto-try-to-build-v.patch +# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information +Patch167: kvm-smbios-error-out-when-building-type-4-table-is-not-p.patch +# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information +Patch168: kvm-pc-q35-set-SMBIOS-entry-point-type-to-auto-by-defaul.patch +# For RHEL-28125 - RHEL9.4 - KVM : Live migration of guest with multiple qcow devices remains incomplete. +Patch169: kvm-mirror-Don-t-call-job_pause_point-under-graph-lock.patch +# For RHEL-28125 - RHEL9.4 - KVM : Live migration of guest with multiple qcow devices remains incomplete. +Patch170: kvm-nbd-server-Fix-race-in-draining-the-export.patch +# For RHEL-28125 - RHEL9.4 - KVM : Live migration of guest with multiple qcow devices remains incomplete. +Patch171: kvm-iotests-Add-test-for-reset-AioContext-switches-with-.patch +# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information +Patch172: kvm-pc-smbios-fixup-manufacturer-product-version-to-matc.patch +# For RHEL-24614 - [RHEL9][chardev] qemu hit core dump while using TLS server from host to guest +Patch173: kvm-chardev-lower-priority-of-the-HUP-GSource-in-socket-.patch +# For RHEL-24614 - [RHEL9][chardev] qemu hit core dump while using TLS server from host to guest +Patch174: kvm-Revert-chardev-char-socket-Fix-TLS-io-channels-sendi.patch +# For RHEL-24614 - [RHEL9][chardev] qemu hit core dump while using TLS server from host to guest +Patch175: kvm-Revert-chardev-use-a-child-source-for-qio-input-sour.patch +# For RHEL-28947 - Qemu crashing with "failed to set up stack guard page: Cannot allocate memory" +Patch176: kvm-coroutine-cap-per-thread-local-pool-size.patch +# For RHEL-28947 - Qemu crashing with "failed to set up stack guard page: Cannot allocate memory" +Patch177: kvm-coroutine-reserve-5-000-mappings.patch %if %{have_clang} BuildRequires: clang @@ -576,6 +622,7 @@ BuildRequires: gcc BuildRequires: meson >= %{meson_version} BuildRequires: ninja-build BuildRequires: zlib-devel +BuildRequires: libzstd-devel BuildRequires: glib2-devel BuildRequires: gnutls-devel BuildRequires: cyrus-sasl-devel @@ -729,8 +776,8 @@ Summary: %{name} documentation %package -n qemu-pr-helper Summary: qemu-pr-helper utility for %{name} %description -n qemu-pr-helper -This package provides the qemu-pr-helper utility that is required for certain -SCSI features. +This package provides the qemu-pr-helper utility that is required for certain +SCSI features. %package -n qemu-img @@ -869,7 +916,7 @@ Summary: QEMU usbredir support Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} Requires: usbredir >= 0.7.1 Provides: %{name}-hw-usbredir -Obsoletes: %{name}-hw-usbredir <= %{epoch}:%{version} +Obsoletes: %{name}-hw-usbredir <= %{epoch}:%{version} %description device-usb-redirect This package provides usbredir support. @@ -933,6 +980,7 @@ ulimit -n 10240 --disable-debug-tcg \\\ --disable-dmg \\\ --disable-docs \\\ + --disable-download \\\ --disable-dsound \\\ --disable-fdt \\\ --disable-fuse \\\ @@ -945,7 +993,6 @@ ulimit -n 10240 --disable-gtk \\\ --disable-guest-agent \\\ --disable-guest-agent-msi \\\ - --disable-hax \\\ --disable-hvf \\\ --disable-iconv \\\ --disable-jack \\\ @@ -982,6 +1029,7 @@ ulimit -n 10240 --disable-pa \\\ --disable-parallels \\\ --disable-pie \\\ + --disable-plugins \\\ --disable-pvrdma \\\ --disable-qcow1 \\\ --disable-qed \\\ @@ -1036,7 +1084,6 @@ ulimit -n 10240 --disable-xen-pci-passthrough \\\ --disable-xkbcommon \\\ --disable-zstd \\\ - --with-git-submodules=ignore \\\ --without-default-devices @@ -1057,10 +1104,8 @@ run_configure() { --with-pkgversion="%{name}-%{version}-%{release}" \ --with-suffix="%{name}" \ --firmwarepath=%{firmwaredirs} \ - --meson="%{__meson}" \ --enable-trace-backends=dtrace \ --with-coroutine=ucontext \ - --with-git=git \ --tls-priority=@QEMU,SYSTEM \ %{disable_everything} \ --with-devices-%{kvm_target}=%{kvm_target}-rh-devices \ @@ -1151,6 +1196,7 @@ run_configure \ --enable-werror \ %endif --enable-xkbcommon \ + --enable-zstd \ %if %{have_safe_stack} --enable-safe-stack \ %endif @@ -1301,7 +1347,7 @@ mkdir -p %{buildroot}%{_datadir}/%{name}/tracetool/format install -m 0644 -t %{buildroot}%{_datadir}/%{name}/tracetool/format scripts/tracetool/format/*.py mkdir -p %{buildroot}%{qemudocdir} -install -p -m 0644 -t %{buildroot}%{qemudocdir} README.rst README.systemtap COPYING COPYING.LIB LICENSE docs/interop/qmp-spec.txt +install -p -m 0644 -t %{buildroot}%{qemudocdir} README.rst README.systemtap COPYING COPYING.LIB LICENSE # Rename man page pushd %{buildroot}%{_mandir}/man1/ @@ -1625,30 +1671,308 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog -* Fri Dec 05 2023 Miroslav Rezanina - 8.0.0-16.el9_3.3 -- kvm-target-s390x-dump-Remove-unneeded-dump-info-function.patch [RHEL-16997] -- kvm-dump-Add-arch-cleanup-function.patch [RHEL-16997] -- kvm-target-s390x-arch_dump-Add-arch-cleanup-function-for.patch [RHEL-16997] -- Resolves: RHEL-16997 - (RHEL9.3 - KVM : Secure execution guest remains in "paused" state, post "virsh dump" failure (qemu-kvm) [rhel-9.3.0.z]) - -* Mon Oct 09 2023 Miroslav Rezanina - 8.0.0-16.el9_3.1 -- kvm-virtio-Drop-out-of-coroutine-context-in-virtio_load.patch [RHEL-4453] -- Resolves: RHEL-4453 - (qemu-kvm crashed when migrating guest with failover vf [rhel-9.3.0.z]) - -* Mon Sep 18 2023 Miroslav Rezanina - 8.0.0-16.el9_3 -- kvm-migration-Add-migration-prefix-to-functions-in-targe.patch [bz#2229868] -- kvm-migration-Move-more-initializations-to-migrate_init.patch [bz#2229868] -- kvm-migration-Add-.save_prepare-handler-to-struct-SaveVM.patch [bz#2229868] -- kvm-vfio-migration-Block-VFIO-migration-with-postcopy-mi.patch [bz#2229868] -- Resolves: bz#2229868 - ([vfio migration]Disable postcopy for VM with migratable vfio device) - -* Fri Sep 15 2023 Miroslav Rezanina - 8.0.0-15.el9_3 -- kvm-io-remove-io-watch-if-TLS-channel-is-closed-during-h.patch [bz#2216504] -- Resolves: bz#2216504 - (CVE-2023-3354 qemu-kvm: QEMU: VNC: improper I/O watch removal in TLS handshake can lead to remote unauthenticated denial of service [rhel-9.3.0]) +* Tue Mar 26 2024 Miroslav Rezanina - 8.2.0-11 +- kvm-coroutine-cap-per-thread-local-pool-size.patch [RHEL-28947] +- kvm-coroutine-reserve-5-000-mappings.patch [RHEL-28947] +- Resolves: RHEL-28947 + (Qemu crashing with "failed to set up stack guard page: Cannot allocate memory") + +* Thu Mar 21 2024 Miroslav Rezanina - 8.2.0-10 +- kvm-chardev-lower-priority-of-the-HUP-GSource-in-socket-.patch [RHEL-24614] +- kvm-Revert-chardev-char-socket-Fix-TLS-io-channels-sendi.patch [RHEL-24614] +- kvm-Revert-chardev-use-a-child-source-for-qio-input-sour.patch [RHEL-24614] +- Resolves: RHEL-24614 + ([RHEL9][chardev] qemu hit core dump while using TLS server from host to guest) + +* Wed Mar 20 2024 Miroslav Rezanina - 8.2.0-9 +- kvm-mirror-Don-t-call-job_pause_point-under-graph-lock.patch [RHEL-28125] +- kvm-nbd-server-Fix-race-in-draining-the-export.patch [RHEL-28125] +- kvm-iotests-Add-test-for-reset-AioContext-switches-with-.patch [RHEL-28125] +- kvm-pc-smbios-fixup-manufacturer-product-version-to-matc.patch [RHEL-21705] +- Resolves: RHEL-28125 + (RHEL9.4 - KVM : Live migration of guest with multiple qcow devices remains incomplete.) +- Resolves: RHEL-21705 + (pc-q35-rhel9.4.0 does not provide proper computer information) + +* Mon Mar 18 2024 Miroslav Rezanina - 8.2.0-8 +- kvm-ui-clipboard-mark-type-as-not-available-when-there-i.patch [RHEL-19629] +- kvm-ui-clipboard-add-asserts-for-update-and-request.patch [RHEL-19629] +- kvm-hw-i386-pc-Defer-smbios_set_defaults-to-machine_done.patch [RHEL-21705] +- kvm-Implement-base-of-SMBIOS-type-9-descriptor.patch [RHEL-21705] +- kvm-Implement-SMBIOS-type-9-v2.6.patch [RHEL-21705] +- kvm-smbios-cleanup-smbios_get_tables-from-legacy-handlin.patch [RHEL-21705] +- kvm-smbios-get-rid-of-smbios_smp_sockets-global.patch [RHEL-21705] +- kvm-smbios-get-rid-of-smbios_legacy-global.patch [RHEL-21705] +- kvm-smbios-avoid-mangling-user-provided-tables.patch [RHEL-21705] +- kvm-smbios-don-t-check-type4-structures-in-legacy-mode.patch [RHEL-21705] +- kvm-smbios-add-smbios_add_usr_blob_size-helper.patch [RHEL-21705] +- kvm-smbios-rename-expose-structures-bitmaps-used-by-both.patch [RHEL-21705] +- kvm-smbios-build-legacy-mode-code-only-for-pc-machine.patch [RHEL-21705] +- kvm-smbios-handle-errors-consistently.patch [RHEL-21705] +- kvm-smbios-get-rid-of-global-smbios_ep_type.patch [RHEL-21705] +- kvm-smbios-clear-smbios_type4_count-before-building-tabl.patch [RHEL-21705] +- kvm-smbios-extend-smbios-entry-point-type-with-auto-valu.patch [RHEL-21705] +- kvm-smbios-in-case-of-entry-point-is-auto-try-to-build-v.patch [RHEL-21705] +- kvm-smbios-error-out-when-building-type-4-table-is-not-p.patch [RHEL-21705] +- kvm-pc-q35-set-SMBIOS-entry-point-type-to-auto-by-defaul.patch [RHEL-21705] +- Resolves: RHEL-19629 + (CVE-2023-6683 qemu-kvm: QEMU: VNC: NULL pointer dereference in qemu_clipboard_request() [rhel-9]) +- Resolves: RHEL-21705 + (pc-q35-rhel9.4.0 does not provide proper computer information) + +* Fri Mar 08 2024 Miroslav Rezanina - 8.2.0-7 +- kvm-qemu_init-increase-NOFILE-soft-limit-on-POSIX.patch [RHEL-26049] +- kvm-chardev-char-socket-Fix-TLS-io-channels-sending-too-.patch [RHEL-24614] +- Resolves: RHEL-26049 + (When max vcpu is greater than or equal to 246, qemu unable to init event notifier) +- Resolves: RHEL-24614 + ([RHEL9][chardev][s390x] qemu hit core dump while using TLS server from host to guest) + +* Mon Feb 19 2024 Miroslav Rezanina - 8.2.0-6 +- kvm-virtio-scsi-Attach-event-vq-notifier-with-no_poll.patch [RHEL-3934] +- kvm-virtio-Re-enable-notifications-after-drain.patch [RHEL-3934] +- kvm-virtio-blk-Use-ioeventfd_attach-in-start_ioeventfd.patch [RHEL-3934] +- kvm-virtio-blk-avoid-using-ioeventfd-state-in-irqfd-cond.patch [RHEL-15394] +- kvm-hw-arm-virt-deprecate-virt-rhel9.-0-2-.0-machine-typ.patch [RHEL-24988] +- Resolves: RHEL-3934 + ([qemu-kvm] Failed on repeatedly hotplug/unplug disk iothread enabled ) +- Resolves: RHEL-15394 + (virtio-blk: qemu hang on "no response on QMP query-status" when write data to disk without enough space) +- Resolves: RHEL-24988 + (Mark virt-rhel9.{0,2}.0 machine types as deprecated) + +* Mon Feb 12 2024 Miroslav Rezanina - 8.2.0-5 +- kvm-hv-balloon-use-get_min_alignment-to-express-32-GiB-a.patch [RHEL-20341] +- kvm-memory-device-reintroduce-memory-region-size-check.patch [RHEL-20341] +- kvm-block-backend-Allow-concurrent-context-changes.patch [RHEL-24593] +- kvm-scsi-Await-request-purging.patch [RHEL-24593] +- kvm-string-output-visitor-show-structs-as-omitted.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-string-output-visitor-Fix-pseudo-struct-handling.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-qdev-properties-alias-all-object-class-properties.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-qdev-add-IOThreadVirtQueueMappingList-property-type.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-virtio-blk-add-iothread-vq-mapping-parameter.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-virtio-blk-Fix-potential-nullpointer-read-access-in-.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-iotests-add-filter_qmp_generated_node_ids.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-iotests-port-141-to-Python-for-reliable-QMP-testing.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-monitor-only-run-coroutine-commands-in-qemu_aio_cont.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-virtio-blk-move-dataplane-code-into-virtio-blk.c.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-virtio-blk-rename-dataplane-create-destroy-functions.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-virtio-blk-rename-dataplane-to-ioeventfd.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-virtio-blk-restart-s-rq-reqs-in-vq-AioContexts.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-virtio-blk-tolerate-failure-to-set-BlockBackend-AioC.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-virtio-blk-always-set-ioeventfd-during-startup.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-tests-unit-Bump-test-replication-timeout-to-60-secon.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-iotests-iothreads-stream-Use-the-right-TimeoutError.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-virtio-mem-default-enable-dynamic-memslots.patch [RHEL-24045] +- Resolves: RHEL-20341 + (memory-device size alignment check invalid in QEMU 8.2) +- Resolves: RHEL-24593 + (qemu crash blk_get_aio_context(BlockBackend *): Assertion `ctx == blk->ctx' when repeatedly hotplug/unplug disk) +- Resolves: RHEL-17369 + ([nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed.) +- Resolves: RHEL-20764 + ([qemu-kvm] Enable qemu multiqueue block layer support) +- Resolves: RHEL-7356 + ([qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9]) +- Resolves: RHEL-24045 + (QEMU: default-enable dynamically using multiple memslots for virtio-mem) + +* Tue Jan 30 2024 Miroslav Rezanina - 8.2.0-4 +- kvm-vfio-pci-Clear-MSI-X-IRQ-index-always.patch [RHEL-21293] +- Resolves: RHEL-21293 + ([emulated igb] Failed to set up TRIGGER eventfd signaling for interrupt INTX-0: VFIO_DEVICE_SET_IRQS failure: Invalid argument) + +* Wed Jan 24 2024 Miroslav Rezanina - 8.2.0-3 +- kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch [RHEL-19738] +- kvm-vfio-Introduce-base-object-for-VFIOContainer-and-tar.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Introduce-a-empty-VFIOIOMMUOps.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Switch-to-dma_map-unmap-API.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-common-Introduce-vfio_container_init-destroy-he.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-common-Move-giommu_list-in-base-container.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Move-space-field-to-base-container.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Switch-to-IOMMU-BE-set_dirty_page_tra.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Move-per-container-device-list-in-bas.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Convert-functions-to-base-container.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Move-pgsizes-and-dma_max_mappings-to-.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Move-vrdl_list-to-base-container.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Move-listener-to-base-container.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Move-dirty_pgsizes-and-max_dirty_bitm.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Move-iova_ranges-to-base-container.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Implement-attach-detach_device.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-spapr-Introduce-spapr-backend-and-target-interf.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-spapr-switch-to-spapr-IOMMU-BE-add-del_section_.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-spapr-Move-prereg_listener-into-spapr-container.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-spapr-Move-hostwin_list-into-spapr-container.patch [RHEL-19302 RHEL-21057] +- kvm-backends-iommufd-Introduce-the-iommufd-object.patch [RHEL-19302 RHEL-21057] +- kvm-util-char_dev-Add-open_cdev.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-common-return-early-if-space-isn-t-empty.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-iommufd-Implement-the-iommufd-backend.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-iommufd-Relax-assert-check-for-iommufd-backend.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-iommufd-Add-support-for-iova_ranges-and-pgsizes.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-pci-Extract-out-a-helper-vfio_pci_get_pci_hot_r.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-pci-Introduce-a-vfio-pci-hot-reset-interface.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-iommufd-Enable-pci-hot-reset-through-iommufd-cd.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-pci-Allow-the-selection-of-a-given-iommu-backen.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-pci-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-platform-Allow-the-selection-of-a-given-iommu-b.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-platform-Make-vfio-cdev-pre-openable-by-passing.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-ap-Allow-the-selection-of-a-given-iommu-backend.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-ap-Make-vfio-cdev-pre-openable-by-passing-a-fil.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-ccw-Allow-the-selection-of-a-given-iommu-backen.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-ccw-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-Make-VFIOContainerBase-poiner-parameter-const-i.patch [RHEL-19302 RHEL-21057] +- kvm-hw-arm-Activate-IOMMUFD-for-virt-machines.patch [RHEL-19302 RHEL-21057] +- kvm-kconfig-Activate-IOMMUFD-for-s390x-machines.patch [RHEL-19302 RHEL-21057] +- kvm-hw-i386-Activate-IOMMUFD-for-q35-machines.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-pci-Move-VFIODevice-initializations-in-vfio_ins.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-platform-Move-VFIODevice-initializations-in-vfi.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-ap-Move-VFIODevice-initializations-in-vfio_ap_i.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-ccw-Move-VFIODevice-initializations-in-vfio_ccw.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-Introduce-a-helper-function-to-initialize-VFIOD.patch [RHEL-19302 RHEL-21057] +- kvm-docs-devel-Add-VFIO-iommufd-backend-documentation.patch [RHEL-19302 RHEL-21057] +- kvm-hw-ppc-Kconfig-Imply-VFIO_PCI.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-spapr-Extend-VFIOIOMMUOps-with-a-release-handle.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Introduce-vfio_legacy_setup-for-furth.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Initialize-VFIOIOMMUOps-under-vfio_in.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Introduce-a-VFIOIOMMU-QOM-interface.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Introduce-a-VFIOIOMMU-legacy-QOM-inte.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Intoduce-a-new-VFIOIOMMUClass-setup-h.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-spapr-Introduce-a-sPAPR-VFIOIOMMU-QOM-interface.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-iommufd-Introduce-a-VFIOIOMMU-iommufd-QOM-inter.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-spapr-Only-compile-sPAPR-IOMMU-support-when-nee.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-iommufd-Remove-CONFIG_IOMMUFD-usage.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Replace-basename-with-g_path_get_base.patch [RHEL-19302 RHEL-21057] +- kvm-hw-vfio-fix-iteration-over-global-VFIODevice-list.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-iommufd-Remove-the-use-of-stat-to-check-file-ex.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Rename-vfio_init_container-to-vfio_se.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-migration-Add-helper-function-to-set-state-or-r.patch [RHEL-19302 RHEL-21057] +- kvm-backends-iommufd-Remove-check-on-number-of-backend-u.patch [RHEL-19302 RHEL-21057] +- kvm-backends-iommufd-Remove-mutex.patch [RHEL-19302 RHEL-21057] +- kvm-Compile-IOMMUFD-object-on-aarch64.patch [RHEL-19302 RHEL-21057] +- kvm-Compile-IOMMUFD-on-s390x.patch [RHEL-19302 RHEL-21057] +- kvm-Compile-IOMMUFD-on-x86_64.patch [RHEL-19302 RHEL-21057] +- kvm-target-s390x-kvm-pv-Provide-some-more-useful-informa.patch [RHEL-18212] +- kvm-nbd-server-avoid-per-NBDRequest-nbd_client_get-put.patch [RHEL-15965] +- kvm-nbd-server-only-traverse-NBDExport-clients-from-main.patch [RHEL-15965] +- kvm-nbd-server-introduce-NBDClient-lock-to-protect-field.patch [RHEL-15965] +- kvm-block-file-posix-set-up-Linux-AIO-and-io_uring-in-th.patch [RHEL-15965] +- kvm-virtio-blk-add-lock-to-protect-s-rq.patch [RHEL-15965] +- kvm-virtio-blk-don-t-lock-AioContext-in-the-completion-c.patch [RHEL-15965] +- kvm-virtio-blk-don-t-lock-AioContext-in-the-submission-c.patch [RHEL-15965] +- kvm-scsi-only-access-SCSIDevice-requests-from-one-thread.patch [RHEL-15965] +- kvm-virtio-scsi-don-t-lock-AioContext-around-virtio_queu.patch [RHEL-15965] +- kvm-scsi-don-t-lock-AioContext-in-I-O-code-path.patch [RHEL-15965] +- kvm-dma-helpers-don-t-lock-AioContext-in-dma_blk_cb.patch [RHEL-15965] +- kvm-virtio-scsi-replace-AioContext-lock-with-tmf_bh_lock.patch [RHEL-15965] +- kvm-scsi-assert-that-callbacks-run-in-the-correct-AioCon.patch [RHEL-15965] +- kvm-tests-remove-aio_context_acquire-tests.patch [RHEL-15965] +- kvm-aio-make-aio_context_acquire-aio_context_release-a-n.patch [RHEL-15965] +- kvm-graph-lock-remove-AioContext-locking.patch [RHEL-15965] +- kvm-block-remove-AioContext-locking.patch [RHEL-15965] +- kvm-block-remove-bdrv_co_lock.patch [RHEL-15965] +- kvm-scsi-remove-AioContext-locking.patch [RHEL-15965] +- kvm-aio-wait-draw-equivalence-between-AIO_WAIT_WHILE-and.patch [RHEL-15965] +- kvm-aio-remove-aio_context_acquire-aio_context_release-A.patch [RHEL-15965] +- kvm-docs-remove-AioContext-lock-from-IOThread-docs.patch [RHEL-15965] +- kvm-scsi-remove-outdated-AioContext-lock-comment.patch [RHEL-15965] +- kvm-job-remove-outdated-AioContext-locking-comments.patch [RHEL-15965] +- kvm-block-remove-outdated-AioContext-locking-comments.patch [RHEL-15965] +- kvm-block-coroutine-wrapper-use-qemu_get_current_aio_con.patch [RHEL-15965] +- kvm-s390x-pci-avoid-double-enable-disable-of-aif.patch [RHEL-21169] +- kvm-s390x-pci-refresh-fh-before-disabling-aif.patch [RHEL-21169] +- kvm-s390x-pci-drive-ISM-reset-from-subsystem-reset.patch [RHEL-21169] +- kvm-include-ui-rect.h-fix-qemu_rect_init-mis-assignment.patch [RHEL-21570] +- kvm-virtio-gpu-block-migration-of-VMs-with-blob-true.patch [RHEL-7565] +- kvm-spec-Enable-zstd.patch [RHEL-7361] +- Resolves: RHEL-19738 + (Enable properties allowing to disable high memory regions) +- Resolves: RHEL-19302 + (NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend) +- Resolves: RHEL-21057 + (Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6) +- Resolves: RHEL-18212 + ([RHEL9][Secure-execution][s390x] The error message is not clear when boot up a SE guest with wrong encryption) +- Resolves: RHEL-15965 + ( [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize)) +- Resolves: RHEL-21169 + ([s390x] VM fails to start with ISM passed through QEMU 8.2) +- Resolves: RHEL-21570 + (Critical performance degradation for input devices in virtio vnc session) +- Resolves: RHEL-7565 + (qemu crashed when migrate guest with blob resources enabled) +- Resolves: RHEL-7361 + ([qemu-kvm] Enable zstd support for qcow2 files) + +* Mon Jan 08 2024 Miroslav Rezanina - 8.2.0-2 +- kvm-hw-arm-virt-Fix-compats.patch [RHEL-17168] +- Resolves: RHEL-17168 + (Introduce virt-rhel9.4.0 arm-virt machine type [aarch64]) + +* Tue Jan 02 2024 Miroslav Rezanina - 8.2.0-1 +- Rebase to QEMU 8.2.0 [RHEL-14111] +- Fix machine type compatibility [RHEL-17067 RHEL-17068] +- Add 9.4.0 machine type [RHEL-17168 RHEL-19117 RHEL-19119] +- Resolves: RHEL-14111 + (Rebase qemu-kvm to QEMU 8.2.0) +- Resolves: RHEL-17067 + (Check/fix machine type compatibility for qemu-kvm 8.2.0 [s390x]) +- Resolves: RHEL-17068 + (Check/fix machine type compatibility for qemu-kvm 8.2.0 [x86_64]) +- Resolves: RHEL-17168 + (Introduce virt-rhel9.4.0 arm-virt machine type [aarch64]) +- Resolves: RHEL-19117 + (Introduce virt-rhel9.4.0 arm-virt machine type [x86_64]) +- Resolves: RHEL-19119 + (Introduce virt-rhel9.4.0 arm-virt machine type [s390x]) + +* Thu Nov 30 2023 Miroslav Rezanina - 8.1.0-5 +- kvm-Preparation-for-using-allow-rpcs-list-in-guest-agent.patch [RHEL-955] +- kvm-Use-allow-rpcs-instead-of-block-rpcs-in-guest-agent..patch [RHEL-955] +- Resolves: RHEL-955 + (Use allow-rpcs instead of block-rpcs in guest-agent.service) + +* Mon Nov 13 2023 Miroslav Rezanina - 8.1.0-4 +- kvm-hw-scsi-scsi-disk-Disallow-block-sizes-smaller-than-.patch [RHEL-2828] +- kvm-Enable-igb-on-x86_64.patch [RHEL-1308] +- kvm-host-include-generic-host-atomic128-Fix-compilation-.patch [RHEL-12991] +- kvm-Enable-qemu-kvm-device-usb-redirec-for-aarch64.patch [RHEL-7561] +- Resolves: RHEL-2828 + (CVE-2023-42467 qemu-kvm: qemu: denial of service due to division by zero [rhel-9]) +- Resolves: RHEL-1308 + ([RFE] iGB: Add an emulated SR-IOV network card) +- Resolves: RHEL-12991 + (qemu-kvm fails to build on s390x with clang-17) +- Resolves: RHEL-7561 + (Missing the rpm package qemu-kvm-device-usb-redirect on Arm64 platform) + +* Mon Oct 16 2023 Miroslav Rezanina - 8.1.0-3 +- kvm-migration-Fix-race-that-dest-preempt-thread-close-to.patch [RHEL-11219] +- kvm-migration-Fix-possible-race-when-setting-rp_state.er.patch [RHEL-11219] +- kvm-migration-Fix-possible-races-when-shutting-down-the-.patch [RHEL-11219] +- kvm-migration-Fix-possible-race-when-shutting-down-to_ds.patch [RHEL-11219] +- kvm-migration-Remove-redundant-cleanup-of-postcopy_qemuf.patch [RHEL-11219] +- kvm-migration-Consolidate-return-path-closing-code.patch [RHEL-11219] +- kvm-migration-Replace-the-return-path-retry-logic.patch [RHEL-11219] +- kvm-migration-Move-return-path-cleanup-to-main-migration.patch [RHEL-11219] +- kvm-file-posix-Clear-bs-bl.zoned-on-error.patch [RHEL-7360] +- kvm-file-posix-Check-bs-bl.zoned-for-zone-info.patch [RHEL-7360] +- kvm-file-posix-Fix-zone-update-in-I-O-error-path.patch [RHEL-7360] +- kvm-file-posix-Simplify-raw_co_prw-s-out-zone-code.patch [RHEL-7360] +- kvm-tests-file-io-error-New-test.patch [RHEL-7360] +- Resolves: RHEL-11219 + (migration tests failing for RHEL 9.4 sometimes) +- Resolves: RHEL-7360 + (Qemu Core Dumped When Writing Larger Size Than The Size of A Data Disk) + +* Mon Oct 02 2023 Miroslav Rezanina - 8.1.0-2 +- kvm-virtio-Drop-out-of-coroutine-context-in-virtio_load.patch [RHEL-832] +- Resolves: RHEL-832 + (qemu-kvm crashed when migrating guest with failover vf) + +* Mon Sep 04 2023 Miroslav Rezanina - 8.1.0-1 +- Rebase to QEMU 8.1 [RHEL-870] +- Resolves: RHEL-870 + (Rebase qemu-kvm to QEMU 8.1.0) * Thu Aug 24 2023 Miroslav Rezanina - 8.0.0-13 - kvm-vdpa-return-errno-in-vhost_vdpa_get_vring_group-erro.patch [RHEL-923] From d289d6cae72b1e2722e548b42dac742edcadae32 Mon Sep 17 00:00:00 2001 From: MSVSphere Packaging Team Date: Thu, 13 Jun 2024 03:36:19 +0300 Subject: [PATCH 5/5] import qemu-kvm-8.2.0-11.el9_4.2 --- ...-kvm_irqchip_add_msi_route-in-case-o.patch | 76 ++++++++ ...tio-pci-fix-use-of-a-released-vector.patch | 162 ++++++++++++++++++ SPECS/qemu-kvm.spec | 16 +- 3 files changed, 253 insertions(+), 1 deletion(-) create mode 100644 SOURCES/kvm-kvm-error-out-of-kvm_irqchip_add_msi_route-in-case-o.patch create mode 100644 SOURCES/kvm-virtio-pci-fix-use-of-a-released-vector.patch diff --git a/SOURCES/kvm-kvm-error-out-of-kvm_irqchip_add_msi_route-in-case-o.patch b/SOURCES/kvm-kvm-error-out-of-kvm_irqchip_add_msi_route-in-case-o.patch new file mode 100644 index 0000000..5116b96 --- /dev/null +++ b/SOURCES/kvm-kvm-error-out-of-kvm_irqchip_add_msi_route-in-case-o.patch @@ -0,0 +1,76 @@ +From 8a8fa4ab4dc05502550ca207926cd0c93a3341ea Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Mon, 8 Apr 2024 12:43:49 +0200 +Subject: [PATCH] kvm: error out of kvm_irqchip_add_msi_route() in case of full + route table + +RH-Author: Igor Mammedov +RH-MergeRequest: 374: kvm: error out of kvm_irqchip_add_msi_route() in case of full route table +RH-Jira: RHEL-32990 +RH-Acked-by: Ani Sinha +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/1] df31f2d0cafe10a1ac22a2bebb85dc17c1e891e0 + +RH-Jira: RHEL-32990 + +subj is calling kvm_add_routing_entry() which simply extends + KVMState::irq_routes::entries[] +but doesn't check if number of routes goes beyond limit the kernel +is willing to accept. Which later leads toi the assert + + qemu-kvm: ../accel/kvm/kvm-all.c:1833: kvm_irqchip_commit_routes: Assertion `ret == 0' failed + +typically it happens during guest boot for large enough guest + +Reproduced with: + ./qemu --enable-kvm -m 8G -smp 64 -machine pc \ + `for b in {1..2}; do echo -n "-device pci-bridge,id=pci$b,chassis_nr=$b "; + for i in {0..31}; do touch /tmp/vblk$b$i; + echo -n "-drive file=/tmp/vblk$b$i,if=none,id=drive$b$i,format=raw + -device virtio-blk-pci,drive=drive$b$i,bus=pci$b "; + done; done` + +While crash at boot time is bad, the same might happen at hotplug time +which is unacceptable. +So instead calling kvm_add_routing_entry() unconditionally, check first +that number of routes won't exceed KVM_CAP_IRQ_ROUTING. This way virtio +device insteads killin qemu, will gracefully fail to initialize device +as expected with following warnings on console: + virtio-blk failed to set guest notifier (-28), ensure -accel kvm is set. + virtio_bus_start_ioeventfd: failed. Fallback to userspace (slower). + +Signed-off-by: Igor Mammedov +--- + accel/kvm/kvm-all.c | 15 ++++++++++----- + 1 file changed, 10 insertions(+), 5 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index e39a810a4e..f1a4564cbd 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2000,12 +2000,17 @@ int kvm_irqchip_add_msi_route(KVMRouteChange *c, int vector, PCIDevice *dev) + return -EINVAL; + } + +- trace_kvm_irqchip_add_msi_route(dev ? dev->name : (char *)"N/A", +- vector, virq); ++ if (s->irq_routes->nr < s->gsi_count) { ++ trace_kvm_irqchip_add_msi_route(dev ? dev->name : (char *)"N/A", ++ vector, virq); + +- kvm_add_routing_entry(s, &kroute); +- kvm_arch_add_msi_route_post(&kroute, vector, dev); +- c->changes++; ++ kvm_add_routing_entry(s, &kroute); ++ kvm_arch_add_msi_route_post(&kroute, vector, dev); ++ c->changes++; ++ } else { ++ kvm_irqchip_release_virq(s, virq); ++ return -ENOSPC; ++ } + + return virq; + } +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-pci-fix-use-of-a-released-vector.patch b/SOURCES/kvm-virtio-pci-fix-use-of-a-released-vector.patch new file mode 100644 index 0000000..4128907 --- /dev/null +++ b/SOURCES/kvm-virtio-pci-fix-use-of-a-released-vector.patch @@ -0,0 +1,162 @@ +From ccd8ffa5cd7f9bcfddeda7a9fa1ad86d4bad870e Mon Sep 17 00:00:00 2001 +From: Cindy Lu +Date: Fri, 12 Apr 2024 14:26:55 +0800 +Subject: [PATCH] virtio-pci: fix use of a released vector +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cindy Lu +RH-MergeRequest: 366: virtio-pci: fix use of a released vector +RH-Jira: RHEL-32837 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Jason Wang +RH-Commit: [1/1] edd26ffb8727635310aed42e42925afe87df2287 + +During the booting process of the non-standard image, the behavior of the +called function in qemu is as follows: + +1. vhost_net_stop() was triggered by guest image. This will call the function +virtio_pci_set_guest_notifiers() with assgin= false, +virtio_pci_set_guest_notifiers() will release the irqfd for vector 0 + +2. virtio_reset() was triggered, this will set configure vector to VIRTIO_NO_VECTOR + +3.vhost_net_start() was called (at this time, the configure vector is +still VIRTIO_NO_VECTOR) and then call virtio_pci_set_guest_notifiers() with +assgin=true, so the irqfd for vector 0 is still not "init" during this process + +4. The system continues to boot and sets the vector back to 0. After that +msix_fire_vector_notifier() was triggered to unmask the vector 0 and meet the crash + +To fix the issue, we need to support changing the vector after VIRTIO_CONFIG_S_DRIVER_OK is set. + +(gdb) bt +0 __pthread_kill_implementation (threadid=, signo=signo@entry=6, no_tid=no_tid@entry=0) + at pthread_kill.c:44 +1 0x00007fc87148ec53 in __pthread_kill_internal (signo=6, threadid=) at pthread_kill.c:78 +2 0x00007fc87143e956 in __GI_raise (sig=sig@entry=6) at ../sysdeps/posix/raise.c:26 +3 0x00007fc8714287f4 in __GI_abort () at abort.c:79 +4 0x00007fc87142871b in __assert_fail_base + (fmt=0x7fc8715bbde0 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=0x5606413efd53 "ret == 0", file=0x5606413ef87d "../accel/kvm/kvm-all.c", line=1837, function=) at assert.c:92 +5 0x00007fc871437536 in __GI___assert_fail + (assertion=0x5606413efd53 "ret == 0", file=0x5606413ef87d "../accel/kvm/kvm-all.c", line=1837, function=0x5606413f06f0 <__PRETTY_FUNCTION__.19> "kvm_irqchip_commit_routes") at assert.c:101 +6 0x0000560640f884b5 in kvm_irqchip_commit_routes (s=0x560642cae1f0) at ../accel/kvm/kvm-all.c:1837 +7 0x0000560640c98f8e in virtio_pci_one_vector_unmask + (proxy=0x560643c65f00, queue_no=4294967295, vector=0, msg=..., n=0x560643c6e4c8) + at ../hw/virtio/virtio-pci.c:1005 +8 0x0000560640c99201 in virtio_pci_vector_unmask (dev=0x560643c65f00, vector=0, msg=...) + at ../hw/virtio/virtio-pci.c:1070 +9 0x0000560640bc402e in msix_fire_vector_notifier (dev=0x560643c65f00, vector=0, is_masked=false) + at ../hw/pci/msix.c:120 +10 0x0000560640bc40f1 in msix_handle_mask_update (dev=0x560643c65f00, vector=0, was_masked=true) + at ../hw/pci/msix.c:140 +11 0x0000560640bc4503 in msix_table_mmio_write (opaque=0x560643c65f00, addr=12, val=0, size=4) + at ../hw/pci/msix.c:231 +12 0x0000560640f26d83 in memory_region_write_accessor + (mr=0x560643c66540, addr=12, value=0x7fc86b7bc628, size=4, shift=0, mask=4294967295, attrs=...) + at ../system/memory.c:497 +13 0x0000560640f270a6 in access_with_adjusted_size + + (addr=12, value=0x7fc86b7bc628, size=4, access_size_min=1, access_size_max=4, access_fn=0x560640f26c8d , mr=0x560643c66540, attrs=...) at ../system/memory.c:573 +14 0x0000560640f2a2b5 in memory_region_dispatch_write (mr=0x560643c66540, addr=12, data=0, op=MO_32, attrs=...) + at ../system/memory.c:1521 +15 0x0000560640f37bac in flatview_write_continue + (fv=0x7fc65805e0b0, addr=4273803276, attrs=..., ptr=0x7fc871e9c028, len=4, addr1=12, l=4, mr=0x560643c66540) + at ../system/physmem.c:2714 +16 0x0000560640f37d0f in flatview_write + (fv=0x7fc65805e0b0, addr=4273803276, attrs=..., buf=0x7fc871e9c028, len=4) at ../system/physmem.c:2756 +17 0x0000560640f380bf in address_space_write + (as=0x560642161ae0 , addr=4273803276, attrs=..., buf=0x7fc871e9c028, len=4) + at ../system/physmem.c:2863 +18 0x0000560640f3812c in address_space_rw + (as=0x560642161ae0 , addr=4273803276, attrs=..., buf=0x7fc871e9c028, len=4, is_write=true) at ../system/physmem.c:2873 +--Type for more, q to quit, c to continue without paging-- +19 0x0000560640f8aa55 in kvm_cpu_exec (cpu=0x560642f205e0) at ../accel/kvm/kvm-all.c:2915 +20 0x0000560640f8d731 in kvm_vcpu_thread_fn (arg=0x560642f205e0) at ../accel/kvm/kvm-accel-ops.c:51 +21 0x00005606411949f4 in qemu_thread_start (args=0x560642f292b0) at ../util/qemu-thread-posix.c:541 +22 0x00007fc87148cdcd in start_thread (arg=) at pthread_create.c:442 +23 0x00007fc871512630 in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81 +(gdb) + +MST: coding style and typo fixups + +Fixes: f9a09ca3ea ("vhost: add support for configure interrupt") +Cc: qemu-stable@nongnu.org +Signed-off-by: Cindy Lu +Message-ID: <2321ade5f601367efe7380c04e3f61379c59b48f.1713173550.git.mst@redhat.com> +Cc: Lei Yang +Cc: Jason Wang +Signed-off-by: Michael S. Tsirkin +Tested-by: Cindy Lu +(cherry picked from commit 2ce6cff94df2650c460f809e5ad263f1d22507c0) +Signed-off-by: Cindy Lu +--- + hw/virtio/virtio-pci.c | 37 +++++++++++++++++++++++++++++++++++-- + 1 file changed, 35 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c +index e433879542..08faefe29a 100644 +--- a/hw/virtio/virtio-pci.c ++++ b/hw/virtio/virtio-pci.c +@@ -1424,6 +1424,38 @@ static int virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy, + return offset; + } + ++static void virtio_pci_set_vector(VirtIODevice *vdev, ++ VirtIOPCIProxy *proxy, ++ int queue_no, uint16_t old_vector, ++ uint16_t new_vector) ++{ ++ bool kvm_irqfd = (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) && ++ msix_enabled(&proxy->pci_dev) && kvm_msi_via_irqfd_enabled(); ++ ++ if (new_vector == old_vector) { ++ return; ++ } ++ ++ /* ++ * If the device uses irqfd and the vector changes after DRIVER_OK is ++ * set, we need to release the old vector and set up the new one. ++ * Otherwise just need to set the new vector on the device. ++ */ ++ if (kvm_irqfd && old_vector != VIRTIO_NO_VECTOR) { ++ kvm_virtio_pci_vector_release_one(proxy, queue_no); ++ } ++ /* Set the new vector on the device. */ ++ if (queue_no == VIRTIO_CONFIG_IRQ_IDX) { ++ vdev->config_vector = new_vector; ++ } else { ++ virtio_queue_set_vector(vdev, queue_no, new_vector); ++ } ++ /* If the new vector changed need to set it up. */ ++ if (kvm_irqfd && new_vector != VIRTIO_NO_VECTOR) { ++ kvm_virtio_pci_vector_use_one(proxy, queue_no); ++ } ++} ++ + int virtio_pci_add_shm_cap(VirtIOPCIProxy *proxy, + uint8_t bar, uint64_t offset, uint64_t length, + uint8_t id) +@@ -1570,7 +1602,8 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr, + } else { + val = VIRTIO_NO_VECTOR; + } +- vdev->config_vector = val; ++ virtio_pci_set_vector(vdev, proxy, VIRTIO_CONFIG_IRQ_IDX, ++ vdev->config_vector, val); + break; + case VIRTIO_PCI_COMMON_STATUS: + if (!(val & VIRTIO_CONFIG_S_DRIVER_OK)) { +@@ -1610,7 +1643,7 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr, + } else { + val = VIRTIO_NO_VECTOR; + } +- virtio_queue_set_vector(vdev, vdev->queue_sel, val); ++ virtio_pci_set_vector(vdev, proxy, vdev->queue_sel, vector, val); + break; + case VIRTIO_PCI_COMMON_Q_ENABLE: + if (val == 1) { +-- +2.39.3 + diff --git a/SPECS/qemu-kvm.spec b/SPECS/qemu-kvm.spec index fbe4f67..21cd617 100644 --- a/SPECS/qemu-kvm.spec +++ b/SPECS/qemu-kvm.spec @@ -149,7 +149,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 8.2.0 -Release: 11%{?rcrel}%{?dist}%{?cc_suffix} +Release: 11%{?rcrel}%{?dist}%{?cc_suffix}.2 # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -610,6 +610,10 @@ Patch175: kvm-Revert-chardev-use-a-child-source-for-qio-input-sour.patch Patch176: kvm-coroutine-cap-per-thread-local-pool-size.patch # For RHEL-28947 - Qemu crashing with "failed to set up stack guard page: Cannot allocate memory" Patch177: kvm-coroutine-reserve-5-000-mappings.patch +# For RHEL-32837 - qemu-kvm running Vyatta hits assert when doing KVM_SET_GSI_ROUTING [rhel-9.4.z] +Patch178: kvm-virtio-pci-fix-use-of-a-released-vector.patch +# For RHEL-32990 - qemu crash with kvm_irqchip_commit_routes: Assertion `ret == 0' failed if booting with many virtio disks and vcpus [rhel-9.4.z] +Patch179: kvm-kvm-error-out-of-kvm_irqchip_add_msi_route-in-case-o.patch %if %{have_clang} BuildRequires: clang @@ -1671,6 +1675,16 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Mon May 20 2024 Miroslav Rezanina - 8.2.0-11.el9_4.2 +- kvm-kvm-error-out-of-kvm_irqchip_add_msi_route-in-case-o.patch [RHEL-32990] +- Resolves: RHEL-32990 + (qemu crash with kvm_irqchip_commit_routes: Assertion `ret == 0' failed if booting with many virtio disks and vcpus [rhel-9.4.z]) + +* Thu Apr 18 2024 Miroslav Rezanina - 8.2.0-11.el9_4.1 +- kvm-virtio-pci-fix-use-of-a-released-vector.patch [RHEL-32837] +- Resolves: RHEL-32837 + (qemu-kvm running Vyatta hits assert when doing KVM_SET_GSI_ROUTING [rhel-9.4.z]) + * Tue Mar 26 2024 Miroslav Rezanina - 8.2.0-11 - kvm-coroutine-cap-per-thread-local-pool-size.patch [RHEL-28947] - kvm-coroutine-reserve-5-000-mappings.patch [RHEL-28947]